From ef5aeccf34b7996cc38f956a30c476bdb3b34aef Mon Sep 17 00:00:00 2001
From: LucaBrugnoli <luca.brugnoli1190@gmail.com>
Date: Fri, 3 Apr 2026 11:20:33 +0200
Subject: [PATCH 1/3] Add WiSE electrolyte benchmark (density, X-ray S(q), Li-O
 RDF)

Three sub-benchmarks for 21 m LiTFSI/H2O with 6 MLIP models
(matpes-r2scan, mace-mpa-0-medium, mace-omat-0-medium, mace-mp-0b3,
mace-mh-1-omat, mace-mh-1-omol):
- NPT density vs Gilbert et al. JCED 2017
- X-ray structure factor S(q) vs SAXS experiment
- Li-O RDF coordination numbers vs Watanabe et al. JPCB 2021
---
 .../wise_electrolytes/data/saxs_maginn.txt    | 301 ++++++++++++
 .../density/analyse_density.py                | 298 ++++++++++++
 .../wise_electrolytes/density/metrics.yml     |  15 +
 .../wise_electrolytes/rdf/analyse_rdf.py      | 434 ++++++++++++++++++
 .../wise_electrolytes/rdf/metrics.yml         |  15 +
 .../xray_sf/analyse_xray_sf.py                | 398 ++++++++++++++++
 .../wise_electrolytes/xray_sf/metrics.yml     |  15 +
 .../wise_electrolytes/density/app_density.py  |  68 +++
 ml_peg/app/wise_electrolytes/rdf/app_rdf.py   |  69 +++
 .../wise_electrolytes/wise_electrolytes.yml   |   7 +
 .../wise_electrolytes/xray_sf/app_xray_sf.py  |  66 +++
 .../wise_electrolytes/density/calc_density.py |  75 +++
 .../calcs/wise_electrolytes/rdf/calc_rdf.py   | 313 +++++++++++++
 .../wise_electrolytes/xray_sf/calc_xray_sf.py | 345 ++++++++++++++
 14 files changed, 2419 insertions(+)
 create mode 100644 ml_peg/analysis/wise_electrolytes/data/saxs_maginn.txt
 create mode 100644 ml_peg/analysis/wise_electrolytes/density/analyse_density.py
 create mode 100644 ml_peg/analysis/wise_electrolytes/density/metrics.yml
 create mode 100644 ml_peg/analysis/wise_electrolytes/rdf/analyse_rdf.py
 create mode 100644 ml_peg/analysis/wise_electrolytes/rdf/metrics.yml
 create mode 100644 ml_peg/analysis/wise_electrolytes/xray_sf/analyse_xray_sf.py
 create mode 100644 ml_peg/analysis/wise_electrolytes/xray_sf/metrics.yml
 create mode 100644 ml_peg/app/wise_electrolytes/density/app_density.py
 create mode 100644 ml_peg/app/wise_electrolytes/rdf/app_rdf.py
 create mode 100644 ml_peg/app/wise_electrolytes/wise_electrolytes.yml
 create mode 100644 ml_peg/app/wise_electrolytes/xray_sf/app_xray_sf.py
 create mode 100644 ml_peg/calcs/wise_electrolytes/density/calc_density.py
 create mode 100644 ml_peg/calcs/wise_electrolytes/rdf/calc_rdf.py
 create mode 100644 ml_peg/calcs/wise_electrolytes/xray_sf/calc_xray_sf.py

diff --git a/ml_peg/analysis/wise_electrolytes/data/saxs_maginn.txt b/ml_peg/analysis/wise_electrolytes/data/saxs_maginn.txt
new file mode 100644
index 000000000..08d66b2cf
--- /dev/null
+++ b/ml_peg/analysis/wise_electrolytes/data/saxs_maginn.txt
@@ -0,0 +1,301 @@
+# 10.1021/acs.jpcb.1c02189
+# q / A-1   S(q)
+0.20849        -0.14530
+0.27792        -0.13506
+0.34735        -0.12067
+0.41678        -0.10336
+0.45728        -0.08727
+0.49200        -0.06776
+0.52671        -0.04801
+0.55178        -0.02997
+0.57493        -0.00949
+0.59036        0.01014
+0.61598        0.03412
+0.64243        0.06049
+0.65400        0.08160
+0.66557        0.10150
+0.67714        0.12052
+0.68293        0.13778
+0.69797        0.15855
+0.70800        0.17996
+0.72012        0.20988
+0.73170        0.24081
+0.74465        0.27042
+0.75484        0.30330
+0.73500        0.28700
+0.76641        0.34761
+0.76586        0.32650
+0.77798        0.38731
+0.77550        0.36819
+0.79286        0.42909
+0.78708        0.40842
+0.80113        0.46380
+0.78129        0.44792
+0.81022        0.50741
+0.81215        0.48840
+0.82427        0.56746
+0.82526        0.54096
+0.81601        0.52692
+0.83584        0.62473
+0.83336        0.60080
+0.81601        0.59129
+0.84741        0.67948
+0.84493        0.65675
+0.82758        0.64688
+0.85899        0.73424
+0.85651        0.71161
+0.83915        0.70247
+0.87001        0.79805
+0.86808        0.77562
+0.86808        0.75989
+0.87965        0.86242
+0.88312        0.83911
+0.87386        0.82146
+0.89370        0.93738
+0.89122        0.91350
+0.88544        0.89753
+0.88544        0.88241
+0.90527        0.99757
+0.89701        0.97555
+0.89701        0.96141
+0.91436        1.05943
+0.90858        1.03504
+0.90858        1.01993
+0.92842        1.12024
+0.92015        1.09454
+0.92015        1.07991
+0.93999        1.17479
+0.93172        1.15208
+0.92015        1.14135
+0.95156        1.21659
+0.94329        1.19840
+0.96258        1.25522
+0.95487        1.23790
+0.97470        1.28555
+0.98727        1.31134
+1.01272        1.33234
+1.05323        1.30934
+1.04744        1.33153
+1.06595        1.28501
+1.07637        1.26497
+1.08794        1.24156
+1.10067        1.21596
+1.11108        1.19365
+1.12150        1.17119
+1.13307        1.14808
+1.14580        1.12380
+1.15853        1.10039
+1.17010        1.07815
+1.18630        1.05913
+1.19980        1.03724
+1.23259        1.01286
+1.27226        0.99067
+1.34252        0.98092
+1.41195        0.97409
+1.46237        0.95577
+1.49874        0.93595
+1.51417        0.92045
+1.53577        0.90319
+1.54965        0.88446
+1.57395        0.86291
+1.59710        0.83755
+1.62024        0.81219
+1.64338        0.78586
+1.66653        0.76026
+1.69380        0.73737
+1.71667        0.71637
+1.75332        0.69589
+1.79299        0.67865
+1.86325        0.66980
+1.93846        0.67614
+1.99748        0.69413
+2.04261        0.71832
+2.07154        0.73685
+2.09468        0.75441
+2.12554        0.77245
+2.14868        0.79293
+2.17183        0.81439
+2.19497        0.83584
+2.21040        0.85462
+2.23354        0.87558
+2.26247        0.90216
+2.27983        0.92593
+2.30297        0.94776
+2.33190        0.97567
+2.35697        1.00091
+2.37587        1.01876
+2.39555        1.03748
+2.42448        1.05796
+2.46498        1.08027
+2.52284        1.09893
+2.59227        1.09722
+2.64665        1.08137
+2.68484        1.06363
+2.72534        1.04480
+2.76502        1.02682
+2.79477        1.01056
+2.82949        0.99506
+2.86420        0.97750
+2.89892        0.95995
+2.93859        0.94156
+2.96835        0.92630
+3.00885        0.90893
+3.05513        0.88918
+3.10060        0.87154
+3.15019        0.85357
+3.19400        0.83804
+3.25764        0.82126
+3.29236        0.80890
+3.34443        0.79342
+3.40807        0.77854
+3.47172        0.76148
+3.54115        0.74636
+3.61058        0.73295
+3.68001        0.72198
+3.74944        0.71491
+3.81887        0.71076
+3.88830        0.71101
+3.95773        0.71539
+4.02716        0.72612
+4.09080        0.74051
+4.14866        0.75709
+4.19660        0.77541
+4.23958        0.79547
+4.27132        0.81512
+4.30901        0.83455
+4.34075        0.85491
+4.37431        0.87607
+4.40787        0.89792
+4.43217        0.91752
+4.46303        0.93459
+4.48617        0.95214
+4.50932        0.97019
+4.53246        0.98921
+4.55560        1.00822
+4.57875        1.02724
+4.60189        1.04724
+4.62503        1.06698
+4.64818        1.08747
+4.66708        1.10595
+4.69446        1.12623
+4.71761        1.14476
+4.74075        1.16305
+4.76389        1.18231
+4.78511        1.20279
+4.80825        1.22298
+4.83718        1.24193
+4.86611        1.26180
+4.90083        1.28374
+4.93554        1.30447
+4.97604        1.32440
+5.02150        1.34407
+5.08019        1.36420
+5.14962        1.37444
+5.21905        1.37225
+5.28269        1.36225
+5.34055        1.34558
+5.38105        1.32586
+5.42155        1.30788
+5.45627        1.29057
+5.49098        1.27301
+5.52570        1.25546
+5.56041        1.23717
+5.59513        1.21742
+5.62985        1.19694
+5.66456        1.17719
+5.69928        1.15671
+5.73399        1.13647
+5.76871        1.11721
+5.80342        1.09795
+5.83814        1.07991
+5.87285        1.06235
+5.90757        1.04650
+5.94724        1.02975
+5.99353        1.01199
+6.06379        0.99359
+6.13322        0.97799
+6.20265        0.96604
+6.27208        0.96043
+6.34151        0.95726
+6.41094        0.95775
+6.48037        0.96043
+6.54980        0.96507
+6.61923        0.97043
+6.68866        0.97458
+6.75809        0.97897
+6.82752        0.98140
+6.89695        0.98287
+6.96638        0.98213
+7.03581        0.98067
+7.10524        0.97823
+7.17467        0.97531
+7.24410        0.97238
+7.31353        0.96872
+7.38296        0.96580
+7.45239        0.96287
+7.52182        0.96043
+7.59125        0.95775
+7.66068        0.95629
+7.73011        0.95434
+7.79954        0.95288
+7.86898        0.95214
+7.93841        0.95117
+8.00784        0.95214
+8.07727        0.95263
+8.14670        0.95385
+8.21613        0.95653
+8.28556        0.96019
+8.35499        0.96458
+8.42442        0.97019
+8.49385        0.97604
+8.56328        0.98238
+8.63271        0.98896
+8.70214        0.99530
+8.77157        1.00262
+8.84100        1.00944
+8.91043        1.01603
+8.97986        1.02432
+9.04929        1.03261
+9.11872        1.04163
+9.18815        1.05162
+9.25758        1.06284
+9.32701        1.07527
+9.39644        1.08747
+9.46587        1.10088
+9.53530        1.11404
+9.60473        1.12745
+9.67417        1.14038
+9.74360        1.15305
+9.81303        1.16329
+9.88246        1.17158
+9.95189        1.17646
+10.02132    1.18061
+10.09075    1.17987
+10.16018    1.17670
+10.22961    1.16890
+10.29904    1.15744
+10.36847    1.14184
+10.43211    1.12584
+10.48997    1.10917
+10.54204    1.08887
+10.60238    1.06946
+10.65583    1.05065
+10.70488    1.03288
+10.76046    1.01389
+10.81629    0.99652
+10.87184    0.98043
+10.92970    0.96346
+10.99334    0.94776
+11.06277    0.93240
+11.13220    0.92069
+11.20163    0.91265
+11.27106    0.90557
+11.34049    0.90314
+11.40992    0.90289
+11.47936    0.90411
+11.54879    0.90704
+11.61822    0.91167
+11.68765    0.91703
+11.75708    0.92362
+11.82651    0.92996
+11.89594    0.93678
diff --git a/ml_peg/analysis/wise_electrolytes/density/analyse_density.py b/ml_peg/analysis/wise_electrolytes/density/analyse_density.py
new file mode 100644
index 000000000..f2410484f
--- /dev/null
+++ b/ml_peg/analysis/wise_electrolytes/density/analyse_density.py
@@ -0,0 +1,298 @@
+"""Analyse density benchmark for LiTFSI/H2O electrolyte."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import plotly.graph_objects as go
+import pytest
+
+from ml_peg.analysis.utils.utils import load_metrics_config
+
+# --- Paths -------------------------------------------------------------------
+
+CALCS_ROOT = Path(__file__).resolve().parents[3] / "calcs"
+CALC_PATH = CALCS_ROOT / "wise_electrolytes" / "density" / "outputs"
+APP_ROOT = Path(__file__).resolve().parents[3] / "app"
+OUT_PATH = APP_ROOT / "data" / "wise_electrolytes" / "density"
+
+MODELS = [
+    "matpes-r2scan",
+    "mace-mpa-0-medium",
+    "mace-omat-0-medium",
+    "mace-mp-0b3",
+    "mace-mh-1-omat",
+    "mace-mh-1-omol",
+]
+
+RHO_EXP = 1.7126  # g/cm³  (Gilbert et al., JCED 2017, DOI: 10.1021/acs.jced.7b00135)
+
+# Map benchmark model names → ml-peg registry names (for app summary table)
+MODEL_NAME_MAP = {
+    "matpes-r2scan": "mace-matpes-r2scan",
+    "mace-mpa-0-medium": "mace-mpa-0",
+    "mace-omat-0-medium": "mace-omat-0",
+}
+
+METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
+DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
+    METRICS_CONFIG_PATH
+)
+
+
+# --- Helpers -----------------------------------------------------------------
+
+
+def load_density_results() -> dict[str, dict]:
+    """
+    Load density.json for each model.
+
+    Returns
+    -------
+    dict[str, dict]
+        Per-model density results keyed by model name.
+    """
+    results = {}
+    for model in MODELS:
+        json_path = CALC_PATH / model / "density.json"
+        if json_path.exists():
+            with open(json_path) as f:
+                results[model] = json.load(f)
+    return results
+
+
+def normalize_metric(value: float, good: float, bad: float) -> float:
+    """
+    Normalize metric to [0, 1] where 0=good, 1=bad.
+
+    Parameters
+    ----------
+    value : float
+        Raw metric value.
+    good : float
+        Value corresponding to score 0.
+    bad : float
+        Value corresponding to score 1.
+
+    Returns
+    -------
+    float
+        Normalized score clipped to [0, 1].
+    """
+    if bad == good:
+        return 0.0 if value <= good else 1.0
+    score = (value - good) / (bad - good)
+    return max(0.0, min(1.0, score))
+
+
+# --- Plot builders -----------------------------------------------------------
+
+
+def build_density_bar_chart(data: dict[str, dict]) -> go.Figure:
+    """
+    Build bar chart of density per model.
+
+    Parameters
+    ----------
+    data : dict[str, dict]
+        Per-model density results.
+
+    Returns
+    -------
+    go.Figure
+        Plotly bar chart figure.
+    """
+    fig = go.Figure()
+    for model in MODELS:
+        if model not in data:
+            continue
+        fig.add_trace(
+            go.Bar(
+                x=[model],
+                y=[data[model]["rho_mean"]],
+                error_y={"type": "data", "array": [data[model]["rho_std"]]},
+                name=model,
+            )
+        )
+
+    fig.add_hline(
+        y=RHO_EXP,
+        line_dash="dash",
+        line_color="red",
+        annotation_text=f"Exp. ({RHO_EXP} g/cm\u00b3, Gilbert 2017)",
+    )
+    fig.update_layout(
+        title="Electrolyte Density (LiTFSI/H2O, 21 m)",
+        yaxis_title="Density / g cm⁻³",
+        showlegend=False,
+    )
+    return fig
+
+
+def build_density_timeseries(data: dict[str, dict]) -> go.Figure:
+    """
+    Build density vs time plot for all models.
+
+    Parameters
+    ----------
+    data : dict[str, dict]
+        Per-model density results.
+
+    Returns
+    -------
+    go.Figure
+        Plotly timeseries figure.
+    """
+    fig = go.Figure()
+    for model in MODELS:
+        if model not in data:
+            continue
+        d = data[model]
+        fig.add_trace(
+            go.Scatter(
+                x=d["time_full"],
+                y=d["density_full"],
+                mode="lines",
+                name=model,
+                opacity=0.8,
+            )
+        )
+
+    fig.add_hline(
+        y=RHO_EXP,
+        line_dash="dash",
+        line_color="black",
+        annotation_text=f"Exp. ({RHO_EXP} g/cm\u00b3)",
+    )
+    fig.update_layout(
+        title="NPT Density vs Time (LiTFSI/H2O)",
+        xaxis_title="Time / ps",
+        yaxis_title="Density / g cm⁻³",
+    )
+    return fig
+
+
+# --- Metrics table -----------------------------------------------------------
+
+
+def build_metrics_table(data: dict[str, dict]) -> dict:
+    """
+    Build metrics table JSON compatible with ml-peg app format.
+
+    Parameters
+    ----------
+    data : dict[str, dict]
+        Per-model density results.
+
+    Returns
+    -------
+    dict
+        Table with data, columns, tooltips, and thresholds.
+    """
+    rows = []
+    for model in MODELS:
+        if model not in data:
+            continue
+        d = data[model]
+        abs_err = d["rho_abs_error"]
+        pct_err = abs(d["rho_error_pct"])
+
+        score_abs = normalize_metric(
+            abs_err,
+            DEFAULT_THRESHOLDS["Density Error"]["good"],
+            DEFAULT_THRESHOLDS["Density Error"]["bad"],
+        )
+        score_pct = normalize_metric(
+            pct_err,
+            DEFAULT_THRESHOLDS["Density Error (%)"]["good"],
+            DEFAULT_THRESHOLDS["Density Error (%)"]["bad"],
+        )
+
+        w_abs = DEFAULT_WEIGHTS.get("Density Error", 1.0)
+        w_pct = DEFAULT_WEIGHTS.get("Density Error (%)", 1.0)
+        total_w = w_abs + w_pct
+        overall = (
+            1.0 - (score_abs * w_abs + score_pct * w_pct) / total_w
+            if total_w > 0
+            else 0.0
+        )
+
+        rows.append(
+            {
+                "MLIP": model,
+                "id": MODEL_NAME_MAP.get(model, model),
+                "Density (g/cm3)": round(d["rho_mean"], 4),
+                "Density Error": round(abs_err, 4),
+                "Density Error (%)": round(pct_err, 2),
+                "Score": round(overall, 3),
+            }
+        )
+
+    rows.sort(key=lambda r: r["Score"], reverse=True)
+
+    columns = [
+        {"name": c, "id": c}
+        for c in [
+            "MLIP",
+            "Density Error",
+            "Density Error (%)",
+            "Score",
+        ]
+    ]
+
+    return {
+        "data": rows,
+        "columns": columns,
+        "tooltip_header": DEFAULT_TOOLTIPS,
+        "thresholds": {k: dict(v) for k, v in DEFAULT_THRESHOLDS.items()},
+        "weights": dict(DEFAULT_WEIGHTS),
+        "model_name_map": MODEL_NAME_MAP,
+    }
+
+
+# --- Pytest interface --------------------------------------------------------
+
+
+@pytest.fixture
+def density_analysis():
+    """
+    Run full density analysis: table + plots.
+
+    Returns
+    -------
+    dict
+        Metrics table dictionary.
+    """
+    data = load_density_results()
+    if not data:
+        pytest.skip("No density data found")
+
+    OUT_PATH.mkdir(parents=True, exist_ok=True)
+
+    table = build_metrics_table(data)
+    with open(OUT_PATH / "density_metrics_table.json", "w") as f:
+        json.dump(table, f, indent=2)
+
+    fig_bar = build_density_bar_chart(data)
+    with open(OUT_PATH / "figure_density_bar.json", "w") as f:
+        f.write(fig_bar.to_json())
+
+    fig_ts = build_density_timeseries(data)
+    with open(OUT_PATH / "figure_density_timeseries.json", "w") as f:
+        f.write(fig_ts.to_json())
+
+    return table
+
+
+def test_density(density_analysis) -> None:
+    """
+    Run density benchmark — generates table and plots.
+
+    Parameters
+    ----------
+    density_analysis : dict
+        Fixture providing the metrics table.
+    """
+    table = density_analysis
+    assert len(table["data"]) > 0, "No models produced density data"
diff --git a/ml_peg/analysis/wise_electrolytes/density/metrics.yml b/ml_peg/analysis/wise_electrolytes/density/metrics.yml
new file mode 100644
index 000000000..f4b2bc014
--- /dev/null
+++ b/ml_peg/analysis/wise_electrolytes/density/metrics.yml
@@ -0,0 +1,15 @@
+metrics:
+  Density Error:
+    good: 0.0
+    bad: 0.17
+    unit: "g/cm3"
+    tooltip: "Absolute error in NPT density vs experimental (1.7126 g/cm3, Gilbert et al. JCED 2017)"
+    level_of_theory: Experimental (Gilbert et al., J. Chem. Eng. Data 62, 2056, 2017)
+    weight: 1
+  Density Error (%):
+    good: 0.0
+    bad: 10.0
+    unit: "%"
+    tooltip: "Percent error in NPT density vs experimental (1.7126 g/cm3, Gilbert et al. JCED 2017)"
+    level_of_theory: Experimental (Gilbert et al., J. Chem. Eng. Data 62, 2056, 2017)
+    weight: 0
diff --git a/ml_peg/analysis/wise_electrolytes/rdf/analyse_rdf.py b/ml_peg/analysis/wise_electrolytes/rdf/analyse_rdf.py
new file mode 100644
index 000000000..8df3acd2a
--- /dev/null
+++ b/ml_peg/analysis/wise_electrolytes/rdf/analyse_rdf.py
@@ -0,0 +1,434 @@
+"""Analyse Li-O RDF benchmark for LiTFSI/H2O electrolyte."""
+# Reads rdf.json and rdf.npz files from calc outputs and generates
+# scoring tables and g(r) plots for the ml-peg dashboard.
+#
+# Reference coordination numbers (Watanabe et al., J. Phys. Chem. B 125, 7477, 2021):
+#   CN(Li-O_water) = 2.0  }  ~18.5 m LiTFSI/H2O,
+#   CN(Li-O_TFSI)  = 2.0  }  neutron diffraction, 6Li/7Li + H/D substitution
+#
+# Integration cutoff: 2.83 Å (first minimum of Li-O_total g(r) from r2SCAN AIMD).
+# Trajectories: NVT 50 ps (dt=0.1 ps/frame, 501 frames, equilibrated 50-100 ps window).
+# Box length: 27.4938 Å (cubic, p64_w170 cell: 64 LiTFSI + 170 H2O = 1534 atoms).
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import numpy as np
+import plotly.graph_objects as go
+import pytest
+
+from ml_peg.analysis.utils.utils import load_metrics_config
+
+# --- Paths -------------------------------------------------------------------
+
+CALCS_ROOT = Path(__file__).resolve().parents[3] / "calcs"
+CALC_PATH = CALCS_ROOT / "wise_electrolytes" / "rdf" / "outputs"
+APP_ROOT = Path(__file__).resolve().parents[3] / "app"
+OUT_PATH = APP_ROOT / "data" / "wise_electrolytes" / "rdf"
+
+MODELS = [
+    "matpes-r2scan",
+    "mace-mpa-0-medium",
+    "mace-omat-0-medium",
+    "mace-mp-0b3",
+    "mace-mh-1-omat",
+    "mace-mh-1-omol",
+]
+
+# --- Physical constants and reference values ---------------------------------
+
+# Box geometry (same for all models: p64_w170 NVT cell)
+L_BOX = 27.4938  # Å (cubic NVT cell at experimental density)
+V_BOX = L_BOX**3  # Å³
+
+# Integration cutoff: first minimum of Li-O_total g(r)
+R_CUT = 2.83  # Å
+DR = 0.02  # Å (bin width used in calc_rdf.py)
+
+# Experimental reference: Watanabe et al., JPCB 2021, ~18.5 m LiTFSI/H2O
+CN_EXP_WATER = 2.0
+CN_EXP_TFSI = 2.0
+
+# Map benchmark model names → ml-peg registry names (for app summary table)
+MODEL_NAME_MAP = {
+    "matpes-r2scan": "mace-matpes-r2scan",
+    "mace-mpa-0-medium": "mace-mpa-0",
+    "mace-omat-0-medium": "mace-omat-0",
+}
+
+METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
+DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
+    METRICS_CONFIG_PATH
+)
+
+
+# --- Helpers -----------------------------------------------------------------
+
+
+def compute_cn_from_gr(
+    gr: np.ndarray,
+    r: np.ndarray,
+    n_neighbor: int,
+    volume: float,
+    r_cut: float,
+    dr: float,
+) -> float:
+    """
+    Integrate g(r) to coordination number: CN = 4pi rho int_0^r_cut g(r) r^2 dr.
+
+    Parameters
+    ----------
+    gr : np.ndarray
+        Radial distribution function values.
+    r : np.ndarray
+        Radial distance values in angstrom.
+    n_neighbor : int
+        Number of neighbor atoms of the relevant species in the simulation box.
+    volume : float
+        Volume of the simulation box in angstrom^3.
+    r_cut : float
+        Integration cutoff distance in angstrom.
+    dr : float
+        Bin width in angstrom.
+
+    Returns
+    -------
+    float
+        Coordination number obtained by integrating g(r) up to ``r_cut``.
+    """
+    rho = n_neighbor / volume
+    mask = r <= r_cut
+    return float(np.sum(4 * np.pi * rho * gr[mask] * r[mask] ** 2 * dr))
+
+
+def load_rdf_results() -> dict[str, dict]:
+    """
+    Load rdf.json and rdf.npz for each model, recompute CN at r_cut=2.83 A.
+
+    Returns
+    -------
+    dict[str, dict]
+        Mapping from model name to a dict containing coordination numbers,
+        errors, radial distances, g(r) arrays, and frame count.
+    """
+    results = {}
+    for model in MODELS:
+        json_path = CALC_PATH / model / "rdf.json"
+        npz_path = CALC_PATH / model / "rdf.npz"
+        if not json_path.exists() or not npz_path.exists():
+            continue
+
+        with open(json_path) as f:
+            meta = json.load(f)
+
+        data = np.load(npz_path)
+        r = data["r"]
+
+        n_ow = meta["n_O_water"]
+        n_of = meta["n_O_TFSI"]
+
+        cn_w = compute_cn_from_gr(data["gr_LiO_water"], r, n_ow, V_BOX, R_CUT, DR)
+        cn_f = compute_cn_from_gr(data["gr_LiO_TFSI"], r, n_of, V_BOX, R_CUT, DR)
+
+        results[model] = {
+            "cn_water": cn_w,
+            "cn_tfsi": cn_f,
+            "cn_total": cn_w + cn_f,
+            "err_water": abs(cn_w - CN_EXP_WATER),
+            "err_tfsi": abs(cn_f - CN_EXP_TFSI),
+            "r": r,
+            "gr_water": data["gr_LiO_water"],
+            "gr_tfsi": data["gr_LiO_TFSI"],
+            "gr_total": data["gr_LiO_total"],
+            "n_frames": meta["n_frames_used"],
+        }
+    return results
+
+
+def normalize_metric(value: float, good: float, bad: float) -> float:
+    """
+    Map value linearly: good to 1.0, bad to 0.0, clipped to [0, 1].
+
+    Parameters
+    ----------
+    value : float
+        The metric value to normalize.
+    good : float
+        Threshold value that maps to a score of 1.0.
+    bad : float
+        Threshold value that maps to a score of 0.0.
+
+    Returns
+    -------
+    float
+        Normalized score clipped to the range [0, 1].
+    """
+    if good == bad:
+        return 1.0 if value == good else 0.0
+    t = (value - bad) / (good - bad)
+    return max(0.0, min(1.0, float(t)))
+
+
+# --- Plot builders -----------------------------------------------------------
+
+
+def build_cn_bar_chart(data: dict[str, dict]) -> go.Figure:
+    """
+    Bar chart of CN_water and CN_TFSI per model with experimental reference.
+
+    Parameters
+    ----------
+    data : dict[str, dict]
+        Per-model RDF results as returned by :func:`load_rdf_results`.
+
+    Returns
+    -------
+    go.Figure
+        Plotly figure with grouped bars and experimental reference lines.
+    """
+    fig = go.Figure()
+
+    x_models = [m for m in MODELS if m in data]
+    cn_water_vals = [data[m]["cn_water"] for m in x_models]
+    cn_tfsi_vals = [data[m]["cn_tfsi"] for m in x_models]
+
+    fig.add_trace(
+        go.Bar(
+            name="Li-O<sub>water</sub>",
+            x=x_models,
+            y=cn_water_vals,
+            marker_color="steelblue",
+        )
+    )
+    fig.add_trace(
+        go.Bar(
+            name="Li-O<sub>TFSI</sub>",
+            x=x_models,
+            y=cn_tfsi_vals,
+            marker_color="coral",
+        )
+    )
+
+    # Experimental reference lines
+    fig.add_hline(
+        y=CN_EXP_WATER,
+        line_dash="dash",
+        line_color="steelblue",
+        annotation_text=f"Exp. O<sub>water</sub> ({CN_EXP_WATER})",
+        annotation_position="top right",
+    )
+    fig.add_hline(
+        y=CN_EXP_TFSI,
+        line_dash="dash",
+        line_color="coral",
+        annotation_text=f"Exp. O<sub>TFSI</sub> ({CN_EXP_TFSI})",
+        annotation_position="bottom right",
+    )
+
+    fig.update_layout(
+        title="Li⁺ Coordination Numbers (LiTFSI/H₂O, 21 m)",
+        yaxis_title="Coordination Number",
+        barmode="group",
+        legend={"x": 0.01, "y": 0.99},
+    )
+    return fig
+
+
+def build_gr_plot(data: dict[str, dict]) -> go.Figure:
+    """
+    G(r) plot: Li-O_water and Li-O_TFSI for all models.
+
+    Parameters
+    ----------
+    data : dict[str, dict]
+        Per-model RDF results as returned by :func:`load_rdf_results`.
+
+    Returns
+    -------
+    go.Figure
+        Plotly figure with g(r) curves and integration cutoff line.
+    """
+    fig = go.Figure()
+
+    colors = [
+        "#1f77b4",
+        "#ff7f0e",
+        "#2ca02c",
+        "#d62728",
+        "#9467bd",
+        "#8c564b",
+    ]
+
+    for i, model in enumerate(MODELS):
+        if model not in data:
+            continue
+        d = data[model]
+        c = colors[i % len(colors)]
+        fig.add_trace(
+            go.Scatter(
+                x=d["r"],
+                y=d["gr_water"],
+                mode="lines",
+                name=f"{model} O<sub>w</sub>",
+                line={"color": c, "dash": "solid"},
+                legendgroup=model,
+            )
+        )
+        fig.add_trace(
+            go.Scatter(
+                x=d["r"],
+                y=d["gr_tfsi"],
+                mode="lines",
+                name=f"{model} O<sub>TFSI</sub>",
+                line={"color": c, "dash": "dot"},
+                legendgroup=model,
+            )
+        )
+
+    # Mark integration cutoff
+    fig.add_vline(
+        x=R_CUT,
+        line_dash="dash",
+        line_color="gray",
+        annotation_text=f"r_cut={R_CUT} Å",
+        annotation_position="top right",
+    )
+
+    fig.update_layout(
+        title="Li-O Radial Distribution Functions (LiTFSI/H₂O, 21 m)",
+        xaxis_title="r / Å",
+        yaxis_title="g(r)",
+        xaxis_range=[1.0, 6.0],
+    )
+    return fig
+
+
+# --- Metrics table -----------------------------------------------------------
+
+
+def build_metrics_table(data: dict[str, dict]) -> dict:
+    """
+    Build metrics table JSON for ml-peg app.
+
+    Parameters
+    ----------
+    data : dict[str, dict]
+        Per-model RDF results as returned by :func:`load_rdf_results`.
+
+    Returns
+    -------
+    dict
+        Table payload with keys ``data``, ``columns``, ``tooltip_header``,
+        ``thresholds``, and ``reference``.
+    """
+    rows = []
+    for model in MODELS:
+        if model not in data:
+            continue
+        d = data[model]
+
+        score_w = normalize_metric(
+            d["err_water"],
+            DEFAULT_THRESHOLDS["CN Li-O_water Error"]["good"],
+            DEFAULT_THRESHOLDS["CN Li-O_water Error"]["bad"],
+        )
+        score_f = normalize_metric(
+            d["err_tfsi"],
+            DEFAULT_THRESHOLDS["CN Li-O_TFSI Error"]["good"],
+            DEFAULT_THRESHOLDS["CN Li-O_TFSI Error"]["bad"],
+        )
+
+        w_w = DEFAULT_WEIGHTS.get("CN Li-O_water Error", 1.0)
+        w_f = DEFAULT_WEIGHTS.get("CN Li-O_TFSI Error", 1.0)
+        total_w = w_w + w_f
+        overall = (score_w * w_w + score_f * w_f) / total_w if total_w > 0 else 0.0
+
+        rows.append(
+            {
+                "MLIP": model,
+                "id": MODEL_NAME_MAP.get(model, model),
+                "CN Li-O_water": round(d["cn_water"], 3),
+                "CN Li-O_TFSI": round(d["cn_tfsi"], 3),
+                "CN Li-O_water Error": round(d["err_water"], 3),
+                "CN Li-O_TFSI Error": round(d["err_tfsi"], 3),
+                "Score": round(overall, 3),
+            }
+        )
+
+    rows.sort(key=lambda r: r["Score"], reverse=True)
+
+    columns = [
+        {"name": c, "id": c}
+        for c in [
+            "MLIP",
+            "CN Li-O_water Error",
+            "CN Li-O_TFSI Error",
+            "Score",
+        ]
+    ]
+
+    return {
+        "data": rows,
+        "columns": columns,
+        "tooltip_header": DEFAULT_TOOLTIPS,
+        "thresholds": {k: dict(v) for k, v in DEFAULT_THRESHOLDS.items()},
+        "weights": dict(DEFAULT_WEIGHTS),
+        "model_name_map": MODEL_NAME_MAP,
+        "reference": {
+            "CN_water": CN_EXP_WATER,
+            "CN_TFSI": CN_EXP_TFSI,
+            "citation": "Watanabe et al., J. Phys. Chem. B 125, 7477 (2021)",
+            "concentration": "~18.5 m LiTFSI/H2O",
+            "method": "neutron diffraction with 6Li/7Li + H/D isotopic substitution",
+            "r_cut_angstrom": R_CUT,
+        },
+    }
+
+
+# --- Pytest interface --------------------------------------------------------
+
+
+@pytest.fixture
+def rdf_analysis():
+    """
+    Run full RDF analysis: table + plots.
+
+    Returns
+    -------
+    dict
+        Metrics table payload written to ``rdf_metrics_table.json``.
+    """
+    data = load_rdf_results()
+    if not data:
+        pytest.skip("No RDF data found")
+
+    OUT_PATH.mkdir(parents=True, exist_ok=True)
+
+    table = build_metrics_table(data)
+    with open(OUT_PATH / "rdf_metrics_table.json", "w") as f:
+        json.dump(table, f, indent=2)
+
+    fig_cn = build_cn_bar_chart(data)
+    with open(OUT_PATH / "figure_cn_bar.json", "w") as f:
+        f.write(fig_cn.to_json())
+
+    fig_gr = build_gr_plot(data)
+    with open(OUT_PATH / "figure_gr.json", "w") as f:
+        f.write(fig_gr.to_json())
+
+    return table
+
+
+def test_rdf(rdf_analysis) -> None:
+    """
+    Run RDF benchmark -- generates table and plots.
+
+    Parameters
+    ----------
+    rdf_analysis : dict
+        Metrics table payload provided by the ``rdf_analysis`` fixture.
+    """
+    table = rdf_analysis
+    assert len(table["data"]) > 0, "No models produced RDF data"
diff --git a/ml_peg/analysis/wise_electrolytes/rdf/metrics.yml b/ml_peg/analysis/wise_electrolytes/rdf/metrics.yml
new file mode 100644
index 000000000..e6a67093a
--- /dev/null
+++ b/ml_peg/analysis/wise_electrolytes/rdf/metrics.yml
@@ -0,0 +1,15 @@
+metrics:
+  CN Li-O_water Error:
+    good: 0.0
+    bad: 0.5
+    unit: "-"
+    tooltip: "Absolute error in Li-O_water coordination number vs experimental reference (CN=2.0, Watanabe et al. J. Phys. Chem. B 2021, ~18.5 m, neutron diffraction with isotopic substitution)"
+    level_of_theory: Experimental (neutron diffraction, Watanabe et al. JPCB 2021)
+    weight: 1
+  CN Li-O_TFSI Error:
+    good: 0.0
+    bad: 0.5
+    unit: "-"
+    tooltip: "Absolute error in Li-O_TFSI coordination number vs experimental reference (CN=2.0, Watanabe et al. J. Phys. Chem. B 2021, ~18.5 m, neutron diffraction with isotopic substitution)"
+    level_of_theory: Experimental (neutron diffraction, Watanabe et al. JPCB 2021)
+    weight: 1
diff --git a/ml_peg/analysis/wise_electrolytes/xray_sf/analyse_xray_sf.py b/ml_peg/analysis/wise_electrolytes/xray_sf/analyse_xray_sf.py
new file mode 100644
index 000000000..51b45c2b4
--- /dev/null
+++ b/ml_peg/analysis/wise_electrolytes/xray_sf/analyse_xray_sf.py
@@ -0,0 +1,398 @@
+"""Analyse X-ray structure factor benchmark for LiTFSI/H2O electrolyte."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import numpy as np
+import plotly.graph_objects as go
+import pytest
+from scipy.interpolate import interp1d
+
+from ml_peg.analysis.utils.utils import load_metrics_config
+
+# --- Paths -------------------------------------------------------------------
+
+CALCS_ROOT = Path(__file__).resolve().parents[3] / "calcs"
+CALC_PATH = CALCS_ROOT / "wise_electrolytes" / "xray_sf" / "outputs"
+APP_ROOT = Path(__file__).resolve().parents[3] / "app"
+OUT_PATH = APP_ROOT / "data" / "wise_electrolytes" / "xray_sf"
+
+# Experimental data bundled with the benchmark
+EXP_PATH = Path(__file__).resolve().parents[1] / "data" / "saxs_maginn.txt"
+
+MODELS = [
+    "matpes-r2scan",
+    "mace-mpa-0-medium",
+    "mace-omat-0-medium",
+    "mace-mp-0b3",
+    "mace-mh-1-omat",
+    "mace-mh-1-omol",
+]
+
+# Map benchmark model names → ml-peg registry names (for app summary table)
+MODEL_NAME_MAP = {
+    "matpes-r2scan": "mace-matpes-r2scan",
+    "mace-mpa-0-medium": "mace-mpa-0",
+    "mace-omat-0-medium": "mace-omat-0",
+}
+
+METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
+DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
+    METRICS_CONFIG_PATH
+)
+
+
+# --- Helpers -----------------------------------------------------------------
+
+
+def load_experimental_sq() -> tuple[np.ndarray, np.ndarray]:
+    """
+    Load experimental SAXS S(q) data.
+
+    Returns
+    -------
+    q_exp : np.ndarray
+        Scattering vector values in inverse angstroms.
+    sq_exp : np.ndarray
+        Experimental structure factor values.
+    """
+    data = np.loadtxt(EXP_PATH)
+    return data[:, 0], data[:, 1]
+
+
+def load_computed_sq(model: str) -> tuple[np.ndarray, np.ndarray] | None:
+    """
+    Load computed S(q) for a model.
+
+    Parameters
+    ----------
+    model : str
+        Name of the MLIP model.
+
+    Returns
+    -------
+    tuple of (np.ndarray, np.ndarray) or None
+        Scattering vector and structure factor arrays, or None if the
+        output file does not exist.
+    """
+    json_path = CALC_PATH / model / "xray_sq.json"
+    if not json_path.exists():
+        return None
+    with open(json_path) as f:
+        d = json.load(f)
+    return np.array(d["q"]), np.array(d["Sq"])
+
+
+def compute_r_factor(
+    q_exp: np.ndarray,
+    sq_exp: np.ndarray,
+    q_calc: np.ndarray,
+    sq_calc: np.ndarray,
+) -> float:
+    """
+    Compute R-factor: sum|S_exp - S_calc| / sum|S_exp|.
+
+    The q range used is the overlap between experimental and calculated data:
+    q_min is the minimum q of the calculated S(q), set by the simulation cell
+    size (2*pi/L), and q_max is the maximum q of the experimental data.
+
+    Parameters
+    ----------
+    q_exp : np.ndarray
+        Experimental scattering vector values.
+    sq_exp : np.ndarray
+        Experimental structure factor values.
+    q_calc : np.ndarray
+        Calculated scattering vector values.
+    sq_calc : np.ndarray
+        Calculated structure factor values.
+
+    Returns
+    -------
+    float
+        R-factor value, or NaN if no overlapping valid data exists.
+    """
+    valid_calc = ~np.isnan(sq_calc)
+    q_min_calc = float(q_calc[valid_calc].min())
+    q_max_exp = float(q_exp.max())
+
+    f = interp1d(
+        q_calc[valid_calc],
+        sq_calc[valid_calc],
+        kind="linear",
+        bounds_error=False,
+        fill_value=np.nan,
+    )
+
+    mask = (q_exp >= q_min_calc) & (q_exp <= q_max_exp)
+    sq_interp = f(q_exp[mask])
+    sq_e = sq_exp[mask]
+
+    ok = ~np.isnan(sq_interp)
+    if not ok.any():
+        return float("nan")
+
+    return float(np.sum(np.abs(sq_e[ok] - sq_interp[ok])) / np.sum(np.abs(sq_e[ok])))
+
+
+def find_first_peak(
+    q: np.ndarray, sq: np.ndarray, q_min: float = 0.5, q_max: float = 2.0
+) -> float:
+    """
+    Find position of first peak in S(q).
+
+    Parameters
+    ----------
+    q : np.ndarray
+        Scattering vector values.
+    sq : np.ndarray
+        Structure factor values.
+    q_min : float, optional
+        Lower bound of the search window in inverse angstroms (default 0.5).
+    q_max : float, optional
+        Upper bound of the search window in inverse angstroms (default 2.0).
+
+    Returns
+    -------
+    float
+        Position of the first peak, or NaN if fewer than 3 valid points
+        exist in the search window.
+    """
+    mask = (q >= q_min) & (q <= q_max) & ~np.isnan(sq)
+    if mask.sum() < 3:
+        return float("nan")
+    q_sub = q[mask]
+    sq_sub = sq[mask]
+    return float(q_sub[np.argmax(sq_sub)])
+
+
+def normalize_metric(value: float, good: float, bad: float) -> float:
+    """
+    Normalize a metric value to [0, 1] where 0 is good and 1 is bad.
+
+    Parameters
+    ----------
+    value : float
+        Raw metric value.
+    good : float
+        Threshold corresponding to the best achievable score (0).
+    bad : float
+        Threshold corresponding to the worst score (1).
+
+    Returns
+    -------
+    float
+        Normalized score clipped to [0, 1].
+    """
+    if bad == good:
+        return 0.0 if value <= good else 1.0
+    score = (value - good) / (bad - good)
+    return max(0.0, min(1.0, score))
+
+
+# --- Plot builders -----------------------------------------------------------
+
+
+def build_sq_comparison_plot(
+    q_exp: np.ndarray,
+    sq_exp: np.ndarray,
+    model_data: dict[str, tuple[np.ndarray, np.ndarray]],
+) -> go.Figure:
+    """
+    Build S(q) overlay plot comparing computed and experimental data.
+
+    Parameters
+    ----------
+    q_exp : np.ndarray
+        Experimental scattering vector values.
+    sq_exp : np.ndarray
+        Experimental structure factor values.
+    model_data : dict of str to tuple of (np.ndarray, np.ndarray)
+        Mapping from model name to (q, S(q)) arrays.
+
+    Returns
+    -------
+    go.Figure
+        Plotly figure with experimental and computed S(q) traces.
+    """
+    fig = go.Figure()
+
+    fig.add_trace(
+        go.Scatter(
+            x=q_exp.tolist(),
+            y=sq_exp.tolist(),
+            mode="lines",
+            name="Exp. (Zhang et al. 2021)",
+            line={"color": "black", "width": 2},
+        )
+    )
+
+    for model, (q, sq) in model_data.items():
+        fig.add_trace(
+            go.Scatter(
+                x=q.tolist(),
+                y=sq.tolist(),
+                mode="lines",
+                name=model,
+                opacity=0.8,
+            )
+        )
+
+    fig.update_layout(
+        title="X-ray Structure Factor S(q) — LiTFSI/H2O (21 m)",
+        xaxis_title="q / Å⁻¹",
+        yaxis_title="S(q) (Faber-Ziman)",
+        legend={"x": 0.60, "y": 0.98},
+    )
+    return fig
+
+
+def build_metrics_table(results: dict[str, dict]) -> dict:
+    """
+    Build metrics table as a JSON-serialisable dictionary.
+
+    Parameters
+    ----------
+    results : dict of str to dict
+        Per-model results containing ``r_factor``, ``peak_position_error``,
+        ``peak_calc``, and ``peak_exp`` entries.
+
+    Returns
+    -------
+    dict
+        Dictionary with ``data`` (list of row dicts), ``columns``,
+        ``tooltip_header``, and ``thresholds`` keys.
+    """
+    rows = []
+    for model in MODELS:
+        if model not in results:
+            continue
+        r = results[model]
+        r_factor = r["r_factor"]
+        peak_err = r["peak_position_error"]
+
+        score_r = normalize_metric(
+            r_factor,
+            DEFAULT_THRESHOLDS["S(q) R-factor"]["good"],
+            DEFAULT_THRESHOLDS["S(q) R-factor"]["bad"],
+        )
+        score_p = normalize_metric(
+            peak_err,
+            DEFAULT_THRESHOLDS["First Peak Position Error"]["good"],
+            DEFAULT_THRESHOLDS["First Peak Position Error"]["bad"],
+        )
+
+        w_r = DEFAULT_WEIGHTS.get("S(q) R-factor", 1.0)
+        w_p = DEFAULT_WEIGHTS.get("First Peak Position Error", 1.0)
+        total_w = w_r + w_p
+        overall = (
+            1.0 - (score_r * w_r + score_p * w_p) / total_w if total_w > 0 else 0.0
+        )
+
+        rows.append(
+            {
+                "MLIP": model,
+                "id": MODEL_NAME_MAP.get(model, model),
+                "S(q) R-factor": round(r_factor, 4),
+                "First Peak Position Error": round(peak_err, 4),
+                "First Peak (calc)": round(r["peak_calc"], 2),
+                "First Peak (exp)": round(r["peak_exp"], 2),
+                "Score": round(overall, 3),
+            }
+        )
+
+    rows.sort(key=lambda r: r["Score"], reverse=True)
+
+    columns = [
+        {"name": c, "id": c}
+        for c in [
+            "MLIP",
+            "S(q) R-factor",
+            "First Peak Position Error",
+            "Score",
+        ]
+    ]
+
+    return {
+        "data": rows,
+        "columns": columns,
+        "tooltip_header": DEFAULT_TOOLTIPS,
+        "thresholds": {k: dict(v) for k, v in DEFAULT_THRESHOLDS.items()},
+        "weights": dict(DEFAULT_WEIGHTS),
+        "model_name_map": MODEL_NAME_MAP,
+    }
+
+
+# --- Pytest interface --------------------------------------------------------
+
+
+@pytest.fixture
+def xray_sf_analysis():
+    """
+    Run full X-ray S(q) analysis and write outputs.
+
+    Returns
+    -------
+    dict
+        Metrics table dictionary produced by ``build_metrics_table``.
+    """
+    q_exp, sq_exp = load_experimental_sq()
+    peak_exp = find_first_peak(q_exp, sq_exp)
+
+    model_data = {}
+    results = {}
+
+    for model in MODELS:
+        data = load_computed_sq(model)
+        if data is None:
+            continue
+        q_calc, sq_calc = data
+        model_data[model] = (q_calc, sq_calc)
+
+        r_factor = compute_r_factor(
+            q_exp, sq_exp, q_calc, sq_calc
+        )  # range: [q_min_calc, q_max_exp]
+        peak_calc = find_first_peak(q_calc, sq_calc)
+        peak_error = (
+            abs(peak_calc - peak_exp) if not np.isnan(peak_calc) else float("nan")
+        )
+
+        results[model] = {
+            "r_factor": r_factor,
+            "peak_calc": peak_calc,
+            "peak_exp": peak_exp,
+            "peak_position_error": peak_error,
+        }
+
+    if not results:
+        pytest.skip("No S(q) data found")
+
+    OUT_PATH.mkdir(parents=True, exist_ok=True)
+
+    table = build_metrics_table(results)
+    with open(OUT_PATH / "xray_sf_metrics_table.json", "w") as f:
+        json.dump(table, f, indent=2)
+
+    fig = build_sq_comparison_plot(q_exp, sq_exp, model_data)
+    with open(OUT_PATH / "figure_xray_sq_comparison.json", "w") as f:
+        f.write(fig.to_json())
+
+    with open(OUT_PATH / "xray_sf_results.json", "w") as f:
+        json.dump(results, f, indent=2)
+
+    return table
+
+
+def test_xray_sf(xray_sf_analysis) -> None:
+    """
+    Run X-ray structure factor benchmark.
+
+    Parameters
+    ----------
+    xray_sf_analysis : dict
+        Metrics table fixture providing analysis results.
+    """
+    table = xray_sf_analysis
+    assert len(table["data"]) > 0, "No models produced S(q) data"
diff --git a/ml_peg/analysis/wise_electrolytes/xray_sf/metrics.yml b/ml_peg/analysis/wise_electrolytes/xray_sf/metrics.yml
new file mode 100644
index 000000000..310fd8824
--- /dev/null
+++ b/ml_peg/analysis/wise_electrolytes/xray_sf/metrics.yml
@@ -0,0 +1,15 @@
+metrics:
+  S(q) R-factor:
+    good: 0.0
+    bad: 0.5
+    unit: "-"
+    tooltip: "R-factor between computed and experimental X-ray S(q): sum|S_exp - S_calc| / sum|S_exp|"
+    level_of_theory: Experimental (SAXS, Zhang et al., J. Phys. Chem. B 125, 4501, 2021)
+    weight: 1
+  First Peak Position Error:
+    good: 0.0
+    bad: 0.3
+    unit: "1/A"
+    tooltip: "Absolute error in position of first S(q) peak (~1.0 A-1)"
+    level_of_theory: Experimental (SAXS, Zhang et al., J. Phys. Chem. B 125, 4501, 2021)
+    weight: 0
diff --git a/ml_peg/app/wise_electrolytes/density/app_density.py b/ml_peg/app/wise_electrolytes/density/app_density.py
new file mode 100644
index 000000000..2a9055d4f
--- /dev/null
+++ b/ml_peg/app/wise_electrolytes/density/app_density.py
@@ -0,0 +1,68 @@
+"""Dash app for LiTFSI/H2O electrolyte density benchmark."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from dash import html
+
+from ml_peg.app.base_app import BaseApp
+from ml_peg.app.utils.build_callbacks import plot_from_table_column
+from ml_peg.app.utils.load import read_plot
+
+BENCHMARK_NAME = "wise_electrolytes_density"
+APP_ROOT = Path(__file__).resolve().parents[2]
+DATA_PATH = APP_ROOT / "data" / "wise_electrolytes" / "density"
+
+
+class DensityApp(BaseApp):
+    """Dash app for WiSE electrolyte density benchmark."""
+
+    def register_callbacks(self) -> None:
+        """Register interactive plot callbacks."""
+        bar_chart = read_plot(
+            DATA_PATH / "figure_density_bar.json",
+            id=f"{BENCHMARK_NAME}-bar",
+        )
+        timeseries = read_plot(
+            DATA_PATH / "figure_density_timeseries.json",
+            id=f"{BENCHMARK_NAME}-timeseries",
+        )
+
+        plot_from_table_column(
+            table_id=self.table_id,
+            plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
+            column_to_plot={
+                "Density Error": bar_chart,
+                "Density Error (%)": bar_chart,
+                "Score": timeseries,
+            },
+        )
+
+
+def get_app() -> DensityApp:
+    """
+    Return configured density benchmark app.
+
+    Returns
+    -------
+    DensityApp
+        Configured app instance.
+    """
+    return DensityApp(
+        name="WiSE Density",
+        description=(
+            "NPT density of 21 m LiTFSI/H2O electrolyte at 298 K. "
+            "Experimental reference: 1.72 g/cm³ (Maginn et al., 2021)."
+        ),
+        docs_url="",
+        table_path=DATA_PATH / "density_metrics_table.json",
+        extra_components=[
+            html.Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
+        ],
+    )
+
+
+if __name__ == "__main__":
+    app = get_app()
+    app.run(port=8060, debug=True)
diff --git a/ml_peg/app/wise_electrolytes/rdf/app_rdf.py b/ml_peg/app/wise_electrolytes/rdf/app_rdf.py
new file mode 100644
index 000000000..8de28f8be
--- /dev/null
+++ b/ml_peg/app/wise_electrolytes/rdf/app_rdf.py
@@ -0,0 +1,69 @@
+"""Dash app for LiTFSI/H2O Li-O RDF / coordination number benchmark."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from dash import html
+
+from ml_peg.app.base_app import BaseApp
+from ml_peg.app.utils.build_callbacks import plot_from_table_column
+from ml_peg.app.utils.load import read_plot
+
+BENCHMARK_NAME = "wise_electrolytes_rdf"
+APP_ROOT = Path(__file__).resolve().parents[2]
+DATA_PATH = APP_ROOT / "data" / "wise_electrolytes" / "rdf"
+
+
+class RdfApp(BaseApp):
+    """Dash app for WiSE electrolyte Li-O RDF benchmark."""
+
+    def register_callbacks(self) -> None:
+        """Register interactive plot callbacks."""
+        cn_bar = read_plot(
+            DATA_PATH / "figure_cn_bar.json",
+            id=f"{BENCHMARK_NAME}-cn-bar",
+        )
+        gr_plot = read_plot(
+            DATA_PATH / "figure_gr.json",
+            id=f"{BENCHMARK_NAME}-gr",
+        )
+
+        plot_from_table_column(
+            table_id=self.table_id,
+            plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
+            column_to_plot={
+                "CN Li-O_water Error": cn_bar,
+                "CN Li-O_TFSI Error": cn_bar,
+                "Score": gr_plot,
+            },
+        )
+
+
+def get_app() -> RdfApp:
+    """
+    Return configured RDF benchmark app.
+
+    Returns
+    -------
+    RdfApp
+        Configured app instance.
+    """
+    return RdfApp(
+        name="WiSE Li-O RDF",
+        description=(
+            "Li-O coordination numbers from radial distribution functions "
+            "of 21 m LiTFSI/H2O electrolyte (NVT, 298 K). "
+            "Reference: Watanabe et al., J. Phys. Chem. B 125, 7477 (2021)."
+        ),
+        docs_url="",
+        table_path=DATA_PATH / "rdf_metrics_table.json",
+        extra_components=[
+            html.Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
+        ],
+    )
+
+
+if __name__ == "__main__":
+    app = get_app()
+    app.run(port=8062, debug=True)
diff --git a/ml_peg/app/wise_electrolytes/wise_electrolytes.yml b/ml_peg/app/wise_electrolytes/wise_electrolytes.yml
new file mode 100644
index 000000000..dbfe8135c
--- /dev/null
+++ b/ml_peg/app/wise_electrolytes/wise_electrolytes.yml
@@ -0,0 +1,7 @@
+title: WiSE Electrolytes
+description: Structural and thermodynamic properties of 21 m LiTFSI/H2O "water-in-salt" electrolytes from molecular dynamics
+weight: 0
+benchmark_weights:
+  WiSE Density: 0.5
+  WiSE X-ray S(q): 0.3
+  WiSE Li-O RDF: 0.2
diff --git a/ml_peg/app/wise_electrolytes/xray_sf/app_xray_sf.py b/ml_peg/app/wise_electrolytes/xray_sf/app_xray_sf.py
new file mode 100644
index 000000000..87e0f78f7
--- /dev/null
+++ b/ml_peg/app/wise_electrolytes/xray_sf/app_xray_sf.py
@@ -0,0 +1,66 @@
+"""Dash app for LiTFSI/H2O X-ray structure factor benchmark."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from dash import html
+
+from ml_peg.app.base_app import BaseApp
+from ml_peg.app.utils.build_callbacks import plot_from_table_column
+from ml_peg.app.utils.load import read_plot
+
+BENCHMARK_NAME = "wise_electrolytes_xray_sf"
+APP_ROOT = Path(__file__).resolve().parents[2]
+DATA_PATH = APP_ROOT / "data" / "wise_electrolytes" / "xray_sf"
+
+
+class XraySfApp(BaseApp):
+    """Dash app for WiSE electrolyte X-ray S(q) benchmark."""
+
+    def register_callbacks(self) -> None:
+        """Register interactive plot callbacks."""
+        sq_plot = read_plot(
+            DATA_PATH / "figure_xray_sq_comparison.json",
+            id=f"{BENCHMARK_NAME}-sq",
+        )
+
+        plot_from_table_column(
+            table_id=self.table_id,
+            plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
+            column_to_plot={
+                "S(q) R-factor": sq_plot,
+                "First Peak Position Error": sq_plot,
+                "Score": sq_plot,
+            },
+        )
+
+
+def get_app() -> XraySfApp:
+    """
+    Return configured X-ray S(q) benchmark app.
+
+    Returns
+    -------
+    XraySfApp
+        Configured app instance.
+    """
+    return XraySfApp(
+        name="WiSE X-ray S(q)",
+        description=(
+            "X-ray structure factor S(q) of 21 m LiTFSI/H2O electrolyte from NVT MD. "
+            "Computed via dynasor with Cromer-Mann form factors "
+            "(Faber-Ziman normalization). "
+            "Experimental reference: SAXS (Maginn et al., J. Phys. Chem. B, 2021)."
+        ),
+        docs_url="",
+        table_path=DATA_PATH / "xray_sf_metrics_table.json",
+        extra_components=[
+            html.Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
+        ],
+    )
+
+
+if __name__ == "__main__":
+    app = get_app()
+    app.run(port=8061, debug=True)
diff --git a/ml_peg/calcs/wise_electrolytes/density/calc_density.py b/ml_peg/calcs/wise_electrolytes/density/calc_density.py
new file mode 100644
index 000000000..2d52a63d7
--- /dev/null
+++ b/ml_peg/calcs/wise_electrolytes/density/calc_density.py
@@ -0,0 +1,75 @@
+"""
+Extract density from pre-computed NPT simulations of LiTFSI/H2O electrolyte.
+
+This benchmark reads pre-computed LAMMPS NPT thermo logs and extracts
+equilibrium density for each MLIP model. The simulations use a p16_w42
+cell (382 atoms: 16 LiTFSI + 42 H2O) at 298 K, 1 atm.
+
+System: 21 m LiTFSI / H2O electrolyte.
+Experimental density: 1.7126 g/cm3
+(Gilbert et al., J. Chem. Eng. Data 62, 2056 (2017),
+DOI: 10.1021/acs.jced.7b00135).
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+
+import pytest
+
+# --- Configuration -----------------------------------------------------------
+
+# Path to converted data (will be replaced with S3 download for PR)
+DATA_ROOT = Path(
+    os.environ.get(
+        "ML_PEG_WISE_DENSITY_DATA_ROOT",
+        "/lus/work/CT9/cin1387/lbrugnoli/prove/ml_peg_benchmark/data/wise_electrolytes/density",
+    )
+)
+OUT_PATH = Path(__file__).parent / "outputs"
+
+MODELS = [
+    "matpes-r2scan",
+    "mace-mpa-0-medium",
+    "mace-omat-0-medium",
+    "mace-mp-0b3",
+    "mace-mh-1-omat",
+    "mace-mh-1-omol",
+]
+
+RHO_EXP = 1.7126  # g/cm³  (Gilbert et al., JCED 2017)
+
+
+# --- Pytest interface (ml-peg convention) ------------------------------------
+
+
+@pytest.mark.parametrize("model", MODELS)
+def test_extract_density(model: str) -> None:
+    """
+    Extract and save density data for one model.
+
+    Parameters
+    ----------
+    model : str
+        Name of the MLIP model to extract density for.
+    """
+    # Read pre-computed density data
+    json_path = DATA_ROOT / model / "density.json"
+    if not json_path.exists():
+        pytest.skip(f"No density data for {model}")
+
+    with open(json_path) as f:
+        result = json.load(f)
+
+    # Save to outputs
+    out_dir = OUT_PATH / model
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    with open(out_dir / "density.json", "w") as f:
+        json.dump(result, f, indent=2)
+
+    # Basic sanity
+    assert result["rho_mean"] > 0, f"Negative density for {model}"
+    assert abs(result["rho_error_pct"]) < 50, f"Density error > 50% for {model}"
diff --git a/ml_peg/calcs/wise_electrolytes/rdf/calc_rdf.py b/ml_peg/calcs/wise_electrolytes/rdf/calc_rdf.py
new file mode 100644
index 000000000..7bc8de2ab
--- /dev/null
+++ b/ml_peg/calcs/wise_electrolytes/rdf/calc_rdf.py
@@ -0,0 +1,313 @@
+"""
+Compute Li-O radial distribution functions for LiTFSI/H2O electrolyte.
+
+Reads pre-computed NVT trajectories (.extxyz, 100 ps) for each MLIP model
+and computes g(r) for Li-O_total, Li-O_water (O bonded to H, d_OH < 1.25 A),
+and Li-O_TFSI (O bonded to S, d_OS < 1.75 A), plus coordination numbers by
+integration of the first peak.
+
+System: 21 m LiTFSI / H2O, p64_w170 cell (1534 atoms).
+Experimental reference for coordination numbers:
+Watanabe et al., J. Phys. Chem. B 125, 7477 (2021), DOI: 10.1021/acs.jpcb.1c04693.
+Neutron diffraction with 6Li/7Li + H/D isotopic substitution, ~18.5 m LiTFSI/H2O.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+
+from ase.geometry import get_distances
+from ase.io import iread
+import numpy as np
+import pytest
+
+# --- Configuration -----------------------------------------------------------
+
+DATA_ROOT = Path(
+    os.environ.get(
+        "ML_PEG_WISE_XRAY_DATA_ROOT",
+        "/lus/work/CT9/cin1387/lbrugnoli/prove/ml_peg_benchmark/data/wise_electrolytes/xray_sf",
+    )
+)
+OUT_PATH = Path(__file__).parent / "outputs"
+
+MODELS = [
+    "matpes-r2scan",
+    "mace-mpa-0-medium",
+    "mace-omat-0-medium",
+    "mace-mp-0b3",
+    "mace-mh-1-omat",
+    "mace-mh-1-omol",
+]
+
+# RDF parameters
+R_MAX = 6.0  # Å — covers first + second shell
+DR = 0.02  # Å — bin width
+R_CUT_COORD = 2.83  # Å — first minimum of Li-O g(r) from r2SCAN AIMD reference
+
+# O-type identification cutoffs (applied on first frame only)
+D_OH_CUT = 1.25  # Å — O-H bond in water
+D_OS_CUT = 1.75  # Å — O-S bond in TFSI
+
+
+# --- Helpers -----------------------------------------------------------------
+
+
+def identify_o_types(atoms) -> tuple[np.ndarray, np.ndarray]:
+    """
+    Return indices of O_water and O_TFSI from a single ASE Atoms frame.
+
+    Parameters
+    ----------
+    atoms : ase.Atoms
+        A single frame from the trajectory.
+
+    Returns
+    -------
+    o_water : np.ndarray
+        Indices of oxygen atoms bonded to hydrogen (water oxygens).
+    o_tfsi : np.ndarray
+        Indices of oxygen atoms bonded to sulfur (TFSI oxygens).
+    """
+    syms = np.array(atoms.get_chemical_symbols())
+    pos = atoms.get_positions()
+    cell = atoms.get_cell()
+    pbc = atoms.get_pbc()
+
+    o_idx = np.where(syms == "O")[0]
+    h_idx = np.where(syms == "H")[0]
+    s_idx = np.where(syms == "S")[0]
+
+    o_water, o_tfsi = [], []
+    for o in o_idx:
+        _, d_oh = get_distances(pos[o : o + 1], pos[h_idx], cell=cell, pbc=pbc)
+        _, d_os = get_distances(pos[o : o + 1], pos[s_idx], cell=cell, pbc=pbc)
+        if d_oh.min() < D_OH_CUT:
+            o_water.append(o)
+        elif d_os.min() < D_OS_CUT:
+            o_tfsi.append(o)
+
+    return np.array(o_water), np.array(o_tfsi)
+
+
+def compute_rdf(
+    traj_path: Path,
+    o_water_idx: np.ndarray,
+    o_tfsi_idx: np.ndarray,
+    r_max: float = R_MAX,
+    dr: float = DR,
+    skip_frames: int = 0,  # trajectories are pre-equilibrated (50-100 ps window)
+) -> dict:
+    """
+    Compute Li-O RDFs from an extxyz trajectory.
+
+    Parameters
+    ----------
+    traj_path : Path
+        Path to .extxyz trajectory file.
+    o_water_idx : np.ndarray
+        Atom indices of O_water (from first frame, fixed).
+    o_tfsi_idx : np.ndarray
+        Atom indices of O_TFSI (from first frame, fixed).
+    r_max : float
+        Maximum distance for RDF.
+    dr : float
+        Bin width.
+    skip_frames : int
+        Number of initial frames to skip (equilibration).
+
+    Returns
+    -------
+    dict
+        Dictionary with keys ``r``, ``gr_LiO_total``, ``gr_LiO_water``,
+        ``gr_LiO_TFSI``, ``coord_LiO_total``, ``coord_LiO_water``,
+        ``coord_LiO_TFSI``, ``n_li``, ``n_O_water``, ``n_O_TFSI``,
+        ``n_frames_used``, and ``r_cut_coord``.
+    """
+    bins = np.arange(0, r_max + dr, dr)
+    r_centers = 0.5 * (bins[:-1] + bins[1:])
+    n_bins = len(r_centers)
+
+    hist_total = np.zeros(n_bins)
+    hist_water = np.zeros(n_bins)
+    hist_tfsi = np.zeros(n_bins)
+
+    n_frames = 0
+    n_li = None
+    volume = None
+    o_all_idx = np.concatenate([o_water_idx, o_tfsi_idx])
+
+    for frame_idx, atoms in enumerate(
+        iread(str(traj_path), format="extxyz", index=":")
+    ):
+        if frame_idx < skip_frames:
+            continue
+
+        syms = np.array(atoms.get_chemical_symbols())
+        pos = atoms.get_positions()
+        cell = atoms.get_cell()
+        pbc = atoms.get_pbc()
+
+        li_idx = np.where(syms == "Li")[0]
+        if n_li is None:
+            n_li = len(li_idx)
+        if volume is None:
+            volume = atoms.get_volume()
+
+        pos_li = pos[li_idx]
+
+        for o_set, hist in [
+            (o_all_idx, hist_total),
+            (o_water_idx, hist_water),
+            (o_tfsi_idx, hist_tfsi),
+        ]:
+            pos_o = pos[o_set]
+            _, dists = get_distances(pos_li, pos_o, cell=cell, pbc=pbc)
+            dists_flat = dists.ravel()
+            dists_flat = dists_flat[dists_flat < r_max]
+            h, _ = np.histogram(dists_flat, bins=bins)
+            hist += h
+
+        n_frames += 1
+
+    if n_frames == 0 or n_li is None:
+        raise RuntimeError(f"No frames processed from {traj_path}")
+
+    # Normalize to g(r)
+    def normalize(hist, n_central, n_neighbor):
+        """
+        Normalize histogram to g(r).
+
+        Parameters
+        ----------
+        hist : np.ndarray
+            Raw pair-distance histogram.
+        n_central : int
+            Number of central atoms (Li).
+        n_neighbor : int
+            Number of neighbor atoms (O subset).
+
+        Returns
+        -------
+        np.ndarray
+            Normalized radial distribution function.
+        """
+        shell_vol = (4.0 / 3.0) * np.pi * (bins[1:] ** 3 - bins[:-1] ** 3)
+        rho = n_neighbor / volume
+        norm = n_central * n_frames * rho * shell_vol
+        return hist / norm
+
+    n_o_total = len(o_all_idx)
+    n_o_water = len(o_water_idx)
+    n_o_tfsi = len(o_tfsi_idx)
+
+    gr_total = normalize(hist_total, n_li, n_o_total)
+    gr_water = normalize(hist_water, n_li, n_o_water)
+    gr_tfsi = normalize(hist_tfsi, n_li, n_o_tfsi)
+
+    # Coordination numbers: integrate 4π ρ g(r) r² dr from 0 to R_CUT_COORD
+    def coord_number(gr, n_neighbor):
+        """
+        Compute coordination number from g(r).
+
+        Parameters
+        ----------
+        gr : np.ndarray
+            Radial distribution function.
+        n_neighbor : int
+            Number of neighbor atoms (O subset).
+
+        Returns
+        -------
+        float
+            Integrated coordination number up to R_CUT_COORD.
+        """
+        rho = n_neighbor / volume
+        integrand = 4.0 * np.pi * rho * gr * r_centers**2 * dr
+        mask = r_centers <= R_CUT_COORD
+        return float(np.sum(integrand[mask]))
+
+    return {
+        "r": r_centers.tolist(),
+        "gr_LiO_total": gr_total.tolist(),
+        "gr_LiO_water": gr_water.tolist(),
+        "gr_LiO_TFSI": gr_tfsi.tolist(),
+        "coord_LiO_total": coord_number(gr_total, n_o_total),
+        "coord_LiO_water": coord_number(gr_water, n_o_water),
+        "coord_LiO_TFSI": coord_number(gr_tfsi, n_o_tfsi),
+        "n_li": n_li,
+        "n_O_water": n_o_water,
+        "n_O_TFSI": n_o_tfsi,
+        "n_frames_used": n_frames,
+        "r_cut_coord": R_CUT_COORD,
+    }
+
+
+def find_trajectory(model: str) -> Path | None:
+    """
+    Find NVT extxyz trajectory for a model.
+
+    Parameters
+    ----------
+    model : str
+        Name of the MLIP model.
+
+    Returns
+    -------
+    Path or None
+        Path to the trajectory file, or None if not found.
+    """
+    p = DATA_ROOT / model / "nvt_trajectory.extxyz"
+    return p if p.exists() else None
+
+
+# --- Pytest interface --------------------------------------------------------
+
+
+@pytest.mark.parametrize("model", MODELS)
+def test_compute_rdf(model: str) -> None:
+    """
+    Compute and save Li-O RDFs and coordination numbers for one model.
+
+    Parameters
+    ----------
+    model : str
+        Name of the MLIP model to compute RDFs for.
+    """
+    traj_path = find_trajectory(model)
+    if traj_path is None:
+        pytest.skip(f"No NVT trajectory for {model}")
+
+    # Identify O types from first frame (indices fixed throughout NVT)
+    from ase.io import read as ase_read
+
+    first_frame = ase_read(str(traj_path), index=0, format="extxyz")
+    o_water_idx, o_tfsi_idx = identify_o_types(first_frame)
+
+    assert len(o_water_idx) > 0, f"No O_water found for {model}"
+    assert len(o_tfsi_idx) > 0, f"No O_TFSI found for {model}"
+
+    result = compute_rdf(traj_path, o_water_idx, o_tfsi_idx)
+
+    out_dir = OUT_PATH / model
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    with open(out_dir / "rdf.json", "w") as f:
+        json.dump(
+            {k: v for k, v in result.items() if not isinstance(v, list)}, f, indent=2
+        )
+
+    np.savez(
+        out_dir / "rdf.npz",
+        r=np.array(result["r"]),
+        gr_LiO_total=np.array(result["gr_LiO_total"]),
+        gr_LiO_water=np.array(result["gr_LiO_water"]),
+        gr_LiO_TFSI=np.array(result["gr_LiO_TFSI"]),
+    )
+
+    # Sanity checks
+    assert 2.0 < result["coord_LiO_total"] < 8.0, (
+        f"Unexpected Li-O_total CN={result['coord_LiO_total']:.2f} for {model}"
+    )
diff --git a/ml_peg/calcs/wise_electrolytes/xray_sf/calc_xray_sf.py b/ml_peg/calcs/wise_electrolytes/xray_sf/calc_xray_sf.py
new file mode 100644
index 000000000..c4e173ffd
--- /dev/null
+++ b/ml_peg/calcs/wise_electrolytes/xray_sf/calc_xray_sf.py
@@ -0,0 +1,345 @@
+"""
+Compute X-ray structure factor S(q) for LiTFSI/H2O electrolyte.
+
+Reads pre-computed NVT trajectories (.extxyz, 100 ps) for each MLIP model
+and computes S(q) via dynasor using TRAVIS-matching settings: Cromer-Mann
+4-Gaussian form factors (including H), Faber-Ziman normalization with Laue
+monotonic term subtraction, and Savitzky-Golay smoothing
+(window=27, order=3, dq=0.02 A^-1).
+
+System: 21 m LiTFSI / H2O, p64_w170 cell (1534 atoms).
+Experimental reference: SAXS, Zhang, Lewis, Mars, Wan et al.,
+J. Phys. Chem. B 125, 4501 (2021), DOI: 10.1021/acs.jpcb.1c02189.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+import warnings
+
+import numpy as np
+import pytest
+from scipy.signal import savgol_filter
+
+warnings.filterwarnings("ignore")
+
+# --- Configuration -----------------------------------------------------------
+
+# Path to converted .extxyz trajectories (will be S3 download for PR)
+DATA_ROOT = Path(
+    os.environ.get(
+        "ML_PEG_WISE_XRAY_DATA_ROOT",
+        "/lus/work/CT9/cin1387/lbrugnoli/prove/ml_peg_benchmark/data/wise_electrolytes/xray_sf",
+    )
+)
+OUT_PATH = Path(__file__).parent / "outputs"
+
+MODELS = [
+    "matpes-r2scan",
+    "mace-mpa-0-medium",
+    "mace-omat-0-medium",
+    "mace-mp-0b3",
+    "mace-mh-1-omat",
+    "mace-mh-1-omol",
+]
+
+# S(q) computation parameters (TRAVIS-matching)
+Q_MAX = 13.0  # Å⁻¹
+Q_MIN = 0.5  # Å⁻¹
+MAX_QPOINTS = 50000
+DQ_BIN = 0.02  # Å⁻¹
+SAVGOL_WINDOW = 27  # optimal for matching TRAVIS
+SAVGOL_ORDER = 3
+
+# LAMMPS type → element (for fallback)
+TYPE_TO_ELEMENT = {1: "Li", 2: "C", 3: "F", 4: "S", 5: "N", 6: "O", 7: "H"}
+
+# System composition (p64_w170: 64 LiTFSI + 170 H2O)
+COMPOSITION = {"Li": 64, "C": 128, "F": 384, "S": 128, "N": 64, "O": 426, "H": 340}
+N_ATOMS = sum(COMPOSITION.values())  # 1534
+CONC = {k: v / N_ATOMS for k, v in COMPOSITION.items()}
+
+# TRAVIS Cromer-Mann 4-Gaussian form factors (International Tables)
+TRAVIS_FF = {
+    "S": {
+        "a": [6.905, 5.203, 1.438, 1.586],
+        "b": [1.468, 22.215, 0.254, 56.172],
+        "c": 0.867,
+    },
+    "F": {
+        "a": [3.539, 2.641, 1.517, 1.024],
+        "b": [10.283, 4.294, 0.262, 26.148],
+        "c": 0.278,
+    },
+    "O": {
+        "a": [3.049, 2.287, 1.546, 0.867],
+        "b": [13.277, 5.701, 0.324, 32.909],
+        "c": 0.251,
+    },
+    "N": {
+        "a": [12.213, 3.132, 2.013, 1.166],
+        "b": [0.006, 9.893, 28.997, 0.583],
+        "c": -11.529,
+    },
+    "C": {
+        "a": [2.310, 1.020, 1.589, 0.865],
+        "b": [20.844, 10.208, 0.569, 51.651],
+        "c": 0.216,
+    },
+    "Li": {
+        "a": [1.128, 0.751, 0.618, 0.465],
+        "b": [3.955, 1.052, 85.391, 168.261],
+        "c": 0.038,
+    },
+    "H": {
+        "a": [0.493, 0.323, 0.140, 0.041],
+        "b": [10.511, 26.126, 3.142, 57.800],
+        "c": 0.003,
+    },
+}
+
+
+# --- Form factor helpers -----------------------------------------------------
+
+
+def compute_fq_travis(elem: str, q_arr: np.ndarray) -> np.ndarray:
+    """
+    Compute X-ray form factor f(q) using TRAVIS 4-Gaussian (Cromer-Mann) params.
+
+    Parameters
+    ----------
+    elem : str
+        Chemical element symbol (e.g. ``"Li"``, ``"O"``).
+    q_arr : np.ndarray
+        Array of q values in inverse angstroms.
+
+    Returns
+    -------
+    np.ndarray
+        Form factor values evaluated at each q.
+    """
+    ff = TRAVIS_FF[elem]
+    s2 = (q_arr / (4 * np.pi)) ** 2
+    return sum(ff["a"][i] * np.exp(-ff["b"][i] * s2) for i in range(4)) + ff["c"]
+
+
+# --- S(q) computation -------------------------------------------------------
+
+
+def compute_sq_travis_style(traj_path: Path) -> dict:
+    """
+    Compute S(q) in TRAVIS Faber-Ziman convention using dynasor.
+
+    Steps:
+
+    1. Read trajectory with dynasor (LAMMPS dump via MDAnalysis).
+    2. Compute partial S_ab(q) on a fine spherical q-grid.
+    3. Apply TRAVIS Cromer-Mann form factors: I_xray = sum f_a * f_b * S_ab.
+    4. Spherical binning at dq = 0.02 A^-1.
+    5. Faber-Ziman normalization with Laue term: S_FZ = I/<f>^2 - <f^2>/<f>^2 + 1.
+    6. Savitzky-Golay smoothing.
+
+    Parameters
+    ----------
+    traj_path : Path
+        Path to the trajectory file (.extxyz or LAMMPS dump).
+
+    Returns
+    -------
+    dict
+        Dictionary with keys ``q``, ``Sq``, ``n_qpoints``,
+        ``n_qpoints_used``, ``cell``, ``atom_types``,
+        ``particle_counts``, and ``params``.
+    """
+    from dynasor import (
+        Trajectory,
+        compute_static_structure_factors,
+        get_spherical_qpoints,
+    )
+
+    traj_str = str(traj_path)
+    is_extxyz = traj_str.endswith(".extxyz") or traj_str.endswith(".xyz")
+
+    if is_extxyz:
+        # For .extxyz: dynasor reads it directly, atomic_indices from element symbols
+        from ase.io import read as ase_read
+
+        first_frame = ase_read(traj_str, index=0)
+        symbols = first_frame.get_chemical_symbols()
+        atomic_indices = {}
+        for i, sym in enumerate(symbols):
+            atomic_indices.setdefault(sym, []).append(i)
+
+        traj = Trajectory(
+            traj_str,
+            trajectory_format="ase",
+            atomic_indices=atomic_indices,
+        )
+    else:
+        # For LAMMPS dump: use MDAnalysis backend
+        import MDAnalysis as mda  # noqa: N813
+
+        u = mda.Universe(traj_str, format="LAMMPSDUMP")
+        types = u.atoms.types
+        atomic_indices = {}
+        for t, elem in TYPE_TO_ELEMENT.items():
+            mask = types == str(t)
+            idx = np.where(mask)[0].tolist()
+            if idx:
+                atomic_indices[elem] = idx
+
+        traj = Trajectory(
+            traj_str,
+            trajectory_format="lammps_mdanalysis",
+            atomic_indices=atomic_indices,
+        )
+
+    q_points = get_spherical_qpoints(traj.cell, q_max=Q_MAX, max_points=MAX_QPOINTS)
+    sample = compute_static_structure_factors(traj, q_points)
+
+    q_norms = np.linalg.norm(sample.q_points, axis=1)
+    atom_types = list(sample.particle_counts.keys())
+
+    # Manual X-ray weighting with TRAVIS form factors (including H)
+    ff_at_q = {at: compute_fq_travis(at, q_norms) for at in atom_types}
+    i_xray = np.zeros(len(q_norms))
+    for s1, s2 in sample.pairs:
+        sq_ab = sample[f"Sq_{s1}_{s2}"].flatten()
+        i_xray += ff_at_q[s1] * ff_at_q[s2] * sq_ab
+
+    # Spherical binning
+    q_bins = np.arange(0.0, Q_MAX + DQ_BIN, DQ_BIN)
+    q_centers = 0.5 * (q_bins[:-1] + q_bins[1:])
+    i_xray_binned = np.full(len(q_centers), np.nan)
+    counts = np.zeros(len(q_centers), dtype=int)
+
+    for i in range(len(q_centers)):
+        mask = (q_norms >= q_bins[i]) & (q_norms < q_bins[i + 1])
+        n = mask.sum()
+        if n > 0:
+            i_xray_binned[i] = np.mean(i_xray[mask])
+            counts[i] = n
+
+    # Faber-Ziman normalization with Laue subtraction
+    f_avg = np.zeros(len(q_centers))
+    f2_avg = np.zeros(len(q_centers))
+    for elem, c in CONC.items():
+        fq = compute_fq_travis(elem, q_centers)
+        f_avg += c * fq
+        f2_avg += c * fq**2
+    f_avg_sq = f_avg**2
+
+    sq_fz = np.where(
+        f_avg_sq > 0,
+        i_xray_binned / f_avg_sq - f2_avg / f_avg_sq + 1.0,
+        np.nan,
+    )
+
+    # Savitzky-Golay smoothing
+    valid = ~np.isnan(sq_fz) & (q_centers >= 0.3) & (q_centers <= Q_MAX)
+    q_v = q_centers[valid]
+    sq_v = sq_fz[valid]
+    if len(sq_v) > SAVGOL_WINDOW:
+        sq_smooth = savgol_filter(sq_v, SAVGOL_WINDOW, SAVGOL_ORDER)
+    else:
+        sq_smooth = sq_v
+
+    # Restrict to useful range
+    rmask = (q_v >= Q_MIN) & (q_v <= 12.0)
+
+    return {
+        "q": q_v[rmask].tolist(),
+        "Sq": sq_smooth[rmask].tolist(),
+        "n_qpoints": len(q_points),
+        "n_qpoints_used": len(sample.q_points),
+        "cell": traj.cell.tolist(),
+        "atom_types": traj.atom_types,
+        "particle_counts": {k: int(v) for k, v in sample.particle_counts.items()},
+        "params": {
+            "q_max": Q_MAX,
+            "q_min": Q_MIN,
+            "dq_bin": DQ_BIN,
+            "max_qpoints": MAX_QPOINTS,
+            "savgol_window": SAVGOL_WINDOW,
+            "savgol_order": SAVGOL_ORDER,
+            "form_factors": "cromer-mann-4gaussian",
+            "normalization": "faber-ziman",
+        },
+    }
+
+
+# --- Trajectory finding ------------------------------------------------------
+
+
+def find_trajectory(model: str) -> Path | None:
+    """
+    Find NVT trajectory for a model.
+
+    Prefers converted .extxyz, falls back to LAMMPS dump.
+
+    Parameters
+    ----------
+    model : str
+        Name of the MLIP model.
+
+    Returns
+    -------
+    Path or None
+        Path to the trajectory file, or None if not found.
+    """
+    # Check for converted .extxyz first
+    extxyz = DATA_ROOT / model / "nvt_trajectory.extxyz"
+    if extxyz.exists():
+        return extxyz
+
+    # Fall back to original LAMMPS dump
+    runs_root = Path("/lus/work/CT9/cin1387/lbrugnoli/prove/runs")
+    model_cell = runs_root / model / "p64_w170"
+    for d in sorted(model_cell.glob("nvt_*"), reverse=True):
+        traj = d / "nvt_long_trajectory.lammpstrj"
+        if traj.exists():
+            return traj
+    for d in sorted(model_cell.glob("pipeline_*"), reverse=True):
+        traj = d / "nvt_trajectory.lammpstrj"
+        if traj.exists():
+            return traj
+
+    return None
+
+
+# --- Pytest interface (ml-peg convention) ------------------------------------
+
+
+@pytest.mark.parametrize("model", MODELS)
+def test_compute_xray_sq(model: str) -> None:
+    """
+    Compute and save X-ray S(q) in Faber-Ziman convention for one model.
+
+    Parameters
+    ----------
+    model : str
+        Name of the MLIP model to compute S(q) for.
+    """
+    traj_path = find_trajectory(model)
+    if traj_path is None:
+        pytest.skip(f"No NVT trajectory for {model}")
+
+    result = compute_sq_travis_style(traj_path)
+    result["model"] = model
+    result["traj_path"] = str(traj_path)
+
+    out_dir = OUT_PATH / model
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    with open(out_dir / "xray_sq.json", "w") as f:
+        json.dump(result, f, indent=2)
+
+    np.savez(
+        out_dir / "xray_sq.npz",
+        q=np.array(result["q"]),
+        Sq=np.array(result["Sq"]),
+    )
+
+    assert len(result["q"]) > 10, f"Too few q-points for {model}"

From 8387cec3391a6e2ae92f537b9475d89449a18b00 Mon Sep 17 00:00:00 2001
From: luca <luca@Luke-EBook.localdomain>
Date: Sun, 26 Apr 2026 17:07:01 +0200
Subject: [PATCH 2/3] Consolidate WiSE benchmark per PR #445 review

- Merge density / rdf / xray_sf into a single litfsi_h2o_21m benchmark
  under ml_peg/{calcs,analysis,app}/wise_electrolytes/litfsi_h2o_21m/.
- Adopt the standard ml-peg patterns in the analysis and calc scripts:
  load_models(current_models) for model discovery, @build_table for
  the metrics table, and metrics.yml for thresholds/tooltips/weights.
- Add a Janus recast of the LAMMPS+symmetrix Adastra production protocol
  in ml_peg/calcs/wise_electrolytes/md_reference/calc_md_reference.py
  (pytest-skipped reference; documents the exact MD parameters).
- Add the docs page docs/source/user_guide/benchmarks/wise_electrolytes.rst
  (and toctree entry) and wire the app docs_url to it.
- Update the parent wise_electrolytes.yml to a single benchmark weight.
---
 docs/source/user_guide/benchmarks/index.rst   |   1 +
 .../benchmarks/wise_electrolytes.rst          |  87 +++
 .../density/analyse_density.py                | 298 --------
 .../wise_electrolytes/density/metrics.yml     |  15 -
 .../litfsi_h2o_21m/analyse_litfsi_h2o_21m.py  | 721 ++++++++++++++++++
 .../litfsi_h2o_21m/metrics.yml                |  43 ++
 .../wise_electrolytes/rdf/analyse_rdf.py      | 434 -----------
 .../wise_electrolytes/rdf/metrics.yml         |  15 -
 .../xray_sf/analyse_xray_sf.py                | 398 ----------
 .../wise_electrolytes/xray_sf/metrics.yml     |  15 -
 .../wise_electrolytes/density/app_density.py  |  68 --
 .../litfsi_h2o_21m/app_litfsi_h2o_21m.py      |  91 +++
 ml_peg/app/wise_electrolytes/rdf/app_rdf.py   |  69 --
 .../wise_electrolytes/wise_electrolytes.yml   |   4 +-
 .../wise_electrolytes/xray_sf/app_xray_sf.py  |  66 --
 .../wise_electrolytes/density/calc_density.py |  75 --
 .../litfsi_h2o_21m/calc_litfsi_h2o_21m.py     | 616 +++++++++++++++
 .../md_reference/calc_md_reference.py         | 238 ++++++
 .../calcs/wise_electrolytes/rdf/calc_rdf.py   | 313 --------
 .../wise_electrolytes/xray_sf/calc_xray_sf.py | 345 ---------
 20 files changed, 1798 insertions(+), 2114 deletions(-)
 create mode 100644 docs/source/user_guide/benchmarks/wise_electrolytes.rst
 delete mode 100644 ml_peg/analysis/wise_electrolytes/density/analyse_density.py
 delete mode 100644 ml_peg/analysis/wise_electrolytes/density/metrics.yml
 create mode 100644 ml_peg/analysis/wise_electrolytes/litfsi_h2o_21m/analyse_litfsi_h2o_21m.py
 create mode 100644 ml_peg/analysis/wise_electrolytes/litfsi_h2o_21m/metrics.yml
 delete mode 100644 ml_peg/analysis/wise_electrolytes/rdf/analyse_rdf.py
 delete mode 100644 ml_peg/analysis/wise_electrolytes/rdf/metrics.yml
 delete mode 100644 ml_peg/analysis/wise_electrolytes/xray_sf/analyse_xray_sf.py
 delete mode 100644 ml_peg/analysis/wise_electrolytes/xray_sf/metrics.yml
 delete mode 100644 ml_peg/app/wise_electrolytes/density/app_density.py
 create mode 100644 ml_peg/app/wise_electrolytes/litfsi_h2o_21m/app_litfsi_h2o_21m.py
 delete mode 100644 ml_peg/app/wise_electrolytes/rdf/app_rdf.py
 delete mode 100644 ml_peg/app/wise_electrolytes/xray_sf/app_xray_sf.py
 delete mode 100644 ml_peg/calcs/wise_electrolytes/density/calc_density.py
 create mode 100644 ml_peg/calcs/wise_electrolytes/litfsi_h2o_21m/calc_litfsi_h2o_21m.py
 create mode 100644 ml_peg/calcs/wise_electrolytes/md_reference/calc_md_reference.py
 delete mode 100644 ml_peg/calcs/wise_electrolytes/rdf/calc_rdf.py
 delete mode 100644 ml_peg/calcs/wise_electrolytes/xray_sf/calc_xray_sf.py

diff --git a/docs/source/user_guide/benchmarks/index.rst b/docs/source/user_guide/benchmarks/index.rst
index 8e907ce78..1dbd28e8c 100644
--- a/docs/source/user_guide/benchmarks/index.rst
+++ b/docs/source/user_guide/benchmarks/index.rst
@@ -17,3 +17,4 @@ Benchmarks
     tm_complexes
     conformers
     molecular_dynamics
+    wise_electrolytes
diff --git a/docs/source/user_guide/benchmarks/wise_electrolytes.rst b/docs/source/user_guide/benchmarks/wise_electrolytes.rst
new file mode 100644
index 000000000..ebee388de
--- /dev/null
+++ b/docs/source/user_guide/benchmarks/wise_electrolytes.rst
@@ -0,0 +1,87 @@
+==================
+WiSE electrolytes
+==================
+
+LiTFSI/H2O 21 m
+===============
+
+Summary
+-------
+
+Performance in predicting structural and thermodynamic properties of the 21 molal
+LiTFSI/H2O "water-in-salt" electrolyte (WiSE). The benchmark consolidates three
+experimental observables for the same chemistry: NPT equilibrium density, Li-O
+coordination from the radial distribution function, and the X-ray static
+structure factor S(q).
+
+Two simulation cells are used: a small cell (16 LiTFSI + 42 H2O, 382 atoms) for
+the NPT density, and a large cubic cell (64 LiTFSI + 170 H2O, 1534 atoms,
+27.4938 A) for the NVT trajectories that feed the RDF and S(q).
+
+Metrics
+-------
+
+1. Density error
+
+   Absolute error in NPT density vs the experimental value
+   (1.7126 g/cm3, Gilbert et al., *J. Chem. Eng. Data* 62, 2056 (2017)).
+   The density is the average over the production NPT trajectory at
+   298.15 K and 1 atm.
+
+2. Li-O_water coordination number error
+
+   Absolute error in the Li-O_water coordination number, computed by
+   integrating the Li-O_water radial distribution function up to the first
+   minimum (R_cut = 2.83 A, from r2SCAN AIMD reference). Water oxygens are
+   identified from the first frame as those bonded to a hydrogen
+   (d_OH < 1.25 A). Reference value: 2.0 (Watanabe et al.,
+   *J. Phys. Chem. B* 125, 7477 (2021), neutron diffraction with isotopic
+   substitution at ~18.5 m).
+
+3. Li-O_TFSI coordination number error
+
+   Same definition as above, restricted to TFSI oxygens (those bonded to a
+   sulfur, d_OS < 1.75 A). Reference value: 2.0 (same source).
+
+4. S(q) R-factor
+
+   R-factor between the computed and experimental X-ray structure factor,
+   ``sum(|S_exp - S_calc|) / sum(|S_exp|)``. S(q) is computed via dynasor
+   in the Faber-Ziman convention with Cromer-Mann 4-Gaussian form factors
+   (including hydrogen) and Savitzky-Golay smoothing
+   (window = 27, order = 3, dq = 0.02 A^-1). Reference: SAXS data of
+   Zhang et al., *J. Phys. Chem. B* 125, 4501 (2021).
+
+Computational cost
+------------------
+
+High: production trajectories require a GPU MD code (e.g. LAMMPS + symmetrix
+or Janus + MACE) and several hours per model on a single MI250X / A100 GPU.
+Re-extracting metrics from pre-computed trajectories is cheap (seconds per
+model).
+
+Data availability
+-----------------
+
+Input structures:
+
+* p64_w170 cubic cell (1534 atoms, 27.4938 A) for NVT trajectories.
+* p16_w42 cell (382 atoms) for NPT density.
+
+Production trajectories were generated with LAMMPS + symmetrix on Adastra
+(MI250X) using a MACE foundation model. The reference Janus recast of the
+same protocol (Min -> NVT 50 ps -> NPT 200 ps, dt = 0.5 fs, T = 298.15 K,
+P = 1.01325 bar, NH thermostat tau = 50 fs, NH barostat tau = 500 fs) lives
+in ``ml_peg/calcs/wise_electrolytes/md_reference/``.
+
+Reference data:
+
+* Density: Gilbert et al., *J. Chem. Eng. Data* 62, 2056 (2017),
+  DOI: 10.1021/acs.jced.7b00135.
+* Li-O coordination numbers: Watanabe et al., *J. Phys. Chem. B* 125, 7477
+  (2021), DOI: 10.1021/acs.jpcb.1c04693.
+* X-ray S(q): Zhang et al., *J. Phys. Chem. B* 125, 4501 (2021),
+  DOI: 10.1021/acs.jpcb.1c02189.
+
+Further details on the simulation protocol and MLIP assessment for this
+system: L. Brugnoli, arXiv:2603.22099 (2026).
diff --git a/ml_peg/analysis/wise_electrolytes/density/analyse_density.py b/ml_peg/analysis/wise_electrolytes/density/analyse_density.py
deleted file mode 100644
index f2410484f..000000000
--- a/ml_peg/analysis/wise_electrolytes/density/analyse_density.py
+++ /dev/null
@@ -1,298 +0,0 @@
-"""Analyse density benchmark for LiTFSI/H2O electrolyte."""
-
-from __future__ import annotations
-
-import json
-from pathlib import Path
-
-import plotly.graph_objects as go
-import pytest
-
-from ml_peg.analysis.utils.utils import load_metrics_config
-
-# --- Paths -------------------------------------------------------------------
-
-CALCS_ROOT = Path(__file__).resolve().parents[3] / "calcs"
-CALC_PATH = CALCS_ROOT / "wise_electrolytes" / "density" / "outputs"
-APP_ROOT = Path(__file__).resolve().parents[3] / "app"
-OUT_PATH = APP_ROOT / "data" / "wise_electrolytes" / "density"
-
-MODELS = [
-    "matpes-r2scan",
-    "mace-mpa-0-medium",
-    "mace-omat-0-medium",
-    "mace-mp-0b3",
-    "mace-mh-1-omat",
-    "mace-mh-1-omol",
-]
-
-RHO_EXP = 1.7126  # g/cm³  (Gilbert et al., JCED 2017, DOI: 10.1021/acs.jced.7b00135)
-
-# Map benchmark model names → ml-peg registry names (for app summary table)
-MODEL_NAME_MAP = {
-    "matpes-r2scan": "mace-matpes-r2scan",
-    "mace-mpa-0-medium": "mace-mpa-0",
-    "mace-omat-0-medium": "mace-omat-0",
-}
-
-METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
-DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
-    METRICS_CONFIG_PATH
-)
-
-
-# --- Helpers -----------------------------------------------------------------
-
-
-def load_density_results() -> dict[str, dict]:
-    """
-    Load density.json for each model.
-
-    Returns
-    -------
-    dict[str, dict]
-        Per-model density results keyed by model name.
-    """
-    results = {}
-    for model in MODELS:
-        json_path = CALC_PATH / model / "density.json"
-        if json_path.exists():
-            with open(json_path) as f:
-                results[model] = json.load(f)
-    return results
-
-
-def normalize_metric(value: float, good: float, bad: float) -> float:
-    """
-    Normalize metric to [0, 1] where 0=good, 1=bad.
-
-    Parameters
-    ----------
-    value : float
-        Raw metric value.
-    good : float
-        Value corresponding to score 0.
-    bad : float
-        Value corresponding to score 1.
-
-    Returns
-    -------
-    float
-        Normalized score clipped to [0, 1].
-    """
-    if bad == good:
-        return 0.0 if value <= good else 1.0
-    score = (value - good) / (bad - good)
-    return max(0.0, min(1.0, score))
-
-
-# --- Plot builders -----------------------------------------------------------
-
-
-def build_density_bar_chart(data: dict[str, dict]) -> go.Figure:
-    """
-    Build bar chart of density per model.
-
-    Parameters
-    ----------
-    data : dict[str, dict]
-        Per-model density results.
-
-    Returns
-    -------
-    go.Figure
-        Plotly bar chart figure.
-    """
-    fig = go.Figure()
-    for model in MODELS:
-        if model not in data:
-            continue
-        fig.add_trace(
-            go.Bar(
-                x=[model],
-                y=[data[model]["rho_mean"]],
-                error_y={"type": "data", "array": [data[model]["rho_std"]]},
-                name=model,
-            )
-        )
-
-    fig.add_hline(
-        y=RHO_EXP,
-        line_dash="dash",
-        line_color="red",
-        annotation_text=f"Exp. ({RHO_EXP} g/cm\u00b3, Gilbert 2017)",
-    )
-    fig.update_layout(
-        title="Electrolyte Density (LiTFSI/H2O, 21 m)",
-        yaxis_title="Density / g cm⁻³",
-        showlegend=False,
-    )
-    return fig
-
-
-def build_density_timeseries(data: dict[str, dict]) -> go.Figure:
-    """
-    Build density vs time plot for all models.
-
-    Parameters
-    ----------
-    data : dict[str, dict]
-        Per-model density results.
-
-    Returns
-    -------
-    go.Figure
-        Plotly timeseries figure.
-    """
-    fig = go.Figure()
-    for model in MODELS:
-        if model not in data:
-            continue
-        d = data[model]
-        fig.add_trace(
-            go.Scatter(
-                x=d["time_full"],
-                y=d["density_full"],
-                mode="lines",
-                name=model,
-                opacity=0.8,
-            )
-        )
-
-    fig.add_hline(
-        y=RHO_EXP,
-        line_dash="dash",
-        line_color="black",
-        annotation_text=f"Exp. ({RHO_EXP} g/cm\u00b3)",
-    )
-    fig.update_layout(
-        title="NPT Density vs Time (LiTFSI/H2O)",
-        xaxis_title="Time / ps",
-        yaxis_title="Density / g cm⁻³",
-    )
-    return fig
-
-
-# --- Metrics table -----------------------------------------------------------
-
-
-def build_metrics_table(data: dict[str, dict]) -> dict:
-    """
-    Build metrics table JSON compatible with ml-peg app format.
-
-    Parameters
-    ----------
-    data : dict[str, dict]
-        Per-model density results.
-
-    Returns
-    -------
-    dict
-        Table with data, columns, tooltips, and thresholds.
-    """
-    rows = []
-    for model in MODELS:
-        if model not in data:
-            continue
-        d = data[model]
-        abs_err = d["rho_abs_error"]
-        pct_err = abs(d["rho_error_pct"])
-
-        score_abs = normalize_metric(
-            abs_err,
-            DEFAULT_THRESHOLDS["Density Error"]["good"],
-            DEFAULT_THRESHOLDS["Density Error"]["bad"],
-        )
-        score_pct = normalize_metric(
-            pct_err,
-            DEFAULT_THRESHOLDS["Density Error (%)"]["good"],
-            DEFAULT_THRESHOLDS["Density Error (%)"]["bad"],
-        )
-
-        w_abs = DEFAULT_WEIGHTS.get("Density Error", 1.0)
-        w_pct = DEFAULT_WEIGHTS.get("Density Error (%)", 1.0)
-        total_w = w_abs + w_pct
-        overall = (
-            1.0 - (score_abs * w_abs + score_pct * w_pct) / total_w
-            if total_w > 0
-            else 0.0
-        )
-
-        rows.append(
-            {
-                "MLIP": model,
-                "id": MODEL_NAME_MAP.get(model, model),
-                "Density (g/cm3)": round(d["rho_mean"], 4),
-                "Density Error": round(abs_err, 4),
-                "Density Error (%)": round(pct_err, 2),
-                "Score": round(overall, 3),
-            }
-        )
-
-    rows.sort(key=lambda r: r["Score"], reverse=True)
-
-    columns = [
-        {"name": c, "id": c}
-        for c in [
-            "MLIP",
-            "Density Error",
-            "Density Error (%)",
-            "Score",
-        ]
-    ]
-
-    return {
-        "data": rows,
-        "columns": columns,
-        "tooltip_header": DEFAULT_TOOLTIPS,
-        "thresholds": {k: dict(v) for k, v in DEFAULT_THRESHOLDS.items()},
-        "weights": dict(DEFAULT_WEIGHTS),
-        "model_name_map": MODEL_NAME_MAP,
-    }
-
-
-# --- Pytest interface --------------------------------------------------------
-
-
-@pytest.fixture
-def density_analysis():
-    """
-    Run full density analysis: table + plots.
-
-    Returns
-    -------
-    dict
-        Metrics table dictionary.
-    """
-    data = load_density_results()
-    if not data:
-        pytest.skip("No density data found")
-
-    OUT_PATH.mkdir(parents=True, exist_ok=True)
-
-    table = build_metrics_table(data)
-    with open(OUT_PATH / "density_metrics_table.json", "w") as f:
-        json.dump(table, f, indent=2)
-
-    fig_bar = build_density_bar_chart(data)
-    with open(OUT_PATH / "figure_density_bar.json", "w") as f:
-        f.write(fig_bar.to_json())
-
-    fig_ts = build_density_timeseries(data)
-    with open(OUT_PATH / "figure_density_timeseries.json", "w") as f:
-        f.write(fig_ts.to_json())
-
-    return table
-
-
-def test_density(density_analysis) -> None:
-    """
-    Run density benchmark — generates table and plots.
-
-    Parameters
-    ----------
-    density_analysis : dict
-        Fixture providing the metrics table.
-    """
-    table = density_analysis
-    assert len(table["data"]) > 0, "No models produced density data"
diff --git a/ml_peg/analysis/wise_electrolytes/density/metrics.yml b/ml_peg/analysis/wise_electrolytes/density/metrics.yml
deleted file mode 100644
index f4b2bc014..000000000
--- a/ml_peg/analysis/wise_electrolytes/density/metrics.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-metrics:
-  Density Error:
-    good: 0.0
-    bad: 0.17
-    unit: "g/cm3"
-    tooltip: "Absolute error in NPT density vs experimental (1.7126 g/cm3, Gilbert et al. JCED 2017)"
-    level_of_theory: Experimental (Gilbert et al., J. Chem. Eng. Data 62, 2056, 2017)
-    weight: 1
-  Density Error (%):
-    good: 0.0
-    bad: 10.0
-    unit: "%"
-    tooltip: "Percent error in NPT density vs experimental (1.7126 g/cm3, Gilbert et al. JCED 2017)"
-    level_of_theory: Experimental (Gilbert et al., J. Chem. Eng. Data 62, 2056, 2017)
-    weight: 0
diff --git a/ml_peg/analysis/wise_electrolytes/litfsi_h2o_21m/analyse_litfsi_h2o_21m.py b/ml_peg/analysis/wise_electrolytes/litfsi_h2o_21m/analyse_litfsi_h2o_21m.py
new file mode 100644
index 000000000..d71a48cee
--- /dev/null
+++ b/ml_peg/analysis/wise_electrolytes/litfsi_h2o_21m/analyse_litfsi_h2o_21m.py
@@ -0,0 +1,721 @@
+"""Analyse the WiSE 21 m LiTFSI/H2O electrolyte benchmark."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import numpy as np
+import plotly.graph_objects as go
+import pytest
+from scipy.interpolate import interp1d
+
+from ml_peg.analysis.utils.decorators import build_table
+from ml_peg.analysis.utils.utils import load_metrics_config
+from ml_peg.app import APP_ROOT
+from ml_peg.calcs import CALCS_ROOT
+from ml_peg.models.get_models import load_models
+from ml_peg.models.models import current_models
+
+# --- Paths -------------------------------------------------------------------
+
+CALC_PATH = CALCS_ROOT / "wise_electrolytes" / "litfsi_h2o_21m" / "outputs"
+OUT_PATH = APP_ROOT / "data" / "wise_electrolytes" / "litfsi_h2o_21m"
+EXP_SAXS_PATH = (
+    Path(__file__).resolve().parents[1] / "data" / "saxs_maginn.txt"
+)
+
+MODELS = load_models(current_models)
+
+METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
+DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
+    METRICS_CONFIG_PATH
+)
+
+# --- Physical constants and reference values ---------------------------------
+
+# Box geometry (same for all models: p64_w170 NVT cell)
+L_BOX = 27.4938  # Å (cubic NVT cell at experimental density)
+V_BOX = L_BOX**3  # Å³
+
+# Integration cutoff: first minimum of Li-O_total g(r) (r2SCAN AIMD)
+R_CUT = 2.83  # Å
+DR = 0.02  # Å (bin width used in calc_rdf.py)
+
+# Experimental references
+RHO_EXP = 1.7126     # g/cm³ — Gilbert et al., JCED 62, 2056 (2017)
+CN_EXP_WATER = 2.0   # Watanabe et al., JPCB 125, 7477 (2021)
+CN_EXP_TFSI = 2.0
+
+
+# =============================================================================
+# Density helpers and figures
+# =============================================================================
+
+
+def load_density_results() -> dict[str, dict]:
+    """
+    Load density.json for each registered model that has calc output.
+
+    Returns
+    -------
+    dict[str, dict]
+        Per-model density results keyed by registry model name.
+    """
+    results = {}
+    for model in MODELS:
+        json_path = CALC_PATH / model / "density.json"
+        if json_path.exists():
+            with open(json_path) as f:
+                results[model] = json.load(f)
+    return results
+
+
+def build_density_bar_chart(data: dict[str, dict]) -> go.Figure:
+    """
+    Build bar chart of density per model.
+
+    Parameters
+    ----------
+    data : dict[str, dict]
+        Per-model density results.
+
+    Returns
+    -------
+    go.Figure
+        Plotly bar chart figure.
+    """
+    fig = go.Figure()
+    for model in MODELS:
+        if model not in data:
+            continue
+        fig.add_trace(
+            go.Bar(
+                x=[model],
+                y=[data[model]["rho_mean"]],
+                error_y={"type": "data", "array": [data[model]["rho_std"]]},
+                name=model,
+            )
+        )
+
+    fig.add_hline(
+        y=RHO_EXP,
+        line_dash="dash",
+        line_color="red",
+        annotation_text=f"Exp. ({RHO_EXP} g/cm\u00b3, Gilbert 2017)",
+    )
+    fig.update_layout(
+        title="Electrolyte Density (LiTFSI/H2O, 21 m)",
+        yaxis_title="Density / g cm⁻³",
+        showlegend=False,
+    )
+    return fig
+
+
+def build_density_timeseries(data: dict[str, dict]) -> go.Figure:
+    """
+    Build density vs time plot for all models.
+
+    Parameters
+    ----------
+    data : dict[str, dict]
+        Per-model density results.
+
+    Returns
+    -------
+    go.Figure
+        Plotly timeseries figure.
+    """
+    fig = go.Figure()
+    for model in MODELS:
+        if model not in data:
+            continue
+        d = data[model]
+        fig.add_trace(
+            go.Scatter(
+                x=d["time_full"],
+                y=d["density_full"],
+                mode="lines",
+                name=model,
+                opacity=0.8,
+            )
+        )
+
+    fig.add_hline(
+        y=RHO_EXP,
+        line_dash="dash",
+        line_color="black",
+        annotation_text=f"Exp. ({RHO_EXP} g/cm\u00b3)",
+    )
+    fig.update_layout(
+        title="NPT Density vs Time (LiTFSI/H2O)",
+        xaxis_title="Time / ps",
+        yaxis_title="Density / g cm⁻³",
+    )
+    return fig
+
+
+# =============================================================================
+# RDF / coordination number helpers and figures
+# =============================================================================
+
+
+def compute_cn_from_gr(
+    gr: np.ndarray,
+    r: np.ndarray,
+    n_neighbor: int,
+    volume: float,
+    r_cut: float,
+    dr: float,
+) -> float:
+    """
+    Integrate g(r) to coordination number: CN = 4pi rho int_0^r_cut g(r) r^2 dr.
+
+    Parameters
+    ----------
+    gr : np.ndarray
+        Radial distribution function values.
+    r : np.ndarray
+        Radial distance values in angstrom.
+    n_neighbor : int
+        Number of neighbor atoms of the relevant species in the simulation box.
+    volume : float
+        Volume of the simulation box in angstrom^3.
+    r_cut : float
+        Integration cutoff distance in angstrom.
+    dr : float
+        Bin width in angstrom.
+
+    Returns
+    -------
+    float
+        Coordination number obtained by integrating g(r) up to ``r_cut``.
+    """
+    rho = n_neighbor / volume
+    mask = r <= r_cut
+    return float(np.sum(4 * np.pi * rho * gr[mask] * r[mask] ** 2 * dr))
+
+
+def load_rdf_results() -> dict[str, dict]:
+    """
+    Load rdf.json and rdf.npz for each registered model that has calc output.
+
+    Returns
+    -------
+    dict[str, dict]
+        Mapping from registry model name to a dict containing coordination
+        numbers, errors, radial distances, g(r) arrays, and frame count.
+    """
+    results = {}
+    for model in MODELS:
+        json_path = CALC_PATH / model / "rdf.json"
+        npz_path = CALC_PATH / model / "rdf.npz"
+        if not json_path.exists() or not npz_path.exists():
+            continue
+
+        with open(json_path) as f:
+            meta = json.load(f)
+
+        data = np.load(npz_path)
+        r = data["r"]
+
+        n_ow = meta["n_O_water"]
+        n_of = meta["n_O_TFSI"]
+
+        cn_w = compute_cn_from_gr(data["gr_LiO_water"], r, n_ow, V_BOX, R_CUT, DR)
+        cn_f = compute_cn_from_gr(data["gr_LiO_TFSI"], r, n_of, V_BOX, R_CUT, DR)
+
+        results[model] = {
+            "cn_water": cn_w,
+            "cn_tfsi": cn_f,
+            "cn_total": cn_w + cn_f,
+            "err_water": abs(cn_w - CN_EXP_WATER),
+            "err_tfsi": abs(cn_f - CN_EXP_TFSI),
+            "r": r,
+            "gr_water": data["gr_LiO_water"],
+            "gr_tfsi": data["gr_LiO_TFSI"],
+            "gr_total": data["gr_LiO_total"],
+            "n_frames": meta["n_frames_used"],
+        }
+    return results
+
+
+def build_cn_bar_chart(data: dict[str, dict]) -> go.Figure:
+    """
+    Bar chart of CN_water and CN_TFSI per model with experimental reference.
+
+    Parameters
+    ----------
+    data : dict[str, dict]
+        Per-model RDF results as returned by :func:`load_rdf_results`.
+
+    Returns
+    -------
+    go.Figure
+        Plotly figure with grouped bars and experimental reference lines.
+    """
+    fig = go.Figure()
+
+    x_models = [m for m in MODELS if m in data]
+    cn_water_vals = [data[m]["cn_water"] for m in x_models]
+    cn_tfsi_vals = [data[m]["cn_tfsi"] for m in x_models]
+
+    fig.add_trace(
+        go.Bar(
+            name="Li-O<sub>water</sub>",
+            x=x_models,
+            y=cn_water_vals,
+            marker_color="steelblue",
+        )
+    )
+    fig.add_trace(
+        go.Bar(
+            name="Li-O<sub>TFSI</sub>",
+            x=x_models,
+            y=cn_tfsi_vals,
+            marker_color="coral",
+        )
+    )
+
+    fig.add_hline(
+        y=CN_EXP_WATER,
+        line_dash="dash",
+        line_color="steelblue",
+        annotation_text=f"Exp. O<sub>water</sub> ({CN_EXP_WATER})",
+        annotation_position="top right",
+    )
+    fig.add_hline(
+        y=CN_EXP_TFSI,
+        line_dash="dash",
+        line_color="coral",
+        annotation_text=f"Exp. O<sub>TFSI</sub> ({CN_EXP_TFSI})",
+        annotation_position="bottom right",
+    )
+
+    fig.update_layout(
+        title="Li⁺ Coordination Numbers (LiTFSI/H₂O, 21 m)",
+        yaxis_title="Coordination Number",
+        barmode="group",
+        legend={"x": 0.01, "y": 0.99},
+    )
+    return fig
+
+
+def build_gr_plot(data: dict[str, dict]) -> go.Figure:
+    """
+    G(r) plot: Li-O_water and Li-O_TFSI for all models.
+
+    Parameters
+    ----------
+    data : dict[str, dict]
+        Per-model RDF results as returned by :func:`load_rdf_results`.
+
+    Returns
+    -------
+    go.Figure
+        Plotly figure with g(r) curves and integration cutoff line.
+    """
+    fig = go.Figure()
+
+    colors = [
+        "#1f77b4",
+        "#ff7f0e",
+        "#2ca02c",
+        "#d62728",
+        "#9467bd",
+        "#8c564b",
+        "#e377c2",
+        "#7f7f7f",
+    ]
+
+    for i, model in enumerate(MODELS):
+        if model not in data:
+            continue
+        d = data[model]
+        c = colors[i % len(colors)]
+        fig.add_trace(
+            go.Scatter(
+                x=d["r"],
+                y=d["gr_water"],
+                mode="lines",
+                name=f"{model} O<sub>w</sub>",
+                line={"color": c, "dash": "solid"},
+                legendgroup=model,
+            )
+        )
+        fig.add_trace(
+            go.Scatter(
+                x=d["r"],
+                y=d["gr_tfsi"],
+                mode="lines",
+                name=f"{model} O<sub>TFSI</sub>",
+                line={"color": c, "dash": "dot"},
+                legendgroup=model,
+            )
+        )
+
+    fig.add_vline(
+        x=R_CUT,
+        line_dash="dash",
+        line_color="gray",
+        annotation_text=f"r_cut={R_CUT} Å",
+        annotation_position="top right",
+    )
+
+    fig.update_layout(
+        title="Li-O Radial Distribution Functions (LiTFSI/H₂O, 21 m)",
+        xaxis_title="r / Å",
+        yaxis_title="g(r)",
+        xaxis_range=[1.0, 6.0],
+    )
+    return fig
+
+
+# =============================================================================
+# X-ray S(q) helpers and figures
+# =============================================================================
+
+
+def load_experimental_sq() -> tuple[np.ndarray, np.ndarray]:
+    """
+    Load experimental SAXS S(q) data.
+
+    Returns
+    -------
+    q_exp : np.ndarray
+        Scattering vector values in inverse angstroms.
+    sq_exp : np.ndarray
+        Experimental structure factor values.
+    """
+    data = np.loadtxt(EXP_SAXS_PATH)
+    return data[:, 0], data[:, 1]
+
+
+def load_computed_sq(model: str) -> tuple[np.ndarray, np.ndarray] | None:
+    """
+    Load computed S(q) for a model.
+
+    Parameters
+    ----------
+    model : str
+        Registry name of the MLIP model.
+
+    Returns
+    -------
+    tuple of (np.ndarray, np.ndarray) or None
+        Scattering vector and structure factor arrays, or None if the
+        output file does not exist.
+    """
+    json_path = CALC_PATH / model / "xray_sq.json"
+    if not json_path.exists():
+        return None
+    with open(json_path) as f:
+        d = json.load(f)
+    return np.array(d["q"]), np.array(d["Sq"])
+
+
+def compute_r_factor(
+    q_exp: np.ndarray,
+    sq_exp: np.ndarray,
+    q_calc: np.ndarray,
+    sq_calc: np.ndarray,
+) -> float:
+    """
+    Compute R-factor: sum|S_exp - S_calc| / sum|S_exp|.
+
+    Parameters
+    ----------
+    q_exp : np.ndarray
+        Experimental scattering vector values.
+    sq_exp : np.ndarray
+        Experimental structure factor values.
+    q_calc : np.ndarray
+        Calculated scattering vector values.
+    sq_calc : np.ndarray
+        Calculated structure factor values.
+
+    Returns
+    -------
+    float
+        R-factor value, or NaN if no overlapping valid data exists.
+    """
+    valid_calc = ~np.isnan(sq_calc)
+    q_min_calc = float(q_calc[valid_calc].min())
+    q_max_exp = float(q_exp.max())
+
+    f = interp1d(
+        q_calc[valid_calc],
+        sq_calc[valid_calc],
+        kind="linear",
+        bounds_error=False,
+        fill_value=np.nan,
+    )
+
+    mask = (q_exp >= q_min_calc) & (q_exp <= q_max_exp)
+    sq_interp = f(q_exp[mask])
+    sq_e = sq_exp[mask]
+
+    ok = ~np.isnan(sq_interp)
+    if not ok.any():
+        return float("nan")
+
+    return float(np.sum(np.abs(sq_e[ok] - sq_interp[ok])) / np.sum(np.abs(sq_e[ok])))
+
+
+def find_first_peak(
+    q: np.ndarray, sq: np.ndarray, q_min: float = 0.5, q_max: float = 2.0
+) -> float:
+    """
+    Find position of first peak in S(q).
+
+    Parameters
+    ----------
+    q : np.ndarray
+        Scattering vector values.
+    sq : np.ndarray
+        Structure factor values.
+    q_min : float, optional
+        Lower bound of the search window in inverse angstroms (default 0.5).
+    q_max : float, optional
+        Upper bound of the search window in inverse angstroms (default 2.0).
+
+    Returns
+    -------
+    float
+        Position of the first peak, or NaN if fewer than 3 valid points
+        exist in the search window.
+    """
+    mask = (q >= q_min) & (q <= q_max) & ~np.isnan(sq)
+    if mask.sum() < 3:
+        return float("nan")
+    q_sub = q[mask]
+    sq_sub = sq[mask]
+    return float(q_sub[np.argmax(sq_sub)])
+
+
+def build_sq_comparison_plot(
+    q_exp: np.ndarray,
+    sq_exp: np.ndarray,
+    model_data: dict[str, tuple[np.ndarray, np.ndarray]],
+) -> go.Figure:
+    """
+    Build S(q) overlay plot comparing computed and experimental data.
+
+    Parameters
+    ----------
+    q_exp : np.ndarray
+        Experimental scattering vector values.
+    sq_exp : np.ndarray
+        Experimental structure factor values.
+    model_data : dict of str to tuple of (np.ndarray, np.ndarray)
+        Mapping from model name to (q, S(q)) arrays.
+
+    Returns
+    -------
+    go.Figure
+        Plotly figure with experimental and computed S(q) traces.
+    """
+    fig = go.Figure()
+
+    fig.add_trace(
+        go.Scatter(
+            x=q_exp.tolist(),
+            y=sq_exp.tolist(),
+            mode="lines",
+            name="Exp. (Zhang et al. 2021)",
+            line={"color": "black", "width": 2},
+        )
+    )
+
+    for model, (q, sq) in model_data.items():
+        fig.add_trace(
+            go.Scatter(
+                x=q.tolist(),
+                y=sq.tolist(),
+                mode="lines",
+                name=model,
+                opacity=0.8,
+            )
+        )
+
+    fig.update_layout(
+        title="X-ray Structure Factor S(q) — LiTFSI/H2O (21 m)",
+        xaxis_title="q / Å⁻¹",
+        yaxis_title="S(q) (Faber-Ziman)",
+        legend={"x": 0.60, "y": 0.98},
+    )
+    return fig
+
+
+# =============================================================================
+# Pytest fixtures
+# =============================================================================
+
+
+@pytest.fixture
+def density_results() -> dict[str, dict]:
+    """
+    Load density results and write density plot JSONs.
+
+    Returns
+    -------
+    dict[str, dict]
+        Per-model density results keyed by registry model name.
+    """
+    data = load_density_results()
+    if not data:
+        pytest.skip("No density data found")
+
+    OUT_PATH.mkdir(parents=True, exist_ok=True)
+
+    fig_bar = build_density_bar_chart(data)
+    with open(OUT_PATH / "figure_density_bar.json", "w") as f:
+        f.write(fig_bar.to_json())
+
+    fig_ts = build_density_timeseries(data)
+    with open(OUT_PATH / "figure_density_timeseries.json", "w") as f:
+        f.write(fig_ts.to_json())
+
+    return data
+
+
+@pytest.fixture
+def rdf_results() -> dict[str, dict]:
+    """
+    Load RDF results and write RDF plot JSONs.
+
+    Returns
+    -------
+    dict[str, dict]
+        Per-model RDF results keyed by registry model name.
+    """
+    data = load_rdf_results()
+    if not data:
+        pytest.skip("No RDF data found")
+
+    OUT_PATH.mkdir(parents=True, exist_ok=True)
+
+    fig_cn = build_cn_bar_chart(data)
+    with open(OUT_PATH / "figure_cn_bar.json", "w") as f:
+        f.write(fig_cn.to_json())
+
+    fig_gr = build_gr_plot(data)
+    with open(OUT_PATH / "figure_gr.json", "w") as f:
+        f.write(fig_gr.to_json())
+
+    return data
+
+
+@pytest.fixture
+def xray_sf_results() -> dict[str, dict]:
+    """
+    Compute per-model R-factor and peak-position errors and write S(q) plot.
+
+    Returns
+    -------
+    dict[str, dict]
+        Per-model results keyed by registry model name, containing
+        ``r_factor``, ``peak_calc``, ``peak_exp``, ``peak_position_error``.
+    """
+    q_exp, sq_exp = load_experimental_sq()
+    peak_exp = find_first_peak(q_exp, sq_exp)
+
+    model_data: dict[str, tuple[np.ndarray, np.ndarray]] = {}
+    results: dict[str, dict] = {}
+
+    for model in MODELS:
+        data = load_computed_sq(model)
+        if data is None:
+            continue
+        q_calc, sq_calc = data
+        model_data[model] = (q_calc, sq_calc)
+
+        r_factor = compute_r_factor(q_exp, sq_exp, q_calc, sq_calc)
+        peak_calc = find_first_peak(q_calc, sq_calc)
+        peak_error = (
+            abs(peak_calc - peak_exp) if not np.isnan(peak_calc) else float("nan")
+        )
+
+        results[model] = {
+            "r_factor": r_factor,
+            "peak_calc": peak_calc,
+            "peak_exp": peak_exp,
+            "peak_position_error": peak_error,
+        }
+
+    if not results:
+        pytest.skip("No S(q) data found")
+
+    OUT_PATH.mkdir(parents=True, exist_ok=True)
+
+    fig = build_sq_comparison_plot(q_exp, sq_exp, model_data)
+    with open(OUT_PATH / "figure_xray_sq_comparison.json", "w") as f:
+        f.write(fig.to_json())
+
+    return results
+
+
+@pytest.fixture
+@build_table(
+    filename=OUT_PATH / "litfsi_h2o_21m_metrics_table.json",
+    metric_tooltips=DEFAULT_TOOLTIPS,
+    thresholds=DEFAULT_THRESHOLDS,
+    weights=DEFAULT_WEIGHTS,
+)
+def metrics(
+    density_results: dict[str, dict],
+    rdf_results: dict[str, dict],
+    xray_sf_results: dict[str, dict],
+) -> dict[str, dict]:
+    """
+    Build per-metric dicts consumed by the ``build_table`` decorator.
+
+    Parameters
+    ----------
+    density_results
+        Per-model density results.
+    rdf_results
+        Per-model RDF / coordination-number results.
+    xray_sf_results
+        Per-model R-factor and peak-position results.
+
+    Returns
+    -------
+    dict[str, dict]
+        Metric names mapped to ``{model_name: value}`` dicts. The metric
+        order matches ``metrics.yml`` (Density Error, Density Error (%),
+        CN Li-O_water Error, CN Li-O_TFSI Error, S(q) R-factor,
+        First Peak Position Error).
+    """
+    return {
+        "Density Error": {
+            model: d["rho_abs_error"] for model, d in density_results.items()
+        },
+        "Density Error (%)": {
+            model: abs(d["rho_error_pct"]) for model, d in density_results.items()
+        },
+        "CN Li-O_water Error": {
+            model: d["err_water"] for model, d in rdf_results.items()
+        },
+        "CN Li-O_TFSI Error": {
+            model: d["err_tfsi"] for model, d in rdf_results.items()
+        },
+        "S(q) R-factor": {
+            model: r["r_factor"] for model, r in xray_sf_results.items()
+        },
+        "First Peak Position Error": {
+            model: r["peak_position_error"] for model, r in xray_sf_results.items()
+        },
+    }
+
+
+def test_litfsi_h2o_21m(metrics: dict[str, dict]) -> None:
+    """
+    Run the consolidated WiSE LiTFSI/H2O benchmark.
+
+    Parameters
+    ----------
+    metrics
+        Per-metric values for all models.
+    """
+    assert metrics, "No models produced data for the WiSE LiTFSI/H2O benchmark"
diff --git a/ml_peg/analysis/wise_electrolytes/litfsi_h2o_21m/metrics.yml b/ml_peg/analysis/wise_electrolytes/litfsi_h2o_21m/metrics.yml
new file mode 100644
index 000000000..0f41d821a
--- /dev/null
+++ b/ml_peg/analysis/wise_electrolytes/litfsi_h2o_21m/metrics.yml
@@ -0,0 +1,43 @@
+metrics:
+  Density Error:
+    good: 0.0
+    bad: 0.17
+    unit: "g/cm3"
+    tooltip: "Absolute error in NPT density vs experimental (1.7126 g/cm3, Gilbert et al. JCED 2017)"
+    level_of_theory: Experimental (Gilbert et al., J. Chem. Eng. Data 62, 2056, 2017)
+    weight: 1
+  Density Error (%):
+    good: 0.0
+    bad: 10.0
+    unit: "%"
+    tooltip: "Percent error in NPT density vs experimental (1.7126 g/cm3, Gilbert et al. JCED 2017)"
+    level_of_theory: Experimental (Gilbert et al., J. Chem. Eng. Data 62, 2056, 2017)
+    weight: 0
+  CN Li-O_water Error:
+    good: 0.0
+    bad: 0.5
+    unit: "-"
+    tooltip: "Absolute error in Li-O_water coordination number vs experimental reference (CN=2.0, Watanabe et al. J. Phys. Chem. B 2021, ~18.5 m, neutron diffraction with isotopic substitution)"
+    level_of_theory: Experimental (neutron diffraction, Watanabe et al. JPCB 2021)
+    weight: 1
+  CN Li-O_TFSI Error:
+    good: 0.0
+    bad: 0.5
+    unit: "-"
+    tooltip: "Absolute error in Li-O_TFSI coordination number vs experimental reference (CN=2.0, Watanabe et al. J. Phys. Chem. B 2021, ~18.5 m, neutron diffraction with isotopic substitution)"
+    level_of_theory: Experimental (neutron diffraction, Watanabe et al. JPCB 2021)
+    weight: 1
+  S(q) R-factor:
+    good: 0.0
+    bad: 0.5
+    unit: "-"
+    tooltip: "R-factor between computed and experimental X-ray S(q): sum|S_exp - S_calc| / sum|S_exp|"
+    level_of_theory: Experimental (SAXS, Zhang et al., J. Phys. Chem. B 125, 4501, 2021)
+    weight: 1
+  First Peak Position Error:
+    good: 0.0
+    bad: 0.3
+    unit: "1/A"
+    tooltip: "Absolute error in position of first S(q) peak (~1.0 A-1)"
+    level_of_theory: Experimental (SAXS, Zhang et al., J. Phys. Chem. B 125, 4501, 2021)
+    weight: 0
diff --git a/ml_peg/analysis/wise_electrolytes/rdf/analyse_rdf.py b/ml_peg/analysis/wise_electrolytes/rdf/analyse_rdf.py
deleted file mode 100644
index 8df3acd2a..000000000
--- a/ml_peg/analysis/wise_electrolytes/rdf/analyse_rdf.py
+++ /dev/null
@@ -1,434 +0,0 @@
-"""Analyse Li-O RDF benchmark for LiTFSI/H2O electrolyte."""
-# Reads rdf.json and rdf.npz files from calc outputs and generates
-# scoring tables and g(r) plots for the ml-peg dashboard.
-#
-# Reference coordination numbers (Watanabe et al., J. Phys. Chem. B 125, 7477, 2021):
-#   CN(Li-O_water) = 2.0  }  ~18.5 m LiTFSI/H2O,
-#   CN(Li-O_TFSI)  = 2.0  }  neutron diffraction, 6Li/7Li + H/D substitution
-#
-# Integration cutoff: 2.83 Å (first minimum of Li-O_total g(r) from r2SCAN AIMD).
-# Trajectories: NVT 50 ps (dt=0.1 ps/frame, 501 frames, equilibrated 50-100 ps window).
-# Box length: 27.4938 Å (cubic, p64_w170 cell: 64 LiTFSI + 170 H2O = 1534 atoms).
-
-from __future__ import annotations
-
-import json
-from pathlib import Path
-
-import numpy as np
-import plotly.graph_objects as go
-import pytest
-
-from ml_peg.analysis.utils.utils import load_metrics_config
-
-# --- Paths -------------------------------------------------------------------
-
-CALCS_ROOT = Path(__file__).resolve().parents[3] / "calcs"
-CALC_PATH = CALCS_ROOT / "wise_electrolytes" / "rdf" / "outputs"
-APP_ROOT = Path(__file__).resolve().parents[3] / "app"
-OUT_PATH = APP_ROOT / "data" / "wise_electrolytes" / "rdf"
-
-MODELS = [
-    "matpes-r2scan",
-    "mace-mpa-0-medium",
-    "mace-omat-0-medium",
-    "mace-mp-0b3",
-    "mace-mh-1-omat",
-    "mace-mh-1-omol",
-]
-
-# --- Physical constants and reference values ---------------------------------
-
-# Box geometry (same for all models: p64_w170 NVT cell)
-L_BOX = 27.4938  # Å (cubic NVT cell at experimental density)
-V_BOX = L_BOX**3  # Å³
-
-# Integration cutoff: first minimum of Li-O_total g(r)
-R_CUT = 2.83  # Å
-DR = 0.02  # Å (bin width used in calc_rdf.py)
-
-# Experimental reference: Watanabe et al., JPCB 2021, ~18.5 m LiTFSI/H2O
-CN_EXP_WATER = 2.0
-CN_EXP_TFSI = 2.0
-
-# Map benchmark model names → ml-peg registry names (for app summary table)
-MODEL_NAME_MAP = {
-    "matpes-r2scan": "mace-matpes-r2scan",
-    "mace-mpa-0-medium": "mace-mpa-0",
-    "mace-omat-0-medium": "mace-omat-0",
-}
-
-METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
-DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
-    METRICS_CONFIG_PATH
-)
-
-
-# --- Helpers -----------------------------------------------------------------
-
-
-def compute_cn_from_gr(
-    gr: np.ndarray,
-    r: np.ndarray,
-    n_neighbor: int,
-    volume: float,
-    r_cut: float,
-    dr: float,
-) -> float:
-    """
-    Integrate g(r) to coordination number: CN = 4pi rho int_0^r_cut g(r) r^2 dr.
-
-    Parameters
-    ----------
-    gr : np.ndarray
-        Radial distribution function values.
-    r : np.ndarray
-        Radial distance values in angstrom.
-    n_neighbor : int
-        Number of neighbor atoms of the relevant species in the simulation box.
-    volume : float
-        Volume of the simulation box in angstrom^3.
-    r_cut : float
-        Integration cutoff distance in angstrom.
-    dr : float
-        Bin width in angstrom.
-
-    Returns
-    -------
-    float
-        Coordination number obtained by integrating g(r) up to ``r_cut``.
-    """
-    rho = n_neighbor / volume
-    mask = r <= r_cut
-    return float(np.sum(4 * np.pi * rho * gr[mask] * r[mask] ** 2 * dr))
-
-
-def load_rdf_results() -> dict[str, dict]:
-    """
-    Load rdf.json and rdf.npz for each model, recompute CN at r_cut=2.83 A.
-
-    Returns
-    -------
-    dict[str, dict]
-        Mapping from model name to a dict containing coordination numbers,
-        errors, radial distances, g(r) arrays, and frame count.
-    """
-    results = {}
-    for model in MODELS:
-        json_path = CALC_PATH / model / "rdf.json"
-        npz_path = CALC_PATH / model / "rdf.npz"
-        if not json_path.exists() or not npz_path.exists():
-            continue
-
-        with open(json_path) as f:
-            meta = json.load(f)
-
-        data = np.load(npz_path)
-        r = data["r"]
-
-        n_ow = meta["n_O_water"]
-        n_of = meta["n_O_TFSI"]
-
-        cn_w = compute_cn_from_gr(data["gr_LiO_water"], r, n_ow, V_BOX, R_CUT, DR)
-        cn_f = compute_cn_from_gr(data["gr_LiO_TFSI"], r, n_of, V_BOX, R_CUT, DR)
-
-        results[model] = {
-            "cn_water": cn_w,
-            "cn_tfsi": cn_f,
-            "cn_total": cn_w + cn_f,
-            "err_water": abs(cn_w - CN_EXP_WATER),
-            "err_tfsi": abs(cn_f - CN_EXP_TFSI),
-            "r": r,
-            "gr_water": data["gr_LiO_water"],
-            "gr_tfsi": data["gr_LiO_TFSI"],
-            "gr_total": data["gr_LiO_total"],
-            "n_frames": meta["n_frames_used"],
-        }
-    return results
-
-
-def normalize_metric(value: float, good: float, bad: float) -> float:
-    """
-    Map value linearly: good to 1.0, bad to 0.0, clipped to [0, 1].
-
-    Parameters
-    ----------
-    value : float
-        The metric value to normalize.
-    good : float
-        Threshold value that maps to a score of 1.0.
-    bad : float
-        Threshold value that maps to a score of 0.0.
-
-    Returns
-    -------
-    float
-        Normalized score clipped to the range [0, 1].
-    """
-    if good == bad:
-        return 1.0 if value == good else 0.0
-    t = (value - bad) / (good - bad)
-    return max(0.0, min(1.0, float(t)))
-
-
-# --- Plot builders -----------------------------------------------------------
-
-
-def build_cn_bar_chart(data: dict[str, dict]) -> go.Figure:
-    """
-    Bar chart of CN_water and CN_TFSI per model with experimental reference.
-
-    Parameters
-    ----------
-    data : dict[str, dict]
-        Per-model RDF results as returned by :func:`load_rdf_results`.
-
-    Returns
-    -------
-    go.Figure
-        Plotly figure with grouped bars and experimental reference lines.
-    """
-    fig = go.Figure()
-
-    x_models = [m for m in MODELS if m in data]
-    cn_water_vals = [data[m]["cn_water"] for m in x_models]
-    cn_tfsi_vals = [data[m]["cn_tfsi"] for m in x_models]
-
-    fig.add_trace(
-        go.Bar(
-            name="Li-O<sub>water</sub>",
-            x=x_models,
-            y=cn_water_vals,
-            marker_color="steelblue",
-        )
-    )
-    fig.add_trace(
-        go.Bar(
-            name="Li-O<sub>TFSI</sub>",
-            x=x_models,
-            y=cn_tfsi_vals,
-            marker_color="coral",
-        )
-    )
-
-    # Experimental reference lines
-    fig.add_hline(
-        y=CN_EXP_WATER,
-        line_dash="dash",
-        line_color="steelblue",
-        annotation_text=f"Exp. O<sub>water</sub> ({CN_EXP_WATER})",
-        annotation_position="top right",
-    )
-    fig.add_hline(
-        y=CN_EXP_TFSI,
-        line_dash="dash",
-        line_color="coral",
-        annotation_text=f"Exp. O<sub>TFSI</sub> ({CN_EXP_TFSI})",
-        annotation_position="bottom right",
-    )
-
-    fig.update_layout(
-        title="Li⁺ Coordination Numbers (LiTFSI/H₂O, 21 m)",
-        yaxis_title="Coordination Number",
-        barmode="group",
-        legend={"x": 0.01, "y": 0.99},
-    )
-    return fig
-
-
-def build_gr_plot(data: dict[str, dict]) -> go.Figure:
-    """
-    G(r) plot: Li-O_water and Li-O_TFSI for all models.
-
-    Parameters
-    ----------
-    data : dict[str, dict]
-        Per-model RDF results as returned by :func:`load_rdf_results`.
-
-    Returns
-    -------
-    go.Figure
-        Plotly figure with g(r) curves and integration cutoff line.
-    """
-    fig = go.Figure()
-
-    colors = [
-        "#1f77b4",
-        "#ff7f0e",
-        "#2ca02c",
-        "#d62728",
-        "#9467bd",
-        "#8c564b",
-    ]
-
-    for i, model in enumerate(MODELS):
-        if model not in data:
-            continue
-        d = data[model]
-        c = colors[i % len(colors)]
-        fig.add_trace(
-            go.Scatter(
-                x=d["r"],
-                y=d["gr_water"],
-                mode="lines",
-                name=f"{model} O<sub>w</sub>",
-                line={"color": c, "dash": "solid"},
-                legendgroup=model,
-            )
-        )
-        fig.add_trace(
-            go.Scatter(
-                x=d["r"],
-                y=d["gr_tfsi"],
-                mode="lines",
-                name=f"{model} O<sub>TFSI</sub>",
-                line={"color": c, "dash": "dot"},
-                legendgroup=model,
-            )
-        )
-
-    # Mark integration cutoff
-    fig.add_vline(
-        x=R_CUT,
-        line_dash="dash",
-        line_color="gray",
-        annotation_text=f"r_cut={R_CUT} Å",
-        annotation_position="top right",
-    )
-
-    fig.update_layout(
-        title="Li-O Radial Distribution Functions (LiTFSI/H₂O, 21 m)",
-        xaxis_title="r / Å",
-        yaxis_title="g(r)",
-        xaxis_range=[1.0, 6.0],
-    )
-    return fig
-
-
-# --- Metrics table -----------------------------------------------------------
-
-
-def build_metrics_table(data: dict[str, dict]) -> dict:
-    """
-    Build metrics table JSON for ml-peg app.
-
-    Parameters
-    ----------
-    data : dict[str, dict]
-        Per-model RDF results as returned by :func:`load_rdf_results`.
-
-    Returns
-    -------
-    dict
-        Table payload with keys ``data``, ``columns``, ``tooltip_header``,
-        ``thresholds``, and ``reference``.
-    """
-    rows = []
-    for model in MODELS:
-        if model not in data:
-            continue
-        d = data[model]
-
-        score_w = normalize_metric(
-            d["err_water"],
-            DEFAULT_THRESHOLDS["CN Li-O_water Error"]["good"],
-            DEFAULT_THRESHOLDS["CN Li-O_water Error"]["bad"],
-        )
-        score_f = normalize_metric(
-            d["err_tfsi"],
-            DEFAULT_THRESHOLDS["CN Li-O_TFSI Error"]["good"],
-            DEFAULT_THRESHOLDS["CN Li-O_TFSI Error"]["bad"],
-        )
-
-        w_w = DEFAULT_WEIGHTS.get("CN Li-O_water Error", 1.0)
-        w_f = DEFAULT_WEIGHTS.get("CN Li-O_TFSI Error", 1.0)
-        total_w = w_w + w_f
-        overall = (score_w * w_w + score_f * w_f) / total_w if total_w > 0 else 0.0
-
-        rows.append(
-            {
-                "MLIP": model,
-                "id": MODEL_NAME_MAP.get(model, model),
-                "CN Li-O_water": round(d["cn_water"], 3),
-                "CN Li-O_TFSI": round(d["cn_tfsi"], 3),
-                "CN Li-O_water Error": round(d["err_water"], 3),
-                "CN Li-O_TFSI Error": round(d["err_tfsi"], 3),
-                "Score": round(overall, 3),
-            }
-        )
-
-    rows.sort(key=lambda r: r["Score"], reverse=True)
-
-    columns = [
-        {"name": c, "id": c}
-        for c in [
-            "MLIP",
-            "CN Li-O_water Error",
-            "CN Li-O_TFSI Error",
-            "Score",
-        ]
-    ]
-
-    return {
-        "data": rows,
-        "columns": columns,
-        "tooltip_header": DEFAULT_TOOLTIPS,
-        "thresholds": {k: dict(v) for k, v in DEFAULT_THRESHOLDS.items()},
-        "weights": dict(DEFAULT_WEIGHTS),
-        "model_name_map": MODEL_NAME_MAP,
-        "reference": {
-            "CN_water": CN_EXP_WATER,
-            "CN_TFSI": CN_EXP_TFSI,
-            "citation": "Watanabe et al., J. Phys. Chem. B 125, 7477 (2021)",
-            "concentration": "~18.5 m LiTFSI/H2O",
-            "method": "neutron diffraction with 6Li/7Li + H/D isotopic substitution",
-            "r_cut_angstrom": R_CUT,
-        },
-    }
-
-
-# --- Pytest interface --------------------------------------------------------
-
-
-@pytest.fixture
-def rdf_analysis():
-    """
-    Run full RDF analysis: table + plots.
-
-    Returns
-    -------
-    dict
-        Metrics table payload written to ``rdf_metrics_table.json``.
-    """
-    data = load_rdf_results()
-    if not data:
-        pytest.skip("No RDF data found")
-
-    OUT_PATH.mkdir(parents=True, exist_ok=True)
-
-    table = build_metrics_table(data)
-    with open(OUT_PATH / "rdf_metrics_table.json", "w") as f:
-        json.dump(table, f, indent=2)
-
-    fig_cn = build_cn_bar_chart(data)
-    with open(OUT_PATH / "figure_cn_bar.json", "w") as f:
-        f.write(fig_cn.to_json())
-
-    fig_gr = build_gr_plot(data)
-    with open(OUT_PATH / "figure_gr.json", "w") as f:
-        f.write(fig_gr.to_json())
-
-    return table
-
-
-def test_rdf(rdf_analysis) -> None:
-    """
-    Run RDF benchmark -- generates table and plots.
-
-    Parameters
-    ----------
-    rdf_analysis : dict
-        Metrics table payload provided by the ``rdf_analysis`` fixture.
-    """
-    table = rdf_analysis
-    assert len(table["data"]) > 0, "No models produced RDF data"
diff --git a/ml_peg/analysis/wise_electrolytes/rdf/metrics.yml b/ml_peg/analysis/wise_electrolytes/rdf/metrics.yml
deleted file mode 100644
index e6a67093a..000000000
--- a/ml_peg/analysis/wise_electrolytes/rdf/metrics.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-metrics:
-  CN Li-O_water Error:
-    good: 0.0
-    bad: 0.5
-    unit: "-"
-    tooltip: "Absolute error in Li-O_water coordination number vs experimental reference (CN=2.0, Watanabe et al. J. Phys. Chem. B 2021, ~18.5 m, neutron diffraction with isotopic substitution)"
-    level_of_theory: Experimental (neutron diffraction, Watanabe et al. JPCB 2021)
-    weight: 1
-  CN Li-O_TFSI Error:
-    good: 0.0
-    bad: 0.5
-    unit: "-"
-    tooltip: "Absolute error in Li-O_TFSI coordination number vs experimental reference (CN=2.0, Watanabe et al. J. Phys. Chem. B 2021, ~18.5 m, neutron diffraction with isotopic substitution)"
-    level_of_theory: Experimental (neutron diffraction, Watanabe et al. JPCB 2021)
-    weight: 1
diff --git a/ml_peg/analysis/wise_electrolytes/xray_sf/analyse_xray_sf.py b/ml_peg/analysis/wise_electrolytes/xray_sf/analyse_xray_sf.py
deleted file mode 100644
index 51b45c2b4..000000000
--- a/ml_peg/analysis/wise_electrolytes/xray_sf/analyse_xray_sf.py
+++ /dev/null
@@ -1,398 +0,0 @@
-"""Analyse X-ray structure factor benchmark for LiTFSI/H2O electrolyte."""
-
-from __future__ import annotations
-
-import json
-from pathlib import Path
-
-import numpy as np
-import plotly.graph_objects as go
-import pytest
-from scipy.interpolate import interp1d
-
-from ml_peg.analysis.utils.utils import load_metrics_config
-
-# --- Paths -------------------------------------------------------------------
-
-CALCS_ROOT = Path(__file__).resolve().parents[3] / "calcs"
-CALC_PATH = CALCS_ROOT / "wise_electrolytes" / "xray_sf" / "outputs"
-APP_ROOT = Path(__file__).resolve().parents[3] / "app"
-OUT_PATH = APP_ROOT / "data" / "wise_electrolytes" / "xray_sf"
-
-# Experimental data bundled with the benchmark
-EXP_PATH = Path(__file__).resolve().parents[1] / "data" / "saxs_maginn.txt"
-
-MODELS = [
-    "matpes-r2scan",
-    "mace-mpa-0-medium",
-    "mace-omat-0-medium",
-    "mace-mp-0b3",
-    "mace-mh-1-omat",
-    "mace-mh-1-omol",
-]
-
-# Map benchmark model names → ml-peg registry names (for app summary table)
-MODEL_NAME_MAP = {
-    "matpes-r2scan": "mace-matpes-r2scan",
-    "mace-mpa-0-medium": "mace-mpa-0",
-    "mace-omat-0-medium": "mace-omat-0",
-}
-
-METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
-DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
-    METRICS_CONFIG_PATH
-)
-
-
-# --- Helpers -----------------------------------------------------------------
-
-
-def load_experimental_sq() -> tuple[np.ndarray, np.ndarray]:
-    """
-    Load experimental SAXS S(q) data.
-
-    Returns
-    -------
-    q_exp : np.ndarray
-        Scattering vector values in inverse angstroms.
-    sq_exp : np.ndarray
-        Experimental structure factor values.
-    """
-    data = np.loadtxt(EXP_PATH)
-    return data[:, 0], data[:, 1]
-
-
-def load_computed_sq(model: str) -> tuple[np.ndarray, np.ndarray] | None:
-    """
-    Load computed S(q) for a model.
-
-    Parameters
-    ----------
-    model : str
-        Name of the MLIP model.
-
-    Returns
-    -------
-    tuple of (np.ndarray, np.ndarray) or None
-        Scattering vector and structure factor arrays, or None if the
-        output file does not exist.
-    """
-    json_path = CALC_PATH / model / "xray_sq.json"
-    if not json_path.exists():
-        return None
-    with open(json_path) as f:
-        d = json.load(f)
-    return np.array(d["q"]), np.array(d["Sq"])
-
-
-def compute_r_factor(
-    q_exp: np.ndarray,
-    sq_exp: np.ndarray,
-    q_calc: np.ndarray,
-    sq_calc: np.ndarray,
-) -> float:
-    """
-    Compute R-factor: sum|S_exp - S_calc| / sum|S_exp|.
-
-    The q range used is the overlap between experimental and calculated data:
-    q_min is the minimum q of the calculated S(q), set by the simulation cell
-    size (2*pi/L), and q_max is the maximum q of the experimental data.
-
-    Parameters
-    ----------
-    q_exp : np.ndarray
-        Experimental scattering vector values.
-    sq_exp : np.ndarray
-        Experimental structure factor values.
-    q_calc : np.ndarray
-        Calculated scattering vector values.
-    sq_calc : np.ndarray
-        Calculated structure factor values.
-
-    Returns
-    -------
-    float
-        R-factor value, or NaN if no overlapping valid data exists.
-    """
-    valid_calc = ~np.isnan(sq_calc)
-    q_min_calc = float(q_calc[valid_calc].min())
-    q_max_exp = float(q_exp.max())
-
-    f = interp1d(
-        q_calc[valid_calc],
-        sq_calc[valid_calc],
-        kind="linear",
-        bounds_error=False,
-        fill_value=np.nan,
-    )
-
-    mask = (q_exp >= q_min_calc) & (q_exp <= q_max_exp)
-    sq_interp = f(q_exp[mask])
-    sq_e = sq_exp[mask]
-
-    ok = ~np.isnan(sq_interp)
-    if not ok.any():
-        return float("nan")
-
-    return float(np.sum(np.abs(sq_e[ok] - sq_interp[ok])) / np.sum(np.abs(sq_e[ok])))
-
-
-def find_first_peak(
-    q: np.ndarray, sq: np.ndarray, q_min: float = 0.5, q_max: float = 2.0
-) -> float:
-    """
-    Find position of first peak in S(q).
-
-    Parameters
-    ----------
-    q : np.ndarray
-        Scattering vector values.
-    sq : np.ndarray
-        Structure factor values.
-    q_min : float, optional
-        Lower bound of the search window in inverse angstroms (default 0.5).
-    q_max : float, optional
-        Upper bound of the search window in inverse angstroms (default 2.0).
-
-    Returns
-    -------
-    float
-        Position of the first peak, or NaN if fewer than 3 valid points
-        exist in the search window.
-    """
-    mask = (q >= q_min) & (q <= q_max) & ~np.isnan(sq)
-    if mask.sum() < 3:
-        return float("nan")
-    q_sub = q[mask]
-    sq_sub = sq[mask]
-    return float(q_sub[np.argmax(sq_sub)])
-
-
-def normalize_metric(value: float, good: float, bad: float) -> float:
-    """
-    Normalize a metric value to [0, 1] where 0 is good and 1 is bad.
-
-    Parameters
-    ----------
-    value : float
-        Raw metric value.
-    good : float
-        Threshold corresponding to the best achievable score (0).
-    bad : float
-        Threshold corresponding to the worst score (1).
-
-    Returns
-    -------
-    float
-        Normalized score clipped to [0, 1].
-    """
-    if bad == good:
-        return 0.0 if value <= good else 1.0
-    score = (value - good) / (bad - good)
-    return max(0.0, min(1.0, score))
-
-
-# --- Plot builders -----------------------------------------------------------
-
-
-def build_sq_comparison_plot(
-    q_exp: np.ndarray,
-    sq_exp: np.ndarray,
-    model_data: dict[str, tuple[np.ndarray, np.ndarray]],
-) -> go.Figure:
-    """
-    Build S(q) overlay plot comparing computed and experimental data.
-
-    Parameters
-    ----------
-    q_exp : np.ndarray
-        Experimental scattering vector values.
-    sq_exp : np.ndarray
-        Experimental structure factor values.
-    model_data : dict of str to tuple of (np.ndarray, np.ndarray)
-        Mapping from model name to (q, S(q)) arrays.
-
-    Returns
-    -------
-    go.Figure
-        Plotly figure with experimental and computed S(q) traces.
-    """
-    fig = go.Figure()
-
-    fig.add_trace(
-        go.Scatter(
-            x=q_exp.tolist(),
-            y=sq_exp.tolist(),
-            mode="lines",
-            name="Exp. (Zhang et al. 2021)",
-            line={"color": "black", "width": 2},
-        )
-    )
-
-    for model, (q, sq) in model_data.items():
-        fig.add_trace(
-            go.Scatter(
-                x=q.tolist(),
-                y=sq.tolist(),
-                mode="lines",
-                name=model,
-                opacity=0.8,
-            )
-        )
-
-    fig.update_layout(
-        title="X-ray Structure Factor S(q) — LiTFSI/H2O (21 m)",
-        xaxis_title="q / Å⁻¹",
-        yaxis_title="S(q) (Faber-Ziman)",
-        legend={"x": 0.60, "y": 0.98},
-    )
-    return fig
-
-
-def build_metrics_table(results: dict[str, dict]) -> dict:
-    """
-    Build metrics table as a JSON-serialisable dictionary.
-
-    Parameters
-    ----------
-    results : dict of str to dict
-        Per-model results containing ``r_factor``, ``peak_position_error``,
-        ``peak_calc``, and ``peak_exp`` entries.
-
-    Returns
-    -------
-    dict
-        Dictionary with ``data`` (list of row dicts), ``columns``,
-        ``tooltip_header``, and ``thresholds`` keys.
-    """
-    rows = []
-    for model in MODELS:
-        if model not in results:
-            continue
-        r = results[model]
-        r_factor = r["r_factor"]
-        peak_err = r["peak_position_error"]
-
-        score_r = normalize_metric(
-            r_factor,
-            DEFAULT_THRESHOLDS["S(q) R-factor"]["good"],
-            DEFAULT_THRESHOLDS["S(q) R-factor"]["bad"],
-        )
-        score_p = normalize_metric(
-            peak_err,
-            DEFAULT_THRESHOLDS["First Peak Position Error"]["good"],
-            DEFAULT_THRESHOLDS["First Peak Position Error"]["bad"],
-        )
-
-        w_r = DEFAULT_WEIGHTS.get("S(q) R-factor", 1.0)
-        w_p = DEFAULT_WEIGHTS.get("First Peak Position Error", 1.0)
-        total_w = w_r + w_p
-        overall = (
-            1.0 - (score_r * w_r + score_p * w_p) / total_w if total_w > 0 else 0.0
-        )
-
-        rows.append(
-            {
-                "MLIP": model,
-                "id": MODEL_NAME_MAP.get(model, model),
-                "S(q) R-factor": round(r_factor, 4),
-                "First Peak Position Error": round(peak_err, 4),
-                "First Peak (calc)": round(r["peak_calc"], 2),
-                "First Peak (exp)": round(r["peak_exp"], 2),
-                "Score": round(overall, 3),
-            }
-        )
-
-    rows.sort(key=lambda r: r["Score"], reverse=True)
-
-    columns = [
-        {"name": c, "id": c}
-        for c in [
-            "MLIP",
-            "S(q) R-factor",
-            "First Peak Position Error",
-            "Score",
-        ]
-    ]
-
-    return {
-        "data": rows,
-        "columns": columns,
-        "tooltip_header": DEFAULT_TOOLTIPS,
-        "thresholds": {k: dict(v) for k, v in DEFAULT_THRESHOLDS.items()},
-        "weights": dict(DEFAULT_WEIGHTS),
-        "model_name_map": MODEL_NAME_MAP,
-    }
-
-
-# --- Pytest interface --------------------------------------------------------
-
-
-@pytest.fixture
-def xray_sf_analysis():
-    """
-    Run full X-ray S(q) analysis and write outputs.
-
-    Returns
-    -------
-    dict
-        Metrics table dictionary produced by ``build_metrics_table``.
-    """
-    q_exp, sq_exp = load_experimental_sq()
-    peak_exp = find_first_peak(q_exp, sq_exp)
-
-    model_data = {}
-    results = {}
-
-    for model in MODELS:
-        data = load_computed_sq(model)
-        if data is None:
-            continue
-        q_calc, sq_calc = data
-        model_data[model] = (q_calc, sq_calc)
-
-        r_factor = compute_r_factor(
-            q_exp, sq_exp, q_calc, sq_calc
-        )  # range: [q_min_calc, q_max_exp]
-        peak_calc = find_first_peak(q_calc, sq_calc)
-        peak_error = (
-            abs(peak_calc - peak_exp) if not np.isnan(peak_calc) else float("nan")
-        )
-
-        results[model] = {
-            "r_factor": r_factor,
-            "peak_calc": peak_calc,
-            "peak_exp": peak_exp,
-            "peak_position_error": peak_error,
-        }
-
-    if not results:
-        pytest.skip("No S(q) data found")
-
-    OUT_PATH.mkdir(parents=True, exist_ok=True)
-
-    table = build_metrics_table(results)
-    with open(OUT_PATH / "xray_sf_metrics_table.json", "w") as f:
-        json.dump(table, f, indent=2)
-
-    fig = build_sq_comparison_plot(q_exp, sq_exp, model_data)
-    with open(OUT_PATH / "figure_xray_sq_comparison.json", "w") as f:
-        f.write(fig.to_json())
-
-    with open(OUT_PATH / "xray_sf_results.json", "w") as f:
-        json.dump(results, f, indent=2)
-
-    return table
-
-
-def test_xray_sf(xray_sf_analysis) -> None:
-    """
-    Run X-ray structure factor benchmark.
-
-    Parameters
-    ----------
-    xray_sf_analysis : dict
-        Metrics table fixture providing analysis results.
-    """
-    table = xray_sf_analysis
-    assert len(table["data"]) > 0, "No models produced S(q) data"
diff --git a/ml_peg/analysis/wise_electrolytes/xray_sf/metrics.yml b/ml_peg/analysis/wise_electrolytes/xray_sf/metrics.yml
deleted file mode 100644
index 310fd8824..000000000
--- a/ml_peg/analysis/wise_electrolytes/xray_sf/metrics.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-metrics:
-  S(q) R-factor:
-    good: 0.0
-    bad: 0.5
-    unit: "-"
-    tooltip: "R-factor between computed and experimental X-ray S(q): sum|S_exp - S_calc| / sum|S_exp|"
-    level_of_theory: Experimental (SAXS, Zhang et al., J. Phys. Chem. B 125, 4501, 2021)
-    weight: 1
-  First Peak Position Error:
-    good: 0.0
-    bad: 0.3
-    unit: "1/A"
-    tooltip: "Absolute error in position of first S(q) peak (~1.0 A-1)"
-    level_of_theory: Experimental (SAXS, Zhang et al., J. Phys. Chem. B 125, 4501, 2021)
-    weight: 0
diff --git a/ml_peg/app/wise_electrolytes/density/app_density.py b/ml_peg/app/wise_electrolytes/density/app_density.py
deleted file mode 100644
index 2a9055d4f..000000000
--- a/ml_peg/app/wise_electrolytes/density/app_density.py
+++ /dev/null
@@ -1,68 +0,0 @@
-"""Dash app for LiTFSI/H2O electrolyte density benchmark."""
-
-from __future__ import annotations
-
-from pathlib import Path
-
-from dash import html
-
-from ml_peg.app.base_app import BaseApp
-from ml_peg.app.utils.build_callbacks import plot_from_table_column
-from ml_peg.app.utils.load import read_plot
-
-BENCHMARK_NAME = "wise_electrolytes_density"
-APP_ROOT = Path(__file__).resolve().parents[2]
-DATA_PATH = APP_ROOT / "data" / "wise_electrolytes" / "density"
-
-
-class DensityApp(BaseApp):
-    """Dash app for WiSE electrolyte density benchmark."""
-
-    def register_callbacks(self) -> None:
-        """Register interactive plot callbacks."""
-        bar_chart = read_plot(
-            DATA_PATH / "figure_density_bar.json",
-            id=f"{BENCHMARK_NAME}-bar",
-        )
-        timeseries = read_plot(
-            DATA_PATH / "figure_density_timeseries.json",
-            id=f"{BENCHMARK_NAME}-timeseries",
-        )
-
-        plot_from_table_column(
-            table_id=self.table_id,
-            plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
-            column_to_plot={
-                "Density Error": bar_chart,
-                "Density Error (%)": bar_chart,
-                "Score": timeseries,
-            },
-        )
-
-
-def get_app() -> DensityApp:
-    """
-    Return configured density benchmark app.
-
-    Returns
-    -------
-    DensityApp
-        Configured app instance.
-    """
-    return DensityApp(
-        name="WiSE Density",
-        description=(
-            "NPT density of 21 m LiTFSI/H2O electrolyte at 298 K. "
-            "Experimental reference: 1.72 g/cm³ (Maginn et al., 2021)."
-        ),
-        docs_url="",
-        table_path=DATA_PATH / "density_metrics_table.json",
-        extra_components=[
-            html.Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
-        ],
-    )
-
-
-if __name__ == "__main__":
-    app = get_app()
-    app.run(port=8060, debug=True)
diff --git a/ml_peg/app/wise_electrolytes/litfsi_h2o_21m/app_litfsi_h2o_21m.py b/ml_peg/app/wise_electrolytes/litfsi_h2o_21m/app_litfsi_h2o_21m.py
new file mode 100644
index 000000000..98c46e56c
--- /dev/null
+++ b/ml_peg/app/wise_electrolytes/litfsi_h2o_21m/app_litfsi_h2o_21m.py
@@ -0,0 +1,91 @@
+"""Dash app for the consolidated WiSE 21 m LiTFSI/H2O electrolyte benchmark."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from dash import html
+
+from ml_peg.app.base_app import BaseApp
+from ml_peg.app.utils.build_callbacks import plot_from_table_column
+from ml_peg.app.utils.load import read_plot
+
+BENCHMARK_NAME = "wise_electrolytes_litfsi_h2o_21m"
+APP_ROOT = Path(__file__).resolve().parents[2]
+DATA_PATH = APP_ROOT / "data" / "wise_electrolytes" / "litfsi_h2o_21m"
+DOCS_URL = (
+    "https://ddmms.github.io/ml-peg/user_guide/benchmarks/"
+    "wise_electrolytes.html#litfsi-h2o-21-m"
+)
+
+
+class LitfsiH2O21mApp(BaseApp):
+    """Dash app for the consolidated WiSE LiTFSI/H2O 21 m benchmark."""
+
+    def register_callbacks(self) -> None:
+        """Register interactive plot callbacks for all metrics."""
+        density_bar = read_plot(
+            DATA_PATH / "figure_density_bar.json",
+            id=f"{BENCHMARK_NAME}-density-bar",
+        )
+        density_timeseries = read_plot(
+            DATA_PATH / "figure_density_timeseries.json",
+            id=f"{BENCHMARK_NAME}-density-timeseries",
+        )
+        cn_bar = read_plot(
+            DATA_PATH / "figure_cn_bar.json",
+            id=f"{BENCHMARK_NAME}-cn-bar",
+        )
+        gr_plot = read_plot(
+            DATA_PATH / "figure_gr.json",
+            id=f"{BENCHMARK_NAME}-gr",
+        )
+        sq_plot = read_plot(
+            DATA_PATH / "figure_xray_sq_comparison.json",
+            id=f"{BENCHMARK_NAME}-sq",
+        )
+
+        plot_from_table_column(
+            table_id=self.table_id,
+            plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
+            column_to_plot={
+                "Density Error": density_bar,
+                "Density Error (%)": density_bar,
+                "CN Li-O_water Error": cn_bar,
+                "CN Li-O_TFSI Error": cn_bar,
+                "S(q) R-factor": sq_plot,
+                "First Peak Position Error": sq_plot,
+                "Score": gr_plot,
+            },
+        )
+
+
+def get_app() -> LitfsiH2O21mApp:
+    """
+    Return the configured WiSE LiTFSI/H2O 21 m benchmark app.
+
+    Returns
+    -------
+    LitfsiH2O21mApp
+        Configured app instance.
+    """
+    return LitfsiH2O21mApp(
+        name="WiSE LiTFSI/H2O 21 m",
+        description=(
+            "Consolidated structural and thermodynamic benchmark for the "
+            "21 m LiTFSI/H2O 'water-in-salt' electrolyte: NPT density vs "
+            "experiment (Gilbert 2017), Li-O coordination numbers from RDFs "
+            "(Watanabe 2021), and X-ray structure factor S(q) (Zhang 2021). "
+            "Trajectories produced with LAMMPS+symmetrix on Adastra (MI250X)."
+        ),
+        docs_url=DOCS_URL,
+        table_path=DATA_PATH / "litfsi_h2o_21m_metrics_table.json",
+        extra_components=[
+            html.Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
+        ],
+    )
+
+
+if __name__ == "__main__":
+    app = get_app()
+    app.run(port=8060, debug=True)
diff --git a/ml_peg/app/wise_electrolytes/rdf/app_rdf.py b/ml_peg/app/wise_electrolytes/rdf/app_rdf.py
deleted file mode 100644
index 8de28f8be..000000000
--- a/ml_peg/app/wise_electrolytes/rdf/app_rdf.py
+++ /dev/null
@@ -1,69 +0,0 @@
-"""Dash app for LiTFSI/H2O Li-O RDF / coordination number benchmark."""
-
-from __future__ import annotations
-
-from pathlib import Path
-
-from dash import html
-
-from ml_peg.app.base_app import BaseApp
-from ml_peg.app.utils.build_callbacks import plot_from_table_column
-from ml_peg.app.utils.load import read_plot
-
-BENCHMARK_NAME = "wise_electrolytes_rdf"
-APP_ROOT = Path(__file__).resolve().parents[2]
-DATA_PATH = APP_ROOT / "data" / "wise_electrolytes" / "rdf"
-
-
-class RdfApp(BaseApp):
-    """Dash app for WiSE electrolyte Li-O RDF benchmark."""
-
-    def register_callbacks(self) -> None:
-        """Register interactive plot callbacks."""
-        cn_bar = read_plot(
-            DATA_PATH / "figure_cn_bar.json",
-            id=f"{BENCHMARK_NAME}-cn-bar",
-        )
-        gr_plot = read_plot(
-            DATA_PATH / "figure_gr.json",
-            id=f"{BENCHMARK_NAME}-gr",
-        )
-
-        plot_from_table_column(
-            table_id=self.table_id,
-            plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
-            column_to_plot={
-                "CN Li-O_water Error": cn_bar,
-                "CN Li-O_TFSI Error": cn_bar,
-                "Score": gr_plot,
-            },
-        )
-
-
-def get_app() -> RdfApp:
-    """
-    Return configured RDF benchmark app.
-
-    Returns
-    -------
-    RdfApp
-        Configured app instance.
-    """
-    return RdfApp(
-        name="WiSE Li-O RDF",
-        description=(
-            "Li-O coordination numbers from radial distribution functions "
-            "of 21 m LiTFSI/H2O electrolyte (NVT, 298 K). "
-            "Reference: Watanabe et al., J. Phys. Chem. B 125, 7477 (2021)."
-        ),
-        docs_url="",
-        table_path=DATA_PATH / "rdf_metrics_table.json",
-        extra_components=[
-            html.Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
-        ],
-    )
-
-
-if __name__ == "__main__":
-    app = get_app()
-    app.run(port=8062, debug=True)
diff --git a/ml_peg/app/wise_electrolytes/wise_electrolytes.yml b/ml_peg/app/wise_electrolytes/wise_electrolytes.yml
index dbfe8135c..5cf1c8b8a 100644
--- a/ml_peg/app/wise_electrolytes/wise_electrolytes.yml
+++ b/ml_peg/app/wise_electrolytes/wise_electrolytes.yml
@@ -2,6 +2,4 @@ title: WiSE Electrolytes
 description: Structural and thermodynamic properties of 21 m LiTFSI/H2O "water-in-salt" electrolytes from molecular dynamics
 weight: 0
 benchmark_weights:
-  WiSE Density: 0.5
-  WiSE X-ray S(q): 0.3
-  WiSE Li-O RDF: 0.2
+  WiSE LiTFSI/H2O 21 m: 1.0
diff --git a/ml_peg/app/wise_electrolytes/xray_sf/app_xray_sf.py b/ml_peg/app/wise_electrolytes/xray_sf/app_xray_sf.py
deleted file mode 100644
index 87e0f78f7..000000000
--- a/ml_peg/app/wise_electrolytes/xray_sf/app_xray_sf.py
+++ /dev/null
@@ -1,66 +0,0 @@
-"""Dash app for LiTFSI/H2O X-ray structure factor benchmark."""
-
-from __future__ import annotations
-
-from pathlib import Path
-
-from dash import html
-
-from ml_peg.app.base_app import BaseApp
-from ml_peg.app.utils.build_callbacks import plot_from_table_column
-from ml_peg.app.utils.load import read_plot
-
-BENCHMARK_NAME = "wise_electrolytes_xray_sf"
-APP_ROOT = Path(__file__).resolve().parents[2]
-DATA_PATH = APP_ROOT / "data" / "wise_electrolytes" / "xray_sf"
-
-
-class XraySfApp(BaseApp):
-    """Dash app for WiSE electrolyte X-ray S(q) benchmark."""
-
-    def register_callbacks(self) -> None:
-        """Register interactive plot callbacks."""
-        sq_plot = read_plot(
-            DATA_PATH / "figure_xray_sq_comparison.json",
-            id=f"{BENCHMARK_NAME}-sq",
-        )
-
-        plot_from_table_column(
-            table_id=self.table_id,
-            plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
-            column_to_plot={
-                "S(q) R-factor": sq_plot,
-                "First Peak Position Error": sq_plot,
-                "Score": sq_plot,
-            },
-        )
-
-
-def get_app() -> XraySfApp:
-    """
-    Return configured X-ray S(q) benchmark app.
-
-    Returns
-    -------
-    XraySfApp
-        Configured app instance.
-    """
-    return XraySfApp(
-        name="WiSE X-ray S(q)",
-        description=(
-            "X-ray structure factor S(q) of 21 m LiTFSI/H2O electrolyte from NVT MD. "
-            "Computed via dynasor with Cromer-Mann form factors "
-            "(Faber-Ziman normalization). "
-            "Experimental reference: SAXS (Maginn et al., J. Phys. Chem. B, 2021)."
-        ),
-        docs_url="",
-        table_path=DATA_PATH / "xray_sf_metrics_table.json",
-        extra_components=[
-            html.Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
-        ],
-    )
-
-
-if __name__ == "__main__":
-    app = get_app()
-    app.run(port=8061, debug=True)
diff --git a/ml_peg/calcs/wise_electrolytes/density/calc_density.py b/ml_peg/calcs/wise_electrolytes/density/calc_density.py
deleted file mode 100644
index 2d52a63d7..000000000
--- a/ml_peg/calcs/wise_electrolytes/density/calc_density.py
+++ /dev/null
@@ -1,75 +0,0 @@
-"""
-Extract density from pre-computed NPT simulations of LiTFSI/H2O electrolyte.
-
-This benchmark reads pre-computed LAMMPS NPT thermo logs and extracts
-equilibrium density for each MLIP model. The simulations use a p16_w42
-cell (382 atoms: 16 LiTFSI + 42 H2O) at 298 K, 1 atm.
-
-System: 21 m LiTFSI / H2O electrolyte.
-Experimental density: 1.7126 g/cm3
-(Gilbert et al., J. Chem. Eng. Data 62, 2056 (2017),
-DOI: 10.1021/acs.jced.7b00135).
-"""
-
-from __future__ import annotations
-
-import json
-import os
-from pathlib import Path
-
-import pytest
-
-# --- Configuration -----------------------------------------------------------
-
-# Path to converted data (will be replaced with S3 download for PR)
-DATA_ROOT = Path(
-    os.environ.get(
-        "ML_PEG_WISE_DENSITY_DATA_ROOT",
-        "/lus/work/CT9/cin1387/lbrugnoli/prove/ml_peg_benchmark/data/wise_electrolytes/density",
-    )
-)
-OUT_PATH = Path(__file__).parent / "outputs"
-
-MODELS = [
-    "matpes-r2scan",
-    "mace-mpa-0-medium",
-    "mace-omat-0-medium",
-    "mace-mp-0b3",
-    "mace-mh-1-omat",
-    "mace-mh-1-omol",
-]
-
-RHO_EXP = 1.7126  # g/cm³  (Gilbert et al., JCED 2017)
-
-
-# --- Pytest interface (ml-peg convention) ------------------------------------
-
-
-@pytest.mark.parametrize("model", MODELS)
-def test_extract_density(model: str) -> None:
-    """
-    Extract and save density data for one model.
-
-    Parameters
-    ----------
-    model : str
-        Name of the MLIP model to extract density for.
-    """
-    # Read pre-computed density data
-    json_path = DATA_ROOT / model / "density.json"
-    if not json_path.exists():
-        pytest.skip(f"No density data for {model}")
-
-    with open(json_path) as f:
-        result = json.load(f)
-
-    # Save to outputs
-    out_dir = OUT_PATH / model
-    out_dir.mkdir(parents=True, exist_ok=True)
-
-    with open(out_dir / "density.json", "w") as f:
-        json.dump(result, f, indent=2)
-
-    # Basic sanity
-    assert result["rho_mean"] > 0, f"Negative density for {model}"
-    assert abs(result["rho_error_pct"]) < 50, f"Density error > 50% for {model}"
diff --git a/ml_peg/calcs/wise_electrolytes/litfsi_h2o_21m/calc_litfsi_h2o_21m.py b/ml_peg/calcs/wise_electrolytes/litfsi_h2o_21m/calc_litfsi_h2o_21m.py
new file mode 100644
index 000000000..857669bf8
--- /dev/null
+++ b/ml_peg/calcs/wise_electrolytes/litfsi_h2o_21m/calc_litfsi_h2o_21m.py
@@ -0,0 +1,616 @@
+"""
+Consolidated WiSE 21 m LiTFSI/H2O electrolyte benchmark.
+
+Extracts three observables for each registered MLIP model:
+
+* Density from pre-computed NPT thermo logs (p16_w42 cell, 382 atoms).
+* Li-O coordination numbers from NVT trajectories via radial distribution
+  functions g(r) for Li-O_water (O bonded to H, d_OH < 1.25 A) and
+  Li-O_TFSI (O bonded to S, d_OS < 1.75 A); CN integrated to first minimum
+  R_CUT = 2.83 A from the r2SCAN AIMD reference.
+* X-ray structure factor S(q) computed via dynasor in TRAVIS Faber-Ziman
+  convention with Cromer-Mann 4-Gaussian form factors (including H) and
+  Savitzky-Golay smoothing (window=27, order=3, dq=0.02 A^-1).
+
+System: 21 m LiTFSI / H2O, p64_w170 cell (1534 atoms, 27.4938 A cubic) for
+RDF and S(q); p16_w42 (382 atoms) for NPT density. Trajectories produced
+with LAMMPS + symmetrix on Adastra (MI250X); the Janus recast lives in
+``../md_reference/calc_md_reference.py``.
+
+Experimental references:
+
+* Density: 1.7126 g/cm3 (Gilbert et al., J. Chem. Eng. Data 62, 2056, 2017).
+* Li-O CN: 2.0 each for water/TFSI (Watanabe et al., JPCB 125, 7477, 2021,
+  ~18.5 m, neutron diffraction with isotopic substitution).
+* S(q): SAXS (Zhang et al., J. Phys. Chem. B 125, 4501, 2021).
+
+Source trajectories must live under ``DATA_ROOT/<registry_model_name>/``,
+where ``DATA_ROOT`` defaults to ``./data`` next to this script and is
+overridable via the ``ML_PEG_WISE_LITFSI_H2O_21M_DATA_ROOT`` environment
+variable.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+import warnings
+
+from ase.geometry import get_distances
+from ase.io import iread, read as ase_read
+import numpy as np
+import pytest
+from scipy.signal import savgol_filter
+
+from ml_peg.models.get_models import load_models
+from ml_peg.models.models import current_models
+
+warnings.filterwarnings("ignore")
+
+# --- Configuration -----------------------------------------------------------
+
+MODELS = load_models(current_models)
+
+DATA_ROOT = Path(
+    os.environ.get(
+        "ML_PEG_WISE_LITFSI_H2O_21M_DATA_ROOT",
+        Path(__file__).parent / "data",
+    )
+)
+OUT_PATH = Path(__file__).parent / "outputs"
+
+# --- Density references ------------------------------------------------------
+
+RHO_EXP = 1.7126  # g/cm3 (Gilbert et al., JCED 2017)
+
+# --- RDF parameters ----------------------------------------------------------
+
+R_MAX = 6.0  # A
+DR = 0.02  # A
+R_CUT_COORD = 2.83  # A — first minimum of Li-O g(r) from r2SCAN AIMD
+D_OH_CUT = 1.25  # A — O-H bond cutoff for water identification
+D_OS_CUT = 1.75  # A — O-S bond cutoff for TFSI identification
+
+# --- S(q) parameters (TRAVIS-matching) ---------------------------------------
+
+Q_MAX = 13.0  # A^-1
+Q_MIN = 0.5  # A^-1
+MAX_QPOINTS = 50000
+DQ_BIN = 0.02  # A^-1
+SAVGOL_WINDOW = 27
+SAVGOL_ORDER = 3
+
+# LAMMPS atom-type → element fallback for raw dump files
+TYPE_TO_ELEMENT = {1: "Li", 2: "C", 3: "F", 4: "S", 5: "N", 6: "O", 7: "H"}
+
+# System composition (p64_w170: 64 LiTFSI + 170 H2O)
+COMPOSITION = {"Li": 64, "C": 128, "F": 384, "S": 128, "N": 64, "O": 426, "H": 340}
+N_ATOMS = sum(COMPOSITION.values())  # 1534
+CONC = {k: v / N_ATOMS for k, v in COMPOSITION.items()}
+
+# Cromer-Mann 4-Gaussian X-ray form factor parameters (International Tables)
+TRAVIS_FF = {
+    "S": {"a": [6.905, 5.203, 1.438, 1.586],
+          "b": [1.468, 22.215, 0.254, 56.172], "c": 0.867},
+    "F": {"a": [3.539, 2.641, 1.517, 1.024],
+          "b": [10.283, 4.294, 0.262, 26.148], "c": 0.278},
+    "O": {"a": [3.049, 2.287, 1.546, 0.867],
+          "b": [13.277, 5.701, 0.324, 32.909], "c": 0.251},
+    "N": {"a": [12.213, 3.132, 2.013, 1.166],
+          "b": [0.006, 9.893, 28.997, 0.583], "c": -11.529},
+    "C": {"a": [2.310, 1.020, 1.589, 0.865],
+          "b": [20.844, 10.208, 0.569, 51.651], "c": 0.216},
+    "Li": {"a": [1.128, 0.751, 0.618, 0.465],
+           "b": [3.955, 1.052, 85.391, 168.261], "c": 0.038},
+    "H": {"a": [0.493, 0.323, 0.140, 0.041],
+          "b": [10.511, 26.126, 3.142, 57.800], "c": 0.003},
+}
+
+
+# =============================================================================
+# Density extraction
+# =============================================================================
+
+
+def _density_source_path(model_name: str) -> Path:
+    """
+    Locate the pre-computed density JSON for a model.
+
+    Parameters
+    ----------
+    model_name : str
+        Name of the MLIP model in the registry.
+
+    Returns
+    -------
+    Path
+        Expected path to ``density.json`` under ``DATA_ROOT``.
+    """
+    return DATA_ROOT / model_name / "density.json"
+
+
+# =============================================================================
+# RDF computation
+# =============================================================================
+
+
+def identify_o_types(atoms) -> tuple[np.ndarray, np.ndarray]:
+    """
+    Return indices of O_water and O_TFSI from a single ASE Atoms frame.
+
+    Parameters
+    ----------
+    atoms : ase.Atoms
+        A single frame from the trajectory.
+
+    Returns
+    -------
+    o_water : np.ndarray
+        Indices of oxygen atoms bonded to hydrogen (water oxygens).
+    o_tfsi : np.ndarray
+        Indices of oxygen atoms bonded to sulfur (TFSI oxygens).
+    """
+    syms = np.array(atoms.get_chemical_symbols())
+    pos = atoms.get_positions()
+    cell = atoms.get_cell()
+    pbc = atoms.get_pbc()
+
+    o_idx = np.where(syms == "O")[0]
+    h_idx = np.where(syms == "H")[0]
+    s_idx = np.where(syms == "S")[0]
+
+    o_water, o_tfsi = [], []
+    for o in o_idx:
+        _, d_oh = get_distances(pos[o : o + 1], pos[h_idx], cell=cell, pbc=pbc)
+        _, d_os = get_distances(pos[o : o + 1], pos[s_idx], cell=cell, pbc=pbc)
+        if d_oh.min() < D_OH_CUT:
+            o_water.append(o)
+        elif d_os.min() < D_OS_CUT:
+            o_tfsi.append(o)
+
+    return np.array(o_water), np.array(o_tfsi)
+
+
+def compute_rdf(
+    traj_path: Path,
+    o_water_idx: np.ndarray,
+    o_tfsi_idx: np.ndarray,
+    r_max: float = R_MAX,
+    dr: float = DR,
+    skip_frames: int = 0,
+) -> dict:
+    """
+    Compute Li-O RDFs from an extxyz trajectory.
+
+    Parameters
+    ----------
+    traj_path : Path
+        Path to .extxyz trajectory file.
+    o_water_idx : np.ndarray
+        Atom indices of O_water (from first frame, fixed).
+    o_tfsi_idx : np.ndarray
+        Atom indices of O_TFSI (from first frame, fixed).
+    r_max : float
+        Maximum distance for RDF.
+    dr : float
+        Bin width.
+    skip_frames : int
+        Number of initial frames to skip (equilibration). Trajectories are
+        already pre-equilibrated (50–100 ps window).
+
+    Returns
+    -------
+    dict
+        Dictionary with keys ``r``, ``gr_LiO_total``, ``gr_LiO_water``,
+        ``gr_LiO_TFSI``, ``coord_LiO_total``, ``coord_LiO_water``,
+        ``coord_LiO_TFSI``, ``n_li``, ``n_O_water``, ``n_O_TFSI``,
+        ``n_frames_used``, and ``r_cut_coord``.
+    """
+    bins = np.arange(0, r_max + dr, dr)
+    r_centers = 0.5 * (bins[:-1] + bins[1:])
+    n_bins = len(r_centers)
+
+    hist_total = np.zeros(n_bins)
+    hist_water = np.zeros(n_bins)
+    hist_tfsi = np.zeros(n_bins)
+
+    n_frames = 0
+    n_li = None
+    volume = None
+    o_all_idx = np.concatenate([o_water_idx, o_tfsi_idx])
+
+    for frame_idx, atoms in enumerate(
+        iread(str(traj_path), format="extxyz", index=":")
+    ):
+        if frame_idx < skip_frames:
+            continue
+
+        syms = np.array(atoms.get_chemical_symbols())
+        pos = atoms.get_positions()
+        cell = atoms.get_cell()
+        pbc = atoms.get_pbc()
+
+        li_idx = np.where(syms == "Li")[0]
+        if n_li is None:
+            n_li = len(li_idx)
+        if volume is None:
+            volume = atoms.get_volume()
+
+        pos_li = pos[li_idx]
+
+        for o_set, hist in [
+            (o_all_idx, hist_total),
+            (o_water_idx, hist_water),
+            (o_tfsi_idx, hist_tfsi),
+        ]:
+            pos_o = pos[o_set]
+            _, dists = get_distances(pos_li, pos_o, cell=cell, pbc=pbc)
+            dists_flat = dists.ravel()
+            dists_flat = dists_flat[dists_flat < r_max]
+            h, _ = np.histogram(dists_flat, bins=bins)
+            hist += h
+
+        n_frames += 1
+
+    if n_frames == 0 or n_li is None:
+        raise RuntimeError(f"No frames processed from {traj_path}")
+
+    def normalize(hist, n_central, n_neighbor):
+        """
+        Normalize histogram to g(r).
+
+        Parameters
+        ----------
+        hist : np.ndarray
+            Raw pair-distance histogram.
+        n_central : int
+            Number of central atoms (Li).
+        n_neighbor : int
+            Number of neighbor atoms (O subset).
+
+        Returns
+        -------
+        np.ndarray
+            Normalized radial distribution function.
+        """
+        shell_vol = (4.0 / 3.0) * np.pi * (bins[1:] ** 3 - bins[:-1] ** 3)
+        rho = n_neighbor / volume
+        norm = n_central * n_frames * rho * shell_vol
+        return hist / norm
+
+    n_o_total = len(o_all_idx)
+    n_o_water = len(o_water_idx)
+    n_o_tfsi = len(o_tfsi_idx)
+
+    gr_total = normalize(hist_total, n_li, n_o_total)
+    gr_water = normalize(hist_water, n_li, n_o_water)
+    gr_tfsi = normalize(hist_tfsi, n_li, n_o_tfsi)
+
+    def coord_number(gr, n_neighbor):
+        """
+        Compute coordination number from g(r).
+
+        Parameters
+        ----------
+        gr : np.ndarray
+            Radial distribution function.
+        n_neighbor : int
+            Number of neighbor atoms (O subset).
+
+        Returns
+        -------
+        float
+            Integrated coordination number up to R_CUT_COORD.
+        """
+        rho = n_neighbor / volume
+        integrand = 4.0 * np.pi * rho * gr * r_centers**2 * dr
+        mask = r_centers <= R_CUT_COORD
+        return float(np.sum(integrand[mask]))
+
+    return {
+        "r": r_centers.tolist(),
+        "gr_LiO_total": gr_total.tolist(),
+        "gr_LiO_water": gr_water.tolist(),
+        "gr_LiO_TFSI": gr_tfsi.tolist(),
+        "coord_LiO_total": coord_number(gr_total, n_o_total),
+        "coord_LiO_water": coord_number(gr_water, n_o_water),
+        "coord_LiO_TFSI": coord_number(gr_tfsi, n_o_tfsi),
+        "n_li": n_li,
+        "n_O_water": n_o_water,
+        "n_O_TFSI": n_o_tfsi,
+        "n_frames_used": n_frames,
+        "r_cut_coord": R_CUT_COORD,
+    }
+
+
+# =============================================================================
+# X-ray S(q) computation
+# =============================================================================
+
+
+def compute_fq_travis(elem: str, q_arr: np.ndarray) -> np.ndarray:
+    """
+    Compute X-ray form factor f(q) using TRAVIS 4-Gaussian (Cromer-Mann) params.
+
+    Parameters
+    ----------
+    elem : str
+        Chemical element symbol (e.g. ``"Li"``, ``"O"``).
+    q_arr : np.ndarray
+        Array of q values in inverse angstroms.
+
+    Returns
+    -------
+    np.ndarray
+        Form factor values evaluated at each q.
+    """
+    ff = TRAVIS_FF[elem]
+    s2 = (q_arr / (4 * np.pi)) ** 2
+    return sum(ff["a"][i] * np.exp(-ff["b"][i] * s2) for i in range(4)) + ff["c"]
+
+
+def compute_sq_travis_style(traj_path: Path) -> dict:
+    """
+    Compute S(q) in TRAVIS Faber-Ziman convention using dynasor.
+
+    Steps:
+
+    1. Read trajectory with dynasor (ASE for .extxyz; MDAnalysis for raw dump).
+    2. Compute partial S_ab(q) on a fine spherical q-grid.
+    3. Apply TRAVIS Cromer-Mann form factors: I_xray = sum f_a * f_b * S_ab.
+    4. Spherical binning at dq = 0.02 A^-1.
+    5. Faber-Ziman normalization with Laue term: S_FZ = I/<f>^2 - <f^2>/<f>^2 + 1.
+    6. Savitzky-Golay smoothing.
+
+    Parameters
+    ----------
+    traj_path : Path
+        Path to the trajectory file (.extxyz or LAMMPS dump).
+
+    Returns
+    -------
+    dict
+        Dictionary with keys ``q``, ``Sq``, ``n_qpoints``,
+        ``n_qpoints_used``, ``cell``, ``atom_types``,
+        ``particle_counts``, and ``params``.
+    """
+    from dynasor import (
+        Trajectory,
+        compute_static_structure_factors,
+        get_spherical_qpoints,
+    )
+
+    traj_str = str(traj_path)
+    is_extxyz = traj_str.endswith(".extxyz") or traj_str.endswith(".xyz")
+
+    if is_extxyz:
+        first_frame = ase_read(traj_str, index=0)
+        symbols = first_frame.get_chemical_symbols()
+        atomic_indices = {}
+        for i, sym in enumerate(symbols):
+            atomic_indices.setdefault(sym, []).append(i)
+
+        traj = Trajectory(
+            traj_str,
+            trajectory_format="ase",
+            atomic_indices=atomic_indices,
+        )
+    else:
+        import MDAnalysis as mda  # noqa: N813
+
+        u = mda.Universe(traj_str, format="LAMMPSDUMP")
+        types = u.atoms.types
+        atomic_indices = {}
+        for t, elem in TYPE_TO_ELEMENT.items():
+            mask = types == str(t)
+            idx = np.where(mask)[0].tolist()
+            if idx:
+                atomic_indices[elem] = idx
+
+        traj = Trajectory(
+            traj_str,
+            trajectory_format="lammps_mdanalysis",
+            atomic_indices=atomic_indices,
+        )
+
+    q_points = get_spherical_qpoints(traj.cell, q_max=Q_MAX, max_points=MAX_QPOINTS)
+    sample = compute_static_structure_factors(traj, q_points)
+
+    q_norms = np.linalg.norm(sample.q_points, axis=1)
+    atom_types = list(sample.particle_counts.keys())
+
+    ff_at_q = {at: compute_fq_travis(at, q_norms) for at in atom_types}
+    i_xray = np.zeros(len(q_norms))
+    for s1, s2 in sample.pairs:
+        sq_ab = sample[f"Sq_{s1}_{s2}"].flatten()
+        i_xray += ff_at_q[s1] * ff_at_q[s2] * sq_ab
+
+    q_bins = np.arange(0.0, Q_MAX + DQ_BIN, DQ_BIN)
+    q_centers = 0.5 * (q_bins[:-1] + q_bins[1:])
+    i_xray_binned = np.full(len(q_centers), np.nan)
+    counts = np.zeros(len(q_centers), dtype=int)
+
+    for i in range(len(q_centers)):
+        mask = (q_norms >= q_bins[i]) & (q_norms < q_bins[i + 1])
+        n = mask.sum()
+        if n > 0:
+            i_xray_binned[i] = np.mean(i_xray[mask])
+            counts[i] = n
+
+    f_avg = np.zeros(len(q_centers))
+    f2_avg = np.zeros(len(q_centers))
+    for elem, c in CONC.items():
+        fq = compute_fq_travis(elem, q_centers)
+        f_avg += c * fq
+        f2_avg += c * fq**2
+    f_avg_sq = f_avg**2
+
+    sq_fz = np.where(
+        f_avg_sq > 0,
+        i_xray_binned / f_avg_sq - f2_avg / f_avg_sq + 1.0,
+        np.nan,
+    )
+
+    valid = ~np.isnan(sq_fz) & (q_centers >= 0.3) & (q_centers <= Q_MAX)
+    q_v = q_centers[valid]
+    sq_v = sq_fz[valid]
+    if len(sq_v) > SAVGOL_WINDOW:
+        sq_smooth = savgol_filter(sq_v, SAVGOL_WINDOW, SAVGOL_ORDER)
+    else:
+        sq_smooth = sq_v
+
+    rmask = (q_v >= Q_MIN) & (q_v <= 12.0)
+
+    return {
+        "q": q_v[rmask].tolist(),
+        "Sq": sq_smooth[rmask].tolist(),
+        "n_qpoints": len(q_points),
+        "n_qpoints_used": len(sample.q_points),
+        "cell": traj.cell.tolist(),
+        "atom_types": traj.atom_types,
+        "particle_counts": {k: int(v) for k, v in sample.particle_counts.items()},
+        "params": {
+            "q_max": Q_MAX,
+            "q_min": Q_MIN,
+            "dq_bin": DQ_BIN,
+            "max_qpoints": MAX_QPOINTS,
+            "savgol_window": SAVGOL_WINDOW,
+            "savgol_order": SAVGOL_ORDER,
+            "form_factors": "cromer-mann-4gaussian",
+            "normalization": "faber-ziman",
+        },
+    }
+
+
+def find_trajectory(model_name: str) -> Path | None:
+    """
+    Find NVT trajectory for a model.
+
+    Prefers the converted ``.extxyz`` and falls back to the raw LAMMPS dump.
+
+    Parameters
+    ----------
+    model_name : str
+        Name of the MLIP model.
+
+    Returns
+    -------
+    Path or None
+        Path to the trajectory file, or ``None`` if not found.
+    """
+    extxyz = DATA_ROOT / model_name / "nvt_trajectory.extxyz"
+    if extxyz.exists():
+        return extxyz
+
+    lammpstrj = DATA_ROOT / model_name / "nvt_trajectory.lammpstrj"
+    if lammpstrj.exists():
+        return lammpstrj
+
+    return None
+
+
+# =============================================================================
+# Pytest interface (ml-peg convention)
+# =============================================================================
+
+
+@pytest.mark.parametrize("model_name", MODELS)
+def test_extract_density(model_name: str) -> None:
+    """
+    Extract and save NPT density data for one model.
+
+    Parameters
+    ----------
+    model_name : str
+        Name of the MLIP model in the registry.
+    """
+    src = _density_source_path(model_name)
+    if not src.exists():
+        pytest.skip(f"No density data for {model_name} at {src}")
+
+    with open(src) as f:
+        result = json.load(f)
+
+    out_dir = OUT_PATH / model_name
+    out_dir.mkdir(parents=True, exist_ok=True)
+    with open(out_dir / "density.json", "w") as f:
+        json.dump(result, f, indent=2)
+
+    assert result["rho_mean"] > 0, f"Negative density for {model_name}"
+    assert abs(result["rho_error_pct"]) < 50, (
+        f"Density error > 50% for {model_name}"
+    )
+
+
+@pytest.mark.parametrize("model_name", MODELS)
+def test_compute_rdf(model_name: str) -> None:
+    """
+    Compute and save Li-O RDFs and coordination numbers for one model.
+
+    Parameters
+    ----------
+    model_name : str
+        Name of the MLIP model in the registry.
+    """
+    traj_path = find_trajectory(model_name)
+    if traj_path is None:
+        pytest.skip(f"No NVT trajectory for {model_name}")
+
+    first_frame = ase_read(str(traj_path), index=0, format="extxyz")
+    o_water_idx, o_tfsi_idx = identify_o_types(first_frame)
+
+    assert len(o_water_idx) > 0, f"No O_water found for {model_name}"
+    assert len(o_tfsi_idx) > 0, f"No O_TFSI found for {model_name}"
+
+    result = compute_rdf(traj_path, o_water_idx, o_tfsi_idx)
+
+    out_dir = OUT_PATH / model_name
+    out_dir.mkdir(parents=True, exist_ok=True)
+    with open(out_dir / "rdf.json", "w") as f:
+        json.dump(
+            {k: v for k, v in result.items() if not isinstance(v, list)}, f, indent=2
+        )
+    np.savez(
+        out_dir / "rdf.npz",
+        r=np.array(result["r"]),
+        gr_LiO_total=np.array(result["gr_LiO_total"]),
+        gr_LiO_water=np.array(result["gr_LiO_water"]),
+        gr_LiO_TFSI=np.array(result["gr_LiO_TFSI"]),
+    )
+
+    assert 2.0 < result["coord_LiO_total"] < 8.0, (
+        f"Unexpected Li-O_total CN={result['coord_LiO_total']:.2f} "
+        f"for {model_name}"
+    )
+
+
+@pytest.mark.parametrize("model_name", MODELS)
+def test_compute_xray_sq(model_name: str) -> None:
+    """
+    Compute and save X-ray S(q) in Faber-Ziman convention for one model.
+
+    Parameters
+    ----------
+    model_name : str
+        Name of the MLIP model in the registry.
+    """
+    traj_path = find_trajectory(model_name)
+    if traj_path is None:
+        pytest.skip(f"No NVT trajectory for {model_name}")
+
+    result = compute_sq_travis_style(traj_path)
+    result["model"] = model_name
+    result["traj_path"] = str(traj_path)
+
+    out_dir = OUT_PATH / model_name
+    out_dir.mkdir(parents=True, exist_ok=True)
+    with open(out_dir / "xray_sq.json", "w") as f:
+        json.dump(result, f, indent=2)
+    np.savez(
+        out_dir / "xray_sq.npz",
+        q=np.array(result["q"]),
+        Sq=np.array(result["Sq"]),
+    )
+
+    assert len(result["q"]) > 10, f"Too few q-points for {model_name}"
diff --git a/ml_peg/calcs/wise_electrolytes/md_reference/calc_md_reference.py b/ml_peg/calcs/wise_electrolytes/md_reference/calc_md_reference.py
new file mode 100644
index 000000000..e8db0d638
--- /dev/null
+++ b/ml_peg/calcs/wise_electrolytes/md_reference/calc_md_reference.py
@@ -0,0 +1,238 @@
+"""
+Reference MD protocol for the WiSE 21 m LiTFSI/H2O benchmark.
+
+This script reproduces, with janus-core, the same MD protocol used to
+generate the production trajectories analysed by the density, rdf, and
+xray_sf sub-benchmarks.  Production trajectories were run externally
+with LAMMPS + MACE (symmetrix/Kokkos) on Adastra (MI250X); the driving
+scripts are ``in.pipeline_cpu.lmp`` (Min -> NVT -> NPT) and
+``in.nvt_continue.lmp`` (NVT continuation) on the Adastra workdir.
+
+The protocol is recast here for:
+
+  * transparent, version-pinned documentation of the simulation settings,
+  * optional ml-peg-native replication on smaller systems or shorter windows.
+
+It is intentionally marked ``pytest.mark.skip`` in the default test run:
+at ~1500 atoms and 250+ ps of cumulative MD, a Janus/ASE run would take
+days of GPU time per registered model.  Run manually with
+
+    python calc_md_reference.py <model_name>
+
+or call :func:`run_reference_md` from your own script.
+
+Protocol summary
+----------------
+System     : 64 LiTFSI + 170 H2O  (1534 atoms, 21 m)
+Ensembles  : Min (FIRE) -> NVT 50 ps (NH) -> NPT 200 ps (NH, iso) ->
+             optional NVT continuation 50 ps (NH)
+Temperature: 298.15 K
+Pressure   : 1.01325 bar (1 atm)
+Timestep   : 0.5 fs
+TDAMP/PDAMP: 50 fs / 500 fs (100*dt / 1000*dt, Nosé-Hoover damping)
+Dump cadence: every 0.1 ps (= 200 steps)
+References : Gilbert et al., JCED 62, 2056 (2017);
+             Watanabe et al., JPCB 125, 7477 (2021).
+"""
+
+from __future__ import annotations
+
+from copy import copy
+from pathlib import Path
+from typing import Any
+
+from ase import Atoms
+from ase.io import read
+import pytest
+
+from ml_peg.models.get_models import load_models
+from ml_peg.models.models import current_models
+
+MODELS = load_models(current_models)
+
+DATA_PATH = Path(__file__).parent / "data"
+OUT_PATH = Path(__file__).parent / "outputs"
+
+# --- Physical parameters (mirror Adastra LAMMPS in.pipeline_cpu.lmp) ---------
+
+TEMPERATURE_K = 298.15
+PRESSURE_BAR = 1.01325             # 1 atm
+TIMESTEP_FS = 0.5
+
+# 1 bar = 1e-4 GPa (Janus NPT accepts pressure in GPa)
+PRESSURE_GPA = PRESSURE_BAR * 1e-4
+
+# Nosé-Hoover damping times (match LAMMPS `fix npt`/`fix nvt` TDAMP/PDAMP)
+THERMOSTAT_TIME_FS = 50.0          # 100 * timestep
+BAROSTAT_TIME_FS = 500.0           # 1000 * timestep
+
+# Step counts (at 0.5 fs/step)
+NVT_STEPS = 100_000                # 50 ps  (warm-up / equilibration)
+NPT_STEPS = 400_000                # 200 ps (density + configurational sampling)
+NVT_CONT_STEPS = 100_000           # 50 ps  (optional continuation)
+
+# IO cadence (match LAMMPS THERMO_EVERY / DUMP_EVERY)
+STATS_EVERY = 100                  # 0.05 ps
+TRAJ_EVERY = 200                   # 0.1 ps  (501 frames in 50 ps)
+
+# Minimization (match LAMMPS `min_style fire; minimize 1.0e-6 0.2 2000 20000`)
+MIN_FMAX = 0.2                     # eV/Å
+MIN_STEPS = 2000
+
+SEED = 42
+
+INITIAL_STRUCTURE = DATA_PATH / "p64_w170_initial.xyz"
+
+
+def load_initial_structure() -> Atoms:
+    """
+    Load the 64 LiTFSI + 170 H2O cubic simulation box.
+
+    Returns
+    -------
+    Atoms
+        ASE Atoms at the experimental density (L = 27.4938 Å, 1534 atoms).
+
+    Raises
+    ------
+    FileNotFoundError
+        If the reference structure file is not present. The file is not
+        shipped with the repository; provide an equivalent p64_w170 box
+        (LAMMPS data file converted to .xyz) or point ``INITIAL_STRUCTURE``
+        at your own starting configuration.
+    """
+    if not INITIAL_STRUCTURE.exists():
+        raise FileNotFoundError(
+            f"Reference initial structure not found at {INITIAL_STRUCTURE}. "
+            "Provide an equivalent p64_w170 LAMMPS box (converted to extxyz) "
+            "or point INITIAL_STRUCTURE at your own starting configuration."
+        )
+    return read(INITIAL_STRUCTURE)
+
+
+def run_reference_md(
+    model_name: str,
+    model: Any,
+    *,
+    run_continuation: bool = False,
+) -> None:
+    """
+    Run the Min -> NVT -> NPT reference protocol for one registered model.
+
+    Parameters
+    ----------
+    model_name
+        Registry name of the MLIP model.
+    model
+        Model object returned by :func:`load_models`; must expose
+        ``default_dtype`` and ``get_calculator``.
+    run_continuation
+        If True, append a further 50 ps of NVT after the NPT block
+        (analogue of ``in.nvt_continue.lmp``). Default False.
+    """
+    # Lazy imports: janus_core pulls in torch, which may be absent in
+    # environments that only run the analysis stack.
+    from janus_core.calculations.geom_opt import GeomOpt
+    from janus_core.calculations.md import NPT, NVT_NH
+
+    model.default_dtype = "float64"
+    calc = model.get_calculator()
+
+    struct = load_initial_structure()
+    struct.calc = copy(calc)
+
+    model_out = OUT_PATH / model_name
+    model_out.mkdir(parents=True, exist_ok=True)
+
+    # -- Minimization (FIRE, loose tolerance) ---------------------------------
+    GeomOpt(
+        struct=struct,
+        fmax=MIN_FMAX,
+        optimizer="FIRE",
+        steps=MIN_STEPS,
+        write_traj=False,
+        file_prefix=model_out / "minimize",
+    ).run()
+
+    # -- NVT equilibration, 50 ps ---------------------------------------------
+    NVT_NH(
+        struct=struct,
+        temp=TEMPERATURE_K,
+        timestep=TIMESTEP_FS,
+        steps=NVT_STEPS,
+        thermostat_time=THERMOSTAT_TIME_FS,
+        stats_every=STATS_EVERY,
+        traj_every=TRAJ_EVERY,
+        seed=SEED,
+        file_prefix=model_out / "nvt_equil",
+    ).run()
+
+    # -- NPT production, 200 ps (isotropic Nosé-Hoover; density sampling) -----
+    NPT(
+        struct=struct,
+        temp=TEMPERATURE_K,
+        pressure=PRESSURE_GPA,
+        timestep=TIMESTEP_FS,
+        steps=NPT_STEPS,
+        thermostat_time=THERMOSTAT_TIME_FS,
+        barostat_time=BAROSTAT_TIME_FS,
+        stats_every=STATS_EVERY,
+        traj_every=TRAJ_EVERY,
+        seed=SEED,
+        file_prefix=model_out / "npt_prod",
+    ).run()
+
+    # -- Optional NVT continuation, 50 ps -------------------------------------
+    if run_continuation:
+        NVT_NH(
+            struct=struct,
+            temp=TEMPERATURE_K,
+            timestep=TIMESTEP_FS,
+            steps=NVT_CONT_STEPS,
+            thermostat_time=THERMOSTAT_TIME_FS,
+            stats_every=STATS_EVERY,
+            traj_every=TRAJ_EVERY,
+            seed=SEED,
+            file_prefix=model_out / "nvt_continue",
+        ).run()
+
+
+@pytest.mark.skip(
+    reason=(
+        "Reference protocol only — production MD is run externally with "
+        "LAMMPS+symmetrix on Adastra (see module docstring). Running this in "
+        "pytest would take days of GPU per model."
+    )
+)
+@pytest.mark.parametrize("mlip", MODELS.items())
+def test_md_reference(mlip: tuple[str, Any]) -> None:
+    """
+    Smoke test for the reference Janus MD protocol.
+
+    Parameters
+    ----------
+    mlip
+        ``(model_name, model)`` pair from the ml-peg registry.
+    """
+    model_name, model = mlip
+    run_reference_md(model_name, model)
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description="Run the WiSE reference MD protocol for one registered model."
+    )
+    parser.add_argument("model", help=f"one of: {sorted(MODELS)}")
+    parser.add_argument(
+        "--continuation",
+        action="store_true",
+        help="append the optional 50 ps NVT continuation after NPT.",
+    )
+    args = parser.parse_args()
+
+    if args.model not in MODELS:
+        parser.error(f"unknown model '{args.model}'. Registered: {sorted(MODELS)}")
+
+    run_reference_md(args.model, MODELS[args.model], run_continuation=args.continuation)
diff --git a/ml_peg/calcs/wise_electrolytes/rdf/calc_rdf.py b/ml_peg/calcs/wise_electrolytes/rdf/calc_rdf.py
deleted file mode 100644
index 7bc8de2ab..000000000
--- a/ml_peg/calcs/wise_electrolytes/rdf/calc_rdf.py
+++ /dev/null
@@ -1,313 +0,0 @@
-"""
-Compute Li-O radial distribution functions for LiTFSI/H2O electrolyte.
-
-Reads pre-computed NVT trajectories (.extxyz, 100 ps) for each MLIP model
-and computes g(r) for Li-O_total, Li-O_water (O bonded to H, d_OH < 1.25 A),
-and Li-O_TFSI (O bonded to S, d_OS < 1.75 A), plus coordination numbers by
-integration of the first peak.
-
-System: 21 m LiTFSI / H2O, p64_w170 cell (1534 atoms).
-Experimental reference for coordination numbers:
-Watanabe et al., J. Phys. Chem. B 125, 7477 (2021), DOI: 10.1021/acs.jpcb.1c04693.
-Neutron diffraction with 6Li/7Li + H/D isotopic substitution, ~18.5 m LiTFSI/H2O.
-"""
-
-from __future__ import annotations
-
-import json
-import os
-from pathlib import Path
-
-from ase.geometry import get_distances
-from ase.io import iread
-import numpy as np
-import pytest
-
-# --- Configuration -----------------------------------------------------------
-
-DATA_ROOT = Path(
-    os.environ.get(
-        "ML_PEG_WISE_XRAY_DATA_ROOT",
-        "/lus/work/CT9/cin1387/lbrugnoli/prove/ml_peg_benchmark/data/wise_electrolytes/xray_sf",
-    )
-)
-OUT_PATH = Path(__file__).parent / "outputs"
-
-MODELS = [
-    "matpes-r2scan",
-    "mace-mpa-0-medium",
-    "mace-omat-0-medium",
-    "mace-mp-0b3",
-    "mace-mh-1-omat",
-    "mace-mh-1-omol",
-]
-
-# RDF parameters
-R_MAX = 6.0  # Å — covers first + second shell
-DR = 0.02  # Å — bin width
-R_CUT_COORD = 2.83  # Å — first minimum of Li-O g(r) from r2SCAN AIMD reference
-
-# O-type identification cutoffs (applied on first frame only)
-D_OH_CUT = 1.25  # Å — O-H bond in water
-D_OS_CUT = 1.75  # Å — O-S bond in TFSI
-
-
-# --- Helpers -----------------------------------------------------------------
-
-
-def identify_o_types(atoms) -> tuple[np.ndarray, np.ndarray]:
-    """
-    Return indices of O_water and O_TFSI from a single ASE Atoms frame.
-
-    Parameters
-    ----------
-    atoms : ase.Atoms
-        A single frame from the trajectory.
-
-    Returns
-    -------
-    o_water : np.ndarray
-        Indices of oxygen atoms bonded to hydrogen (water oxygens).
-    o_tfsi : np.ndarray
-        Indices of oxygen atoms bonded to sulfur (TFSI oxygens).
-    """
-    syms = np.array(atoms.get_chemical_symbols())
-    pos = atoms.get_positions()
-    cell = atoms.get_cell()
-    pbc = atoms.get_pbc()
-
-    o_idx = np.where(syms == "O")[0]
-    h_idx = np.where(syms == "H")[0]
-    s_idx = np.where(syms == "S")[0]
-
-    o_water, o_tfsi = [], []
-    for o in o_idx:
-        _, d_oh = get_distances(pos[o : o + 1], pos[h_idx], cell=cell, pbc=pbc)
-        _, d_os = get_distances(pos[o : o + 1], pos[s_idx], cell=cell, pbc=pbc)
-        if d_oh.min() < D_OH_CUT:
-            o_water.append(o)
-        elif d_os.min() < D_OS_CUT:
-            o_tfsi.append(o)
-
-    return np.array(o_water), np.array(o_tfsi)
-
-
-def compute_rdf(
-    traj_path: Path,
-    o_water_idx: np.ndarray,
-    o_tfsi_idx: np.ndarray,
-    r_max: float = R_MAX,
-    dr: float = DR,
-    skip_frames: int = 0,  # trajectories are pre-equilibrated (50-100 ps window)
-) -> dict:
-    """
-    Compute Li-O RDFs from an extxyz trajectory.
-
-    Parameters
-    ----------
-    traj_path : Path
-        Path to .extxyz trajectory file.
-    o_water_idx : np.ndarray
-        Atom indices of O_water (from first frame, fixed).
-    o_tfsi_idx : np.ndarray
-        Atom indices of O_TFSI (from first frame, fixed).
-    r_max : float
-        Maximum distance for RDF.
-    dr : float
-        Bin width.
-    skip_frames : int
-        Number of initial frames to skip (equilibration).
-
-    Returns
-    -------
-    dict
-        Dictionary with keys ``r``, ``gr_LiO_total``, ``gr_LiO_water``,
-        ``gr_LiO_TFSI``, ``coord_LiO_total``, ``coord_LiO_water``,
-        ``coord_LiO_TFSI``, ``n_li``, ``n_O_water``, ``n_O_TFSI``,
-        ``n_frames_used``, and ``r_cut_coord``.
-    """
-    bins = np.arange(0, r_max + dr, dr)
-    r_centers = 0.5 * (bins[:-1] + bins[1:])
-    n_bins = len(r_centers)
-
-    hist_total = np.zeros(n_bins)
-    hist_water = np.zeros(n_bins)
-    hist_tfsi = np.zeros(n_bins)
-
-    n_frames = 0
-    n_li = None
-    volume = None
-    o_all_idx = np.concatenate([o_water_idx, o_tfsi_idx])
-
-    for frame_idx, atoms in enumerate(
-        iread(str(traj_path), format="extxyz", index=":")
-    ):
-        if frame_idx < skip_frames:
-            continue
-
-        syms = np.array(atoms.get_chemical_symbols())
-        pos = atoms.get_positions()
-        cell = atoms.get_cell()
-        pbc = atoms.get_pbc()
-
-        li_idx = np.where(syms == "Li")[0]
-        if n_li is None:
-            n_li = len(li_idx)
-        if volume is None:
-            volume = atoms.get_volume()
-
-        pos_li = pos[li_idx]
-
-        for o_set, hist in [
-            (o_all_idx, hist_total),
-            (o_water_idx, hist_water),
-            (o_tfsi_idx, hist_tfsi),
-        ]:
-            pos_o = pos[o_set]
-            _, dists = get_distances(pos_li, pos_o, cell=cell, pbc=pbc)
-            dists_flat = dists.ravel()
-            dists_flat = dists_flat[dists_flat < r_max]
-            h, _ = np.histogram(dists_flat, bins=bins)
-            hist += h
-
-        n_frames += 1
-
-    if n_frames == 0 or n_li is None:
-        raise RuntimeError(f"No frames processed from {traj_path}")
-
-    # Normalize to g(r)
-    def normalize(hist, n_central, n_neighbor):
-        """
-        Normalize histogram to g(r).
-
-        Parameters
-        ----------
-        hist : np.ndarray
-            Raw pair-distance histogram.
-        n_central : int
-            Number of central atoms (Li).
-        n_neighbor : int
-            Number of neighbor atoms (O subset).
-
-        Returns
-        -------
-        np.ndarray
-            Normalized radial distribution function.
-        """
-        shell_vol = (4.0 / 3.0) * np.pi * (bins[1:] ** 3 - bins[:-1] ** 3)
-        rho = n_neighbor / volume
-        norm = n_central * n_frames * rho * shell_vol
-        return hist / norm
-
-    n_o_total = len(o_all_idx)
-    n_o_water = len(o_water_idx)
-    n_o_tfsi = len(o_tfsi_idx)
-
-    gr_total = normalize(hist_total, n_li, n_o_total)
-    gr_water = normalize(hist_water, n_li, n_o_water)
-    gr_tfsi = normalize(hist_tfsi, n_li, n_o_tfsi)
-
-    # Coordination numbers: integrate 4π ρ g(r) r² dr from 0 to R_CUT_COORD
-    def coord_number(gr, n_neighbor):
-        """
-        Compute coordination number from g(r).
-
-        Parameters
-        ----------
-        gr : np.ndarray
-            Radial distribution function.
-        n_neighbor : int
-            Number of neighbor atoms (O subset).
-
-        Returns
-        -------
-        float
-            Integrated coordination number up to R_CUT_COORD.
-        """
-        rho = n_neighbor / volume
-        integrand = 4.0 * np.pi * rho * gr * r_centers**2 * dr
-        mask = r_centers <= R_CUT_COORD
-        return float(np.sum(integrand[mask]))
-
-    return {
-        "r": r_centers.tolist(),
-        "gr_LiO_total": gr_total.tolist(),
-        "gr_LiO_water": gr_water.tolist(),
-        "gr_LiO_TFSI": gr_tfsi.tolist(),
-        "coord_LiO_total": coord_number(gr_total, n_o_total),
-        "coord_LiO_water": coord_number(gr_water, n_o_water),
-        "coord_LiO_TFSI": coord_number(gr_tfsi, n_o_tfsi),
-        "n_li": n_li,
-        "n_O_water": n_o_water,
-        "n_O_TFSI": n_o_tfsi,
-        "n_frames_used": n_frames,
-        "r_cut_coord": R_CUT_COORD,
-    }
-
-
-def find_trajectory(model: str) -> Path | None:
-    """
-    Find NVT extxyz trajectory for a model.
-
-    Parameters
-    ----------
-    model : str
-        Name of the MLIP model.
-
-    Returns
-    -------
-    Path or None
-        Path to the trajectory file, or None if not found.
-    """
-    p = DATA_ROOT / model / "nvt_trajectory.extxyz"
-    return p if p.exists() else None
-
-
-# --- Pytest interface --------------------------------------------------------
-
-
-@pytest.mark.parametrize("model", MODELS)
-def test_compute_rdf(model: str) -> None:
-    """
-    Compute and save Li-O RDFs and coordination numbers for one model.
-
-    Parameters
-    ----------
-    model : str
-        Name of the MLIP model to compute RDFs for.
-    """
-    traj_path = find_trajectory(model)
-    if traj_path is None:
-        pytest.skip(f"No NVT trajectory for {model}")
-
-    # Identify O types from first frame (indices fixed throughout NVT)
-    from ase.io import read as ase_read
-
-    first_frame = ase_read(str(traj_path), index=0, format="extxyz")
-    o_water_idx, o_tfsi_idx = identify_o_types(first_frame)
-
-    assert len(o_water_idx) > 0, f"No O_water found for {model}"
-    assert len(o_tfsi_idx) > 0, f"No O_TFSI found for {model}"
-
-    result = compute_rdf(traj_path, o_water_idx, o_tfsi_idx)
-
-    out_dir = OUT_PATH / model
-    out_dir.mkdir(parents=True, exist_ok=True)
-
-    with open(out_dir / "rdf.json", "w") as f:
-        json.dump(
-            {k: v for k, v in result.items() if not isinstance(v, list)}, f, indent=2
-        )
-
-    np.savez(
-        out_dir / "rdf.npz",
-        r=np.array(result["r"]),
-        gr_LiO_total=np.array(result["gr_LiO_total"]),
-        gr_LiO_water=np.array(result["gr_LiO_water"]),
-        gr_LiO_TFSI=np.array(result["gr_LiO_TFSI"]),
-    )
-
-    # Sanity checks
-    assert 2.0 < result["coord_LiO_total"] < 8.0, (
-        f"Unexpected Li-O_total CN={result['coord_LiO_total']:.2f} for {model}"
-    )
diff --git a/ml_peg/calcs/wise_electrolytes/xray_sf/calc_xray_sf.py b/ml_peg/calcs/wise_electrolytes/xray_sf/calc_xray_sf.py
deleted file mode 100644
index c4e173ffd..000000000
--- a/ml_peg/calcs/wise_electrolytes/xray_sf/calc_xray_sf.py
+++ /dev/null
@@ -1,345 +0,0 @@
-"""
-Compute X-ray structure factor S(q) for LiTFSI/H2O electrolyte.
-
-Reads pre-computed NVT trajectories (.extxyz, 100 ps) for each MLIP model
-and computes S(q) via dynasor using TRAVIS-matching settings: Cromer-Mann
-4-Gaussian form factors (including H), Faber-Ziman normalization with Laue
-monotonic term subtraction, and Savitzky-Golay smoothing
-(window=27, order=3, dq=0.02 A^-1).
-
-System: 21 m LiTFSI / H2O, p64_w170 cell (1534 atoms).
-Experimental reference: SAXS, Zhang, Lewis, Mars, Wan et al.,
-J. Phys. Chem. B 125, 4501 (2021), DOI: 10.1021/acs.jpcb.1c02189.
-"""
-
-from __future__ import annotations
-
-import json
-import os
-from pathlib import Path
-import warnings
-
-import numpy as np
-import pytest
-from scipy.signal import savgol_filter
-
-warnings.filterwarnings("ignore")
-
-# --- Configuration -----------------------------------------------------------
-
-# Path to converted .extxyz trajectories (will be S3 download for PR)
-DATA_ROOT = Path(
-    os.environ.get(
-        "ML_PEG_WISE_XRAY_DATA_ROOT",
-        "/lus/work/CT9/cin1387/lbrugnoli/prove/ml_peg_benchmark/data/wise_electrolytes/xray_sf",
-    )
-)
-OUT_PATH = Path(__file__).parent / "outputs"
-
-MODELS = [
-    "matpes-r2scan",
-    "mace-mpa-0-medium",
-    "mace-omat-0-medium",
-    "mace-mp-0b3",
-    "mace-mh-1-omat",
-    "mace-mh-1-omol",
-]
-
-# S(q) computation parameters (TRAVIS-matching)
-Q_MAX = 13.0  # Å⁻¹
-Q_MIN = 0.5  # Å⁻¹
-MAX_QPOINTS = 50000
-DQ_BIN = 0.02  # Å⁻¹
-SAVGOL_WINDOW = 27  # optimal for matching TRAVIS
-SAVGOL_ORDER = 3
-
-# LAMMPS type → element (for fallback)
-TYPE_TO_ELEMENT = {1: "Li", 2: "C", 3: "F", 4: "S", 5: "N", 6: "O", 7: "H"}
-
-# System composition (p64_w170: 64 LiTFSI + 170 H2O)
-COMPOSITION = {"Li": 64, "C": 128, "F": 384, "S": 128, "N": 64, "O": 426, "H": 340}
-N_ATOMS = sum(COMPOSITION.values())  # 1534
-CONC = {k: v / N_ATOMS for k, v in COMPOSITION.items()}
-
-# TRAVIS Cromer-Mann 4-Gaussian form factors (International Tables)
-TRAVIS_FF = {
-    "S": {
-        "a": [6.905, 5.203, 1.438, 1.586],
-        "b": [1.468, 22.215, 0.254, 56.172],
-        "c": 0.867,
-    },
-    "F": {
-        "a": [3.539, 2.641, 1.517, 1.024],
-        "b": [10.283, 4.294, 0.262, 26.148],
-        "c": 0.278,
-    },
-    "O": {
-        "a": [3.049, 2.287, 1.546, 0.867],
-        "b": [13.277, 5.701, 0.324, 32.909],
-        "c": 0.251,
-    },
-    "N": {
-        "a": [12.213, 3.132, 2.013, 1.166],
-        "b": [0.006, 9.893, 28.997, 0.583],
-        "c": -11.529,
-    },
-    "C": {
-        "a": [2.310, 1.020, 1.589, 0.865],
-        "b": [20.844, 10.208, 0.569, 51.651],
-        "c": 0.216,
-    },
-    "Li": {
-        "a": [1.128, 0.751, 0.618, 0.465],
-        "b": [3.955, 1.052, 85.391, 168.261],
-        "c": 0.038,
-    },
-    "H": {
-        "a": [0.493, 0.323, 0.140, 0.041],
-        "b": [10.511, 26.126, 3.142, 57.800],
-        "c": 0.003,
-    },
-}
-
-
-# --- Form factor helpers -----------------------------------------------------
-
-
-def compute_fq_travis(elem: str, q_arr: np.ndarray) -> np.ndarray:
-    """
-    Compute X-ray form factor f(q) using TRAVIS 4-Gaussian (Cromer-Mann) params.
-
-    Parameters
-    ----------
-    elem : str
-        Chemical element symbol (e.g. ``"Li"``, ``"O"``).
-    q_arr : np.ndarray
-        Array of q values in inverse angstroms.
-
-    Returns
-    -------
-    np.ndarray
-        Form factor values evaluated at each q.
-    """
-    ff = TRAVIS_FF[elem]
-    s2 = (q_arr / (4 * np.pi)) ** 2
-    return sum(ff["a"][i] * np.exp(-ff["b"][i] * s2) for i in range(4)) + ff["c"]
-
-
-# --- S(q) computation -------------------------------------------------------
-
-
-def compute_sq_travis_style(traj_path: Path) -> dict:
-    """
-    Compute S(q) in TRAVIS Faber-Ziman convention using dynasor.
-
-    Steps:
-
-    1. Read trajectory with dynasor (LAMMPS dump via MDAnalysis).
-    2. Compute partial S_ab(q) on a fine spherical q-grid.
-    3. Apply TRAVIS Cromer-Mann form factors: I_xray = sum f_a * f_b * S_ab.
-    4. Spherical binning at dq = 0.02 A^-1.
-    5. Faber-Ziman normalization with Laue term: S_FZ = I/<f>^2 - <f^2>/<f>^2 + 1.
-    6. Savitzky-Golay smoothing.
-
-    Parameters
-    ----------
-    traj_path : Path
-        Path to the trajectory file (.extxyz or LAMMPS dump).
-
-    Returns
-    -------
-    dict
-        Dictionary with keys ``q``, ``Sq``, ``n_qpoints``,
-        ``n_qpoints_used``, ``cell``, ``atom_types``,
-        ``particle_counts``, and ``params``.
-    """
-    from dynasor import (
-        Trajectory,
-        compute_static_structure_factors,
-        get_spherical_qpoints,
-    )
-
-    traj_str = str(traj_path)
-    is_extxyz = traj_str.endswith(".extxyz") or traj_str.endswith(".xyz")
-
-    if is_extxyz:
-        # For .extxyz: dynasor reads it directly, atomic_indices from element symbols
-        from ase.io import read as ase_read
-
-        first_frame = ase_read(traj_str, index=0)
-        symbols = first_frame.get_chemical_symbols()
-        atomic_indices = {}
-        for i, sym in enumerate(symbols):
-            atomic_indices.setdefault(sym, []).append(i)
-
-        traj = Trajectory(
-            traj_str,
-            trajectory_format="ase",
-            atomic_indices=atomic_indices,
-        )
-    else:
-        # For LAMMPS dump: use MDAnalysis backend
-        import MDAnalysis as mda  # noqa: N813
-
-        u = mda.Universe(traj_str, format="LAMMPSDUMP")
-        types = u.atoms.types
-        atomic_indices = {}
-        for t, elem in TYPE_TO_ELEMENT.items():
-            mask = types == str(t)
-            idx = np.where(mask)[0].tolist()
-            if idx:
-                atomic_indices[elem] = idx
-
-        traj = Trajectory(
-            traj_str,
-            trajectory_format="lammps_mdanalysis",
-            atomic_indices=atomic_indices,
-        )
-
-    q_points = get_spherical_qpoints(traj.cell, q_max=Q_MAX, max_points=MAX_QPOINTS)
-    sample = compute_static_structure_factors(traj, q_points)
-
-    q_norms = np.linalg.norm(sample.q_points, axis=1)
-    atom_types = list(sample.particle_counts.keys())
-
-    # Manual X-ray weighting with TRAVIS form factors (including H)
-    ff_at_q = {at: compute_fq_travis(at, q_norms) for at in atom_types}
-    i_xray = np.zeros(len(q_norms))
-    for s1, s2 in sample.pairs:
-        sq_ab = sample[f"Sq_{s1}_{s2}"].flatten()
-        i_xray += ff_at_q[s1] * ff_at_q[s2] * sq_ab
-
-    # Spherical binning
-    q_bins = np.arange(0.0, Q_MAX + DQ_BIN, DQ_BIN)
-    q_centers = 0.5 * (q_bins[:-1] + q_bins[1:])
-    i_xray_binned = np.full(len(q_centers), np.nan)
-    counts = np.zeros(len(q_centers), dtype=int)
-
-    for i in range(len(q_centers)):
-        mask = (q_norms >= q_bins[i]) & (q_norms < q_bins[i + 1])
-        n = mask.sum()
-        if n > 0:
-            i_xray_binned[i] = np.mean(i_xray[mask])
-            counts[i] = n
-
-    # Faber-Ziman normalization with Laue subtraction
-    f_avg = np.zeros(len(q_centers))
-    f2_avg = np.zeros(len(q_centers))
-    for elem, c in CONC.items():
-        fq = compute_fq_travis(elem, q_centers)
-        f_avg += c * fq
-        f2_avg += c * fq**2
-    f_avg_sq = f_avg**2
-
-    sq_fz = np.where(
-        f_avg_sq > 0,
-        i_xray_binned / f_avg_sq - f2_avg / f_avg_sq + 1.0,
-        np.nan,
-    )
-
-    # Savitzky-Golay smoothing
-    valid = ~np.isnan(sq_fz) & (q_centers >= 0.3) & (q_centers <= Q_MAX)
-    q_v = q_centers[valid]
-    sq_v = sq_fz[valid]
-    if len(sq_v) > SAVGOL_WINDOW:
-        sq_smooth = savgol_filter(sq_v, SAVGOL_WINDOW, SAVGOL_ORDER)
-    else:
-        sq_smooth = sq_v
-
-    # Restrict to useful range
-    rmask = (q_v >= Q_MIN) & (q_v <= 12.0)
-
-    return {
-        "q": q_v[rmask].tolist(),
-        "Sq": sq_smooth[rmask].tolist(),
-        "n_qpoints": len(q_points),
-        "n_qpoints_used": len(sample.q_points),
-        "cell": traj.cell.tolist(),
-        "atom_types": traj.atom_types,
-        "particle_counts": {k: int(v) for k, v in sample.particle_counts.items()},
-        "params": {
-            "q_max": Q_MAX,
-            "q_min": Q_MIN,
-            "dq_bin": DQ_BIN,
-            "max_qpoints": MAX_QPOINTS,
-            "savgol_window": SAVGOL_WINDOW,
-            "savgol_order": SAVGOL_ORDER,
-            "form_factors": "cromer-mann-4gaussian",
-            "normalization": "faber-ziman",
-        },
-    }
-
-
-# --- Trajectory finding ------------------------------------------------------
-
-
-def find_trajectory(model: str) -> Path | None:
-    """
-    Find NVT trajectory for a model.
-
-    Prefers converted .extxyz, falls back to LAMMPS dump.
-
-    Parameters
-    ----------
-    model : str
-        Name of the MLIP model.
-
-    Returns
-    -------
-    Path or None
-        Path to the trajectory file, or None if not found.
-    """
-    # Check for converted .extxyz first
-    extxyz = DATA_ROOT / model / "nvt_trajectory.extxyz"
-    if extxyz.exists():
-        return extxyz
-
-    # Fall back to original LAMMPS dump
-    runs_root = Path("/lus/work/CT9/cin1387/lbrugnoli/prove/runs")
-    model_cell = runs_root / model / "p64_w170"
-    for d in sorted(model_cell.glob("nvt_*"), reverse=True):
-        traj = d / "nvt_long_trajectory.lammpstrj"
-        if traj.exists():
-            return traj
-    for d in sorted(model_cell.glob("pipeline_*"), reverse=True):
-        traj = d / "nvt_trajectory.lammpstrj"
-        if traj.exists():
-            return traj
-
-    return None
-
-
-# --- Pytest interface (ml-peg convention) ------------------------------------
-
-
-@pytest.mark.parametrize("model", MODELS)
-def test_compute_xray_sq(model: str) -> None:
-    """
-    Compute and save X-ray S(q) in Faber-Ziman convention for one model.
-
-    Parameters
-    ----------
-    model : str
-        Name of the MLIP model to compute S(q) for.
-    """
-    traj_path = find_trajectory(model)
-    if traj_path is None:
-        pytest.skip(f"No NVT trajectory for {model}")
-
-    result = compute_sq_travis_style(traj_path)
-    result["model"] = model
-    result["traj_path"] = str(traj_path)
-
-    out_dir = OUT_PATH / model
-    out_dir.mkdir(parents=True, exist_ok=True)
-
-    with open(out_dir / "xray_sq.json", "w") as f:
-        json.dump(result, f, indent=2)
-
-    np.savez(
-        out_dir / "xray_sq.npz",
-        q=np.array(result["q"]),
-        Sq=np.array(result["Sq"]),
-    )
-
-    assert len(result["q"]) > 10, f"Too few q-points for {model}"

From ff0db5c34c7fd136dabbd2ea6674f2bdece15536 Mon Sep 17 00:00:00 2001
From: luca <luca@Luke-EBook.localdomain>
Date: Tue, 28 Apr 2026 11:40:36 +0200
Subject: [PATCH 3/3] Use NPT_MTK and very_slow marker in WiSE MD reference

- Switch NPT (Melchionna) -> NPT_MTK (Martyna-Tobias-Klein) so the
  janus reference matches the LAMMPS fix npt formulation used in
  production. Pass thermostat_chain=3 and barostat_chain=3 explicitly,
  matching the LAMMPS default chain length.
- Replace pytest.mark.skip with pytest.mark.very_slow per Joseph's
  review on PR #445: the reference protocol is now opt-in via
  --run-very-slow rather than unconditionally skipped, so it can be
  exercised when validating new models.
---
 .../md_reference/calc_md_reference.py         | 43 ++++++++++---------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/ml_peg/calcs/wise_electrolytes/md_reference/calc_md_reference.py b/ml_peg/calcs/wise_electrolytes/md_reference/calc_md_reference.py
index e8db0d638..27f7fb5d5 100644
--- a/ml_peg/calcs/wise_electrolytes/md_reference/calc_md_reference.py
+++ b/ml_peg/calcs/wise_electrolytes/md_reference/calc_md_reference.py
@@ -2,20 +2,22 @@
 Reference MD protocol for the WiSE 21 m LiTFSI/H2O benchmark.
 
 This script reproduces, with janus-core, the same MD protocol used to
-generate the production trajectories analysed by the density, rdf, and
-xray_sf sub-benchmarks.  Production trajectories were run externally
+generate the production trajectories analysed by the consolidated
+litfsi_h2o_21m benchmark. Production trajectories were run externally
 with LAMMPS + MACE (symmetrix/Kokkos) on Adastra (MI250X); the driving
 scripts are ``in.pipeline_cpu.lmp`` (Min -> NVT -> NPT) and
 ``in.nvt_continue.lmp`` (NVT continuation) on the Adastra workdir.
 
-The protocol is recast here for:
+Integrators are matched to LAMMPS where janus-core exposes a choice:
 
-  * transparent, version-pinned documentation of the simulation settings,
-  * optional ml-peg-native replication on smaller systems or shorter windows.
+  * NVT: Nosé-Hoover chain (``NVT_NH``) -- equivalent to LAMMPS ``fix nvt``.
+  * NPT: Martyna-Tobias-Klein chain (``NPT_MTK``) -- the same formulation
+    used by LAMMPS ``fix npt``. (``NPT`` in janus is Melchionna and would
+    *not* match.)
 
-It is intentionally marked ``pytest.mark.skip`` in the default test run:
-at ~1500 atoms and 250+ ps of cumulative MD, a Janus/ASE run would take
-days of GPU time per registered model.  Run manually with
+The protocol is intentionally marked ``pytest.mark.very_slow``: at ~1500
+atoms and 250+ ps of cumulative MD, an ASE/Janus run takes a day of GPU
+time per registered model. Run manually with
 
     python calc_md_reference.py <model_name>
 
@@ -24,12 +26,14 @@
 Protocol summary
 ----------------
 System     : 64 LiTFSI + 170 H2O  (1534 atoms, 21 m)
-Ensembles  : Min (FIRE) -> NVT 50 ps (NH) -> NPT 200 ps (NH, iso) ->
-             optional NVT continuation 50 ps (NH)
+Ensembles  : Min (FIRE) -> NVT 50 ps (NH chain) ->
+             NPT 200 ps (MTK chain, iso) ->
+             optional NVT continuation 50 ps (NH chain)
 Temperature: 298.15 K
 Pressure   : 1.01325 bar (1 atm)
 Timestep   : 0.5 fs
 TDAMP/PDAMP: 50 fs / 500 fs (100*dt / 1000*dt, Nosé-Hoover damping)
+NH chains  : length 3 for both thermostat and barostat (LAMMPS default)
 Dump cadence: every 0.1 ps (= 200 steps)
 References : Gilbert et al., JCED 62, 2056 (2017);
              Watanabe et al., JPCB 125, 7477 (2021).
@@ -133,7 +137,7 @@ def run_reference_md(
     # Lazy imports: janus_core pulls in torch, which may be absent in
     # environments that only run the analysis stack.
     from janus_core.calculations.geom_opt import GeomOpt
-    from janus_core.calculations.md import NPT, NVT_NH
+    from janus_core.calculations.md import NPT_MTK, NVT_NH
 
     model.default_dtype = "float64"
     calc = model.get_calculator()
@@ -167,8 +171,11 @@ def run_reference_md(
         file_prefix=model_out / "nvt_equil",
     ).run()
 
-    # -- NPT production, 200 ps (isotropic Nosé-Hoover; density sampling) -----
-    NPT(
+    # -- NPT production, 200 ps (isotropic MTK chain; density + structure) ----
+    # NPT_MTK uses the Martyna-Tobias-Klein chain integrator -- the same
+    # formulation as LAMMPS `fix npt iso`. Chain length 3 is the LAMMPS
+    # default for both the thermostat and barostat sub-chains.
+    NPT_MTK(
         struct=struct,
         temp=TEMPERATURE_K,
         pressure=PRESSURE_GPA,
@@ -176,6 +183,8 @@ def run_reference_md(
         steps=NPT_STEPS,
         thermostat_time=THERMOSTAT_TIME_FS,
         barostat_time=BAROSTAT_TIME_FS,
+        thermostat_chain=3,
+        barostat_chain=3,
         stats_every=STATS_EVERY,
         traj_every=TRAJ_EVERY,
         seed=SEED,
@@ -197,13 +206,7 @@ def run_reference_md(
         ).run()
 
 
-@pytest.mark.skip(
-    reason=(
-        "Reference protocol only — production MD is run externally with "
-        "LAMMPS+symmetrix on Adastra (see module docstring). Running this in "
-        "pytest would take days of GPU per model."
-    )
-)
+@pytest.mark.very_slow
 @pytest.mark.parametrize("mlip", MODELS.items())
 def test_md_reference(mlip: tuple[str, Any]) -> None:
     """