From ef5aeccf34b7996cc38f956a30c476bdb3b34aef Mon Sep 17 00:00:00 2001 From: LucaBrugnoli Date: Fri, 3 Apr 2026 11:20:33 +0200 Subject: [PATCH 1/3] Add WiSE electrolyte benchmark (density, X-ray S(q), Li-O RDF) Three sub-benchmarks for 21 m LiTFSI/H2O with 6 MLIP models (matpes-r2scan, mace-mpa-0-medium, mace-omat-0-medium, mace-mp-0b3, mace-mh-1-omat, mace-mh-1-omol): - NPT density vs Gilbert et al. JCED 2017 - X-ray structure factor S(q) vs SAXS experiment - Li-O RDF coordination numbers vs Watanabe et al. JPCB 2021 --- .../wise_electrolytes/data/saxs_maginn.txt | 301 ++++++++++++ .../density/analyse_density.py | 298 ++++++++++++ .../wise_electrolytes/density/metrics.yml | 15 + .../wise_electrolytes/rdf/analyse_rdf.py | 434 ++++++++++++++++++ .../wise_electrolytes/rdf/metrics.yml | 15 + .../xray_sf/analyse_xray_sf.py | 398 ++++++++++++++++ .../wise_electrolytes/xray_sf/metrics.yml | 15 + .../wise_electrolytes/density/app_density.py | 68 +++ ml_peg/app/wise_electrolytes/rdf/app_rdf.py | 69 +++ .../wise_electrolytes/wise_electrolytes.yml | 7 + .../wise_electrolytes/xray_sf/app_xray_sf.py | 66 +++ .../wise_electrolytes/density/calc_density.py | 75 +++ .../calcs/wise_electrolytes/rdf/calc_rdf.py | 313 +++++++++++++ .../wise_electrolytes/xray_sf/calc_xray_sf.py | 345 ++++++++++++++ 14 files changed, 2419 insertions(+) create mode 100644 ml_peg/analysis/wise_electrolytes/data/saxs_maginn.txt create mode 100644 ml_peg/analysis/wise_electrolytes/density/analyse_density.py create mode 100644 ml_peg/analysis/wise_electrolytes/density/metrics.yml create mode 100644 ml_peg/analysis/wise_electrolytes/rdf/analyse_rdf.py create mode 100644 ml_peg/analysis/wise_electrolytes/rdf/metrics.yml create mode 100644 ml_peg/analysis/wise_electrolytes/xray_sf/analyse_xray_sf.py create mode 100644 ml_peg/analysis/wise_electrolytes/xray_sf/metrics.yml create mode 100644 ml_peg/app/wise_electrolytes/density/app_density.py create mode 100644 ml_peg/app/wise_electrolytes/rdf/app_rdf.py create mode 100644 ml_peg/app/wise_electrolytes/wise_electrolytes.yml create mode 100644 ml_peg/app/wise_electrolytes/xray_sf/app_xray_sf.py create mode 100644 ml_peg/calcs/wise_electrolytes/density/calc_density.py create mode 100644 ml_peg/calcs/wise_electrolytes/rdf/calc_rdf.py create mode 100644 ml_peg/calcs/wise_electrolytes/xray_sf/calc_xray_sf.py diff --git a/ml_peg/analysis/wise_electrolytes/data/saxs_maginn.txt b/ml_peg/analysis/wise_electrolytes/data/saxs_maginn.txt new file mode 100644 index 000000000..08d66b2cf --- /dev/null +++ b/ml_peg/analysis/wise_electrolytes/data/saxs_maginn.txt @@ -0,0 +1,301 @@ +# 10.1021/acs.jpcb.1c02189 +# q / A-1 S(q) +0.20849 -0.14530 +0.27792 -0.13506 +0.34735 -0.12067 +0.41678 -0.10336 +0.45728 -0.08727 +0.49200 -0.06776 +0.52671 -0.04801 +0.55178 -0.02997 +0.57493 -0.00949 +0.59036 0.01014 +0.61598 0.03412 +0.64243 0.06049 +0.65400 0.08160 +0.66557 0.10150 +0.67714 0.12052 +0.68293 0.13778 +0.69797 0.15855 +0.70800 0.17996 +0.72012 0.20988 +0.73170 0.24081 +0.74465 0.27042 +0.75484 0.30330 +0.73500 0.28700 +0.76641 0.34761 +0.76586 0.32650 +0.77798 0.38731 +0.77550 0.36819 +0.79286 0.42909 +0.78708 0.40842 +0.80113 0.46380 +0.78129 0.44792 +0.81022 0.50741 +0.81215 0.48840 +0.82427 0.56746 +0.82526 0.54096 +0.81601 0.52692 +0.83584 0.62473 +0.83336 0.60080 +0.81601 0.59129 +0.84741 0.67948 +0.84493 0.65675 +0.82758 0.64688 +0.85899 0.73424 +0.85651 0.71161 +0.83915 0.70247 +0.87001 0.79805 +0.86808 0.77562 +0.86808 0.75989 +0.87965 0.86242 +0.88312 0.83911 +0.87386 0.82146 +0.89370 0.93738 +0.89122 0.91350 +0.88544 0.89753 +0.88544 0.88241 +0.90527 0.99757 +0.89701 0.97555 +0.89701 0.96141 +0.91436 1.05943 +0.90858 1.03504 +0.90858 1.01993 +0.92842 1.12024 +0.92015 1.09454 +0.92015 1.07991 +0.93999 1.17479 +0.93172 1.15208 +0.92015 1.14135 +0.95156 1.21659 +0.94329 1.19840 +0.96258 1.25522 +0.95487 1.23790 +0.97470 1.28555 +0.98727 1.31134 +1.01272 1.33234 +1.05323 1.30934 +1.04744 1.33153 +1.06595 1.28501 +1.07637 1.26497 +1.08794 1.24156 +1.10067 1.21596 +1.11108 1.19365 +1.12150 1.17119 +1.13307 1.14808 +1.14580 1.12380 +1.15853 1.10039 +1.17010 1.07815 +1.18630 1.05913 +1.19980 1.03724 +1.23259 1.01286 +1.27226 0.99067 +1.34252 0.98092 +1.41195 0.97409 +1.46237 0.95577 +1.49874 0.93595 +1.51417 0.92045 +1.53577 0.90319 +1.54965 0.88446 +1.57395 0.86291 +1.59710 0.83755 +1.62024 0.81219 +1.64338 0.78586 +1.66653 0.76026 +1.69380 0.73737 +1.71667 0.71637 +1.75332 0.69589 +1.79299 0.67865 +1.86325 0.66980 +1.93846 0.67614 +1.99748 0.69413 +2.04261 0.71832 +2.07154 0.73685 +2.09468 0.75441 +2.12554 0.77245 +2.14868 0.79293 +2.17183 0.81439 +2.19497 0.83584 +2.21040 0.85462 +2.23354 0.87558 +2.26247 0.90216 +2.27983 0.92593 +2.30297 0.94776 +2.33190 0.97567 +2.35697 1.00091 +2.37587 1.01876 +2.39555 1.03748 +2.42448 1.05796 +2.46498 1.08027 +2.52284 1.09893 +2.59227 1.09722 +2.64665 1.08137 +2.68484 1.06363 +2.72534 1.04480 +2.76502 1.02682 +2.79477 1.01056 +2.82949 0.99506 +2.86420 0.97750 +2.89892 0.95995 +2.93859 0.94156 +2.96835 0.92630 +3.00885 0.90893 +3.05513 0.88918 +3.10060 0.87154 +3.15019 0.85357 +3.19400 0.83804 +3.25764 0.82126 +3.29236 0.80890 +3.34443 0.79342 +3.40807 0.77854 +3.47172 0.76148 +3.54115 0.74636 +3.61058 0.73295 +3.68001 0.72198 +3.74944 0.71491 +3.81887 0.71076 +3.88830 0.71101 +3.95773 0.71539 +4.02716 0.72612 +4.09080 0.74051 +4.14866 0.75709 +4.19660 0.77541 +4.23958 0.79547 +4.27132 0.81512 +4.30901 0.83455 +4.34075 0.85491 +4.37431 0.87607 +4.40787 0.89792 +4.43217 0.91752 +4.46303 0.93459 +4.48617 0.95214 +4.50932 0.97019 +4.53246 0.98921 +4.55560 1.00822 +4.57875 1.02724 +4.60189 1.04724 +4.62503 1.06698 +4.64818 1.08747 +4.66708 1.10595 +4.69446 1.12623 +4.71761 1.14476 +4.74075 1.16305 +4.76389 1.18231 +4.78511 1.20279 +4.80825 1.22298 +4.83718 1.24193 +4.86611 1.26180 +4.90083 1.28374 +4.93554 1.30447 +4.97604 1.32440 +5.02150 1.34407 +5.08019 1.36420 +5.14962 1.37444 +5.21905 1.37225 +5.28269 1.36225 +5.34055 1.34558 +5.38105 1.32586 +5.42155 1.30788 +5.45627 1.29057 +5.49098 1.27301 +5.52570 1.25546 +5.56041 1.23717 +5.59513 1.21742 +5.62985 1.19694 +5.66456 1.17719 +5.69928 1.15671 +5.73399 1.13647 +5.76871 1.11721 +5.80342 1.09795 +5.83814 1.07991 +5.87285 1.06235 +5.90757 1.04650 +5.94724 1.02975 +5.99353 1.01199 +6.06379 0.99359 +6.13322 0.97799 +6.20265 0.96604 +6.27208 0.96043 +6.34151 0.95726 +6.41094 0.95775 +6.48037 0.96043 +6.54980 0.96507 +6.61923 0.97043 +6.68866 0.97458 +6.75809 0.97897 +6.82752 0.98140 +6.89695 0.98287 +6.96638 0.98213 +7.03581 0.98067 +7.10524 0.97823 +7.17467 0.97531 +7.24410 0.97238 +7.31353 0.96872 +7.38296 0.96580 +7.45239 0.96287 +7.52182 0.96043 +7.59125 0.95775 +7.66068 0.95629 +7.73011 0.95434 +7.79954 0.95288 +7.86898 0.95214 +7.93841 0.95117 +8.00784 0.95214 +8.07727 0.95263 +8.14670 0.95385 +8.21613 0.95653 +8.28556 0.96019 +8.35499 0.96458 +8.42442 0.97019 +8.49385 0.97604 +8.56328 0.98238 +8.63271 0.98896 +8.70214 0.99530 +8.77157 1.00262 +8.84100 1.00944 +8.91043 1.01603 +8.97986 1.02432 +9.04929 1.03261 +9.11872 1.04163 +9.18815 1.05162 +9.25758 1.06284 +9.32701 1.07527 +9.39644 1.08747 +9.46587 1.10088 +9.53530 1.11404 +9.60473 1.12745 +9.67417 1.14038 +9.74360 1.15305 +9.81303 1.16329 +9.88246 1.17158 +9.95189 1.17646 +10.02132 1.18061 +10.09075 1.17987 +10.16018 1.17670 +10.22961 1.16890 +10.29904 1.15744 +10.36847 1.14184 +10.43211 1.12584 +10.48997 1.10917 +10.54204 1.08887 +10.60238 1.06946 +10.65583 1.05065 +10.70488 1.03288 +10.76046 1.01389 +10.81629 0.99652 +10.87184 0.98043 +10.92970 0.96346 +10.99334 0.94776 +11.06277 0.93240 +11.13220 0.92069 +11.20163 0.91265 +11.27106 0.90557 +11.34049 0.90314 +11.40992 0.90289 +11.47936 0.90411 +11.54879 0.90704 +11.61822 0.91167 +11.68765 0.91703 +11.75708 0.92362 +11.82651 0.92996 +11.89594 0.93678 diff --git a/ml_peg/analysis/wise_electrolytes/density/analyse_density.py b/ml_peg/analysis/wise_electrolytes/density/analyse_density.py new file mode 100644 index 000000000..f2410484f --- /dev/null +++ b/ml_peg/analysis/wise_electrolytes/density/analyse_density.py @@ -0,0 +1,298 @@ +"""Analyse density benchmark for LiTFSI/H2O electrolyte.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import plotly.graph_objects as go +import pytest + +from ml_peg.analysis.utils.utils import load_metrics_config + +# --- Paths ------------------------------------------------------------------- + +CALCS_ROOT = Path(__file__).resolve().parents[3] / "calcs" +CALC_PATH = CALCS_ROOT / "wise_electrolytes" / "density" / "outputs" +APP_ROOT = Path(__file__).resolve().parents[3] / "app" +OUT_PATH = APP_ROOT / "data" / "wise_electrolytes" / "density" + +MODELS = [ + "matpes-r2scan", + "mace-mpa-0-medium", + "mace-omat-0-medium", + "mace-mp-0b3", + "mace-mh-1-omat", + "mace-mh-1-omol", +] + +RHO_EXP = 1.7126 # g/cm³ (Gilbert et al., JCED 2017, DOI: 10.1021/acs.jced.7b00135) + +# Map benchmark model names → ml-peg registry names (for app summary table) +MODEL_NAME_MAP = { + "matpes-r2scan": "mace-matpes-r2scan", + "mace-mpa-0-medium": "mace-mpa-0", + "mace-omat-0-medium": "mace-omat-0", +} + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + + +# --- Helpers ----------------------------------------------------------------- + + +def load_density_results() -> dict[str, dict]: + """ + Load density.json for each model. + + Returns + ------- + dict[str, dict] + Per-model density results keyed by model name. + """ + results = {} + for model in MODELS: + json_path = CALC_PATH / model / "density.json" + if json_path.exists(): + with open(json_path) as f: + results[model] = json.load(f) + return results + + +def normalize_metric(value: float, good: float, bad: float) -> float: + """ + Normalize metric to [0, 1] where 0=good, 1=bad. + + Parameters + ---------- + value : float + Raw metric value. + good : float + Value corresponding to score 0. + bad : float + Value corresponding to score 1. + + Returns + ------- + float + Normalized score clipped to [0, 1]. + """ + if bad == good: + return 0.0 if value <= good else 1.0 + score = (value - good) / (bad - good) + return max(0.0, min(1.0, score)) + + +# --- Plot builders ----------------------------------------------------------- + + +def build_density_bar_chart(data: dict[str, dict]) -> go.Figure: + """ + Build bar chart of density per model. + + Parameters + ---------- + data : dict[str, dict] + Per-model density results. + + Returns + ------- + go.Figure + Plotly bar chart figure. + """ + fig = go.Figure() + for model in MODELS: + if model not in data: + continue + fig.add_trace( + go.Bar( + x=[model], + y=[data[model]["rho_mean"]], + error_y={"type": "data", "array": [data[model]["rho_std"]]}, + name=model, + ) + ) + + fig.add_hline( + y=RHO_EXP, + line_dash="dash", + line_color="red", + annotation_text=f"Exp. ({RHO_EXP} g/cm\u00b3, Gilbert 2017)", + ) + fig.update_layout( + title="Electrolyte Density (LiTFSI/H2O, 21 m)", + yaxis_title="Density / g cm⁻³", + showlegend=False, + ) + return fig + + +def build_density_timeseries(data: dict[str, dict]) -> go.Figure: + """ + Build density vs time plot for all models. + + Parameters + ---------- + data : dict[str, dict] + Per-model density results. + + Returns + ------- + go.Figure + Plotly timeseries figure. + """ + fig = go.Figure() + for model in MODELS: + if model not in data: + continue + d = data[model] + fig.add_trace( + go.Scatter( + x=d["time_full"], + y=d["density_full"], + mode="lines", + name=model, + opacity=0.8, + ) + ) + + fig.add_hline( + y=RHO_EXP, + line_dash="dash", + line_color="black", + annotation_text=f"Exp. ({RHO_EXP} g/cm\u00b3)", + ) + fig.update_layout( + title="NPT Density vs Time (LiTFSI/H2O)", + xaxis_title="Time / ps", + yaxis_title="Density / g cm⁻³", + ) + return fig + + +# --- Metrics table ----------------------------------------------------------- + + +def build_metrics_table(data: dict[str, dict]) -> dict: + """ + Build metrics table JSON compatible with ml-peg app format. + + Parameters + ---------- + data : dict[str, dict] + Per-model density results. + + Returns + ------- + dict + Table with data, columns, tooltips, and thresholds. + """ + rows = [] + for model in MODELS: + if model not in data: + continue + d = data[model] + abs_err = d["rho_abs_error"] + pct_err = abs(d["rho_error_pct"]) + + score_abs = normalize_metric( + abs_err, + DEFAULT_THRESHOLDS["Density Error"]["good"], + DEFAULT_THRESHOLDS["Density Error"]["bad"], + ) + score_pct = normalize_metric( + pct_err, + DEFAULT_THRESHOLDS["Density Error (%)"]["good"], + DEFAULT_THRESHOLDS["Density Error (%)"]["bad"], + ) + + w_abs = DEFAULT_WEIGHTS.get("Density Error", 1.0) + w_pct = DEFAULT_WEIGHTS.get("Density Error (%)", 1.0) + total_w = w_abs + w_pct + overall = ( + 1.0 - (score_abs * w_abs + score_pct * w_pct) / total_w + if total_w > 0 + else 0.0 + ) + + rows.append( + { + "MLIP": model, + "id": MODEL_NAME_MAP.get(model, model), + "Density (g/cm3)": round(d["rho_mean"], 4), + "Density Error": round(abs_err, 4), + "Density Error (%)": round(pct_err, 2), + "Score": round(overall, 3), + } + ) + + rows.sort(key=lambda r: r["Score"], reverse=True) + + columns = [ + {"name": c, "id": c} + for c in [ + "MLIP", + "Density Error", + "Density Error (%)", + "Score", + ] + ] + + return { + "data": rows, + "columns": columns, + "tooltip_header": DEFAULT_TOOLTIPS, + "thresholds": {k: dict(v) for k, v in DEFAULT_THRESHOLDS.items()}, + "weights": dict(DEFAULT_WEIGHTS), + "model_name_map": MODEL_NAME_MAP, + } + + +# --- Pytest interface -------------------------------------------------------- + + +@pytest.fixture +def density_analysis(): + """ + Run full density analysis: table + plots. + + Returns + ------- + dict + Metrics table dictionary. + """ + data = load_density_results() + if not data: + pytest.skip("No density data found") + + OUT_PATH.mkdir(parents=True, exist_ok=True) + + table = build_metrics_table(data) + with open(OUT_PATH / "density_metrics_table.json", "w") as f: + json.dump(table, f, indent=2) + + fig_bar = build_density_bar_chart(data) + with open(OUT_PATH / "figure_density_bar.json", "w") as f: + f.write(fig_bar.to_json()) + + fig_ts = build_density_timeseries(data) + with open(OUT_PATH / "figure_density_timeseries.json", "w") as f: + f.write(fig_ts.to_json()) + + return table + + +def test_density(density_analysis) -> None: + """ + Run density benchmark — generates table and plots. + + Parameters + ---------- + density_analysis : dict + Fixture providing the metrics table. + """ + table = density_analysis + assert len(table["data"]) > 0, "No models produced density data" diff --git a/ml_peg/analysis/wise_electrolytes/density/metrics.yml b/ml_peg/analysis/wise_electrolytes/density/metrics.yml new file mode 100644 index 000000000..f4b2bc014 --- /dev/null +++ b/ml_peg/analysis/wise_electrolytes/density/metrics.yml @@ -0,0 +1,15 @@ +metrics: + Density Error: + good: 0.0 + bad: 0.17 + unit: "g/cm3" + tooltip: "Absolute error in NPT density vs experimental (1.7126 g/cm3, Gilbert et al. JCED 2017)" + level_of_theory: Experimental (Gilbert et al., J. Chem. Eng. Data 62, 2056, 2017) + weight: 1 + Density Error (%): + good: 0.0 + bad: 10.0 + unit: "%" + tooltip: "Percent error in NPT density vs experimental (1.7126 g/cm3, Gilbert et al. JCED 2017)" + level_of_theory: Experimental (Gilbert et al., J. Chem. Eng. Data 62, 2056, 2017) + weight: 0 diff --git a/ml_peg/analysis/wise_electrolytes/rdf/analyse_rdf.py b/ml_peg/analysis/wise_electrolytes/rdf/analyse_rdf.py new file mode 100644 index 000000000..8df3acd2a --- /dev/null +++ b/ml_peg/analysis/wise_electrolytes/rdf/analyse_rdf.py @@ -0,0 +1,434 @@ +"""Analyse Li-O RDF benchmark for LiTFSI/H2O electrolyte.""" +# Reads rdf.json and rdf.npz files from calc outputs and generates +# scoring tables and g(r) plots for the ml-peg dashboard. +# +# Reference coordination numbers (Watanabe et al., J. Phys. Chem. B 125, 7477, 2021): +# CN(Li-O_water) = 2.0 } ~18.5 m LiTFSI/H2O, +# CN(Li-O_TFSI) = 2.0 } neutron diffraction, 6Li/7Li + H/D substitution +# +# Integration cutoff: 2.83 Å (first minimum of Li-O_total g(r) from r2SCAN AIMD). +# Trajectories: NVT 50 ps (dt=0.1 ps/frame, 501 frames, equilibrated 50-100 ps window). +# Box length: 27.4938 Å (cubic, p64_w170 cell: 64 LiTFSI + 170 H2O = 1534 atoms). + +from __future__ import annotations + +import json +from pathlib import Path + +import numpy as np +import plotly.graph_objects as go +import pytest + +from ml_peg.analysis.utils.utils import load_metrics_config + +# --- Paths ------------------------------------------------------------------- + +CALCS_ROOT = Path(__file__).resolve().parents[3] / "calcs" +CALC_PATH = CALCS_ROOT / "wise_electrolytes" / "rdf" / "outputs" +APP_ROOT = Path(__file__).resolve().parents[3] / "app" +OUT_PATH = APP_ROOT / "data" / "wise_electrolytes" / "rdf" + +MODELS = [ + "matpes-r2scan", + "mace-mpa-0-medium", + "mace-omat-0-medium", + "mace-mp-0b3", + "mace-mh-1-omat", + "mace-mh-1-omol", +] + +# --- Physical constants and reference values --------------------------------- + +# Box geometry (same for all models: p64_w170 NVT cell) +L_BOX = 27.4938 # Å (cubic NVT cell at experimental density) +V_BOX = L_BOX**3 # ų + +# Integration cutoff: first minimum of Li-O_total g(r) +R_CUT = 2.83 # Å +DR = 0.02 # Å (bin width used in calc_rdf.py) + +# Experimental reference: Watanabe et al., JPCB 2021, ~18.5 m LiTFSI/H2O +CN_EXP_WATER = 2.0 +CN_EXP_TFSI = 2.0 + +# Map benchmark model names → ml-peg registry names (for app summary table) +MODEL_NAME_MAP = { + "matpes-r2scan": "mace-matpes-r2scan", + "mace-mpa-0-medium": "mace-mpa-0", + "mace-omat-0-medium": "mace-omat-0", +} + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + + +# --- Helpers ----------------------------------------------------------------- + + +def compute_cn_from_gr( + gr: np.ndarray, + r: np.ndarray, + n_neighbor: int, + volume: float, + r_cut: float, + dr: float, +) -> float: + """ + Integrate g(r) to coordination number: CN = 4pi rho int_0^r_cut g(r) r^2 dr. + + Parameters + ---------- + gr : np.ndarray + Radial distribution function values. + r : np.ndarray + Radial distance values in angstrom. + n_neighbor : int + Number of neighbor atoms of the relevant species in the simulation box. + volume : float + Volume of the simulation box in angstrom^3. + r_cut : float + Integration cutoff distance in angstrom. + dr : float + Bin width in angstrom. + + Returns + ------- + float + Coordination number obtained by integrating g(r) up to ``r_cut``. + """ + rho = n_neighbor / volume + mask = r <= r_cut + return float(np.sum(4 * np.pi * rho * gr[mask] * r[mask] ** 2 * dr)) + + +def load_rdf_results() -> dict[str, dict]: + """ + Load rdf.json and rdf.npz for each model, recompute CN at r_cut=2.83 A. + + Returns + ------- + dict[str, dict] + Mapping from model name to a dict containing coordination numbers, + errors, radial distances, g(r) arrays, and frame count. + """ + results = {} + for model in MODELS: + json_path = CALC_PATH / model / "rdf.json" + npz_path = CALC_PATH / model / "rdf.npz" + if not json_path.exists() or not npz_path.exists(): + continue + + with open(json_path) as f: + meta = json.load(f) + + data = np.load(npz_path) + r = data["r"] + + n_ow = meta["n_O_water"] + n_of = meta["n_O_TFSI"] + + cn_w = compute_cn_from_gr(data["gr_LiO_water"], r, n_ow, V_BOX, R_CUT, DR) + cn_f = compute_cn_from_gr(data["gr_LiO_TFSI"], r, n_of, V_BOX, R_CUT, DR) + + results[model] = { + "cn_water": cn_w, + "cn_tfsi": cn_f, + "cn_total": cn_w + cn_f, + "err_water": abs(cn_w - CN_EXP_WATER), + "err_tfsi": abs(cn_f - CN_EXP_TFSI), + "r": r, + "gr_water": data["gr_LiO_water"], + "gr_tfsi": data["gr_LiO_TFSI"], + "gr_total": data["gr_LiO_total"], + "n_frames": meta["n_frames_used"], + } + return results + + +def normalize_metric(value: float, good: float, bad: float) -> float: + """ + Map value linearly: good to 1.0, bad to 0.0, clipped to [0, 1]. + + Parameters + ---------- + value : float + The metric value to normalize. + good : float + Threshold value that maps to a score of 1.0. + bad : float + Threshold value that maps to a score of 0.0. + + Returns + ------- + float + Normalized score clipped to the range [0, 1]. + """ + if good == bad: + return 1.0 if value == good else 0.0 + t = (value - bad) / (good - bad) + return max(0.0, min(1.0, float(t))) + + +# --- Plot builders ----------------------------------------------------------- + + +def build_cn_bar_chart(data: dict[str, dict]) -> go.Figure: + """ + Bar chart of CN_water and CN_TFSI per model with experimental reference. + + Parameters + ---------- + data : dict[str, dict] + Per-model RDF results as returned by :func:`load_rdf_results`. + + Returns + ------- + go.Figure + Plotly figure with grouped bars and experimental reference lines. + """ + fig = go.Figure() + + x_models = [m for m in MODELS if m in data] + cn_water_vals = [data[m]["cn_water"] for m in x_models] + cn_tfsi_vals = [data[m]["cn_tfsi"] for m in x_models] + + fig.add_trace( + go.Bar( + name="Li-Owater", + x=x_models, + y=cn_water_vals, + marker_color="steelblue", + ) + ) + fig.add_trace( + go.Bar( + name="Li-OTFSI", + x=x_models, + y=cn_tfsi_vals, + marker_color="coral", + ) + ) + + # Experimental reference lines + fig.add_hline( + y=CN_EXP_WATER, + line_dash="dash", + line_color="steelblue", + annotation_text=f"Exp. Owater ({CN_EXP_WATER})", + annotation_position="top right", + ) + fig.add_hline( + y=CN_EXP_TFSI, + line_dash="dash", + line_color="coral", + annotation_text=f"Exp. OTFSI ({CN_EXP_TFSI})", + annotation_position="bottom right", + ) + + fig.update_layout( + title="Li⁺ Coordination Numbers (LiTFSI/H₂O, 21 m)", + yaxis_title="Coordination Number", + barmode="group", + legend={"x": 0.01, "y": 0.99}, + ) + return fig + + +def build_gr_plot(data: dict[str, dict]) -> go.Figure: + """ + G(r) plot: Li-O_water and Li-O_TFSI for all models. + + Parameters + ---------- + data : dict[str, dict] + Per-model RDF results as returned by :func:`load_rdf_results`. + + Returns + ------- + go.Figure + Plotly figure with g(r) curves and integration cutoff line. + """ + fig = go.Figure() + + colors = [ + "#1f77b4", + "#ff7f0e", + "#2ca02c", + "#d62728", + "#9467bd", + "#8c564b", + ] + + for i, model in enumerate(MODELS): + if model not in data: + continue + d = data[model] + c = colors[i % len(colors)] + fig.add_trace( + go.Scatter( + x=d["r"], + y=d["gr_water"], + mode="lines", + name=f"{model} Ow", + line={"color": c, "dash": "solid"}, + legendgroup=model, + ) + ) + fig.add_trace( + go.Scatter( + x=d["r"], + y=d["gr_tfsi"], + mode="lines", + name=f"{model} OTFSI", + line={"color": c, "dash": "dot"}, + legendgroup=model, + ) + ) + + # Mark integration cutoff + fig.add_vline( + x=R_CUT, + line_dash="dash", + line_color="gray", + annotation_text=f"r_cut={R_CUT} Å", + annotation_position="top right", + ) + + fig.update_layout( + title="Li-O Radial Distribution Functions (LiTFSI/H₂O, 21 m)", + xaxis_title="r / Å", + yaxis_title="g(r)", + xaxis_range=[1.0, 6.0], + ) + return fig + + +# --- Metrics table ----------------------------------------------------------- + + +def build_metrics_table(data: dict[str, dict]) -> dict: + """ + Build metrics table JSON for ml-peg app. + + Parameters + ---------- + data : dict[str, dict] + Per-model RDF results as returned by :func:`load_rdf_results`. + + Returns + ------- + dict + Table payload with keys ``data``, ``columns``, ``tooltip_header``, + ``thresholds``, and ``reference``. + """ + rows = [] + for model in MODELS: + if model not in data: + continue + d = data[model] + + score_w = normalize_metric( + d["err_water"], + DEFAULT_THRESHOLDS["CN Li-O_water Error"]["good"], + DEFAULT_THRESHOLDS["CN Li-O_water Error"]["bad"], + ) + score_f = normalize_metric( + d["err_tfsi"], + DEFAULT_THRESHOLDS["CN Li-O_TFSI Error"]["good"], + DEFAULT_THRESHOLDS["CN Li-O_TFSI Error"]["bad"], + ) + + w_w = DEFAULT_WEIGHTS.get("CN Li-O_water Error", 1.0) + w_f = DEFAULT_WEIGHTS.get("CN Li-O_TFSI Error", 1.0) + total_w = w_w + w_f + overall = (score_w * w_w + score_f * w_f) / total_w if total_w > 0 else 0.0 + + rows.append( + { + "MLIP": model, + "id": MODEL_NAME_MAP.get(model, model), + "CN Li-O_water": round(d["cn_water"], 3), + "CN Li-O_TFSI": round(d["cn_tfsi"], 3), + "CN Li-O_water Error": round(d["err_water"], 3), + "CN Li-O_TFSI Error": round(d["err_tfsi"], 3), + "Score": round(overall, 3), + } + ) + + rows.sort(key=lambda r: r["Score"], reverse=True) + + columns = [ + {"name": c, "id": c} + for c in [ + "MLIP", + "CN Li-O_water Error", + "CN Li-O_TFSI Error", + "Score", + ] + ] + + return { + "data": rows, + "columns": columns, + "tooltip_header": DEFAULT_TOOLTIPS, + "thresholds": {k: dict(v) for k, v in DEFAULT_THRESHOLDS.items()}, + "weights": dict(DEFAULT_WEIGHTS), + "model_name_map": MODEL_NAME_MAP, + "reference": { + "CN_water": CN_EXP_WATER, + "CN_TFSI": CN_EXP_TFSI, + "citation": "Watanabe et al., J. Phys. Chem. B 125, 7477 (2021)", + "concentration": "~18.5 m LiTFSI/H2O", + "method": "neutron diffraction with 6Li/7Li + H/D isotopic substitution", + "r_cut_angstrom": R_CUT, + }, + } + + +# --- Pytest interface -------------------------------------------------------- + + +@pytest.fixture +def rdf_analysis(): + """ + Run full RDF analysis: table + plots. + + Returns + ------- + dict + Metrics table payload written to ``rdf_metrics_table.json``. + """ + data = load_rdf_results() + if not data: + pytest.skip("No RDF data found") + + OUT_PATH.mkdir(parents=True, exist_ok=True) + + table = build_metrics_table(data) + with open(OUT_PATH / "rdf_metrics_table.json", "w") as f: + json.dump(table, f, indent=2) + + fig_cn = build_cn_bar_chart(data) + with open(OUT_PATH / "figure_cn_bar.json", "w") as f: + f.write(fig_cn.to_json()) + + fig_gr = build_gr_plot(data) + with open(OUT_PATH / "figure_gr.json", "w") as f: + f.write(fig_gr.to_json()) + + return table + + +def test_rdf(rdf_analysis) -> None: + """ + Run RDF benchmark -- generates table and plots. + + Parameters + ---------- + rdf_analysis : dict + Metrics table payload provided by the ``rdf_analysis`` fixture. + """ + table = rdf_analysis + assert len(table["data"]) > 0, "No models produced RDF data" diff --git a/ml_peg/analysis/wise_electrolytes/rdf/metrics.yml b/ml_peg/analysis/wise_electrolytes/rdf/metrics.yml new file mode 100644 index 000000000..e6a67093a --- /dev/null +++ b/ml_peg/analysis/wise_electrolytes/rdf/metrics.yml @@ -0,0 +1,15 @@ +metrics: + CN Li-O_water Error: + good: 0.0 + bad: 0.5 + unit: "-" + tooltip: "Absolute error in Li-O_water coordination number vs experimental reference (CN=2.0, Watanabe et al. J. Phys. Chem. B 2021, ~18.5 m, neutron diffraction with isotopic substitution)" + level_of_theory: Experimental (neutron diffraction, Watanabe et al. JPCB 2021) + weight: 1 + CN Li-O_TFSI Error: + good: 0.0 + bad: 0.5 + unit: "-" + tooltip: "Absolute error in Li-O_TFSI coordination number vs experimental reference (CN=2.0, Watanabe et al. J. Phys. Chem. B 2021, ~18.5 m, neutron diffraction with isotopic substitution)" + level_of_theory: Experimental (neutron diffraction, Watanabe et al. JPCB 2021) + weight: 1 diff --git a/ml_peg/analysis/wise_electrolytes/xray_sf/analyse_xray_sf.py b/ml_peg/analysis/wise_electrolytes/xray_sf/analyse_xray_sf.py new file mode 100644 index 000000000..51b45c2b4 --- /dev/null +++ b/ml_peg/analysis/wise_electrolytes/xray_sf/analyse_xray_sf.py @@ -0,0 +1,398 @@ +"""Analyse X-ray structure factor benchmark for LiTFSI/H2O electrolyte.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import numpy as np +import plotly.graph_objects as go +import pytest +from scipy.interpolate import interp1d + +from ml_peg.analysis.utils.utils import load_metrics_config + +# --- Paths ------------------------------------------------------------------- + +CALCS_ROOT = Path(__file__).resolve().parents[3] / "calcs" +CALC_PATH = CALCS_ROOT / "wise_electrolytes" / "xray_sf" / "outputs" +APP_ROOT = Path(__file__).resolve().parents[3] / "app" +OUT_PATH = APP_ROOT / "data" / "wise_electrolytes" / "xray_sf" + +# Experimental data bundled with the benchmark +EXP_PATH = Path(__file__).resolve().parents[1] / "data" / "saxs_maginn.txt" + +MODELS = [ + "matpes-r2scan", + "mace-mpa-0-medium", + "mace-omat-0-medium", + "mace-mp-0b3", + "mace-mh-1-omat", + "mace-mh-1-omol", +] + +# Map benchmark model names → ml-peg registry names (for app summary table) +MODEL_NAME_MAP = { + "matpes-r2scan": "mace-matpes-r2scan", + "mace-mpa-0-medium": "mace-mpa-0", + "mace-omat-0-medium": "mace-omat-0", +} + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + + +# --- Helpers ----------------------------------------------------------------- + + +def load_experimental_sq() -> tuple[np.ndarray, np.ndarray]: + """ + Load experimental SAXS S(q) data. + + Returns + ------- + q_exp : np.ndarray + Scattering vector values in inverse angstroms. + sq_exp : np.ndarray + Experimental structure factor values. + """ + data = np.loadtxt(EXP_PATH) + return data[:, 0], data[:, 1] + + +def load_computed_sq(model: str) -> tuple[np.ndarray, np.ndarray] | None: + """ + Load computed S(q) for a model. + + Parameters + ---------- + model : str + Name of the MLIP model. + + Returns + ------- + tuple of (np.ndarray, np.ndarray) or None + Scattering vector and structure factor arrays, or None if the + output file does not exist. + """ + json_path = CALC_PATH / model / "xray_sq.json" + if not json_path.exists(): + return None + with open(json_path) as f: + d = json.load(f) + return np.array(d["q"]), np.array(d["Sq"]) + + +def compute_r_factor( + q_exp: np.ndarray, + sq_exp: np.ndarray, + q_calc: np.ndarray, + sq_calc: np.ndarray, +) -> float: + """ + Compute R-factor: sum|S_exp - S_calc| / sum|S_exp|. + + The q range used is the overlap between experimental and calculated data: + q_min is the minimum q of the calculated S(q), set by the simulation cell + size (2*pi/L), and q_max is the maximum q of the experimental data. + + Parameters + ---------- + q_exp : np.ndarray + Experimental scattering vector values. + sq_exp : np.ndarray + Experimental structure factor values. + q_calc : np.ndarray + Calculated scattering vector values. + sq_calc : np.ndarray + Calculated structure factor values. + + Returns + ------- + float + R-factor value, or NaN if no overlapping valid data exists. + """ + valid_calc = ~np.isnan(sq_calc) + q_min_calc = float(q_calc[valid_calc].min()) + q_max_exp = float(q_exp.max()) + + f = interp1d( + q_calc[valid_calc], + sq_calc[valid_calc], + kind="linear", + bounds_error=False, + fill_value=np.nan, + ) + + mask = (q_exp >= q_min_calc) & (q_exp <= q_max_exp) + sq_interp = f(q_exp[mask]) + sq_e = sq_exp[mask] + + ok = ~np.isnan(sq_interp) + if not ok.any(): + return float("nan") + + return float(np.sum(np.abs(sq_e[ok] - sq_interp[ok])) / np.sum(np.abs(sq_e[ok]))) + + +def find_first_peak( + q: np.ndarray, sq: np.ndarray, q_min: float = 0.5, q_max: float = 2.0 +) -> float: + """ + Find position of first peak in S(q). + + Parameters + ---------- + q : np.ndarray + Scattering vector values. + sq : np.ndarray + Structure factor values. + q_min : float, optional + Lower bound of the search window in inverse angstroms (default 0.5). + q_max : float, optional + Upper bound of the search window in inverse angstroms (default 2.0). + + Returns + ------- + float + Position of the first peak, or NaN if fewer than 3 valid points + exist in the search window. + """ + mask = (q >= q_min) & (q <= q_max) & ~np.isnan(sq) + if mask.sum() < 3: + return float("nan") + q_sub = q[mask] + sq_sub = sq[mask] + return float(q_sub[np.argmax(sq_sub)]) + + +def normalize_metric(value: float, good: float, bad: float) -> float: + """ + Normalize a metric value to [0, 1] where 0 is good and 1 is bad. + + Parameters + ---------- + value : float + Raw metric value. + good : float + Threshold corresponding to the best achievable score (0). + bad : float + Threshold corresponding to the worst score (1). + + Returns + ------- + float + Normalized score clipped to [0, 1]. + """ + if bad == good: + return 0.0 if value <= good else 1.0 + score = (value - good) / (bad - good) + return max(0.0, min(1.0, score)) + + +# --- Plot builders ----------------------------------------------------------- + + +def build_sq_comparison_plot( + q_exp: np.ndarray, + sq_exp: np.ndarray, + model_data: dict[str, tuple[np.ndarray, np.ndarray]], +) -> go.Figure: + """ + Build S(q) overlay plot comparing computed and experimental data. + + Parameters + ---------- + q_exp : np.ndarray + Experimental scattering vector values. + sq_exp : np.ndarray + Experimental structure factor values. + model_data : dict of str to tuple of (np.ndarray, np.ndarray) + Mapping from model name to (q, S(q)) arrays. + + Returns + ------- + go.Figure + Plotly figure with experimental and computed S(q) traces. + """ + fig = go.Figure() + + fig.add_trace( + go.Scatter( + x=q_exp.tolist(), + y=sq_exp.tolist(), + mode="lines", + name="Exp. (Zhang et al. 2021)", + line={"color": "black", "width": 2}, + ) + ) + + for model, (q, sq) in model_data.items(): + fig.add_trace( + go.Scatter( + x=q.tolist(), + y=sq.tolist(), + mode="lines", + name=model, + opacity=0.8, + ) + ) + + fig.update_layout( + title="X-ray Structure Factor S(q) — LiTFSI/H2O (21 m)", + xaxis_title="q / Å⁻¹", + yaxis_title="S(q) (Faber-Ziman)", + legend={"x": 0.60, "y": 0.98}, + ) + return fig + + +def build_metrics_table(results: dict[str, dict]) -> dict: + """ + Build metrics table as a JSON-serialisable dictionary. + + Parameters + ---------- + results : dict of str to dict + Per-model results containing ``r_factor``, ``peak_position_error``, + ``peak_calc``, and ``peak_exp`` entries. + + Returns + ------- + dict + Dictionary with ``data`` (list of row dicts), ``columns``, + ``tooltip_header``, and ``thresholds`` keys. + """ + rows = [] + for model in MODELS: + if model not in results: + continue + r = results[model] + r_factor = r["r_factor"] + peak_err = r["peak_position_error"] + + score_r = normalize_metric( + r_factor, + DEFAULT_THRESHOLDS["S(q) R-factor"]["good"], + DEFAULT_THRESHOLDS["S(q) R-factor"]["bad"], + ) + score_p = normalize_metric( + peak_err, + DEFAULT_THRESHOLDS["First Peak Position Error"]["good"], + DEFAULT_THRESHOLDS["First Peak Position Error"]["bad"], + ) + + w_r = DEFAULT_WEIGHTS.get("S(q) R-factor", 1.0) + w_p = DEFAULT_WEIGHTS.get("First Peak Position Error", 1.0) + total_w = w_r + w_p + overall = ( + 1.0 - (score_r * w_r + score_p * w_p) / total_w if total_w > 0 else 0.0 + ) + + rows.append( + { + "MLIP": model, + "id": MODEL_NAME_MAP.get(model, model), + "S(q) R-factor": round(r_factor, 4), + "First Peak Position Error": round(peak_err, 4), + "First Peak (calc)": round(r["peak_calc"], 2), + "First Peak (exp)": round(r["peak_exp"], 2), + "Score": round(overall, 3), + } + ) + + rows.sort(key=lambda r: r["Score"], reverse=True) + + columns = [ + {"name": c, "id": c} + for c in [ + "MLIP", + "S(q) R-factor", + "First Peak Position Error", + "Score", + ] + ] + + return { + "data": rows, + "columns": columns, + "tooltip_header": DEFAULT_TOOLTIPS, + "thresholds": {k: dict(v) for k, v in DEFAULT_THRESHOLDS.items()}, + "weights": dict(DEFAULT_WEIGHTS), + "model_name_map": MODEL_NAME_MAP, + } + + +# --- Pytest interface -------------------------------------------------------- + + +@pytest.fixture +def xray_sf_analysis(): + """ + Run full X-ray S(q) analysis and write outputs. + + Returns + ------- + dict + Metrics table dictionary produced by ``build_metrics_table``. + """ + q_exp, sq_exp = load_experimental_sq() + peak_exp = find_first_peak(q_exp, sq_exp) + + model_data = {} + results = {} + + for model in MODELS: + data = load_computed_sq(model) + if data is None: + continue + q_calc, sq_calc = data + model_data[model] = (q_calc, sq_calc) + + r_factor = compute_r_factor( + q_exp, sq_exp, q_calc, sq_calc + ) # range: [q_min_calc, q_max_exp] + peak_calc = find_first_peak(q_calc, sq_calc) + peak_error = ( + abs(peak_calc - peak_exp) if not np.isnan(peak_calc) else float("nan") + ) + + results[model] = { + "r_factor": r_factor, + "peak_calc": peak_calc, + "peak_exp": peak_exp, + "peak_position_error": peak_error, + } + + if not results: + pytest.skip("No S(q) data found") + + OUT_PATH.mkdir(parents=True, exist_ok=True) + + table = build_metrics_table(results) + with open(OUT_PATH / "xray_sf_metrics_table.json", "w") as f: + json.dump(table, f, indent=2) + + fig = build_sq_comparison_plot(q_exp, sq_exp, model_data) + with open(OUT_PATH / "figure_xray_sq_comparison.json", "w") as f: + f.write(fig.to_json()) + + with open(OUT_PATH / "xray_sf_results.json", "w") as f: + json.dump(results, f, indent=2) + + return table + + +def test_xray_sf(xray_sf_analysis) -> None: + """ + Run X-ray structure factor benchmark. + + Parameters + ---------- + xray_sf_analysis : dict + Metrics table fixture providing analysis results. + """ + table = xray_sf_analysis + assert len(table["data"]) > 0, "No models produced S(q) data" diff --git a/ml_peg/analysis/wise_electrolytes/xray_sf/metrics.yml b/ml_peg/analysis/wise_electrolytes/xray_sf/metrics.yml new file mode 100644 index 000000000..310fd8824 --- /dev/null +++ b/ml_peg/analysis/wise_electrolytes/xray_sf/metrics.yml @@ -0,0 +1,15 @@ +metrics: + S(q) R-factor: + good: 0.0 + bad: 0.5 + unit: "-" + tooltip: "R-factor between computed and experimental X-ray S(q): sum|S_exp - S_calc| / sum|S_exp|" + level_of_theory: Experimental (SAXS, Zhang et al., J. Phys. Chem. B 125, 4501, 2021) + weight: 1 + First Peak Position Error: + good: 0.0 + bad: 0.3 + unit: "1/A" + tooltip: "Absolute error in position of first S(q) peak (~1.0 A-1)" + level_of_theory: Experimental (SAXS, Zhang et al., J. Phys. Chem. B 125, 4501, 2021) + weight: 0 diff --git a/ml_peg/app/wise_electrolytes/density/app_density.py b/ml_peg/app/wise_electrolytes/density/app_density.py new file mode 100644 index 000000000..2a9055d4f --- /dev/null +++ b/ml_peg/app/wise_electrolytes/density/app_density.py @@ -0,0 +1,68 @@ +"""Dash app for LiTFSI/H2O electrolyte density benchmark.""" + +from __future__ import annotations + +from pathlib import Path + +from dash import html + +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import plot_from_table_column +from ml_peg.app.utils.load import read_plot + +BENCHMARK_NAME = "wise_electrolytes_density" +APP_ROOT = Path(__file__).resolve().parents[2] +DATA_PATH = APP_ROOT / "data" / "wise_electrolytes" / "density" + + +class DensityApp(BaseApp): + """Dash app for WiSE electrolyte density benchmark.""" + + def register_callbacks(self) -> None: + """Register interactive plot callbacks.""" + bar_chart = read_plot( + DATA_PATH / "figure_density_bar.json", + id=f"{BENCHMARK_NAME}-bar", + ) + timeseries = read_plot( + DATA_PATH / "figure_density_timeseries.json", + id=f"{BENCHMARK_NAME}-timeseries", + ) + + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot={ + "Density Error": bar_chart, + "Density Error (%)": bar_chart, + "Score": timeseries, + }, + ) + + +def get_app() -> DensityApp: + """ + Return configured density benchmark app. + + Returns + ------- + DensityApp + Configured app instance. + """ + return DensityApp( + name="WiSE Density", + description=( + "NPT density of 21 m LiTFSI/H2O electrolyte at 298 K. " + "Experimental reference: 1.72 g/cm³ (Maginn et al., 2021)." + ), + docs_url="", + table_path=DATA_PATH / "density_metrics_table.json", + extra_components=[ + html.Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + ], + ) + + +if __name__ == "__main__": + app = get_app() + app.run(port=8060, debug=True) diff --git a/ml_peg/app/wise_electrolytes/rdf/app_rdf.py b/ml_peg/app/wise_electrolytes/rdf/app_rdf.py new file mode 100644 index 000000000..8de28f8be --- /dev/null +++ b/ml_peg/app/wise_electrolytes/rdf/app_rdf.py @@ -0,0 +1,69 @@ +"""Dash app for LiTFSI/H2O Li-O RDF / coordination number benchmark.""" + +from __future__ import annotations + +from pathlib import Path + +from dash import html + +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import plot_from_table_column +from ml_peg.app.utils.load import read_plot + +BENCHMARK_NAME = "wise_electrolytes_rdf" +APP_ROOT = Path(__file__).resolve().parents[2] +DATA_PATH = APP_ROOT / "data" / "wise_electrolytes" / "rdf" + + +class RdfApp(BaseApp): + """Dash app for WiSE electrolyte Li-O RDF benchmark.""" + + def register_callbacks(self) -> None: + """Register interactive plot callbacks.""" + cn_bar = read_plot( + DATA_PATH / "figure_cn_bar.json", + id=f"{BENCHMARK_NAME}-cn-bar", + ) + gr_plot = read_plot( + DATA_PATH / "figure_gr.json", + id=f"{BENCHMARK_NAME}-gr", + ) + + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot={ + "CN Li-O_water Error": cn_bar, + "CN Li-O_TFSI Error": cn_bar, + "Score": gr_plot, + }, + ) + + +def get_app() -> RdfApp: + """ + Return configured RDF benchmark app. + + Returns + ------- + RdfApp + Configured app instance. + """ + return RdfApp( + name="WiSE Li-O RDF", + description=( + "Li-O coordination numbers from radial distribution functions " + "of 21 m LiTFSI/H2O electrolyte (NVT, 298 K). " + "Reference: Watanabe et al., J. Phys. Chem. B 125, 7477 (2021)." + ), + docs_url="", + table_path=DATA_PATH / "rdf_metrics_table.json", + extra_components=[ + html.Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + ], + ) + + +if __name__ == "__main__": + app = get_app() + app.run(port=8062, debug=True) diff --git a/ml_peg/app/wise_electrolytes/wise_electrolytes.yml b/ml_peg/app/wise_electrolytes/wise_electrolytes.yml new file mode 100644 index 000000000..dbfe8135c --- /dev/null +++ b/ml_peg/app/wise_electrolytes/wise_electrolytes.yml @@ -0,0 +1,7 @@ +title: WiSE Electrolytes +description: Structural and thermodynamic properties of 21 m LiTFSI/H2O "water-in-salt" electrolytes from molecular dynamics +weight: 0 +benchmark_weights: + WiSE Density: 0.5 + WiSE X-ray S(q): 0.3 + WiSE Li-O RDF: 0.2 diff --git a/ml_peg/app/wise_electrolytes/xray_sf/app_xray_sf.py b/ml_peg/app/wise_electrolytes/xray_sf/app_xray_sf.py new file mode 100644 index 000000000..87e0f78f7 --- /dev/null +++ b/ml_peg/app/wise_electrolytes/xray_sf/app_xray_sf.py @@ -0,0 +1,66 @@ +"""Dash app for LiTFSI/H2O X-ray structure factor benchmark.""" + +from __future__ import annotations + +from pathlib import Path + +from dash import html + +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import plot_from_table_column +from ml_peg.app.utils.load import read_plot + +BENCHMARK_NAME = "wise_electrolytes_xray_sf" +APP_ROOT = Path(__file__).resolve().parents[2] +DATA_PATH = APP_ROOT / "data" / "wise_electrolytes" / "xray_sf" + + +class XraySfApp(BaseApp): + """Dash app for WiSE electrolyte X-ray S(q) benchmark.""" + + def register_callbacks(self) -> None: + """Register interactive plot callbacks.""" + sq_plot = read_plot( + DATA_PATH / "figure_xray_sq_comparison.json", + id=f"{BENCHMARK_NAME}-sq", + ) + + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot={ + "S(q) R-factor": sq_plot, + "First Peak Position Error": sq_plot, + "Score": sq_plot, + }, + ) + + +def get_app() -> XraySfApp: + """ + Return configured X-ray S(q) benchmark app. + + Returns + ------- + XraySfApp + Configured app instance. + """ + return XraySfApp( + name="WiSE X-ray S(q)", + description=( + "X-ray structure factor S(q) of 21 m LiTFSI/H2O electrolyte from NVT MD. " + "Computed via dynasor with Cromer-Mann form factors " + "(Faber-Ziman normalization). " + "Experimental reference: SAXS (Maginn et al., J. Phys. Chem. B, 2021)." + ), + docs_url="", + table_path=DATA_PATH / "xray_sf_metrics_table.json", + extra_components=[ + html.Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + ], + ) + + +if __name__ == "__main__": + app = get_app() + app.run(port=8061, debug=True) diff --git a/ml_peg/calcs/wise_electrolytes/density/calc_density.py b/ml_peg/calcs/wise_electrolytes/density/calc_density.py new file mode 100644 index 000000000..2d52a63d7 --- /dev/null +++ b/ml_peg/calcs/wise_electrolytes/density/calc_density.py @@ -0,0 +1,75 @@ +""" +Extract density from pre-computed NPT simulations of LiTFSI/H2O electrolyte. + +This benchmark reads pre-computed LAMMPS NPT thermo logs and extracts +equilibrium density for each MLIP model. The simulations use a p16_w42 +cell (382 atoms: 16 LiTFSI + 42 H2O) at 298 K, 1 atm. + +System: 21 m LiTFSI / H2O electrolyte. +Experimental density: 1.7126 g/cm3 +(Gilbert et al., J. Chem. Eng. Data 62, 2056 (2017), +DOI: 10.1021/acs.jced.7b00135). +""" + +from __future__ import annotations + +import json +import os +from pathlib import Path + +import pytest + +# --- Configuration ----------------------------------------------------------- + +# Path to converted data (will be replaced with S3 download for PR) +DATA_ROOT = Path( + os.environ.get( + "ML_PEG_WISE_DENSITY_DATA_ROOT", + "/lus/work/CT9/cin1387/lbrugnoli/prove/ml_peg_benchmark/data/wise_electrolytes/density", + ) +) +OUT_PATH = Path(__file__).parent / "outputs" + +MODELS = [ + "matpes-r2scan", + "mace-mpa-0-medium", + "mace-omat-0-medium", + "mace-mp-0b3", + "mace-mh-1-omat", + "mace-mh-1-omol", +] + +RHO_EXP = 1.7126 # g/cm³ (Gilbert et al., JCED 2017) + + +# --- Pytest interface (ml-peg convention) ------------------------------------ + + +@pytest.mark.parametrize("model", MODELS) +def test_extract_density(model: str) -> None: + """ + Extract and save density data for one model. + + Parameters + ---------- + model : str + Name of the MLIP model to extract density for. + """ + # Read pre-computed density data + json_path = DATA_ROOT / model / "density.json" + if not json_path.exists(): + pytest.skip(f"No density data for {model}") + + with open(json_path) as f: + result = json.load(f) + + # Save to outputs + out_dir = OUT_PATH / model + out_dir.mkdir(parents=True, exist_ok=True) + + with open(out_dir / "density.json", "w") as f: + json.dump(result, f, indent=2) + + # Basic sanity + assert result["rho_mean"] > 0, f"Negative density for {model}" + assert abs(result["rho_error_pct"]) < 50, f"Density error > 50% for {model}" diff --git a/ml_peg/calcs/wise_electrolytes/rdf/calc_rdf.py b/ml_peg/calcs/wise_electrolytes/rdf/calc_rdf.py new file mode 100644 index 000000000..7bc8de2ab --- /dev/null +++ b/ml_peg/calcs/wise_electrolytes/rdf/calc_rdf.py @@ -0,0 +1,313 @@ +""" +Compute Li-O radial distribution functions for LiTFSI/H2O electrolyte. + +Reads pre-computed NVT trajectories (.extxyz, 100 ps) for each MLIP model +and computes g(r) for Li-O_total, Li-O_water (O bonded to H, d_OH < 1.25 A), +and Li-O_TFSI (O bonded to S, d_OS < 1.75 A), plus coordination numbers by +integration of the first peak. + +System: 21 m LiTFSI / H2O, p64_w170 cell (1534 atoms). +Experimental reference for coordination numbers: +Watanabe et al., J. Phys. Chem. B 125, 7477 (2021), DOI: 10.1021/acs.jpcb.1c04693. +Neutron diffraction with 6Li/7Li + H/D isotopic substitution, ~18.5 m LiTFSI/H2O. +""" + +from __future__ import annotations + +import json +import os +from pathlib import Path + +from ase.geometry import get_distances +from ase.io import iread +import numpy as np +import pytest + +# --- Configuration ----------------------------------------------------------- + +DATA_ROOT = Path( + os.environ.get( + "ML_PEG_WISE_XRAY_DATA_ROOT", + "/lus/work/CT9/cin1387/lbrugnoli/prove/ml_peg_benchmark/data/wise_electrolytes/xray_sf", + ) +) +OUT_PATH = Path(__file__).parent / "outputs" + +MODELS = [ + "matpes-r2scan", + "mace-mpa-0-medium", + "mace-omat-0-medium", + "mace-mp-0b3", + "mace-mh-1-omat", + "mace-mh-1-omol", +] + +# RDF parameters +R_MAX = 6.0 # Å — covers first + second shell +DR = 0.02 # Å — bin width +R_CUT_COORD = 2.83 # Å — first minimum of Li-O g(r) from r2SCAN AIMD reference + +# O-type identification cutoffs (applied on first frame only) +D_OH_CUT = 1.25 # Å — O-H bond in water +D_OS_CUT = 1.75 # Å — O-S bond in TFSI + + +# --- Helpers ----------------------------------------------------------------- + + +def identify_o_types(atoms) -> tuple[np.ndarray, np.ndarray]: + """ + Return indices of O_water and O_TFSI from a single ASE Atoms frame. + + Parameters + ---------- + atoms : ase.Atoms + A single frame from the trajectory. + + Returns + ------- + o_water : np.ndarray + Indices of oxygen atoms bonded to hydrogen (water oxygens). + o_tfsi : np.ndarray + Indices of oxygen atoms bonded to sulfur (TFSI oxygens). + """ + syms = np.array(atoms.get_chemical_symbols()) + pos = atoms.get_positions() + cell = atoms.get_cell() + pbc = atoms.get_pbc() + + o_idx = np.where(syms == "O")[0] + h_idx = np.where(syms == "H")[0] + s_idx = np.where(syms == "S")[0] + + o_water, o_tfsi = [], [] + for o in o_idx: + _, d_oh = get_distances(pos[o : o + 1], pos[h_idx], cell=cell, pbc=pbc) + _, d_os = get_distances(pos[o : o + 1], pos[s_idx], cell=cell, pbc=pbc) + if d_oh.min() < D_OH_CUT: + o_water.append(o) + elif d_os.min() < D_OS_CUT: + o_tfsi.append(o) + + return np.array(o_water), np.array(o_tfsi) + + +def compute_rdf( + traj_path: Path, + o_water_idx: np.ndarray, + o_tfsi_idx: np.ndarray, + r_max: float = R_MAX, + dr: float = DR, + skip_frames: int = 0, # trajectories are pre-equilibrated (50-100 ps window) +) -> dict: + """ + Compute Li-O RDFs from an extxyz trajectory. + + Parameters + ---------- + traj_path : Path + Path to .extxyz trajectory file. + o_water_idx : np.ndarray + Atom indices of O_water (from first frame, fixed). + o_tfsi_idx : np.ndarray + Atom indices of O_TFSI (from first frame, fixed). + r_max : float + Maximum distance for RDF. + dr : float + Bin width. + skip_frames : int + Number of initial frames to skip (equilibration). + + Returns + ------- + dict + Dictionary with keys ``r``, ``gr_LiO_total``, ``gr_LiO_water``, + ``gr_LiO_TFSI``, ``coord_LiO_total``, ``coord_LiO_water``, + ``coord_LiO_TFSI``, ``n_li``, ``n_O_water``, ``n_O_TFSI``, + ``n_frames_used``, and ``r_cut_coord``. + """ + bins = np.arange(0, r_max + dr, dr) + r_centers = 0.5 * (bins[:-1] + bins[1:]) + n_bins = len(r_centers) + + hist_total = np.zeros(n_bins) + hist_water = np.zeros(n_bins) + hist_tfsi = np.zeros(n_bins) + + n_frames = 0 + n_li = None + volume = None + o_all_idx = np.concatenate([o_water_idx, o_tfsi_idx]) + + for frame_idx, atoms in enumerate( + iread(str(traj_path), format="extxyz", index=":") + ): + if frame_idx < skip_frames: + continue + + syms = np.array(atoms.get_chemical_symbols()) + pos = atoms.get_positions() + cell = atoms.get_cell() + pbc = atoms.get_pbc() + + li_idx = np.where(syms == "Li")[0] + if n_li is None: + n_li = len(li_idx) + if volume is None: + volume = atoms.get_volume() + + pos_li = pos[li_idx] + + for o_set, hist in [ + (o_all_idx, hist_total), + (o_water_idx, hist_water), + (o_tfsi_idx, hist_tfsi), + ]: + pos_o = pos[o_set] + _, dists = get_distances(pos_li, pos_o, cell=cell, pbc=pbc) + dists_flat = dists.ravel() + dists_flat = dists_flat[dists_flat < r_max] + h, _ = np.histogram(dists_flat, bins=bins) + hist += h + + n_frames += 1 + + if n_frames == 0 or n_li is None: + raise RuntimeError(f"No frames processed from {traj_path}") + + # Normalize to g(r) + def normalize(hist, n_central, n_neighbor): + """ + Normalize histogram to g(r). + + Parameters + ---------- + hist : np.ndarray + Raw pair-distance histogram. + n_central : int + Number of central atoms (Li). + n_neighbor : int + Number of neighbor atoms (O subset). + + Returns + ------- + np.ndarray + Normalized radial distribution function. + """ + shell_vol = (4.0 / 3.0) * np.pi * (bins[1:] ** 3 - bins[:-1] ** 3) + rho = n_neighbor / volume + norm = n_central * n_frames * rho * shell_vol + return hist / norm + + n_o_total = len(o_all_idx) + n_o_water = len(o_water_idx) + n_o_tfsi = len(o_tfsi_idx) + + gr_total = normalize(hist_total, n_li, n_o_total) + gr_water = normalize(hist_water, n_li, n_o_water) + gr_tfsi = normalize(hist_tfsi, n_li, n_o_tfsi) + + # Coordination numbers: integrate 4π ρ g(r) r² dr from 0 to R_CUT_COORD + def coord_number(gr, n_neighbor): + """ + Compute coordination number from g(r). + + Parameters + ---------- + gr : np.ndarray + Radial distribution function. + n_neighbor : int + Number of neighbor atoms (O subset). + + Returns + ------- + float + Integrated coordination number up to R_CUT_COORD. + """ + rho = n_neighbor / volume + integrand = 4.0 * np.pi * rho * gr * r_centers**2 * dr + mask = r_centers <= R_CUT_COORD + return float(np.sum(integrand[mask])) + + return { + "r": r_centers.tolist(), + "gr_LiO_total": gr_total.tolist(), + "gr_LiO_water": gr_water.tolist(), + "gr_LiO_TFSI": gr_tfsi.tolist(), + "coord_LiO_total": coord_number(gr_total, n_o_total), + "coord_LiO_water": coord_number(gr_water, n_o_water), + "coord_LiO_TFSI": coord_number(gr_tfsi, n_o_tfsi), + "n_li": n_li, + "n_O_water": n_o_water, + "n_O_TFSI": n_o_tfsi, + "n_frames_used": n_frames, + "r_cut_coord": R_CUT_COORD, + } + + +def find_trajectory(model: str) -> Path | None: + """ + Find NVT extxyz trajectory for a model. + + Parameters + ---------- + model : str + Name of the MLIP model. + + Returns + ------- + Path or None + Path to the trajectory file, or None if not found. + """ + p = DATA_ROOT / model / "nvt_trajectory.extxyz" + return p if p.exists() else None + + +# --- Pytest interface -------------------------------------------------------- + + +@pytest.mark.parametrize("model", MODELS) +def test_compute_rdf(model: str) -> None: + """ + Compute and save Li-O RDFs and coordination numbers for one model. + + Parameters + ---------- + model : str + Name of the MLIP model to compute RDFs for. + """ + traj_path = find_trajectory(model) + if traj_path is None: + pytest.skip(f"No NVT trajectory for {model}") + + # Identify O types from first frame (indices fixed throughout NVT) + from ase.io import read as ase_read + + first_frame = ase_read(str(traj_path), index=0, format="extxyz") + o_water_idx, o_tfsi_idx = identify_o_types(first_frame) + + assert len(o_water_idx) > 0, f"No O_water found for {model}" + assert len(o_tfsi_idx) > 0, f"No O_TFSI found for {model}" + + result = compute_rdf(traj_path, o_water_idx, o_tfsi_idx) + + out_dir = OUT_PATH / model + out_dir.mkdir(parents=True, exist_ok=True) + + with open(out_dir / "rdf.json", "w") as f: + json.dump( + {k: v for k, v in result.items() if not isinstance(v, list)}, f, indent=2 + ) + + np.savez( + out_dir / "rdf.npz", + r=np.array(result["r"]), + gr_LiO_total=np.array(result["gr_LiO_total"]), + gr_LiO_water=np.array(result["gr_LiO_water"]), + gr_LiO_TFSI=np.array(result["gr_LiO_TFSI"]), + ) + + # Sanity checks + assert 2.0 < result["coord_LiO_total"] < 8.0, ( + f"Unexpected Li-O_total CN={result['coord_LiO_total']:.2f} for {model}" + ) diff --git a/ml_peg/calcs/wise_electrolytes/xray_sf/calc_xray_sf.py b/ml_peg/calcs/wise_electrolytes/xray_sf/calc_xray_sf.py new file mode 100644 index 000000000..c4e173ffd --- /dev/null +++ b/ml_peg/calcs/wise_electrolytes/xray_sf/calc_xray_sf.py @@ -0,0 +1,345 @@ +""" +Compute X-ray structure factor S(q) for LiTFSI/H2O electrolyte. + +Reads pre-computed NVT trajectories (.extxyz, 100 ps) for each MLIP model +and computes S(q) via dynasor using TRAVIS-matching settings: Cromer-Mann +4-Gaussian form factors (including H), Faber-Ziman normalization with Laue +monotonic term subtraction, and Savitzky-Golay smoothing +(window=27, order=3, dq=0.02 A^-1). + +System: 21 m LiTFSI / H2O, p64_w170 cell (1534 atoms). +Experimental reference: SAXS, Zhang, Lewis, Mars, Wan et al., +J. Phys. Chem. B 125, 4501 (2021), DOI: 10.1021/acs.jpcb.1c02189. +""" + +from __future__ import annotations + +import json +import os +from pathlib import Path +import warnings + +import numpy as np +import pytest +from scipy.signal import savgol_filter + +warnings.filterwarnings("ignore") + +# --- Configuration ----------------------------------------------------------- + +# Path to converted .extxyz trajectories (will be S3 download for PR) +DATA_ROOT = Path( + os.environ.get( + "ML_PEG_WISE_XRAY_DATA_ROOT", + "/lus/work/CT9/cin1387/lbrugnoli/prove/ml_peg_benchmark/data/wise_electrolytes/xray_sf", + ) +) +OUT_PATH = Path(__file__).parent / "outputs" + +MODELS = [ + "matpes-r2scan", + "mace-mpa-0-medium", + "mace-omat-0-medium", + "mace-mp-0b3", + "mace-mh-1-omat", + "mace-mh-1-omol", +] + +# S(q) computation parameters (TRAVIS-matching) +Q_MAX = 13.0 # Å⁻¹ +Q_MIN = 0.5 # Å⁻¹ +MAX_QPOINTS = 50000 +DQ_BIN = 0.02 # Å⁻¹ +SAVGOL_WINDOW = 27 # optimal for matching TRAVIS +SAVGOL_ORDER = 3 + +# LAMMPS type → element (for fallback) +TYPE_TO_ELEMENT = {1: "Li", 2: "C", 3: "F", 4: "S", 5: "N", 6: "O", 7: "H"} + +# System composition (p64_w170: 64 LiTFSI + 170 H2O) +COMPOSITION = {"Li": 64, "C": 128, "F": 384, "S": 128, "N": 64, "O": 426, "H": 340} +N_ATOMS = sum(COMPOSITION.values()) # 1534 +CONC = {k: v / N_ATOMS for k, v in COMPOSITION.items()} + +# TRAVIS Cromer-Mann 4-Gaussian form factors (International Tables) +TRAVIS_FF = { + "S": { + "a": [6.905, 5.203, 1.438, 1.586], + "b": [1.468, 22.215, 0.254, 56.172], + "c": 0.867, + }, + "F": { + "a": [3.539, 2.641, 1.517, 1.024], + "b": [10.283, 4.294, 0.262, 26.148], + "c": 0.278, + }, + "O": { + "a": [3.049, 2.287, 1.546, 0.867], + "b": [13.277, 5.701, 0.324, 32.909], + "c": 0.251, + }, + "N": { + "a": [12.213, 3.132, 2.013, 1.166], + "b": [0.006, 9.893, 28.997, 0.583], + "c": -11.529, + }, + "C": { + "a": [2.310, 1.020, 1.589, 0.865], + "b": [20.844, 10.208, 0.569, 51.651], + "c": 0.216, + }, + "Li": { + "a": [1.128, 0.751, 0.618, 0.465], + "b": [3.955, 1.052, 85.391, 168.261], + "c": 0.038, + }, + "H": { + "a": [0.493, 0.323, 0.140, 0.041], + "b": [10.511, 26.126, 3.142, 57.800], + "c": 0.003, + }, +} + + +# --- Form factor helpers ----------------------------------------------------- + + +def compute_fq_travis(elem: str, q_arr: np.ndarray) -> np.ndarray: + """ + Compute X-ray form factor f(q) using TRAVIS 4-Gaussian (Cromer-Mann) params. + + Parameters + ---------- + elem : str + Chemical element symbol (e.g. ``"Li"``, ``"O"``). + q_arr : np.ndarray + Array of q values in inverse angstroms. + + Returns + ------- + np.ndarray + Form factor values evaluated at each q. + """ + ff = TRAVIS_FF[elem] + s2 = (q_arr / (4 * np.pi)) ** 2 + return sum(ff["a"][i] * np.exp(-ff["b"][i] * s2) for i in range(4)) + ff["c"] + + +# --- S(q) computation ------------------------------------------------------- + + +def compute_sq_travis_style(traj_path: Path) -> dict: + """ + Compute S(q) in TRAVIS Faber-Ziman convention using dynasor. + + Steps: + + 1. Read trajectory with dynasor (LAMMPS dump via MDAnalysis). + 2. Compute partial S_ab(q) on a fine spherical q-grid. + 3. Apply TRAVIS Cromer-Mann form factors: I_xray = sum f_a * f_b * S_ab. + 4. Spherical binning at dq = 0.02 A^-1. + 5. Faber-Ziman normalization with Laue term: S_FZ = I/^2 - /^2 + 1. + 6. Savitzky-Golay smoothing. + + Parameters + ---------- + traj_path : Path + Path to the trajectory file (.extxyz or LAMMPS dump). + + Returns + ------- + dict + Dictionary with keys ``q``, ``Sq``, ``n_qpoints``, + ``n_qpoints_used``, ``cell``, ``atom_types``, + ``particle_counts``, and ``params``. + """ + from dynasor import ( + Trajectory, + compute_static_structure_factors, + get_spherical_qpoints, + ) + + traj_str = str(traj_path) + is_extxyz = traj_str.endswith(".extxyz") or traj_str.endswith(".xyz") + + if is_extxyz: + # For .extxyz: dynasor reads it directly, atomic_indices from element symbols + from ase.io import read as ase_read + + first_frame = ase_read(traj_str, index=0) + symbols = first_frame.get_chemical_symbols() + atomic_indices = {} + for i, sym in enumerate(symbols): + atomic_indices.setdefault(sym, []).append(i) + + traj = Trajectory( + traj_str, + trajectory_format="ase", + atomic_indices=atomic_indices, + ) + else: + # For LAMMPS dump: use MDAnalysis backend + import MDAnalysis as mda # noqa: N813 + + u = mda.Universe(traj_str, format="LAMMPSDUMP") + types = u.atoms.types + atomic_indices = {} + for t, elem in TYPE_TO_ELEMENT.items(): + mask = types == str(t) + idx = np.where(mask)[0].tolist() + if idx: + atomic_indices[elem] = idx + + traj = Trajectory( + traj_str, + trajectory_format="lammps_mdanalysis", + atomic_indices=atomic_indices, + ) + + q_points = get_spherical_qpoints(traj.cell, q_max=Q_MAX, max_points=MAX_QPOINTS) + sample = compute_static_structure_factors(traj, q_points) + + q_norms = np.linalg.norm(sample.q_points, axis=1) + atom_types = list(sample.particle_counts.keys()) + + # Manual X-ray weighting with TRAVIS form factors (including H) + ff_at_q = {at: compute_fq_travis(at, q_norms) for at in atom_types} + i_xray = np.zeros(len(q_norms)) + for s1, s2 in sample.pairs: + sq_ab = sample[f"Sq_{s1}_{s2}"].flatten() + i_xray += ff_at_q[s1] * ff_at_q[s2] * sq_ab + + # Spherical binning + q_bins = np.arange(0.0, Q_MAX + DQ_BIN, DQ_BIN) + q_centers = 0.5 * (q_bins[:-1] + q_bins[1:]) + i_xray_binned = np.full(len(q_centers), np.nan) + counts = np.zeros(len(q_centers), dtype=int) + + for i in range(len(q_centers)): + mask = (q_norms >= q_bins[i]) & (q_norms < q_bins[i + 1]) + n = mask.sum() + if n > 0: + i_xray_binned[i] = np.mean(i_xray[mask]) + counts[i] = n + + # Faber-Ziman normalization with Laue subtraction + f_avg = np.zeros(len(q_centers)) + f2_avg = np.zeros(len(q_centers)) + for elem, c in CONC.items(): + fq = compute_fq_travis(elem, q_centers) + f_avg += c * fq + f2_avg += c * fq**2 + f_avg_sq = f_avg**2 + + sq_fz = np.where( + f_avg_sq > 0, + i_xray_binned / f_avg_sq - f2_avg / f_avg_sq + 1.0, + np.nan, + ) + + # Savitzky-Golay smoothing + valid = ~np.isnan(sq_fz) & (q_centers >= 0.3) & (q_centers <= Q_MAX) + q_v = q_centers[valid] + sq_v = sq_fz[valid] + if len(sq_v) > SAVGOL_WINDOW: + sq_smooth = savgol_filter(sq_v, SAVGOL_WINDOW, SAVGOL_ORDER) + else: + sq_smooth = sq_v + + # Restrict to useful range + rmask = (q_v >= Q_MIN) & (q_v <= 12.0) + + return { + "q": q_v[rmask].tolist(), + "Sq": sq_smooth[rmask].tolist(), + "n_qpoints": len(q_points), + "n_qpoints_used": len(sample.q_points), + "cell": traj.cell.tolist(), + "atom_types": traj.atom_types, + "particle_counts": {k: int(v) for k, v in sample.particle_counts.items()}, + "params": { + "q_max": Q_MAX, + "q_min": Q_MIN, + "dq_bin": DQ_BIN, + "max_qpoints": MAX_QPOINTS, + "savgol_window": SAVGOL_WINDOW, + "savgol_order": SAVGOL_ORDER, + "form_factors": "cromer-mann-4gaussian", + "normalization": "faber-ziman", + }, + } + + +# --- Trajectory finding ------------------------------------------------------ + + +def find_trajectory(model: str) -> Path | None: + """ + Find NVT trajectory for a model. + + Prefers converted .extxyz, falls back to LAMMPS dump. + + Parameters + ---------- + model : str + Name of the MLIP model. + + Returns + ------- + Path or None + Path to the trajectory file, or None if not found. + """ + # Check for converted .extxyz first + extxyz = DATA_ROOT / model / "nvt_trajectory.extxyz" + if extxyz.exists(): + return extxyz + + # Fall back to original LAMMPS dump + runs_root = Path("/lus/work/CT9/cin1387/lbrugnoli/prove/runs") + model_cell = runs_root / model / "p64_w170" + for d in sorted(model_cell.glob("nvt_*"), reverse=True): + traj = d / "nvt_long_trajectory.lammpstrj" + if traj.exists(): + return traj + for d in sorted(model_cell.glob("pipeline_*"), reverse=True): + traj = d / "nvt_trajectory.lammpstrj" + if traj.exists(): + return traj + + return None + + +# --- Pytest interface (ml-peg convention) ------------------------------------ + + +@pytest.mark.parametrize("model", MODELS) +def test_compute_xray_sq(model: str) -> None: + """ + Compute and save X-ray S(q) in Faber-Ziman convention for one model. + + Parameters + ---------- + model : str + Name of the MLIP model to compute S(q) for. + """ + traj_path = find_trajectory(model) + if traj_path is None: + pytest.skip(f"No NVT trajectory for {model}") + + result = compute_sq_travis_style(traj_path) + result["model"] = model + result["traj_path"] = str(traj_path) + + out_dir = OUT_PATH / model + out_dir.mkdir(parents=True, exist_ok=True) + + with open(out_dir / "xray_sq.json", "w") as f: + json.dump(result, f, indent=2) + + np.savez( + out_dir / "xray_sq.npz", + q=np.array(result["q"]), + Sq=np.array(result["Sq"]), + ) + + assert len(result["q"]) > 10, f"Too few q-points for {model}" From 8387cec3391a6e2ae92f537b9475d89449a18b00 Mon Sep 17 00:00:00 2001 From: luca Date: Sun, 26 Apr 2026 17:07:01 +0200 Subject: [PATCH 2/3] Consolidate WiSE benchmark per PR #445 review - Merge density / rdf / xray_sf into a single litfsi_h2o_21m benchmark under ml_peg/{calcs,analysis,app}/wise_electrolytes/litfsi_h2o_21m/. - Adopt the standard ml-peg patterns in the analysis and calc scripts: load_models(current_models) for model discovery, @build_table for the metrics table, and metrics.yml for thresholds/tooltips/weights. - Add a Janus recast of the LAMMPS+symmetrix Adastra production protocol in ml_peg/calcs/wise_electrolytes/md_reference/calc_md_reference.py (pytest-skipped reference; documents the exact MD parameters). - Add the docs page docs/source/user_guide/benchmarks/wise_electrolytes.rst (and toctree entry) and wire the app docs_url to it. - Update the parent wise_electrolytes.yml to a single benchmark weight. --- docs/source/user_guide/benchmarks/index.rst | 1 + .../benchmarks/wise_electrolytes.rst | 87 +++ .../density/analyse_density.py | 298 -------- .../wise_electrolytes/density/metrics.yml | 15 - .../litfsi_h2o_21m/analyse_litfsi_h2o_21m.py | 721 ++++++++++++++++++ .../litfsi_h2o_21m/metrics.yml | 43 ++ .../wise_electrolytes/rdf/analyse_rdf.py | 434 ----------- .../wise_electrolytes/rdf/metrics.yml | 15 - .../xray_sf/analyse_xray_sf.py | 398 ---------- .../wise_electrolytes/xray_sf/metrics.yml | 15 - .../wise_electrolytes/density/app_density.py | 68 -- .../litfsi_h2o_21m/app_litfsi_h2o_21m.py | 91 +++ ml_peg/app/wise_electrolytes/rdf/app_rdf.py | 69 -- .../wise_electrolytes/wise_electrolytes.yml | 4 +- .../wise_electrolytes/xray_sf/app_xray_sf.py | 66 -- .../wise_electrolytes/density/calc_density.py | 75 -- .../litfsi_h2o_21m/calc_litfsi_h2o_21m.py | 616 +++++++++++++++ .../md_reference/calc_md_reference.py | 238 ++++++ .../calcs/wise_electrolytes/rdf/calc_rdf.py | 313 -------- .../wise_electrolytes/xray_sf/calc_xray_sf.py | 345 --------- 20 files changed, 1798 insertions(+), 2114 deletions(-) create mode 100644 docs/source/user_guide/benchmarks/wise_electrolytes.rst delete mode 100644 ml_peg/analysis/wise_electrolytes/density/analyse_density.py delete mode 100644 ml_peg/analysis/wise_electrolytes/density/metrics.yml create mode 100644 ml_peg/analysis/wise_electrolytes/litfsi_h2o_21m/analyse_litfsi_h2o_21m.py create mode 100644 ml_peg/analysis/wise_electrolytes/litfsi_h2o_21m/metrics.yml delete mode 100644 ml_peg/analysis/wise_electrolytes/rdf/analyse_rdf.py delete mode 100644 ml_peg/analysis/wise_electrolytes/rdf/metrics.yml delete mode 100644 ml_peg/analysis/wise_electrolytes/xray_sf/analyse_xray_sf.py delete mode 100644 ml_peg/analysis/wise_electrolytes/xray_sf/metrics.yml delete mode 100644 ml_peg/app/wise_electrolytes/density/app_density.py create mode 100644 ml_peg/app/wise_electrolytes/litfsi_h2o_21m/app_litfsi_h2o_21m.py delete mode 100644 ml_peg/app/wise_electrolytes/rdf/app_rdf.py delete mode 100644 ml_peg/app/wise_electrolytes/xray_sf/app_xray_sf.py delete mode 100644 ml_peg/calcs/wise_electrolytes/density/calc_density.py create mode 100644 ml_peg/calcs/wise_electrolytes/litfsi_h2o_21m/calc_litfsi_h2o_21m.py create mode 100644 ml_peg/calcs/wise_electrolytes/md_reference/calc_md_reference.py delete mode 100644 ml_peg/calcs/wise_electrolytes/rdf/calc_rdf.py delete mode 100644 ml_peg/calcs/wise_electrolytes/xray_sf/calc_xray_sf.py diff --git a/docs/source/user_guide/benchmarks/index.rst b/docs/source/user_guide/benchmarks/index.rst index 8e907ce78..1dbd28e8c 100644 --- a/docs/source/user_guide/benchmarks/index.rst +++ b/docs/source/user_guide/benchmarks/index.rst @@ -17,3 +17,4 @@ Benchmarks tm_complexes conformers molecular_dynamics + wise_electrolytes diff --git a/docs/source/user_guide/benchmarks/wise_electrolytes.rst b/docs/source/user_guide/benchmarks/wise_electrolytes.rst new file mode 100644 index 000000000..ebee388de --- /dev/null +++ b/docs/source/user_guide/benchmarks/wise_electrolytes.rst @@ -0,0 +1,87 @@ +================== +WiSE electrolytes +================== + +LiTFSI/H2O 21 m +=============== + +Summary +------- + +Performance in predicting structural and thermodynamic properties of the 21 molal +LiTFSI/H2O "water-in-salt" electrolyte (WiSE). The benchmark consolidates three +experimental observables for the same chemistry: NPT equilibrium density, Li-O +coordination from the radial distribution function, and the X-ray static +structure factor S(q). + +Two simulation cells are used: a small cell (16 LiTFSI + 42 H2O, 382 atoms) for +the NPT density, and a large cubic cell (64 LiTFSI + 170 H2O, 1534 atoms, +27.4938 A) for the NVT trajectories that feed the RDF and S(q). + +Metrics +------- + +1. Density error + + Absolute error in NPT density vs the experimental value + (1.7126 g/cm3, Gilbert et al., *J. Chem. Eng. Data* 62, 2056 (2017)). + The density is the average over the production NPT trajectory at + 298.15 K and 1 atm. + +2. Li-O_water coordination number error + + Absolute error in the Li-O_water coordination number, computed by + integrating the Li-O_water radial distribution function up to the first + minimum (R_cut = 2.83 A, from r2SCAN AIMD reference). Water oxygens are + identified from the first frame as those bonded to a hydrogen + (d_OH < 1.25 A). Reference value: 2.0 (Watanabe et al., + *J. Phys. Chem. B* 125, 7477 (2021), neutron diffraction with isotopic + substitution at ~18.5 m). + +3. Li-O_TFSI coordination number error + + Same definition as above, restricted to TFSI oxygens (those bonded to a + sulfur, d_OS < 1.75 A). Reference value: 2.0 (same source). + +4. S(q) R-factor + + R-factor between the computed and experimental X-ray structure factor, + ``sum(|S_exp - S_calc|) / sum(|S_exp|)``. S(q) is computed via dynasor + in the Faber-Ziman convention with Cromer-Mann 4-Gaussian form factors + (including hydrogen) and Savitzky-Golay smoothing + (window = 27, order = 3, dq = 0.02 A^-1). Reference: SAXS data of + Zhang et al., *J. Phys. Chem. B* 125, 4501 (2021). + +Computational cost +------------------ + +High: production trajectories require a GPU MD code (e.g. LAMMPS + symmetrix +or Janus + MACE) and several hours per model on a single MI250X / A100 GPU. +Re-extracting metrics from pre-computed trajectories is cheap (seconds per +model). + +Data availability +----------------- + +Input structures: + +* p64_w170 cubic cell (1534 atoms, 27.4938 A) for NVT trajectories. +* p16_w42 cell (382 atoms) for NPT density. + +Production trajectories were generated with LAMMPS + symmetrix on Adastra +(MI250X) using a MACE foundation model. The reference Janus recast of the +same protocol (Min -> NVT 50 ps -> NPT 200 ps, dt = 0.5 fs, T = 298.15 K, +P = 1.01325 bar, NH thermostat tau = 50 fs, NH barostat tau = 500 fs) lives +in ``ml_peg/calcs/wise_electrolytes/md_reference/``. + +Reference data: + +* Density: Gilbert et al., *J. Chem. Eng. Data* 62, 2056 (2017), + DOI: 10.1021/acs.jced.7b00135. +* Li-O coordination numbers: Watanabe et al., *J. Phys. Chem. B* 125, 7477 + (2021), DOI: 10.1021/acs.jpcb.1c04693. +* X-ray S(q): Zhang et al., *J. Phys. Chem. B* 125, 4501 (2021), + DOI: 10.1021/acs.jpcb.1c02189. + +Further details on the simulation protocol and MLIP assessment for this +system: L. Brugnoli, arXiv:2603.22099 (2026). diff --git a/ml_peg/analysis/wise_electrolytes/density/analyse_density.py b/ml_peg/analysis/wise_electrolytes/density/analyse_density.py deleted file mode 100644 index f2410484f..000000000 --- a/ml_peg/analysis/wise_electrolytes/density/analyse_density.py +++ /dev/null @@ -1,298 +0,0 @@ -"""Analyse density benchmark for LiTFSI/H2O electrolyte.""" - -from __future__ import annotations - -import json -from pathlib import Path - -import plotly.graph_objects as go -import pytest - -from ml_peg.analysis.utils.utils import load_metrics_config - -# --- Paths ------------------------------------------------------------------- - -CALCS_ROOT = Path(__file__).resolve().parents[3] / "calcs" -CALC_PATH = CALCS_ROOT / "wise_electrolytes" / "density" / "outputs" -APP_ROOT = Path(__file__).resolve().parents[3] / "app" -OUT_PATH = APP_ROOT / "data" / "wise_electrolytes" / "density" - -MODELS = [ - "matpes-r2scan", - "mace-mpa-0-medium", - "mace-omat-0-medium", - "mace-mp-0b3", - "mace-mh-1-omat", - "mace-mh-1-omol", -] - -RHO_EXP = 1.7126 # g/cm³ (Gilbert et al., JCED 2017, DOI: 10.1021/acs.jced.7b00135) - -# Map benchmark model names → ml-peg registry names (for app summary table) -MODEL_NAME_MAP = { - "matpes-r2scan": "mace-matpes-r2scan", - "mace-mpa-0-medium": "mace-mpa-0", - "mace-omat-0-medium": "mace-omat-0", -} - -METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") -DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( - METRICS_CONFIG_PATH -) - - -# --- Helpers ----------------------------------------------------------------- - - -def load_density_results() -> dict[str, dict]: - """ - Load density.json for each model. - - Returns - ------- - dict[str, dict] - Per-model density results keyed by model name. - """ - results = {} - for model in MODELS: - json_path = CALC_PATH / model / "density.json" - if json_path.exists(): - with open(json_path) as f: - results[model] = json.load(f) - return results - - -def normalize_metric(value: float, good: float, bad: float) -> float: - """ - Normalize metric to [0, 1] where 0=good, 1=bad. - - Parameters - ---------- - value : float - Raw metric value. - good : float - Value corresponding to score 0. - bad : float - Value corresponding to score 1. - - Returns - ------- - float - Normalized score clipped to [0, 1]. - """ - if bad == good: - return 0.0 if value <= good else 1.0 - score = (value - good) / (bad - good) - return max(0.0, min(1.0, score)) - - -# --- Plot builders ----------------------------------------------------------- - - -def build_density_bar_chart(data: dict[str, dict]) -> go.Figure: - """ - Build bar chart of density per model. - - Parameters - ---------- - data : dict[str, dict] - Per-model density results. - - Returns - ------- - go.Figure - Plotly bar chart figure. - """ - fig = go.Figure() - for model in MODELS: - if model not in data: - continue - fig.add_trace( - go.Bar( - x=[model], - y=[data[model]["rho_mean"]], - error_y={"type": "data", "array": [data[model]["rho_std"]]}, - name=model, - ) - ) - - fig.add_hline( - y=RHO_EXP, - line_dash="dash", - line_color="red", - annotation_text=f"Exp. ({RHO_EXP} g/cm\u00b3, Gilbert 2017)", - ) - fig.update_layout( - title="Electrolyte Density (LiTFSI/H2O, 21 m)", - yaxis_title="Density / g cm⁻³", - showlegend=False, - ) - return fig - - -def build_density_timeseries(data: dict[str, dict]) -> go.Figure: - """ - Build density vs time plot for all models. - - Parameters - ---------- - data : dict[str, dict] - Per-model density results. - - Returns - ------- - go.Figure - Plotly timeseries figure. - """ - fig = go.Figure() - for model in MODELS: - if model not in data: - continue - d = data[model] - fig.add_trace( - go.Scatter( - x=d["time_full"], - y=d["density_full"], - mode="lines", - name=model, - opacity=0.8, - ) - ) - - fig.add_hline( - y=RHO_EXP, - line_dash="dash", - line_color="black", - annotation_text=f"Exp. ({RHO_EXP} g/cm\u00b3)", - ) - fig.update_layout( - title="NPT Density vs Time (LiTFSI/H2O)", - xaxis_title="Time / ps", - yaxis_title="Density / g cm⁻³", - ) - return fig - - -# --- Metrics table ----------------------------------------------------------- - - -def build_metrics_table(data: dict[str, dict]) -> dict: - """ - Build metrics table JSON compatible with ml-peg app format. - - Parameters - ---------- - data : dict[str, dict] - Per-model density results. - - Returns - ------- - dict - Table with data, columns, tooltips, and thresholds. - """ - rows = [] - for model in MODELS: - if model not in data: - continue - d = data[model] - abs_err = d["rho_abs_error"] - pct_err = abs(d["rho_error_pct"]) - - score_abs = normalize_metric( - abs_err, - DEFAULT_THRESHOLDS["Density Error"]["good"], - DEFAULT_THRESHOLDS["Density Error"]["bad"], - ) - score_pct = normalize_metric( - pct_err, - DEFAULT_THRESHOLDS["Density Error (%)"]["good"], - DEFAULT_THRESHOLDS["Density Error (%)"]["bad"], - ) - - w_abs = DEFAULT_WEIGHTS.get("Density Error", 1.0) - w_pct = DEFAULT_WEIGHTS.get("Density Error (%)", 1.0) - total_w = w_abs + w_pct - overall = ( - 1.0 - (score_abs * w_abs + score_pct * w_pct) / total_w - if total_w > 0 - else 0.0 - ) - - rows.append( - { - "MLIP": model, - "id": MODEL_NAME_MAP.get(model, model), - "Density (g/cm3)": round(d["rho_mean"], 4), - "Density Error": round(abs_err, 4), - "Density Error (%)": round(pct_err, 2), - "Score": round(overall, 3), - } - ) - - rows.sort(key=lambda r: r["Score"], reverse=True) - - columns = [ - {"name": c, "id": c} - for c in [ - "MLIP", - "Density Error", - "Density Error (%)", - "Score", - ] - ] - - return { - "data": rows, - "columns": columns, - "tooltip_header": DEFAULT_TOOLTIPS, - "thresholds": {k: dict(v) for k, v in DEFAULT_THRESHOLDS.items()}, - "weights": dict(DEFAULT_WEIGHTS), - "model_name_map": MODEL_NAME_MAP, - } - - -# --- Pytest interface -------------------------------------------------------- - - -@pytest.fixture -def density_analysis(): - """ - Run full density analysis: table + plots. - - Returns - ------- - dict - Metrics table dictionary. - """ - data = load_density_results() - if not data: - pytest.skip("No density data found") - - OUT_PATH.mkdir(parents=True, exist_ok=True) - - table = build_metrics_table(data) - with open(OUT_PATH / "density_metrics_table.json", "w") as f: - json.dump(table, f, indent=2) - - fig_bar = build_density_bar_chart(data) - with open(OUT_PATH / "figure_density_bar.json", "w") as f: - f.write(fig_bar.to_json()) - - fig_ts = build_density_timeseries(data) - with open(OUT_PATH / "figure_density_timeseries.json", "w") as f: - f.write(fig_ts.to_json()) - - return table - - -def test_density(density_analysis) -> None: - """ - Run density benchmark — generates table and plots. - - Parameters - ---------- - density_analysis : dict - Fixture providing the metrics table. - """ - table = density_analysis - assert len(table["data"]) > 0, "No models produced density data" diff --git a/ml_peg/analysis/wise_electrolytes/density/metrics.yml b/ml_peg/analysis/wise_electrolytes/density/metrics.yml deleted file mode 100644 index f4b2bc014..000000000 --- a/ml_peg/analysis/wise_electrolytes/density/metrics.yml +++ /dev/null @@ -1,15 +0,0 @@ -metrics: - Density Error: - good: 0.0 - bad: 0.17 - unit: "g/cm3" - tooltip: "Absolute error in NPT density vs experimental (1.7126 g/cm3, Gilbert et al. JCED 2017)" - level_of_theory: Experimental (Gilbert et al., J. Chem. Eng. Data 62, 2056, 2017) - weight: 1 - Density Error (%): - good: 0.0 - bad: 10.0 - unit: "%" - tooltip: "Percent error in NPT density vs experimental (1.7126 g/cm3, Gilbert et al. JCED 2017)" - level_of_theory: Experimental (Gilbert et al., J. Chem. Eng. Data 62, 2056, 2017) - weight: 0 diff --git a/ml_peg/analysis/wise_electrolytes/litfsi_h2o_21m/analyse_litfsi_h2o_21m.py b/ml_peg/analysis/wise_electrolytes/litfsi_h2o_21m/analyse_litfsi_h2o_21m.py new file mode 100644 index 000000000..d71a48cee --- /dev/null +++ b/ml_peg/analysis/wise_electrolytes/litfsi_h2o_21m/analyse_litfsi_h2o_21m.py @@ -0,0 +1,721 @@ +"""Analyse the WiSE 21 m LiTFSI/H2O electrolyte benchmark.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import numpy as np +import plotly.graph_objects as go +import pytest +from scipy.interpolate import interp1d + +from ml_peg.analysis.utils.decorators import build_table +from ml_peg.analysis.utils.utils import load_metrics_config +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +# --- Paths ------------------------------------------------------------------- + +CALC_PATH = CALCS_ROOT / "wise_electrolytes" / "litfsi_h2o_21m" / "outputs" +OUT_PATH = APP_ROOT / "data" / "wise_electrolytes" / "litfsi_h2o_21m" +EXP_SAXS_PATH = ( + Path(__file__).resolve().parents[1] / "data" / "saxs_maginn.txt" +) + +MODELS = load_models(current_models) + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + +# --- Physical constants and reference values --------------------------------- + +# Box geometry (same for all models: p64_w170 NVT cell) +L_BOX = 27.4938 # Å (cubic NVT cell at experimental density) +V_BOX = L_BOX**3 # ų + +# Integration cutoff: first minimum of Li-O_total g(r) (r2SCAN AIMD) +R_CUT = 2.83 # Å +DR = 0.02 # Å (bin width used in calc_rdf.py) + +# Experimental references +RHO_EXP = 1.7126 # g/cm³ — Gilbert et al., JCED 62, 2056 (2017) +CN_EXP_WATER = 2.0 # Watanabe et al., JPCB 125, 7477 (2021) +CN_EXP_TFSI = 2.0 + + +# ============================================================================= +# Density helpers and figures +# ============================================================================= + + +def load_density_results() -> dict[str, dict]: + """ + Load density.json for each registered model that has calc output. + + Returns + ------- + dict[str, dict] + Per-model density results keyed by registry model name. + """ + results = {} + for model in MODELS: + json_path = CALC_PATH / model / "density.json" + if json_path.exists(): + with open(json_path) as f: + results[model] = json.load(f) + return results + + +def build_density_bar_chart(data: dict[str, dict]) -> go.Figure: + """ + Build bar chart of density per model. + + Parameters + ---------- + data : dict[str, dict] + Per-model density results. + + Returns + ------- + go.Figure + Plotly bar chart figure. + """ + fig = go.Figure() + for model in MODELS: + if model not in data: + continue + fig.add_trace( + go.Bar( + x=[model], + y=[data[model]["rho_mean"]], + error_y={"type": "data", "array": [data[model]["rho_std"]]}, + name=model, + ) + ) + + fig.add_hline( + y=RHO_EXP, + line_dash="dash", + line_color="red", + annotation_text=f"Exp. ({RHO_EXP} g/cm\u00b3, Gilbert 2017)", + ) + fig.update_layout( + title="Electrolyte Density (LiTFSI/H2O, 21 m)", + yaxis_title="Density / g cm⁻³", + showlegend=False, + ) + return fig + + +def build_density_timeseries(data: dict[str, dict]) -> go.Figure: + """ + Build density vs time plot for all models. + + Parameters + ---------- + data : dict[str, dict] + Per-model density results. + + Returns + ------- + go.Figure + Plotly timeseries figure. + """ + fig = go.Figure() + for model in MODELS: + if model not in data: + continue + d = data[model] + fig.add_trace( + go.Scatter( + x=d["time_full"], + y=d["density_full"], + mode="lines", + name=model, + opacity=0.8, + ) + ) + + fig.add_hline( + y=RHO_EXP, + line_dash="dash", + line_color="black", + annotation_text=f"Exp. ({RHO_EXP} g/cm\u00b3)", + ) + fig.update_layout( + title="NPT Density vs Time (LiTFSI/H2O)", + xaxis_title="Time / ps", + yaxis_title="Density / g cm⁻³", + ) + return fig + + +# ============================================================================= +# RDF / coordination number helpers and figures +# ============================================================================= + + +def compute_cn_from_gr( + gr: np.ndarray, + r: np.ndarray, + n_neighbor: int, + volume: float, + r_cut: float, + dr: float, +) -> float: + """ + Integrate g(r) to coordination number: CN = 4pi rho int_0^r_cut g(r) r^2 dr. + + Parameters + ---------- + gr : np.ndarray + Radial distribution function values. + r : np.ndarray + Radial distance values in angstrom. + n_neighbor : int + Number of neighbor atoms of the relevant species in the simulation box. + volume : float + Volume of the simulation box in angstrom^3. + r_cut : float + Integration cutoff distance in angstrom. + dr : float + Bin width in angstrom. + + Returns + ------- + float + Coordination number obtained by integrating g(r) up to ``r_cut``. + """ + rho = n_neighbor / volume + mask = r <= r_cut + return float(np.sum(4 * np.pi * rho * gr[mask] * r[mask] ** 2 * dr)) + + +def load_rdf_results() -> dict[str, dict]: + """ + Load rdf.json and rdf.npz for each registered model that has calc output. + + Returns + ------- + dict[str, dict] + Mapping from registry model name to a dict containing coordination + numbers, errors, radial distances, g(r) arrays, and frame count. + """ + results = {} + for model in MODELS: + json_path = CALC_PATH / model / "rdf.json" + npz_path = CALC_PATH / model / "rdf.npz" + if not json_path.exists() or not npz_path.exists(): + continue + + with open(json_path) as f: + meta = json.load(f) + + data = np.load(npz_path) + r = data["r"] + + n_ow = meta["n_O_water"] + n_of = meta["n_O_TFSI"] + + cn_w = compute_cn_from_gr(data["gr_LiO_water"], r, n_ow, V_BOX, R_CUT, DR) + cn_f = compute_cn_from_gr(data["gr_LiO_TFSI"], r, n_of, V_BOX, R_CUT, DR) + + results[model] = { + "cn_water": cn_w, + "cn_tfsi": cn_f, + "cn_total": cn_w + cn_f, + "err_water": abs(cn_w - CN_EXP_WATER), + "err_tfsi": abs(cn_f - CN_EXP_TFSI), + "r": r, + "gr_water": data["gr_LiO_water"], + "gr_tfsi": data["gr_LiO_TFSI"], + "gr_total": data["gr_LiO_total"], + "n_frames": meta["n_frames_used"], + } + return results + + +def build_cn_bar_chart(data: dict[str, dict]) -> go.Figure: + """ + Bar chart of CN_water and CN_TFSI per model with experimental reference. + + Parameters + ---------- + data : dict[str, dict] + Per-model RDF results as returned by :func:`load_rdf_results`. + + Returns + ------- + go.Figure + Plotly figure with grouped bars and experimental reference lines. + """ + fig = go.Figure() + + x_models = [m for m in MODELS if m in data] + cn_water_vals = [data[m]["cn_water"] for m in x_models] + cn_tfsi_vals = [data[m]["cn_tfsi"] for m in x_models] + + fig.add_trace( + go.Bar( + name="Li-Owater", + x=x_models, + y=cn_water_vals, + marker_color="steelblue", + ) + ) + fig.add_trace( + go.Bar( + name="Li-OTFSI", + x=x_models, + y=cn_tfsi_vals, + marker_color="coral", + ) + ) + + fig.add_hline( + y=CN_EXP_WATER, + line_dash="dash", + line_color="steelblue", + annotation_text=f"Exp. Owater ({CN_EXP_WATER})", + annotation_position="top right", + ) + fig.add_hline( + y=CN_EXP_TFSI, + line_dash="dash", + line_color="coral", + annotation_text=f"Exp. OTFSI ({CN_EXP_TFSI})", + annotation_position="bottom right", + ) + + fig.update_layout( + title="Li⁺ Coordination Numbers (LiTFSI/H₂O, 21 m)", + yaxis_title="Coordination Number", + barmode="group", + legend={"x": 0.01, "y": 0.99}, + ) + return fig + + +def build_gr_plot(data: dict[str, dict]) -> go.Figure: + """ + G(r) plot: Li-O_water and Li-O_TFSI for all models. + + Parameters + ---------- + data : dict[str, dict] + Per-model RDF results as returned by :func:`load_rdf_results`. + + Returns + ------- + go.Figure + Plotly figure with g(r) curves and integration cutoff line. + """ + fig = go.Figure() + + colors = [ + "#1f77b4", + "#ff7f0e", + "#2ca02c", + "#d62728", + "#9467bd", + "#8c564b", + "#e377c2", + "#7f7f7f", + ] + + for i, model in enumerate(MODELS): + if model not in data: + continue + d = data[model] + c = colors[i % len(colors)] + fig.add_trace( + go.Scatter( + x=d["r"], + y=d["gr_water"], + mode="lines", + name=f"{model} Ow", + line={"color": c, "dash": "solid"}, + legendgroup=model, + ) + ) + fig.add_trace( + go.Scatter( + x=d["r"], + y=d["gr_tfsi"], + mode="lines", + name=f"{model} OTFSI", + line={"color": c, "dash": "dot"}, + legendgroup=model, + ) + ) + + fig.add_vline( + x=R_CUT, + line_dash="dash", + line_color="gray", + annotation_text=f"r_cut={R_CUT} Å", + annotation_position="top right", + ) + + fig.update_layout( + title="Li-O Radial Distribution Functions (LiTFSI/H₂O, 21 m)", + xaxis_title="r / Å", + yaxis_title="g(r)", + xaxis_range=[1.0, 6.0], + ) + return fig + + +# ============================================================================= +# X-ray S(q) helpers and figures +# ============================================================================= + + +def load_experimental_sq() -> tuple[np.ndarray, np.ndarray]: + """ + Load experimental SAXS S(q) data. + + Returns + ------- + q_exp : np.ndarray + Scattering vector values in inverse angstroms. + sq_exp : np.ndarray + Experimental structure factor values. + """ + data = np.loadtxt(EXP_SAXS_PATH) + return data[:, 0], data[:, 1] + + +def load_computed_sq(model: str) -> tuple[np.ndarray, np.ndarray] | None: + """ + Load computed S(q) for a model. + + Parameters + ---------- + model : str + Registry name of the MLIP model. + + Returns + ------- + tuple of (np.ndarray, np.ndarray) or None + Scattering vector and structure factor arrays, or None if the + output file does not exist. + """ + json_path = CALC_PATH / model / "xray_sq.json" + if not json_path.exists(): + return None + with open(json_path) as f: + d = json.load(f) + return np.array(d["q"]), np.array(d["Sq"]) + + +def compute_r_factor( + q_exp: np.ndarray, + sq_exp: np.ndarray, + q_calc: np.ndarray, + sq_calc: np.ndarray, +) -> float: + """ + Compute R-factor: sum|S_exp - S_calc| / sum|S_exp|. + + Parameters + ---------- + q_exp : np.ndarray + Experimental scattering vector values. + sq_exp : np.ndarray + Experimental structure factor values. + q_calc : np.ndarray + Calculated scattering vector values. + sq_calc : np.ndarray + Calculated structure factor values. + + Returns + ------- + float + R-factor value, or NaN if no overlapping valid data exists. + """ + valid_calc = ~np.isnan(sq_calc) + q_min_calc = float(q_calc[valid_calc].min()) + q_max_exp = float(q_exp.max()) + + f = interp1d( + q_calc[valid_calc], + sq_calc[valid_calc], + kind="linear", + bounds_error=False, + fill_value=np.nan, + ) + + mask = (q_exp >= q_min_calc) & (q_exp <= q_max_exp) + sq_interp = f(q_exp[mask]) + sq_e = sq_exp[mask] + + ok = ~np.isnan(sq_interp) + if not ok.any(): + return float("nan") + + return float(np.sum(np.abs(sq_e[ok] - sq_interp[ok])) / np.sum(np.abs(sq_e[ok]))) + + +def find_first_peak( + q: np.ndarray, sq: np.ndarray, q_min: float = 0.5, q_max: float = 2.0 +) -> float: + """ + Find position of first peak in S(q). + + Parameters + ---------- + q : np.ndarray + Scattering vector values. + sq : np.ndarray + Structure factor values. + q_min : float, optional + Lower bound of the search window in inverse angstroms (default 0.5). + q_max : float, optional + Upper bound of the search window in inverse angstroms (default 2.0). + + Returns + ------- + float + Position of the first peak, or NaN if fewer than 3 valid points + exist in the search window. + """ + mask = (q >= q_min) & (q <= q_max) & ~np.isnan(sq) + if mask.sum() < 3: + return float("nan") + q_sub = q[mask] + sq_sub = sq[mask] + return float(q_sub[np.argmax(sq_sub)]) + + +def build_sq_comparison_plot( + q_exp: np.ndarray, + sq_exp: np.ndarray, + model_data: dict[str, tuple[np.ndarray, np.ndarray]], +) -> go.Figure: + """ + Build S(q) overlay plot comparing computed and experimental data. + + Parameters + ---------- + q_exp : np.ndarray + Experimental scattering vector values. + sq_exp : np.ndarray + Experimental structure factor values. + model_data : dict of str to tuple of (np.ndarray, np.ndarray) + Mapping from model name to (q, S(q)) arrays. + + Returns + ------- + go.Figure + Plotly figure with experimental and computed S(q) traces. + """ + fig = go.Figure() + + fig.add_trace( + go.Scatter( + x=q_exp.tolist(), + y=sq_exp.tolist(), + mode="lines", + name="Exp. (Zhang et al. 2021)", + line={"color": "black", "width": 2}, + ) + ) + + for model, (q, sq) in model_data.items(): + fig.add_trace( + go.Scatter( + x=q.tolist(), + y=sq.tolist(), + mode="lines", + name=model, + opacity=0.8, + ) + ) + + fig.update_layout( + title="X-ray Structure Factor S(q) — LiTFSI/H2O (21 m)", + xaxis_title="q / Å⁻¹", + yaxis_title="S(q) (Faber-Ziman)", + legend={"x": 0.60, "y": 0.98}, + ) + return fig + + +# ============================================================================= +# Pytest fixtures +# ============================================================================= + + +@pytest.fixture +def density_results() -> dict[str, dict]: + """ + Load density results and write density plot JSONs. + + Returns + ------- + dict[str, dict] + Per-model density results keyed by registry model name. + """ + data = load_density_results() + if not data: + pytest.skip("No density data found") + + OUT_PATH.mkdir(parents=True, exist_ok=True) + + fig_bar = build_density_bar_chart(data) + with open(OUT_PATH / "figure_density_bar.json", "w") as f: + f.write(fig_bar.to_json()) + + fig_ts = build_density_timeseries(data) + with open(OUT_PATH / "figure_density_timeseries.json", "w") as f: + f.write(fig_ts.to_json()) + + return data + + +@pytest.fixture +def rdf_results() -> dict[str, dict]: + """ + Load RDF results and write RDF plot JSONs. + + Returns + ------- + dict[str, dict] + Per-model RDF results keyed by registry model name. + """ + data = load_rdf_results() + if not data: + pytest.skip("No RDF data found") + + OUT_PATH.mkdir(parents=True, exist_ok=True) + + fig_cn = build_cn_bar_chart(data) + with open(OUT_PATH / "figure_cn_bar.json", "w") as f: + f.write(fig_cn.to_json()) + + fig_gr = build_gr_plot(data) + with open(OUT_PATH / "figure_gr.json", "w") as f: + f.write(fig_gr.to_json()) + + return data + + +@pytest.fixture +def xray_sf_results() -> dict[str, dict]: + """ + Compute per-model R-factor and peak-position errors and write S(q) plot. + + Returns + ------- + dict[str, dict] + Per-model results keyed by registry model name, containing + ``r_factor``, ``peak_calc``, ``peak_exp``, ``peak_position_error``. + """ + q_exp, sq_exp = load_experimental_sq() + peak_exp = find_first_peak(q_exp, sq_exp) + + model_data: dict[str, tuple[np.ndarray, np.ndarray]] = {} + results: dict[str, dict] = {} + + for model in MODELS: + data = load_computed_sq(model) + if data is None: + continue + q_calc, sq_calc = data + model_data[model] = (q_calc, sq_calc) + + r_factor = compute_r_factor(q_exp, sq_exp, q_calc, sq_calc) + peak_calc = find_first_peak(q_calc, sq_calc) + peak_error = ( + abs(peak_calc - peak_exp) if not np.isnan(peak_calc) else float("nan") + ) + + results[model] = { + "r_factor": r_factor, + "peak_calc": peak_calc, + "peak_exp": peak_exp, + "peak_position_error": peak_error, + } + + if not results: + pytest.skip("No S(q) data found") + + OUT_PATH.mkdir(parents=True, exist_ok=True) + + fig = build_sq_comparison_plot(q_exp, sq_exp, model_data) + with open(OUT_PATH / "figure_xray_sq_comparison.json", "w") as f: + f.write(fig.to_json()) + + return results + + +@pytest.fixture +@build_table( + filename=OUT_PATH / "litfsi_h2o_21m_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, + weights=DEFAULT_WEIGHTS, +) +def metrics( + density_results: dict[str, dict], + rdf_results: dict[str, dict], + xray_sf_results: dict[str, dict], +) -> dict[str, dict]: + """ + Build per-metric dicts consumed by the ``build_table`` decorator. + + Parameters + ---------- + density_results + Per-model density results. + rdf_results + Per-model RDF / coordination-number results. + xray_sf_results + Per-model R-factor and peak-position results. + + Returns + ------- + dict[str, dict] + Metric names mapped to ``{model_name: value}`` dicts. The metric + order matches ``metrics.yml`` (Density Error, Density Error (%), + CN Li-O_water Error, CN Li-O_TFSI Error, S(q) R-factor, + First Peak Position Error). + """ + return { + "Density Error": { + model: d["rho_abs_error"] for model, d in density_results.items() + }, + "Density Error (%)": { + model: abs(d["rho_error_pct"]) for model, d in density_results.items() + }, + "CN Li-O_water Error": { + model: d["err_water"] for model, d in rdf_results.items() + }, + "CN Li-O_TFSI Error": { + model: d["err_tfsi"] for model, d in rdf_results.items() + }, + "S(q) R-factor": { + model: r["r_factor"] for model, r in xray_sf_results.items() + }, + "First Peak Position Error": { + model: r["peak_position_error"] for model, r in xray_sf_results.items() + }, + } + + +def test_litfsi_h2o_21m(metrics: dict[str, dict]) -> None: + """ + Run the consolidated WiSE LiTFSI/H2O benchmark. + + Parameters + ---------- + metrics + Per-metric values for all models. + """ + assert metrics, "No models produced data for the WiSE LiTFSI/H2O benchmark" diff --git a/ml_peg/analysis/wise_electrolytes/litfsi_h2o_21m/metrics.yml b/ml_peg/analysis/wise_electrolytes/litfsi_h2o_21m/metrics.yml new file mode 100644 index 000000000..0f41d821a --- /dev/null +++ b/ml_peg/analysis/wise_electrolytes/litfsi_h2o_21m/metrics.yml @@ -0,0 +1,43 @@ +metrics: + Density Error: + good: 0.0 + bad: 0.17 + unit: "g/cm3" + tooltip: "Absolute error in NPT density vs experimental (1.7126 g/cm3, Gilbert et al. JCED 2017)" + level_of_theory: Experimental (Gilbert et al., J. Chem. Eng. Data 62, 2056, 2017) + weight: 1 + Density Error (%): + good: 0.0 + bad: 10.0 + unit: "%" + tooltip: "Percent error in NPT density vs experimental (1.7126 g/cm3, Gilbert et al. JCED 2017)" + level_of_theory: Experimental (Gilbert et al., J. Chem. Eng. Data 62, 2056, 2017) + weight: 0 + CN Li-O_water Error: + good: 0.0 + bad: 0.5 + unit: "-" + tooltip: "Absolute error in Li-O_water coordination number vs experimental reference (CN=2.0, Watanabe et al. J. Phys. Chem. B 2021, ~18.5 m, neutron diffraction with isotopic substitution)" + level_of_theory: Experimental (neutron diffraction, Watanabe et al. JPCB 2021) + weight: 1 + CN Li-O_TFSI Error: + good: 0.0 + bad: 0.5 + unit: "-" + tooltip: "Absolute error in Li-O_TFSI coordination number vs experimental reference (CN=2.0, Watanabe et al. J. Phys. Chem. B 2021, ~18.5 m, neutron diffraction with isotopic substitution)" + level_of_theory: Experimental (neutron diffraction, Watanabe et al. JPCB 2021) + weight: 1 + S(q) R-factor: + good: 0.0 + bad: 0.5 + unit: "-" + tooltip: "R-factor between computed and experimental X-ray S(q): sum|S_exp - S_calc| / sum|S_exp|" + level_of_theory: Experimental (SAXS, Zhang et al., J. Phys. Chem. B 125, 4501, 2021) + weight: 1 + First Peak Position Error: + good: 0.0 + bad: 0.3 + unit: "1/A" + tooltip: "Absolute error in position of first S(q) peak (~1.0 A-1)" + level_of_theory: Experimental (SAXS, Zhang et al., J. Phys. Chem. B 125, 4501, 2021) + weight: 0 diff --git a/ml_peg/analysis/wise_electrolytes/rdf/analyse_rdf.py b/ml_peg/analysis/wise_electrolytes/rdf/analyse_rdf.py deleted file mode 100644 index 8df3acd2a..000000000 --- a/ml_peg/analysis/wise_electrolytes/rdf/analyse_rdf.py +++ /dev/null @@ -1,434 +0,0 @@ -"""Analyse Li-O RDF benchmark for LiTFSI/H2O electrolyte.""" -# Reads rdf.json and rdf.npz files from calc outputs and generates -# scoring tables and g(r) plots for the ml-peg dashboard. -# -# Reference coordination numbers (Watanabe et al., J. Phys. Chem. B 125, 7477, 2021): -# CN(Li-O_water) = 2.0 } ~18.5 m LiTFSI/H2O, -# CN(Li-O_TFSI) = 2.0 } neutron diffraction, 6Li/7Li + H/D substitution -# -# Integration cutoff: 2.83 Å (first minimum of Li-O_total g(r) from r2SCAN AIMD). -# Trajectories: NVT 50 ps (dt=0.1 ps/frame, 501 frames, equilibrated 50-100 ps window). -# Box length: 27.4938 Å (cubic, p64_w170 cell: 64 LiTFSI + 170 H2O = 1534 atoms). - -from __future__ import annotations - -import json -from pathlib import Path - -import numpy as np -import plotly.graph_objects as go -import pytest - -from ml_peg.analysis.utils.utils import load_metrics_config - -# --- Paths ------------------------------------------------------------------- - -CALCS_ROOT = Path(__file__).resolve().parents[3] / "calcs" -CALC_PATH = CALCS_ROOT / "wise_electrolytes" / "rdf" / "outputs" -APP_ROOT = Path(__file__).resolve().parents[3] / "app" -OUT_PATH = APP_ROOT / "data" / "wise_electrolytes" / "rdf" - -MODELS = [ - "matpes-r2scan", - "mace-mpa-0-medium", - "mace-omat-0-medium", - "mace-mp-0b3", - "mace-mh-1-omat", - "mace-mh-1-omol", -] - -# --- Physical constants and reference values --------------------------------- - -# Box geometry (same for all models: p64_w170 NVT cell) -L_BOX = 27.4938 # Å (cubic NVT cell at experimental density) -V_BOX = L_BOX**3 # ų - -# Integration cutoff: first minimum of Li-O_total g(r) -R_CUT = 2.83 # Å -DR = 0.02 # Å (bin width used in calc_rdf.py) - -# Experimental reference: Watanabe et al., JPCB 2021, ~18.5 m LiTFSI/H2O -CN_EXP_WATER = 2.0 -CN_EXP_TFSI = 2.0 - -# Map benchmark model names → ml-peg registry names (for app summary table) -MODEL_NAME_MAP = { - "matpes-r2scan": "mace-matpes-r2scan", - "mace-mpa-0-medium": "mace-mpa-0", - "mace-omat-0-medium": "mace-omat-0", -} - -METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") -DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( - METRICS_CONFIG_PATH -) - - -# --- Helpers ----------------------------------------------------------------- - - -def compute_cn_from_gr( - gr: np.ndarray, - r: np.ndarray, - n_neighbor: int, - volume: float, - r_cut: float, - dr: float, -) -> float: - """ - Integrate g(r) to coordination number: CN = 4pi rho int_0^r_cut g(r) r^2 dr. - - Parameters - ---------- - gr : np.ndarray - Radial distribution function values. - r : np.ndarray - Radial distance values in angstrom. - n_neighbor : int - Number of neighbor atoms of the relevant species in the simulation box. - volume : float - Volume of the simulation box in angstrom^3. - r_cut : float - Integration cutoff distance in angstrom. - dr : float - Bin width in angstrom. - - Returns - ------- - float - Coordination number obtained by integrating g(r) up to ``r_cut``. - """ - rho = n_neighbor / volume - mask = r <= r_cut - return float(np.sum(4 * np.pi * rho * gr[mask] * r[mask] ** 2 * dr)) - - -def load_rdf_results() -> dict[str, dict]: - """ - Load rdf.json and rdf.npz for each model, recompute CN at r_cut=2.83 A. - - Returns - ------- - dict[str, dict] - Mapping from model name to a dict containing coordination numbers, - errors, radial distances, g(r) arrays, and frame count. - """ - results = {} - for model in MODELS: - json_path = CALC_PATH / model / "rdf.json" - npz_path = CALC_PATH / model / "rdf.npz" - if not json_path.exists() or not npz_path.exists(): - continue - - with open(json_path) as f: - meta = json.load(f) - - data = np.load(npz_path) - r = data["r"] - - n_ow = meta["n_O_water"] - n_of = meta["n_O_TFSI"] - - cn_w = compute_cn_from_gr(data["gr_LiO_water"], r, n_ow, V_BOX, R_CUT, DR) - cn_f = compute_cn_from_gr(data["gr_LiO_TFSI"], r, n_of, V_BOX, R_CUT, DR) - - results[model] = { - "cn_water": cn_w, - "cn_tfsi": cn_f, - "cn_total": cn_w + cn_f, - "err_water": abs(cn_w - CN_EXP_WATER), - "err_tfsi": abs(cn_f - CN_EXP_TFSI), - "r": r, - "gr_water": data["gr_LiO_water"], - "gr_tfsi": data["gr_LiO_TFSI"], - "gr_total": data["gr_LiO_total"], - "n_frames": meta["n_frames_used"], - } - return results - - -def normalize_metric(value: float, good: float, bad: float) -> float: - """ - Map value linearly: good to 1.0, bad to 0.0, clipped to [0, 1]. - - Parameters - ---------- - value : float - The metric value to normalize. - good : float - Threshold value that maps to a score of 1.0. - bad : float - Threshold value that maps to a score of 0.0. - - Returns - ------- - float - Normalized score clipped to the range [0, 1]. - """ - if good == bad: - return 1.0 if value == good else 0.0 - t = (value - bad) / (good - bad) - return max(0.0, min(1.0, float(t))) - - -# --- Plot builders ----------------------------------------------------------- - - -def build_cn_bar_chart(data: dict[str, dict]) -> go.Figure: - """ - Bar chart of CN_water and CN_TFSI per model with experimental reference. - - Parameters - ---------- - data : dict[str, dict] - Per-model RDF results as returned by :func:`load_rdf_results`. - - Returns - ------- - go.Figure - Plotly figure with grouped bars and experimental reference lines. - """ - fig = go.Figure() - - x_models = [m for m in MODELS if m in data] - cn_water_vals = [data[m]["cn_water"] for m in x_models] - cn_tfsi_vals = [data[m]["cn_tfsi"] for m in x_models] - - fig.add_trace( - go.Bar( - name="Li-Owater", - x=x_models, - y=cn_water_vals, - marker_color="steelblue", - ) - ) - fig.add_trace( - go.Bar( - name="Li-OTFSI", - x=x_models, - y=cn_tfsi_vals, - marker_color="coral", - ) - ) - - # Experimental reference lines - fig.add_hline( - y=CN_EXP_WATER, - line_dash="dash", - line_color="steelblue", - annotation_text=f"Exp. Owater ({CN_EXP_WATER})", - annotation_position="top right", - ) - fig.add_hline( - y=CN_EXP_TFSI, - line_dash="dash", - line_color="coral", - annotation_text=f"Exp. OTFSI ({CN_EXP_TFSI})", - annotation_position="bottom right", - ) - - fig.update_layout( - title="Li⁺ Coordination Numbers (LiTFSI/H₂O, 21 m)", - yaxis_title="Coordination Number", - barmode="group", - legend={"x": 0.01, "y": 0.99}, - ) - return fig - - -def build_gr_plot(data: dict[str, dict]) -> go.Figure: - """ - G(r) plot: Li-O_water and Li-O_TFSI for all models. - - Parameters - ---------- - data : dict[str, dict] - Per-model RDF results as returned by :func:`load_rdf_results`. - - Returns - ------- - go.Figure - Plotly figure with g(r) curves and integration cutoff line. - """ - fig = go.Figure() - - colors = [ - "#1f77b4", - "#ff7f0e", - "#2ca02c", - "#d62728", - "#9467bd", - "#8c564b", - ] - - for i, model in enumerate(MODELS): - if model not in data: - continue - d = data[model] - c = colors[i % len(colors)] - fig.add_trace( - go.Scatter( - x=d["r"], - y=d["gr_water"], - mode="lines", - name=f"{model} Ow", - line={"color": c, "dash": "solid"}, - legendgroup=model, - ) - ) - fig.add_trace( - go.Scatter( - x=d["r"], - y=d["gr_tfsi"], - mode="lines", - name=f"{model} OTFSI", - line={"color": c, "dash": "dot"}, - legendgroup=model, - ) - ) - - # Mark integration cutoff - fig.add_vline( - x=R_CUT, - line_dash="dash", - line_color="gray", - annotation_text=f"r_cut={R_CUT} Å", - annotation_position="top right", - ) - - fig.update_layout( - title="Li-O Radial Distribution Functions (LiTFSI/H₂O, 21 m)", - xaxis_title="r / Å", - yaxis_title="g(r)", - xaxis_range=[1.0, 6.0], - ) - return fig - - -# --- Metrics table ----------------------------------------------------------- - - -def build_metrics_table(data: dict[str, dict]) -> dict: - """ - Build metrics table JSON for ml-peg app. - - Parameters - ---------- - data : dict[str, dict] - Per-model RDF results as returned by :func:`load_rdf_results`. - - Returns - ------- - dict - Table payload with keys ``data``, ``columns``, ``tooltip_header``, - ``thresholds``, and ``reference``. - """ - rows = [] - for model in MODELS: - if model not in data: - continue - d = data[model] - - score_w = normalize_metric( - d["err_water"], - DEFAULT_THRESHOLDS["CN Li-O_water Error"]["good"], - DEFAULT_THRESHOLDS["CN Li-O_water Error"]["bad"], - ) - score_f = normalize_metric( - d["err_tfsi"], - DEFAULT_THRESHOLDS["CN Li-O_TFSI Error"]["good"], - DEFAULT_THRESHOLDS["CN Li-O_TFSI Error"]["bad"], - ) - - w_w = DEFAULT_WEIGHTS.get("CN Li-O_water Error", 1.0) - w_f = DEFAULT_WEIGHTS.get("CN Li-O_TFSI Error", 1.0) - total_w = w_w + w_f - overall = (score_w * w_w + score_f * w_f) / total_w if total_w > 0 else 0.0 - - rows.append( - { - "MLIP": model, - "id": MODEL_NAME_MAP.get(model, model), - "CN Li-O_water": round(d["cn_water"], 3), - "CN Li-O_TFSI": round(d["cn_tfsi"], 3), - "CN Li-O_water Error": round(d["err_water"], 3), - "CN Li-O_TFSI Error": round(d["err_tfsi"], 3), - "Score": round(overall, 3), - } - ) - - rows.sort(key=lambda r: r["Score"], reverse=True) - - columns = [ - {"name": c, "id": c} - for c in [ - "MLIP", - "CN Li-O_water Error", - "CN Li-O_TFSI Error", - "Score", - ] - ] - - return { - "data": rows, - "columns": columns, - "tooltip_header": DEFAULT_TOOLTIPS, - "thresholds": {k: dict(v) for k, v in DEFAULT_THRESHOLDS.items()}, - "weights": dict(DEFAULT_WEIGHTS), - "model_name_map": MODEL_NAME_MAP, - "reference": { - "CN_water": CN_EXP_WATER, - "CN_TFSI": CN_EXP_TFSI, - "citation": "Watanabe et al., J. Phys. Chem. B 125, 7477 (2021)", - "concentration": "~18.5 m LiTFSI/H2O", - "method": "neutron diffraction with 6Li/7Li + H/D isotopic substitution", - "r_cut_angstrom": R_CUT, - }, - } - - -# --- Pytest interface -------------------------------------------------------- - - -@pytest.fixture -def rdf_analysis(): - """ - Run full RDF analysis: table + plots. - - Returns - ------- - dict - Metrics table payload written to ``rdf_metrics_table.json``. - """ - data = load_rdf_results() - if not data: - pytest.skip("No RDF data found") - - OUT_PATH.mkdir(parents=True, exist_ok=True) - - table = build_metrics_table(data) - with open(OUT_PATH / "rdf_metrics_table.json", "w") as f: - json.dump(table, f, indent=2) - - fig_cn = build_cn_bar_chart(data) - with open(OUT_PATH / "figure_cn_bar.json", "w") as f: - f.write(fig_cn.to_json()) - - fig_gr = build_gr_plot(data) - with open(OUT_PATH / "figure_gr.json", "w") as f: - f.write(fig_gr.to_json()) - - return table - - -def test_rdf(rdf_analysis) -> None: - """ - Run RDF benchmark -- generates table and plots. - - Parameters - ---------- - rdf_analysis : dict - Metrics table payload provided by the ``rdf_analysis`` fixture. - """ - table = rdf_analysis - assert len(table["data"]) > 0, "No models produced RDF data" diff --git a/ml_peg/analysis/wise_electrolytes/rdf/metrics.yml b/ml_peg/analysis/wise_electrolytes/rdf/metrics.yml deleted file mode 100644 index e6a67093a..000000000 --- a/ml_peg/analysis/wise_electrolytes/rdf/metrics.yml +++ /dev/null @@ -1,15 +0,0 @@ -metrics: - CN Li-O_water Error: - good: 0.0 - bad: 0.5 - unit: "-" - tooltip: "Absolute error in Li-O_water coordination number vs experimental reference (CN=2.0, Watanabe et al. J. Phys. Chem. B 2021, ~18.5 m, neutron diffraction with isotopic substitution)" - level_of_theory: Experimental (neutron diffraction, Watanabe et al. JPCB 2021) - weight: 1 - CN Li-O_TFSI Error: - good: 0.0 - bad: 0.5 - unit: "-" - tooltip: "Absolute error in Li-O_TFSI coordination number vs experimental reference (CN=2.0, Watanabe et al. J. Phys. Chem. B 2021, ~18.5 m, neutron diffraction with isotopic substitution)" - level_of_theory: Experimental (neutron diffraction, Watanabe et al. JPCB 2021) - weight: 1 diff --git a/ml_peg/analysis/wise_electrolytes/xray_sf/analyse_xray_sf.py b/ml_peg/analysis/wise_electrolytes/xray_sf/analyse_xray_sf.py deleted file mode 100644 index 51b45c2b4..000000000 --- a/ml_peg/analysis/wise_electrolytes/xray_sf/analyse_xray_sf.py +++ /dev/null @@ -1,398 +0,0 @@ -"""Analyse X-ray structure factor benchmark for LiTFSI/H2O electrolyte.""" - -from __future__ import annotations - -import json -from pathlib import Path - -import numpy as np -import plotly.graph_objects as go -import pytest -from scipy.interpolate import interp1d - -from ml_peg.analysis.utils.utils import load_metrics_config - -# --- Paths ------------------------------------------------------------------- - -CALCS_ROOT = Path(__file__).resolve().parents[3] / "calcs" -CALC_PATH = CALCS_ROOT / "wise_electrolytes" / "xray_sf" / "outputs" -APP_ROOT = Path(__file__).resolve().parents[3] / "app" -OUT_PATH = APP_ROOT / "data" / "wise_electrolytes" / "xray_sf" - -# Experimental data bundled with the benchmark -EXP_PATH = Path(__file__).resolve().parents[1] / "data" / "saxs_maginn.txt" - -MODELS = [ - "matpes-r2scan", - "mace-mpa-0-medium", - "mace-omat-0-medium", - "mace-mp-0b3", - "mace-mh-1-omat", - "mace-mh-1-omol", -] - -# Map benchmark model names → ml-peg registry names (for app summary table) -MODEL_NAME_MAP = { - "matpes-r2scan": "mace-matpes-r2scan", - "mace-mpa-0-medium": "mace-mpa-0", - "mace-omat-0-medium": "mace-omat-0", -} - -METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") -DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( - METRICS_CONFIG_PATH -) - - -# --- Helpers ----------------------------------------------------------------- - - -def load_experimental_sq() -> tuple[np.ndarray, np.ndarray]: - """ - Load experimental SAXS S(q) data. - - Returns - ------- - q_exp : np.ndarray - Scattering vector values in inverse angstroms. - sq_exp : np.ndarray - Experimental structure factor values. - """ - data = np.loadtxt(EXP_PATH) - return data[:, 0], data[:, 1] - - -def load_computed_sq(model: str) -> tuple[np.ndarray, np.ndarray] | None: - """ - Load computed S(q) for a model. - - Parameters - ---------- - model : str - Name of the MLIP model. - - Returns - ------- - tuple of (np.ndarray, np.ndarray) or None - Scattering vector and structure factor arrays, or None if the - output file does not exist. - """ - json_path = CALC_PATH / model / "xray_sq.json" - if not json_path.exists(): - return None - with open(json_path) as f: - d = json.load(f) - return np.array(d["q"]), np.array(d["Sq"]) - - -def compute_r_factor( - q_exp: np.ndarray, - sq_exp: np.ndarray, - q_calc: np.ndarray, - sq_calc: np.ndarray, -) -> float: - """ - Compute R-factor: sum|S_exp - S_calc| / sum|S_exp|. - - The q range used is the overlap between experimental and calculated data: - q_min is the minimum q of the calculated S(q), set by the simulation cell - size (2*pi/L), and q_max is the maximum q of the experimental data. - - Parameters - ---------- - q_exp : np.ndarray - Experimental scattering vector values. - sq_exp : np.ndarray - Experimental structure factor values. - q_calc : np.ndarray - Calculated scattering vector values. - sq_calc : np.ndarray - Calculated structure factor values. - - Returns - ------- - float - R-factor value, or NaN if no overlapping valid data exists. - """ - valid_calc = ~np.isnan(sq_calc) - q_min_calc = float(q_calc[valid_calc].min()) - q_max_exp = float(q_exp.max()) - - f = interp1d( - q_calc[valid_calc], - sq_calc[valid_calc], - kind="linear", - bounds_error=False, - fill_value=np.nan, - ) - - mask = (q_exp >= q_min_calc) & (q_exp <= q_max_exp) - sq_interp = f(q_exp[mask]) - sq_e = sq_exp[mask] - - ok = ~np.isnan(sq_interp) - if not ok.any(): - return float("nan") - - return float(np.sum(np.abs(sq_e[ok] - sq_interp[ok])) / np.sum(np.abs(sq_e[ok]))) - - -def find_first_peak( - q: np.ndarray, sq: np.ndarray, q_min: float = 0.5, q_max: float = 2.0 -) -> float: - """ - Find position of first peak in S(q). - - Parameters - ---------- - q : np.ndarray - Scattering vector values. - sq : np.ndarray - Structure factor values. - q_min : float, optional - Lower bound of the search window in inverse angstroms (default 0.5). - q_max : float, optional - Upper bound of the search window in inverse angstroms (default 2.0). - - Returns - ------- - float - Position of the first peak, or NaN if fewer than 3 valid points - exist in the search window. - """ - mask = (q >= q_min) & (q <= q_max) & ~np.isnan(sq) - if mask.sum() < 3: - return float("nan") - q_sub = q[mask] - sq_sub = sq[mask] - return float(q_sub[np.argmax(sq_sub)]) - - -def normalize_metric(value: float, good: float, bad: float) -> float: - """ - Normalize a metric value to [0, 1] where 0 is good and 1 is bad. - - Parameters - ---------- - value : float - Raw metric value. - good : float - Threshold corresponding to the best achievable score (0). - bad : float - Threshold corresponding to the worst score (1). - - Returns - ------- - float - Normalized score clipped to [0, 1]. - """ - if bad == good: - return 0.0 if value <= good else 1.0 - score = (value - good) / (bad - good) - return max(0.0, min(1.0, score)) - - -# --- Plot builders ----------------------------------------------------------- - - -def build_sq_comparison_plot( - q_exp: np.ndarray, - sq_exp: np.ndarray, - model_data: dict[str, tuple[np.ndarray, np.ndarray]], -) -> go.Figure: - """ - Build S(q) overlay plot comparing computed and experimental data. - - Parameters - ---------- - q_exp : np.ndarray - Experimental scattering vector values. - sq_exp : np.ndarray - Experimental structure factor values. - model_data : dict of str to tuple of (np.ndarray, np.ndarray) - Mapping from model name to (q, S(q)) arrays. - - Returns - ------- - go.Figure - Plotly figure with experimental and computed S(q) traces. - """ - fig = go.Figure() - - fig.add_trace( - go.Scatter( - x=q_exp.tolist(), - y=sq_exp.tolist(), - mode="lines", - name="Exp. (Zhang et al. 2021)", - line={"color": "black", "width": 2}, - ) - ) - - for model, (q, sq) in model_data.items(): - fig.add_trace( - go.Scatter( - x=q.tolist(), - y=sq.tolist(), - mode="lines", - name=model, - opacity=0.8, - ) - ) - - fig.update_layout( - title="X-ray Structure Factor S(q) — LiTFSI/H2O (21 m)", - xaxis_title="q / Å⁻¹", - yaxis_title="S(q) (Faber-Ziman)", - legend={"x": 0.60, "y": 0.98}, - ) - return fig - - -def build_metrics_table(results: dict[str, dict]) -> dict: - """ - Build metrics table as a JSON-serialisable dictionary. - - Parameters - ---------- - results : dict of str to dict - Per-model results containing ``r_factor``, ``peak_position_error``, - ``peak_calc``, and ``peak_exp`` entries. - - Returns - ------- - dict - Dictionary with ``data`` (list of row dicts), ``columns``, - ``tooltip_header``, and ``thresholds`` keys. - """ - rows = [] - for model in MODELS: - if model not in results: - continue - r = results[model] - r_factor = r["r_factor"] - peak_err = r["peak_position_error"] - - score_r = normalize_metric( - r_factor, - DEFAULT_THRESHOLDS["S(q) R-factor"]["good"], - DEFAULT_THRESHOLDS["S(q) R-factor"]["bad"], - ) - score_p = normalize_metric( - peak_err, - DEFAULT_THRESHOLDS["First Peak Position Error"]["good"], - DEFAULT_THRESHOLDS["First Peak Position Error"]["bad"], - ) - - w_r = DEFAULT_WEIGHTS.get("S(q) R-factor", 1.0) - w_p = DEFAULT_WEIGHTS.get("First Peak Position Error", 1.0) - total_w = w_r + w_p - overall = ( - 1.0 - (score_r * w_r + score_p * w_p) / total_w if total_w > 0 else 0.0 - ) - - rows.append( - { - "MLIP": model, - "id": MODEL_NAME_MAP.get(model, model), - "S(q) R-factor": round(r_factor, 4), - "First Peak Position Error": round(peak_err, 4), - "First Peak (calc)": round(r["peak_calc"], 2), - "First Peak (exp)": round(r["peak_exp"], 2), - "Score": round(overall, 3), - } - ) - - rows.sort(key=lambda r: r["Score"], reverse=True) - - columns = [ - {"name": c, "id": c} - for c in [ - "MLIP", - "S(q) R-factor", - "First Peak Position Error", - "Score", - ] - ] - - return { - "data": rows, - "columns": columns, - "tooltip_header": DEFAULT_TOOLTIPS, - "thresholds": {k: dict(v) for k, v in DEFAULT_THRESHOLDS.items()}, - "weights": dict(DEFAULT_WEIGHTS), - "model_name_map": MODEL_NAME_MAP, - } - - -# --- Pytest interface -------------------------------------------------------- - - -@pytest.fixture -def xray_sf_analysis(): - """ - Run full X-ray S(q) analysis and write outputs. - - Returns - ------- - dict - Metrics table dictionary produced by ``build_metrics_table``. - """ - q_exp, sq_exp = load_experimental_sq() - peak_exp = find_first_peak(q_exp, sq_exp) - - model_data = {} - results = {} - - for model in MODELS: - data = load_computed_sq(model) - if data is None: - continue - q_calc, sq_calc = data - model_data[model] = (q_calc, sq_calc) - - r_factor = compute_r_factor( - q_exp, sq_exp, q_calc, sq_calc - ) # range: [q_min_calc, q_max_exp] - peak_calc = find_first_peak(q_calc, sq_calc) - peak_error = ( - abs(peak_calc - peak_exp) if not np.isnan(peak_calc) else float("nan") - ) - - results[model] = { - "r_factor": r_factor, - "peak_calc": peak_calc, - "peak_exp": peak_exp, - "peak_position_error": peak_error, - } - - if not results: - pytest.skip("No S(q) data found") - - OUT_PATH.mkdir(parents=True, exist_ok=True) - - table = build_metrics_table(results) - with open(OUT_PATH / "xray_sf_metrics_table.json", "w") as f: - json.dump(table, f, indent=2) - - fig = build_sq_comparison_plot(q_exp, sq_exp, model_data) - with open(OUT_PATH / "figure_xray_sq_comparison.json", "w") as f: - f.write(fig.to_json()) - - with open(OUT_PATH / "xray_sf_results.json", "w") as f: - json.dump(results, f, indent=2) - - return table - - -def test_xray_sf(xray_sf_analysis) -> None: - """ - Run X-ray structure factor benchmark. - - Parameters - ---------- - xray_sf_analysis : dict - Metrics table fixture providing analysis results. - """ - table = xray_sf_analysis - assert len(table["data"]) > 0, "No models produced S(q) data" diff --git a/ml_peg/analysis/wise_electrolytes/xray_sf/metrics.yml b/ml_peg/analysis/wise_electrolytes/xray_sf/metrics.yml deleted file mode 100644 index 310fd8824..000000000 --- a/ml_peg/analysis/wise_electrolytes/xray_sf/metrics.yml +++ /dev/null @@ -1,15 +0,0 @@ -metrics: - S(q) R-factor: - good: 0.0 - bad: 0.5 - unit: "-" - tooltip: "R-factor between computed and experimental X-ray S(q): sum|S_exp - S_calc| / sum|S_exp|" - level_of_theory: Experimental (SAXS, Zhang et al., J. Phys. Chem. B 125, 4501, 2021) - weight: 1 - First Peak Position Error: - good: 0.0 - bad: 0.3 - unit: "1/A" - tooltip: "Absolute error in position of first S(q) peak (~1.0 A-1)" - level_of_theory: Experimental (SAXS, Zhang et al., J. Phys. Chem. B 125, 4501, 2021) - weight: 0 diff --git a/ml_peg/app/wise_electrolytes/density/app_density.py b/ml_peg/app/wise_electrolytes/density/app_density.py deleted file mode 100644 index 2a9055d4f..000000000 --- a/ml_peg/app/wise_electrolytes/density/app_density.py +++ /dev/null @@ -1,68 +0,0 @@ -"""Dash app for LiTFSI/H2O electrolyte density benchmark.""" - -from __future__ import annotations - -from pathlib import Path - -from dash import html - -from ml_peg.app.base_app import BaseApp -from ml_peg.app.utils.build_callbacks import plot_from_table_column -from ml_peg.app.utils.load import read_plot - -BENCHMARK_NAME = "wise_electrolytes_density" -APP_ROOT = Path(__file__).resolve().parents[2] -DATA_PATH = APP_ROOT / "data" / "wise_electrolytes" / "density" - - -class DensityApp(BaseApp): - """Dash app for WiSE electrolyte density benchmark.""" - - def register_callbacks(self) -> None: - """Register interactive plot callbacks.""" - bar_chart = read_plot( - DATA_PATH / "figure_density_bar.json", - id=f"{BENCHMARK_NAME}-bar", - ) - timeseries = read_plot( - DATA_PATH / "figure_density_timeseries.json", - id=f"{BENCHMARK_NAME}-timeseries", - ) - - plot_from_table_column( - table_id=self.table_id, - plot_id=f"{BENCHMARK_NAME}-figure-placeholder", - column_to_plot={ - "Density Error": bar_chart, - "Density Error (%)": bar_chart, - "Score": timeseries, - }, - ) - - -def get_app() -> DensityApp: - """ - Return configured density benchmark app. - - Returns - ------- - DensityApp - Configured app instance. - """ - return DensityApp( - name="WiSE Density", - description=( - "NPT density of 21 m LiTFSI/H2O electrolyte at 298 K. " - "Experimental reference: 1.72 g/cm³ (Maginn et al., 2021)." - ), - docs_url="", - table_path=DATA_PATH / "density_metrics_table.json", - extra_components=[ - html.Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), - ], - ) - - -if __name__ == "__main__": - app = get_app() - app.run(port=8060, debug=True) diff --git a/ml_peg/app/wise_electrolytes/litfsi_h2o_21m/app_litfsi_h2o_21m.py b/ml_peg/app/wise_electrolytes/litfsi_h2o_21m/app_litfsi_h2o_21m.py new file mode 100644 index 000000000..98c46e56c --- /dev/null +++ b/ml_peg/app/wise_electrolytes/litfsi_h2o_21m/app_litfsi_h2o_21m.py @@ -0,0 +1,91 @@ +"""Dash app for the consolidated WiSE 21 m LiTFSI/H2O electrolyte benchmark.""" + +from __future__ import annotations + +from pathlib import Path + +from dash import html + +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import plot_from_table_column +from ml_peg.app.utils.load import read_plot + +BENCHMARK_NAME = "wise_electrolytes_litfsi_h2o_21m" +APP_ROOT = Path(__file__).resolve().parents[2] +DATA_PATH = APP_ROOT / "data" / "wise_electrolytes" / "litfsi_h2o_21m" +DOCS_URL = ( + "https://ddmms.github.io/ml-peg/user_guide/benchmarks/" + "wise_electrolytes.html#litfsi-h2o-21-m" +) + + +class LitfsiH2O21mApp(BaseApp): + """Dash app for the consolidated WiSE LiTFSI/H2O 21 m benchmark.""" + + def register_callbacks(self) -> None: + """Register interactive plot callbacks for all metrics.""" + density_bar = read_plot( + DATA_PATH / "figure_density_bar.json", + id=f"{BENCHMARK_NAME}-density-bar", + ) + density_timeseries = read_plot( + DATA_PATH / "figure_density_timeseries.json", + id=f"{BENCHMARK_NAME}-density-timeseries", + ) + cn_bar = read_plot( + DATA_PATH / "figure_cn_bar.json", + id=f"{BENCHMARK_NAME}-cn-bar", + ) + gr_plot = read_plot( + DATA_PATH / "figure_gr.json", + id=f"{BENCHMARK_NAME}-gr", + ) + sq_plot = read_plot( + DATA_PATH / "figure_xray_sq_comparison.json", + id=f"{BENCHMARK_NAME}-sq", + ) + + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot={ + "Density Error": density_bar, + "Density Error (%)": density_bar, + "CN Li-O_water Error": cn_bar, + "CN Li-O_TFSI Error": cn_bar, + "S(q) R-factor": sq_plot, + "First Peak Position Error": sq_plot, + "Score": gr_plot, + }, + ) + + +def get_app() -> LitfsiH2O21mApp: + """ + Return the configured WiSE LiTFSI/H2O 21 m benchmark app. + + Returns + ------- + LitfsiH2O21mApp + Configured app instance. + """ + return LitfsiH2O21mApp( + name="WiSE LiTFSI/H2O 21 m", + description=( + "Consolidated structural and thermodynamic benchmark for the " + "21 m LiTFSI/H2O 'water-in-salt' electrolyte: NPT density vs " + "experiment (Gilbert 2017), Li-O coordination numbers from RDFs " + "(Watanabe 2021), and X-ray structure factor S(q) (Zhang 2021). " + "Trajectories produced with LAMMPS+symmetrix on Adastra (MI250X)." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "litfsi_h2o_21m_metrics_table.json", + extra_components=[ + html.Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + ], + ) + + +if __name__ == "__main__": + app = get_app() + app.run(port=8060, debug=True) diff --git a/ml_peg/app/wise_electrolytes/rdf/app_rdf.py b/ml_peg/app/wise_electrolytes/rdf/app_rdf.py deleted file mode 100644 index 8de28f8be..000000000 --- a/ml_peg/app/wise_electrolytes/rdf/app_rdf.py +++ /dev/null @@ -1,69 +0,0 @@ -"""Dash app for LiTFSI/H2O Li-O RDF / coordination number benchmark.""" - -from __future__ import annotations - -from pathlib import Path - -from dash import html - -from ml_peg.app.base_app import BaseApp -from ml_peg.app.utils.build_callbacks import plot_from_table_column -from ml_peg.app.utils.load import read_plot - -BENCHMARK_NAME = "wise_electrolytes_rdf" -APP_ROOT = Path(__file__).resolve().parents[2] -DATA_PATH = APP_ROOT / "data" / "wise_electrolytes" / "rdf" - - -class RdfApp(BaseApp): - """Dash app for WiSE electrolyte Li-O RDF benchmark.""" - - def register_callbacks(self) -> None: - """Register interactive plot callbacks.""" - cn_bar = read_plot( - DATA_PATH / "figure_cn_bar.json", - id=f"{BENCHMARK_NAME}-cn-bar", - ) - gr_plot = read_plot( - DATA_PATH / "figure_gr.json", - id=f"{BENCHMARK_NAME}-gr", - ) - - plot_from_table_column( - table_id=self.table_id, - plot_id=f"{BENCHMARK_NAME}-figure-placeholder", - column_to_plot={ - "CN Li-O_water Error": cn_bar, - "CN Li-O_TFSI Error": cn_bar, - "Score": gr_plot, - }, - ) - - -def get_app() -> RdfApp: - """ - Return configured RDF benchmark app. - - Returns - ------- - RdfApp - Configured app instance. - """ - return RdfApp( - name="WiSE Li-O RDF", - description=( - "Li-O coordination numbers from radial distribution functions " - "of 21 m LiTFSI/H2O electrolyte (NVT, 298 K). " - "Reference: Watanabe et al., J. Phys. Chem. B 125, 7477 (2021)." - ), - docs_url="", - table_path=DATA_PATH / "rdf_metrics_table.json", - extra_components=[ - html.Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), - ], - ) - - -if __name__ == "__main__": - app = get_app() - app.run(port=8062, debug=True) diff --git a/ml_peg/app/wise_electrolytes/wise_electrolytes.yml b/ml_peg/app/wise_electrolytes/wise_electrolytes.yml index dbfe8135c..5cf1c8b8a 100644 --- a/ml_peg/app/wise_electrolytes/wise_electrolytes.yml +++ b/ml_peg/app/wise_electrolytes/wise_electrolytes.yml @@ -2,6 +2,4 @@ title: WiSE Electrolytes description: Structural and thermodynamic properties of 21 m LiTFSI/H2O "water-in-salt" electrolytes from molecular dynamics weight: 0 benchmark_weights: - WiSE Density: 0.5 - WiSE X-ray S(q): 0.3 - WiSE Li-O RDF: 0.2 + WiSE LiTFSI/H2O 21 m: 1.0 diff --git a/ml_peg/app/wise_electrolytes/xray_sf/app_xray_sf.py b/ml_peg/app/wise_electrolytes/xray_sf/app_xray_sf.py deleted file mode 100644 index 87e0f78f7..000000000 --- a/ml_peg/app/wise_electrolytes/xray_sf/app_xray_sf.py +++ /dev/null @@ -1,66 +0,0 @@ -"""Dash app for LiTFSI/H2O X-ray structure factor benchmark.""" - -from __future__ import annotations - -from pathlib import Path - -from dash import html - -from ml_peg.app.base_app import BaseApp -from ml_peg.app.utils.build_callbacks import plot_from_table_column -from ml_peg.app.utils.load import read_plot - -BENCHMARK_NAME = "wise_electrolytes_xray_sf" -APP_ROOT = Path(__file__).resolve().parents[2] -DATA_PATH = APP_ROOT / "data" / "wise_electrolytes" / "xray_sf" - - -class XraySfApp(BaseApp): - """Dash app for WiSE electrolyte X-ray S(q) benchmark.""" - - def register_callbacks(self) -> None: - """Register interactive plot callbacks.""" - sq_plot = read_plot( - DATA_PATH / "figure_xray_sq_comparison.json", - id=f"{BENCHMARK_NAME}-sq", - ) - - plot_from_table_column( - table_id=self.table_id, - plot_id=f"{BENCHMARK_NAME}-figure-placeholder", - column_to_plot={ - "S(q) R-factor": sq_plot, - "First Peak Position Error": sq_plot, - "Score": sq_plot, - }, - ) - - -def get_app() -> XraySfApp: - """ - Return configured X-ray S(q) benchmark app. - - Returns - ------- - XraySfApp - Configured app instance. - """ - return XraySfApp( - name="WiSE X-ray S(q)", - description=( - "X-ray structure factor S(q) of 21 m LiTFSI/H2O electrolyte from NVT MD. " - "Computed via dynasor with Cromer-Mann form factors " - "(Faber-Ziman normalization). " - "Experimental reference: SAXS (Maginn et al., J. Phys. Chem. B, 2021)." - ), - docs_url="", - table_path=DATA_PATH / "xray_sf_metrics_table.json", - extra_components=[ - html.Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), - ], - ) - - -if __name__ == "__main__": - app = get_app() - app.run(port=8061, debug=True) diff --git a/ml_peg/calcs/wise_electrolytes/density/calc_density.py b/ml_peg/calcs/wise_electrolytes/density/calc_density.py deleted file mode 100644 index 2d52a63d7..000000000 --- a/ml_peg/calcs/wise_electrolytes/density/calc_density.py +++ /dev/null @@ -1,75 +0,0 @@ -""" -Extract density from pre-computed NPT simulations of LiTFSI/H2O electrolyte. - -This benchmark reads pre-computed LAMMPS NPT thermo logs and extracts -equilibrium density for each MLIP model. The simulations use a p16_w42 -cell (382 atoms: 16 LiTFSI + 42 H2O) at 298 K, 1 atm. - -System: 21 m LiTFSI / H2O electrolyte. -Experimental density: 1.7126 g/cm3 -(Gilbert et al., J. Chem. Eng. Data 62, 2056 (2017), -DOI: 10.1021/acs.jced.7b00135). -""" - -from __future__ import annotations - -import json -import os -from pathlib import Path - -import pytest - -# --- Configuration ----------------------------------------------------------- - -# Path to converted data (will be replaced with S3 download for PR) -DATA_ROOT = Path( - os.environ.get( - "ML_PEG_WISE_DENSITY_DATA_ROOT", - "/lus/work/CT9/cin1387/lbrugnoli/prove/ml_peg_benchmark/data/wise_electrolytes/density", - ) -) -OUT_PATH = Path(__file__).parent / "outputs" - -MODELS = [ - "matpes-r2scan", - "mace-mpa-0-medium", - "mace-omat-0-medium", - "mace-mp-0b3", - "mace-mh-1-omat", - "mace-mh-1-omol", -] - -RHO_EXP = 1.7126 # g/cm³ (Gilbert et al., JCED 2017) - - -# --- Pytest interface (ml-peg convention) ------------------------------------ - - -@pytest.mark.parametrize("model", MODELS) -def test_extract_density(model: str) -> None: - """ - Extract and save density data for one model. - - Parameters - ---------- - model : str - Name of the MLIP model to extract density for. - """ - # Read pre-computed density data - json_path = DATA_ROOT / model / "density.json" - if not json_path.exists(): - pytest.skip(f"No density data for {model}") - - with open(json_path) as f: - result = json.load(f) - - # Save to outputs - out_dir = OUT_PATH / model - out_dir.mkdir(parents=True, exist_ok=True) - - with open(out_dir / "density.json", "w") as f: - json.dump(result, f, indent=2) - - # Basic sanity - assert result["rho_mean"] > 0, f"Negative density for {model}" - assert abs(result["rho_error_pct"]) < 50, f"Density error > 50% for {model}" diff --git a/ml_peg/calcs/wise_electrolytes/litfsi_h2o_21m/calc_litfsi_h2o_21m.py b/ml_peg/calcs/wise_electrolytes/litfsi_h2o_21m/calc_litfsi_h2o_21m.py new file mode 100644 index 000000000..857669bf8 --- /dev/null +++ b/ml_peg/calcs/wise_electrolytes/litfsi_h2o_21m/calc_litfsi_h2o_21m.py @@ -0,0 +1,616 @@ +""" +Consolidated WiSE 21 m LiTFSI/H2O electrolyte benchmark. + +Extracts three observables for each registered MLIP model: + +* Density from pre-computed NPT thermo logs (p16_w42 cell, 382 atoms). +* Li-O coordination numbers from NVT trajectories via radial distribution + functions g(r) for Li-O_water (O bonded to H, d_OH < 1.25 A) and + Li-O_TFSI (O bonded to S, d_OS < 1.75 A); CN integrated to first minimum + R_CUT = 2.83 A from the r2SCAN AIMD reference. +* X-ray structure factor S(q) computed via dynasor in TRAVIS Faber-Ziman + convention with Cromer-Mann 4-Gaussian form factors (including H) and + Savitzky-Golay smoothing (window=27, order=3, dq=0.02 A^-1). + +System: 21 m LiTFSI / H2O, p64_w170 cell (1534 atoms, 27.4938 A cubic) for +RDF and S(q); p16_w42 (382 atoms) for NPT density. Trajectories produced +with LAMMPS + symmetrix on Adastra (MI250X); the Janus recast lives in +``../md_reference/calc_md_reference.py``. + +Experimental references: + +* Density: 1.7126 g/cm3 (Gilbert et al., J. Chem. Eng. Data 62, 2056, 2017). +* Li-O CN: 2.0 each for water/TFSI (Watanabe et al., JPCB 125, 7477, 2021, + ~18.5 m, neutron diffraction with isotopic substitution). +* S(q): SAXS (Zhang et al., J. Phys. Chem. B 125, 4501, 2021). + +Source trajectories must live under ``DATA_ROOT//``, +where ``DATA_ROOT`` defaults to ``./data`` next to this script and is +overridable via the ``ML_PEG_WISE_LITFSI_H2O_21M_DATA_ROOT`` environment +variable. +""" + +from __future__ import annotations + +import json +import os +from pathlib import Path +import warnings + +from ase.geometry import get_distances +from ase.io import iread, read as ase_read +import numpy as np +import pytest +from scipy.signal import savgol_filter + +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +warnings.filterwarnings("ignore") + +# --- Configuration ----------------------------------------------------------- + +MODELS = load_models(current_models) + +DATA_ROOT = Path( + os.environ.get( + "ML_PEG_WISE_LITFSI_H2O_21M_DATA_ROOT", + Path(__file__).parent / "data", + ) +) +OUT_PATH = Path(__file__).parent / "outputs" + +# --- Density references ------------------------------------------------------ + +RHO_EXP = 1.7126 # g/cm3 (Gilbert et al., JCED 2017) + +# --- RDF parameters ---------------------------------------------------------- + +R_MAX = 6.0 # A +DR = 0.02 # A +R_CUT_COORD = 2.83 # A — first minimum of Li-O g(r) from r2SCAN AIMD +D_OH_CUT = 1.25 # A — O-H bond cutoff for water identification +D_OS_CUT = 1.75 # A — O-S bond cutoff for TFSI identification + +# --- S(q) parameters (TRAVIS-matching) --------------------------------------- + +Q_MAX = 13.0 # A^-1 +Q_MIN = 0.5 # A^-1 +MAX_QPOINTS = 50000 +DQ_BIN = 0.02 # A^-1 +SAVGOL_WINDOW = 27 +SAVGOL_ORDER = 3 + +# LAMMPS atom-type → element fallback for raw dump files +TYPE_TO_ELEMENT = {1: "Li", 2: "C", 3: "F", 4: "S", 5: "N", 6: "O", 7: "H"} + +# System composition (p64_w170: 64 LiTFSI + 170 H2O) +COMPOSITION = {"Li": 64, "C": 128, "F": 384, "S": 128, "N": 64, "O": 426, "H": 340} +N_ATOMS = sum(COMPOSITION.values()) # 1534 +CONC = {k: v / N_ATOMS for k, v in COMPOSITION.items()} + +# Cromer-Mann 4-Gaussian X-ray form factor parameters (International Tables) +TRAVIS_FF = { + "S": {"a": [6.905, 5.203, 1.438, 1.586], + "b": [1.468, 22.215, 0.254, 56.172], "c": 0.867}, + "F": {"a": [3.539, 2.641, 1.517, 1.024], + "b": [10.283, 4.294, 0.262, 26.148], "c": 0.278}, + "O": {"a": [3.049, 2.287, 1.546, 0.867], + "b": [13.277, 5.701, 0.324, 32.909], "c": 0.251}, + "N": {"a": [12.213, 3.132, 2.013, 1.166], + "b": [0.006, 9.893, 28.997, 0.583], "c": -11.529}, + "C": {"a": [2.310, 1.020, 1.589, 0.865], + "b": [20.844, 10.208, 0.569, 51.651], "c": 0.216}, + "Li": {"a": [1.128, 0.751, 0.618, 0.465], + "b": [3.955, 1.052, 85.391, 168.261], "c": 0.038}, + "H": {"a": [0.493, 0.323, 0.140, 0.041], + "b": [10.511, 26.126, 3.142, 57.800], "c": 0.003}, +} + + +# ============================================================================= +# Density extraction +# ============================================================================= + + +def _density_source_path(model_name: str) -> Path: + """ + Locate the pre-computed density JSON for a model. + + Parameters + ---------- + model_name : str + Name of the MLIP model in the registry. + + Returns + ------- + Path + Expected path to ``density.json`` under ``DATA_ROOT``. + """ + return DATA_ROOT / model_name / "density.json" + + +# ============================================================================= +# RDF computation +# ============================================================================= + + +def identify_o_types(atoms) -> tuple[np.ndarray, np.ndarray]: + """ + Return indices of O_water and O_TFSI from a single ASE Atoms frame. + + Parameters + ---------- + atoms : ase.Atoms + A single frame from the trajectory. + + Returns + ------- + o_water : np.ndarray + Indices of oxygen atoms bonded to hydrogen (water oxygens). + o_tfsi : np.ndarray + Indices of oxygen atoms bonded to sulfur (TFSI oxygens). + """ + syms = np.array(atoms.get_chemical_symbols()) + pos = atoms.get_positions() + cell = atoms.get_cell() + pbc = atoms.get_pbc() + + o_idx = np.where(syms == "O")[0] + h_idx = np.where(syms == "H")[0] + s_idx = np.where(syms == "S")[0] + + o_water, o_tfsi = [], [] + for o in o_idx: + _, d_oh = get_distances(pos[o : o + 1], pos[h_idx], cell=cell, pbc=pbc) + _, d_os = get_distances(pos[o : o + 1], pos[s_idx], cell=cell, pbc=pbc) + if d_oh.min() < D_OH_CUT: + o_water.append(o) + elif d_os.min() < D_OS_CUT: + o_tfsi.append(o) + + return np.array(o_water), np.array(o_tfsi) + + +def compute_rdf( + traj_path: Path, + o_water_idx: np.ndarray, + o_tfsi_idx: np.ndarray, + r_max: float = R_MAX, + dr: float = DR, + skip_frames: int = 0, +) -> dict: + """ + Compute Li-O RDFs from an extxyz trajectory. + + Parameters + ---------- + traj_path : Path + Path to .extxyz trajectory file. + o_water_idx : np.ndarray + Atom indices of O_water (from first frame, fixed). + o_tfsi_idx : np.ndarray + Atom indices of O_TFSI (from first frame, fixed). + r_max : float + Maximum distance for RDF. + dr : float + Bin width. + skip_frames : int + Number of initial frames to skip (equilibration). Trajectories are + already pre-equilibrated (50–100 ps window). + + Returns + ------- + dict + Dictionary with keys ``r``, ``gr_LiO_total``, ``gr_LiO_water``, + ``gr_LiO_TFSI``, ``coord_LiO_total``, ``coord_LiO_water``, + ``coord_LiO_TFSI``, ``n_li``, ``n_O_water``, ``n_O_TFSI``, + ``n_frames_used``, and ``r_cut_coord``. + """ + bins = np.arange(0, r_max + dr, dr) + r_centers = 0.5 * (bins[:-1] + bins[1:]) + n_bins = len(r_centers) + + hist_total = np.zeros(n_bins) + hist_water = np.zeros(n_bins) + hist_tfsi = np.zeros(n_bins) + + n_frames = 0 + n_li = None + volume = None + o_all_idx = np.concatenate([o_water_idx, o_tfsi_idx]) + + for frame_idx, atoms in enumerate( + iread(str(traj_path), format="extxyz", index=":") + ): + if frame_idx < skip_frames: + continue + + syms = np.array(atoms.get_chemical_symbols()) + pos = atoms.get_positions() + cell = atoms.get_cell() + pbc = atoms.get_pbc() + + li_idx = np.where(syms == "Li")[0] + if n_li is None: + n_li = len(li_idx) + if volume is None: + volume = atoms.get_volume() + + pos_li = pos[li_idx] + + for o_set, hist in [ + (o_all_idx, hist_total), + (o_water_idx, hist_water), + (o_tfsi_idx, hist_tfsi), + ]: + pos_o = pos[o_set] + _, dists = get_distances(pos_li, pos_o, cell=cell, pbc=pbc) + dists_flat = dists.ravel() + dists_flat = dists_flat[dists_flat < r_max] + h, _ = np.histogram(dists_flat, bins=bins) + hist += h + + n_frames += 1 + + if n_frames == 0 or n_li is None: + raise RuntimeError(f"No frames processed from {traj_path}") + + def normalize(hist, n_central, n_neighbor): + """ + Normalize histogram to g(r). + + Parameters + ---------- + hist : np.ndarray + Raw pair-distance histogram. + n_central : int + Number of central atoms (Li). + n_neighbor : int + Number of neighbor atoms (O subset). + + Returns + ------- + np.ndarray + Normalized radial distribution function. + """ + shell_vol = (4.0 / 3.0) * np.pi * (bins[1:] ** 3 - bins[:-1] ** 3) + rho = n_neighbor / volume + norm = n_central * n_frames * rho * shell_vol + return hist / norm + + n_o_total = len(o_all_idx) + n_o_water = len(o_water_idx) + n_o_tfsi = len(o_tfsi_idx) + + gr_total = normalize(hist_total, n_li, n_o_total) + gr_water = normalize(hist_water, n_li, n_o_water) + gr_tfsi = normalize(hist_tfsi, n_li, n_o_tfsi) + + def coord_number(gr, n_neighbor): + """ + Compute coordination number from g(r). + + Parameters + ---------- + gr : np.ndarray + Radial distribution function. + n_neighbor : int + Number of neighbor atoms (O subset). + + Returns + ------- + float + Integrated coordination number up to R_CUT_COORD. + """ + rho = n_neighbor / volume + integrand = 4.0 * np.pi * rho * gr * r_centers**2 * dr + mask = r_centers <= R_CUT_COORD + return float(np.sum(integrand[mask])) + + return { + "r": r_centers.tolist(), + "gr_LiO_total": gr_total.tolist(), + "gr_LiO_water": gr_water.tolist(), + "gr_LiO_TFSI": gr_tfsi.tolist(), + "coord_LiO_total": coord_number(gr_total, n_o_total), + "coord_LiO_water": coord_number(gr_water, n_o_water), + "coord_LiO_TFSI": coord_number(gr_tfsi, n_o_tfsi), + "n_li": n_li, + "n_O_water": n_o_water, + "n_O_TFSI": n_o_tfsi, + "n_frames_used": n_frames, + "r_cut_coord": R_CUT_COORD, + } + + +# ============================================================================= +# X-ray S(q) computation +# ============================================================================= + + +def compute_fq_travis(elem: str, q_arr: np.ndarray) -> np.ndarray: + """ + Compute X-ray form factor f(q) using TRAVIS 4-Gaussian (Cromer-Mann) params. + + Parameters + ---------- + elem : str + Chemical element symbol (e.g. ``"Li"``, ``"O"``). + q_arr : np.ndarray + Array of q values in inverse angstroms. + + Returns + ------- + np.ndarray + Form factor values evaluated at each q. + """ + ff = TRAVIS_FF[elem] + s2 = (q_arr / (4 * np.pi)) ** 2 + return sum(ff["a"][i] * np.exp(-ff["b"][i] * s2) for i in range(4)) + ff["c"] + + +def compute_sq_travis_style(traj_path: Path) -> dict: + """ + Compute S(q) in TRAVIS Faber-Ziman convention using dynasor. + + Steps: + + 1. Read trajectory with dynasor (ASE for .extxyz; MDAnalysis for raw dump). + 2. Compute partial S_ab(q) on a fine spherical q-grid. + 3. Apply TRAVIS Cromer-Mann form factors: I_xray = sum f_a * f_b * S_ab. + 4. Spherical binning at dq = 0.02 A^-1. + 5. Faber-Ziman normalization with Laue term: S_FZ = I/^2 - /^2 + 1. + 6. Savitzky-Golay smoothing. + + Parameters + ---------- + traj_path : Path + Path to the trajectory file (.extxyz or LAMMPS dump). + + Returns + ------- + dict + Dictionary with keys ``q``, ``Sq``, ``n_qpoints``, + ``n_qpoints_used``, ``cell``, ``atom_types``, + ``particle_counts``, and ``params``. + """ + from dynasor import ( + Trajectory, + compute_static_structure_factors, + get_spherical_qpoints, + ) + + traj_str = str(traj_path) + is_extxyz = traj_str.endswith(".extxyz") or traj_str.endswith(".xyz") + + if is_extxyz: + first_frame = ase_read(traj_str, index=0) + symbols = first_frame.get_chemical_symbols() + atomic_indices = {} + for i, sym in enumerate(symbols): + atomic_indices.setdefault(sym, []).append(i) + + traj = Trajectory( + traj_str, + trajectory_format="ase", + atomic_indices=atomic_indices, + ) + else: + import MDAnalysis as mda # noqa: N813 + + u = mda.Universe(traj_str, format="LAMMPSDUMP") + types = u.atoms.types + atomic_indices = {} + for t, elem in TYPE_TO_ELEMENT.items(): + mask = types == str(t) + idx = np.where(mask)[0].tolist() + if idx: + atomic_indices[elem] = idx + + traj = Trajectory( + traj_str, + trajectory_format="lammps_mdanalysis", + atomic_indices=atomic_indices, + ) + + q_points = get_spherical_qpoints(traj.cell, q_max=Q_MAX, max_points=MAX_QPOINTS) + sample = compute_static_structure_factors(traj, q_points) + + q_norms = np.linalg.norm(sample.q_points, axis=1) + atom_types = list(sample.particle_counts.keys()) + + ff_at_q = {at: compute_fq_travis(at, q_norms) for at in atom_types} + i_xray = np.zeros(len(q_norms)) + for s1, s2 in sample.pairs: + sq_ab = sample[f"Sq_{s1}_{s2}"].flatten() + i_xray += ff_at_q[s1] * ff_at_q[s2] * sq_ab + + q_bins = np.arange(0.0, Q_MAX + DQ_BIN, DQ_BIN) + q_centers = 0.5 * (q_bins[:-1] + q_bins[1:]) + i_xray_binned = np.full(len(q_centers), np.nan) + counts = np.zeros(len(q_centers), dtype=int) + + for i in range(len(q_centers)): + mask = (q_norms >= q_bins[i]) & (q_norms < q_bins[i + 1]) + n = mask.sum() + if n > 0: + i_xray_binned[i] = np.mean(i_xray[mask]) + counts[i] = n + + f_avg = np.zeros(len(q_centers)) + f2_avg = np.zeros(len(q_centers)) + for elem, c in CONC.items(): + fq = compute_fq_travis(elem, q_centers) + f_avg += c * fq + f2_avg += c * fq**2 + f_avg_sq = f_avg**2 + + sq_fz = np.where( + f_avg_sq > 0, + i_xray_binned / f_avg_sq - f2_avg / f_avg_sq + 1.0, + np.nan, + ) + + valid = ~np.isnan(sq_fz) & (q_centers >= 0.3) & (q_centers <= Q_MAX) + q_v = q_centers[valid] + sq_v = sq_fz[valid] + if len(sq_v) > SAVGOL_WINDOW: + sq_smooth = savgol_filter(sq_v, SAVGOL_WINDOW, SAVGOL_ORDER) + else: + sq_smooth = sq_v + + rmask = (q_v >= Q_MIN) & (q_v <= 12.0) + + return { + "q": q_v[rmask].tolist(), + "Sq": sq_smooth[rmask].tolist(), + "n_qpoints": len(q_points), + "n_qpoints_used": len(sample.q_points), + "cell": traj.cell.tolist(), + "atom_types": traj.atom_types, + "particle_counts": {k: int(v) for k, v in sample.particle_counts.items()}, + "params": { + "q_max": Q_MAX, + "q_min": Q_MIN, + "dq_bin": DQ_BIN, + "max_qpoints": MAX_QPOINTS, + "savgol_window": SAVGOL_WINDOW, + "savgol_order": SAVGOL_ORDER, + "form_factors": "cromer-mann-4gaussian", + "normalization": "faber-ziman", + }, + } + + +def find_trajectory(model_name: str) -> Path | None: + """ + Find NVT trajectory for a model. + + Prefers the converted ``.extxyz`` and falls back to the raw LAMMPS dump. + + Parameters + ---------- + model_name : str + Name of the MLIP model. + + Returns + ------- + Path or None + Path to the trajectory file, or ``None`` if not found. + """ + extxyz = DATA_ROOT / model_name / "nvt_trajectory.extxyz" + if extxyz.exists(): + return extxyz + + lammpstrj = DATA_ROOT / model_name / "nvt_trajectory.lammpstrj" + if lammpstrj.exists(): + return lammpstrj + + return None + + +# ============================================================================= +# Pytest interface (ml-peg convention) +# ============================================================================= + + +@pytest.mark.parametrize("model_name", MODELS) +def test_extract_density(model_name: str) -> None: + """ + Extract and save NPT density data for one model. + + Parameters + ---------- + model_name : str + Name of the MLIP model in the registry. + """ + src = _density_source_path(model_name) + if not src.exists(): + pytest.skip(f"No density data for {model_name} at {src}") + + with open(src) as f: + result = json.load(f) + + out_dir = OUT_PATH / model_name + out_dir.mkdir(parents=True, exist_ok=True) + with open(out_dir / "density.json", "w") as f: + json.dump(result, f, indent=2) + + assert result["rho_mean"] > 0, f"Negative density for {model_name}" + assert abs(result["rho_error_pct"]) < 50, ( + f"Density error > 50% for {model_name}" + ) + + +@pytest.mark.parametrize("model_name", MODELS) +def test_compute_rdf(model_name: str) -> None: + """ + Compute and save Li-O RDFs and coordination numbers for one model. + + Parameters + ---------- + model_name : str + Name of the MLIP model in the registry. + """ + traj_path = find_trajectory(model_name) + if traj_path is None: + pytest.skip(f"No NVT trajectory for {model_name}") + + first_frame = ase_read(str(traj_path), index=0, format="extxyz") + o_water_idx, o_tfsi_idx = identify_o_types(first_frame) + + assert len(o_water_idx) > 0, f"No O_water found for {model_name}" + assert len(o_tfsi_idx) > 0, f"No O_TFSI found for {model_name}" + + result = compute_rdf(traj_path, o_water_idx, o_tfsi_idx) + + out_dir = OUT_PATH / model_name + out_dir.mkdir(parents=True, exist_ok=True) + with open(out_dir / "rdf.json", "w") as f: + json.dump( + {k: v for k, v in result.items() if not isinstance(v, list)}, f, indent=2 + ) + np.savez( + out_dir / "rdf.npz", + r=np.array(result["r"]), + gr_LiO_total=np.array(result["gr_LiO_total"]), + gr_LiO_water=np.array(result["gr_LiO_water"]), + gr_LiO_TFSI=np.array(result["gr_LiO_TFSI"]), + ) + + assert 2.0 < result["coord_LiO_total"] < 8.0, ( + f"Unexpected Li-O_total CN={result['coord_LiO_total']:.2f} " + f"for {model_name}" + ) + + +@pytest.mark.parametrize("model_name", MODELS) +def test_compute_xray_sq(model_name: str) -> None: + """ + Compute and save X-ray S(q) in Faber-Ziman convention for one model. + + Parameters + ---------- + model_name : str + Name of the MLIP model in the registry. + """ + traj_path = find_trajectory(model_name) + if traj_path is None: + pytest.skip(f"No NVT trajectory for {model_name}") + + result = compute_sq_travis_style(traj_path) + result["model"] = model_name + result["traj_path"] = str(traj_path) + + out_dir = OUT_PATH / model_name + out_dir.mkdir(parents=True, exist_ok=True) + with open(out_dir / "xray_sq.json", "w") as f: + json.dump(result, f, indent=2) + np.savez( + out_dir / "xray_sq.npz", + q=np.array(result["q"]), + Sq=np.array(result["Sq"]), + ) + + assert len(result["q"]) > 10, f"Too few q-points for {model_name}" diff --git a/ml_peg/calcs/wise_electrolytes/md_reference/calc_md_reference.py b/ml_peg/calcs/wise_electrolytes/md_reference/calc_md_reference.py new file mode 100644 index 000000000..e8db0d638 --- /dev/null +++ b/ml_peg/calcs/wise_electrolytes/md_reference/calc_md_reference.py @@ -0,0 +1,238 @@ +""" +Reference MD protocol for the WiSE 21 m LiTFSI/H2O benchmark. + +This script reproduces, with janus-core, the same MD protocol used to +generate the production trajectories analysed by the density, rdf, and +xray_sf sub-benchmarks. Production trajectories were run externally +with LAMMPS + MACE (symmetrix/Kokkos) on Adastra (MI250X); the driving +scripts are ``in.pipeline_cpu.lmp`` (Min -> NVT -> NPT) and +``in.nvt_continue.lmp`` (NVT continuation) on the Adastra workdir. + +The protocol is recast here for: + + * transparent, version-pinned documentation of the simulation settings, + * optional ml-peg-native replication on smaller systems or shorter windows. + +It is intentionally marked ``pytest.mark.skip`` in the default test run: +at ~1500 atoms and 250+ ps of cumulative MD, a Janus/ASE run would take +days of GPU time per registered model. Run manually with + + python calc_md_reference.py + +or call :func:`run_reference_md` from your own script. + +Protocol summary +---------------- +System : 64 LiTFSI + 170 H2O (1534 atoms, 21 m) +Ensembles : Min (FIRE) -> NVT 50 ps (NH) -> NPT 200 ps (NH, iso) -> + optional NVT continuation 50 ps (NH) +Temperature: 298.15 K +Pressure : 1.01325 bar (1 atm) +Timestep : 0.5 fs +TDAMP/PDAMP: 50 fs / 500 fs (100*dt / 1000*dt, Nosé-Hoover damping) +Dump cadence: every 0.1 ps (= 200 steps) +References : Gilbert et al., JCED 62, 2056 (2017); + Watanabe et al., JPCB 125, 7477 (2021). +""" + +from __future__ import annotations + +from copy import copy +from pathlib import Path +from typing import Any + +from ase import Atoms +from ase.io import read +import pytest + +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) + +DATA_PATH = Path(__file__).parent / "data" +OUT_PATH = Path(__file__).parent / "outputs" + +# --- Physical parameters (mirror Adastra LAMMPS in.pipeline_cpu.lmp) --------- + +TEMPERATURE_K = 298.15 +PRESSURE_BAR = 1.01325 # 1 atm +TIMESTEP_FS = 0.5 + +# 1 bar = 1e-4 GPa (Janus NPT accepts pressure in GPa) +PRESSURE_GPA = PRESSURE_BAR * 1e-4 + +# Nosé-Hoover damping times (match LAMMPS `fix npt`/`fix nvt` TDAMP/PDAMP) +THERMOSTAT_TIME_FS = 50.0 # 100 * timestep +BAROSTAT_TIME_FS = 500.0 # 1000 * timestep + +# Step counts (at 0.5 fs/step) +NVT_STEPS = 100_000 # 50 ps (warm-up / equilibration) +NPT_STEPS = 400_000 # 200 ps (density + configurational sampling) +NVT_CONT_STEPS = 100_000 # 50 ps (optional continuation) + +# IO cadence (match LAMMPS THERMO_EVERY / DUMP_EVERY) +STATS_EVERY = 100 # 0.05 ps +TRAJ_EVERY = 200 # 0.1 ps (501 frames in 50 ps) + +# Minimization (match LAMMPS `min_style fire; minimize 1.0e-6 0.2 2000 20000`) +MIN_FMAX = 0.2 # eV/Å +MIN_STEPS = 2000 + +SEED = 42 + +INITIAL_STRUCTURE = DATA_PATH / "p64_w170_initial.xyz" + + +def load_initial_structure() -> Atoms: + """ + Load the 64 LiTFSI + 170 H2O cubic simulation box. + + Returns + ------- + Atoms + ASE Atoms at the experimental density (L = 27.4938 Å, 1534 atoms). + + Raises + ------ + FileNotFoundError + If the reference structure file is not present. The file is not + shipped with the repository; provide an equivalent p64_w170 box + (LAMMPS data file converted to .xyz) or point ``INITIAL_STRUCTURE`` + at your own starting configuration. + """ + if not INITIAL_STRUCTURE.exists(): + raise FileNotFoundError( + f"Reference initial structure not found at {INITIAL_STRUCTURE}. " + "Provide an equivalent p64_w170 LAMMPS box (converted to extxyz) " + "or point INITIAL_STRUCTURE at your own starting configuration." + ) + return read(INITIAL_STRUCTURE) + + +def run_reference_md( + model_name: str, + model: Any, + *, + run_continuation: bool = False, +) -> None: + """ + Run the Min -> NVT -> NPT reference protocol for one registered model. + + Parameters + ---------- + model_name + Registry name of the MLIP model. + model + Model object returned by :func:`load_models`; must expose + ``default_dtype`` and ``get_calculator``. + run_continuation + If True, append a further 50 ps of NVT after the NPT block + (analogue of ``in.nvt_continue.lmp``). Default False. + """ + # Lazy imports: janus_core pulls in torch, which may be absent in + # environments that only run the analysis stack. + from janus_core.calculations.geom_opt import GeomOpt + from janus_core.calculations.md import NPT, NVT_NH + + model.default_dtype = "float64" + calc = model.get_calculator() + + struct = load_initial_structure() + struct.calc = copy(calc) + + model_out = OUT_PATH / model_name + model_out.mkdir(parents=True, exist_ok=True) + + # -- Minimization (FIRE, loose tolerance) --------------------------------- + GeomOpt( + struct=struct, + fmax=MIN_FMAX, + optimizer="FIRE", + steps=MIN_STEPS, + write_traj=False, + file_prefix=model_out / "minimize", + ).run() + + # -- NVT equilibration, 50 ps --------------------------------------------- + NVT_NH( + struct=struct, + temp=TEMPERATURE_K, + timestep=TIMESTEP_FS, + steps=NVT_STEPS, + thermostat_time=THERMOSTAT_TIME_FS, + stats_every=STATS_EVERY, + traj_every=TRAJ_EVERY, + seed=SEED, + file_prefix=model_out / "nvt_equil", + ).run() + + # -- NPT production, 200 ps (isotropic Nosé-Hoover; density sampling) ----- + NPT( + struct=struct, + temp=TEMPERATURE_K, + pressure=PRESSURE_GPA, + timestep=TIMESTEP_FS, + steps=NPT_STEPS, + thermostat_time=THERMOSTAT_TIME_FS, + barostat_time=BAROSTAT_TIME_FS, + stats_every=STATS_EVERY, + traj_every=TRAJ_EVERY, + seed=SEED, + file_prefix=model_out / "npt_prod", + ).run() + + # -- Optional NVT continuation, 50 ps ------------------------------------- + if run_continuation: + NVT_NH( + struct=struct, + temp=TEMPERATURE_K, + timestep=TIMESTEP_FS, + steps=NVT_CONT_STEPS, + thermostat_time=THERMOSTAT_TIME_FS, + stats_every=STATS_EVERY, + traj_every=TRAJ_EVERY, + seed=SEED, + file_prefix=model_out / "nvt_continue", + ).run() + + +@pytest.mark.skip( + reason=( + "Reference protocol only — production MD is run externally with " + "LAMMPS+symmetrix on Adastra (see module docstring). Running this in " + "pytest would take days of GPU per model." + ) +) +@pytest.mark.parametrize("mlip", MODELS.items()) +def test_md_reference(mlip: tuple[str, Any]) -> None: + """ + Smoke test for the reference Janus MD protocol. + + Parameters + ---------- + mlip + ``(model_name, model)`` pair from the ml-peg registry. + """ + model_name, model = mlip + run_reference_md(model_name, model) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser( + description="Run the WiSE reference MD protocol for one registered model." + ) + parser.add_argument("model", help=f"one of: {sorted(MODELS)}") + parser.add_argument( + "--continuation", + action="store_true", + help="append the optional 50 ps NVT continuation after NPT.", + ) + args = parser.parse_args() + + if args.model not in MODELS: + parser.error(f"unknown model '{args.model}'. Registered: {sorted(MODELS)}") + + run_reference_md(args.model, MODELS[args.model], run_continuation=args.continuation) diff --git a/ml_peg/calcs/wise_electrolytes/rdf/calc_rdf.py b/ml_peg/calcs/wise_electrolytes/rdf/calc_rdf.py deleted file mode 100644 index 7bc8de2ab..000000000 --- a/ml_peg/calcs/wise_electrolytes/rdf/calc_rdf.py +++ /dev/null @@ -1,313 +0,0 @@ -""" -Compute Li-O radial distribution functions for LiTFSI/H2O electrolyte. - -Reads pre-computed NVT trajectories (.extxyz, 100 ps) for each MLIP model -and computes g(r) for Li-O_total, Li-O_water (O bonded to H, d_OH < 1.25 A), -and Li-O_TFSI (O bonded to S, d_OS < 1.75 A), plus coordination numbers by -integration of the first peak. - -System: 21 m LiTFSI / H2O, p64_w170 cell (1534 atoms). -Experimental reference for coordination numbers: -Watanabe et al., J. Phys. Chem. B 125, 7477 (2021), DOI: 10.1021/acs.jpcb.1c04693. -Neutron diffraction with 6Li/7Li + H/D isotopic substitution, ~18.5 m LiTFSI/H2O. -""" - -from __future__ import annotations - -import json -import os -from pathlib import Path - -from ase.geometry import get_distances -from ase.io import iread -import numpy as np -import pytest - -# --- Configuration ----------------------------------------------------------- - -DATA_ROOT = Path( - os.environ.get( - "ML_PEG_WISE_XRAY_DATA_ROOT", - "/lus/work/CT9/cin1387/lbrugnoli/prove/ml_peg_benchmark/data/wise_electrolytes/xray_sf", - ) -) -OUT_PATH = Path(__file__).parent / "outputs" - -MODELS = [ - "matpes-r2scan", - "mace-mpa-0-medium", - "mace-omat-0-medium", - "mace-mp-0b3", - "mace-mh-1-omat", - "mace-mh-1-omol", -] - -# RDF parameters -R_MAX = 6.0 # Å — covers first + second shell -DR = 0.02 # Å — bin width -R_CUT_COORD = 2.83 # Å — first minimum of Li-O g(r) from r2SCAN AIMD reference - -# O-type identification cutoffs (applied on first frame only) -D_OH_CUT = 1.25 # Å — O-H bond in water -D_OS_CUT = 1.75 # Å — O-S bond in TFSI - - -# --- Helpers ----------------------------------------------------------------- - - -def identify_o_types(atoms) -> tuple[np.ndarray, np.ndarray]: - """ - Return indices of O_water and O_TFSI from a single ASE Atoms frame. - - Parameters - ---------- - atoms : ase.Atoms - A single frame from the trajectory. - - Returns - ------- - o_water : np.ndarray - Indices of oxygen atoms bonded to hydrogen (water oxygens). - o_tfsi : np.ndarray - Indices of oxygen atoms bonded to sulfur (TFSI oxygens). - """ - syms = np.array(atoms.get_chemical_symbols()) - pos = atoms.get_positions() - cell = atoms.get_cell() - pbc = atoms.get_pbc() - - o_idx = np.where(syms == "O")[0] - h_idx = np.where(syms == "H")[0] - s_idx = np.where(syms == "S")[0] - - o_water, o_tfsi = [], [] - for o in o_idx: - _, d_oh = get_distances(pos[o : o + 1], pos[h_idx], cell=cell, pbc=pbc) - _, d_os = get_distances(pos[o : o + 1], pos[s_idx], cell=cell, pbc=pbc) - if d_oh.min() < D_OH_CUT: - o_water.append(o) - elif d_os.min() < D_OS_CUT: - o_tfsi.append(o) - - return np.array(o_water), np.array(o_tfsi) - - -def compute_rdf( - traj_path: Path, - o_water_idx: np.ndarray, - o_tfsi_idx: np.ndarray, - r_max: float = R_MAX, - dr: float = DR, - skip_frames: int = 0, # trajectories are pre-equilibrated (50-100 ps window) -) -> dict: - """ - Compute Li-O RDFs from an extxyz trajectory. - - Parameters - ---------- - traj_path : Path - Path to .extxyz trajectory file. - o_water_idx : np.ndarray - Atom indices of O_water (from first frame, fixed). - o_tfsi_idx : np.ndarray - Atom indices of O_TFSI (from first frame, fixed). - r_max : float - Maximum distance for RDF. - dr : float - Bin width. - skip_frames : int - Number of initial frames to skip (equilibration). - - Returns - ------- - dict - Dictionary with keys ``r``, ``gr_LiO_total``, ``gr_LiO_water``, - ``gr_LiO_TFSI``, ``coord_LiO_total``, ``coord_LiO_water``, - ``coord_LiO_TFSI``, ``n_li``, ``n_O_water``, ``n_O_TFSI``, - ``n_frames_used``, and ``r_cut_coord``. - """ - bins = np.arange(0, r_max + dr, dr) - r_centers = 0.5 * (bins[:-1] + bins[1:]) - n_bins = len(r_centers) - - hist_total = np.zeros(n_bins) - hist_water = np.zeros(n_bins) - hist_tfsi = np.zeros(n_bins) - - n_frames = 0 - n_li = None - volume = None - o_all_idx = np.concatenate([o_water_idx, o_tfsi_idx]) - - for frame_idx, atoms in enumerate( - iread(str(traj_path), format="extxyz", index=":") - ): - if frame_idx < skip_frames: - continue - - syms = np.array(atoms.get_chemical_symbols()) - pos = atoms.get_positions() - cell = atoms.get_cell() - pbc = atoms.get_pbc() - - li_idx = np.where(syms == "Li")[0] - if n_li is None: - n_li = len(li_idx) - if volume is None: - volume = atoms.get_volume() - - pos_li = pos[li_idx] - - for o_set, hist in [ - (o_all_idx, hist_total), - (o_water_idx, hist_water), - (o_tfsi_idx, hist_tfsi), - ]: - pos_o = pos[o_set] - _, dists = get_distances(pos_li, pos_o, cell=cell, pbc=pbc) - dists_flat = dists.ravel() - dists_flat = dists_flat[dists_flat < r_max] - h, _ = np.histogram(dists_flat, bins=bins) - hist += h - - n_frames += 1 - - if n_frames == 0 or n_li is None: - raise RuntimeError(f"No frames processed from {traj_path}") - - # Normalize to g(r) - def normalize(hist, n_central, n_neighbor): - """ - Normalize histogram to g(r). - - Parameters - ---------- - hist : np.ndarray - Raw pair-distance histogram. - n_central : int - Number of central atoms (Li). - n_neighbor : int - Number of neighbor atoms (O subset). - - Returns - ------- - np.ndarray - Normalized radial distribution function. - """ - shell_vol = (4.0 / 3.0) * np.pi * (bins[1:] ** 3 - bins[:-1] ** 3) - rho = n_neighbor / volume - norm = n_central * n_frames * rho * shell_vol - return hist / norm - - n_o_total = len(o_all_idx) - n_o_water = len(o_water_idx) - n_o_tfsi = len(o_tfsi_idx) - - gr_total = normalize(hist_total, n_li, n_o_total) - gr_water = normalize(hist_water, n_li, n_o_water) - gr_tfsi = normalize(hist_tfsi, n_li, n_o_tfsi) - - # Coordination numbers: integrate 4π ρ g(r) r² dr from 0 to R_CUT_COORD - def coord_number(gr, n_neighbor): - """ - Compute coordination number from g(r). - - Parameters - ---------- - gr : np.ndarray - Radial distribution function. - n_neighbor : int - Number of neighbor atoms (O subset). - - Returns - ------- - float - Integrated coordination number up to R_CUT_COORD. - """ - rho = n_neighbor / volume - integrand = 4.0 * np.pi * rho * gr * r_centers**2 * dr - mask = r_centers <= R_CUT_COORD - return float(np.sum(integrand[mask])) - - return { - "r": r_centers.tolist(), - "gr_LiO_total": gr_total.tolist(), - "gr_LiO_water": gr_water.tolist(), - "gr_LiO_TFSI": gr_tfsi.tolist(), - "coord_LiO_total": coord_number(gr_total, n_o_total), - "coord_LiO_water": coord_number(gr_water, n_o_water), - "coord_LiO_TFSI": coord_number(gr_tfsi, n_o_tfsi), - "n_li": n_li, - "n_O_water": n_o_water, - "n_O_TFSI": n_o_tfsi, - "n_frames_used": n_frames, - "r_cut_coord": R_CUT_COORD, - } - - -def find_trajectory(model: str) -> Path | None: - """ - Find NVT extxyz trajectory for a model. - - Parameters - ---------- - model : str - Name of the MLIP model. - - Returns - ------- - Path or None - Path to the trajectory file, or None if not found. - """ - p = DATA_ROOT / model / "nvt_trajectory.extxyz" - return p if p.exists() else None - - -# --- Pytest interface -------------------------------------------------------- - - -@pytest.mark.parametrize("model", MODELS) -def test_compute_rdf(model: str) -> None: - """ - Compute and save Li-O RDFs and coordination numbers for one model. - - Parameters - ---------- - model : str - Name of the MLIP model to compute RDFs for. - """ - traj_path = find_trajectory(model) - if traj_path is None: - pytest.skip(f"No NVT trajectory for {model}") - - # Identify O types from first frame (indices fixed throughout NVT) - from ase.io import read as ase_read - - first_frame = ase_read(str(traj_path), index=0, format="extxyz") - o_water_idx, o_tfsi_idx = identify_o_types(first_frame) - - assert len(o_water_idx) > 0, f"No O_water found for {model}" - assert len(o_tfsi_idx) > 0, f"No O_TFSI found for {model}" - - result = compute_rdf(traj_path, o_water_idx, o_tfsi_idx) - - out_dir = OUT_PATH / model - out_dir.mkdir(parents=True, exist_ok=True) - - with open(out_dir / "rdf.json", "w") as f: - json.dump( - {k: v for k, v in result.items() if not isinstance(v, list)}, f, indent=2 - ) - - np.savez( - out_dir / "rdf.npz", - r=np.array(result["r"]), - gr_LiO_total=np.array(result["gr_LiO_total"]), - gr_LiO_water=np.array(result["gr_LiO_water"]), - gr_LiO_TFSI=np.array(result["gr_LiO_TFSI"]), - ) - - # Sanity checks - assert 2.0 < result["coord_LiO_total"] < 8.0, ( - f"Unexpected Li-O_total CN={result['coord_LiO_total']:.2f} for {model}" - ) diff --git a/ml_peg/calcs/wise_electrolytes/xray_sf/calc_xray_sf.py b/ml_peg/calcs/wise_electrolytes/xray_sf/calc_xray_sf.py deleted file mode 100644 index c4e173ffd..000000000 --- a/ml_peg/calcs/wise_electrolytes/xray_sf/calc_xray_sf.py +++ /dev/null @@ -1,345 +0,0 @@ -""" -Compute X-ray structure factor S(q) for LiTFSI/H2O electrolyte. - -Reads pre-computed NVT trajectories (.extxyz, 100 ps) for each MLIP model -and computes S(q) via dynasor using TRAVIS-matching settings: Cromer-Mann -4-Gaussian form factors (including H), Faber-Ziman normalization with Laue -monotonic term subtraction, and Savitzky-Golay smoothing -(window=27, order=3, dq=0.02 A^-1). - -System: 21 m LiTFSI / H2O, p64_w170 cell (1534 atoms). -Experimental reference: SAXS, Zhang, Lewis, Mars, Wan et al., -J. Phys. Chem. B 125, 4501 (2021), DOI: 10.1021/acs.jpcb.1c02189. -""" - -from __future__ import annotations - -import json -import os -from pathlib import Path -import warnings - -import numpy as np -import pytest -from scipy.signal import savgol_filter - -warnings.filterwarnings("ignore") - -# --- Configuration ----------------------------------------------------------- - -# Path to converted .extxyz trajectories (will be S3 download for PR) -DATA_ROOT = Path( - os.environ.get( - "ML_PEG_WISE_XRAY_DATA_ROOT", - "/lus/work/CT9/cin1387/lbrugnoli/prove/ml_peg_benchmark/data/wise_electrolytes/xray_sf", - ) -) -OUT_PATH = Path(__file__).parent / "outputs" - -MODELS = [ - "matpes-r2scan", - "mace-mpa-0-medium", - "mace-omat-0-medium", - "mace-mp-0b3", - "mace-mh-1-omat", - "mace-mh-1-omol", -] - -# S(q) computation parameters (TRAVIS-matching) -Q_MAX = 13.0 # Å⁻¹ -Q_MIN = 0.5 # Å⁻¹ -MAX_QPOINTS = 50000 -DQ_BIN = 0.02 # Å⁻¹ -SAVGOL_WINDOW = 27 # optimal for matching TRAVIS -SAVGOL_ORDER = 3 - -# LAMMPS type → element (for fallback) -TYPE_TO_ELEMENT = {1: "Li", 2: "C", 3: "F", 4: "S", 5: "N", 6: "O", 7: "H"} - -# System composition (p64_w170: 64 LiTFSI + 170 H2O) -COMPOSITION = {"Li": 64, "C": 128, "F": 384, "S": 128, "N": 64, "O": 426, "H": 340} -N_ATOMS = sum(COMPOSITION.values()) # 1534 -CONC = {k: v / N_ATOMS for k, v in COMPOSITION.items()} - -# TRAVIS Cromer-Mann 4-Gaussian form factors (International Tables) -TRAVIS_FF = { - "S": { - "a": [6.905, 5.203, 1.438, 1.586], - "b": [1.468, 22.215, 0.254, 56.172], - "c": 0.867, - }, - "F": { - "a": [3.539, 2.641, 1.517, 1.024], - "b": [10.283, 4.294, 0.262, 26.148], - "c": 0.278, - }, - "O": { - "a": [3.049, 2.287, 1.546, 0.867], - "b": [13.277, 5.701, 0.324, 32.909], - "c": 0.251, - }, - "N": { - "a": [12.213, 3.132, 2.013, 1.166], - "b": [0.006, 9.893, 28.997, 0.583], - "c": -11.529, - }, - "C": { - "a": [2.310, 1.020, 1.589, 0.865], - "b": [20.844, 10.208, 0.569, 51.651], - "c": 0.216, - }, - "Li": { - "a": [1.128, 0.751, 0.618, 0.465], - "b": [3.955, 1.052, 85.391, 168.261], - "c": 0.038, - }, - "H": { - "a": [0.493, 0.323, 0.140, 0.041], - "b": [10.511, 26.126, 3.142, 57.800], - "c": 0.003, - }, -} - - -# --- Form factor helpers ----------------------------------------------------- - - -def compute_fq_travis(elem: str, q_arr: np.ndarray) -> np.ndarray: - """ - Compute X-ray form factor f(q) using TRAVIS 4-Gaussian (Cromer-Mann) params. - - Parameters - ---------- - elem : str - Chemical element symbol (e.g. ``"Li"``, ``"O"``). - q_arr : np.ndarray - Array of q values in inverse angstroms. - - Returns - ------- - np.ndarray - Form factor values evaluated at each q. - """ - ff = TRAVIS_FF[elem] - s2 = (q_arr / (4 * np.pi)) ** 2 - return sum(ff["a"][i] * np.exp(-ff["b"][i] * s2) for i in range(4)) + ff["c"] - - -# --- S(q) computation ------------------------------------------------------- - - -def compute_sq_travis_style(traj_path: Path) -> dict: - """ - Compute S(q) in TRAVIS Faber-Ziman convention using dynasor. - - Steps: - - 1. Read trajectory with dynasor (LAMMPS dump via MDAnalysis). - 2. Compute partial S_ab(q) on a fine spherical q-grid. - 3. Apply TRAVIS Cromer-Mann form factors: I_xray = sum f_a * f_b * S_ab. - 4. Spherical binning at dq = 0.02 A^-1. - 5. Faber-Ziman normalization with Laue term: S_FZ = I/^2 - /^2 + 1. - 6. Savitzky-Golay smoothing. - - Parameters - ---------- - traj_path : Path - Path to the trajectory file (.extxyz or LAMMPS dump). - - Returns - ------- - dict - Dictionary with keys ``q``, ``Sq``, ``n_qpoints``, - ``n_qpoints_used``, ``cell``, ``atom_types``, - ``particle_counts``, and ``params``. - """ - from dynasor import ( - Trajectory, - compute_static_structure_factors, - get_spherical_qpoints, - ) - - traj_str = str(traj_path) - is_extxyz = traj_str.endswith(".extxyz") or traj_str.endswith(".xyz") - - if is_extxyz: - # For .extxyz: dynasor reads it directly, atomic_indices from element symbols - from ase.io import read as ase_read - - first_frame = ase_read(traj_str, index=0) - symbols = first_frame.get_chemical_symbols() - atomic_indices = {} - for i, sym in enumerate(symbols): - atomic_indices.setdefault(sym, []).append(i) - - traj = Trajectory( - traj_str, - trajectory_format="ase", - atomic_indices=atomic_indices, - ) - else: - # For LAMMPS dump: use MDAnalysis backend - import MDAnalysis as mda # noqa: N813 - - u = mda.Universe(traj_str, format="LAMMPSDUMP") - types = u.atoms.types - atomic_indices = {} - for t, elem in TYPE_TO_ELEMENT.items(): - mask = types == str(t) - idx = np.where(mask)[0].tolist() - if idx: - atomic_indices[elem] = idx - - traj = Trajectory( - traj_str, - trajectory_format="lammps_mdanalysis", - atomic_indices=atomic_indices, - ) - - q_points = get_spherical_qpoints(traj.cell, q_max=Q_MAX, max_points=MAX_QPOINTS) - sample = compute_static_structure_factors(traj, q_points) - - q_norms = np.linalg.norm(sample.q_points, axis=1) - atom_types = list(sample.particle_counts.keys()) - - # Manual X-ray weighting with TRAVIS form factors (including H) - ff_at_q = {at: compute_fq_travis(at, q_norms) for at in atom_types} - i_xray = np.zeros(len(q_norms)) - for s1, s2 in sample.pairs: - sq_ab = sample[f"Sq_{s1}_{s2}"].flatten() - i_xray += ff_at_q[s1] * ff_at_q[s2] * sq_ab - - # Spherical binning - q_bins = np.arange(0.0, Q_MAX + DQ_BIN, DQ_BIN) - q_centers = 0.5 * (q_bins[:-1] + q_bins[1:]) - i_xray_binned = np.full(len(q_centers), np.nan) - counts = np.zeros(len(q_centers), dtype=int) - - for i in range(len(q_centers)): - mask = (q_norms >= q_bins[i]) & (q_norms < q_bins[i + 1]) - n = mask.sum() - if n > 0: - i_xray_binned[i] = np.mean(i_xray[mask]) - counts[i] = n - - # Faber-Ziman normalization with Laue subtraction - f_avg = np.zeros(len(q_centers)) - f2_avg = np.zeros(len(q_centers)) - for elem, c in CONC.items(): - fq = compute_fq_travis(elem, q_centers) - f_avg += c * fq - f2_avg += c * fq**2 - f_avg_sq = f_avg**2 - - sq_fz = np.where( - f_avg_sq > 0, - i_xray_binned / f_avg_sq - f2_avg / f_avg_sq + 1.0, - np.nan, - ) - - # Savitzky-Golay smoothing - valid = ~np.isnan(sq_fz) & (q_centers >= 0.3) & (q_centers <= Q_MAX) - q_v = q_centers[valid] - sq_v = sq_fz[valid] - if len(sq_v) > SAVGOL_WINDOW: - sq_smooth = savgol_filter(sq_v, SAVGOL_WINDOW, SAVGOL_ORDER) - else: - sq_smooth = sq_v - - # Restrict to useful range - rmask = (q_v >= Q_MIN) & (q_v <= 12.0) - - return { - "q": q_v[rmask].tolist(), - "Sq": sq_smooth[rmask].tolist(), - "n_qpoints": len(q_points), - "n_qpoints_used": len(sample.q_points), - "cell": traj.cell.tolist(), - "atom_types": traj.atom_types, - "particle_counts": {k: int(v) for k, v in sample.particle_counts.items()}, - "params": { - "q_max": Q_MAX, - "q_min": Q_MIN, - "dq_bin": DQ_BIN, - "max_qpoints": MAX_QPOINTS, - "savgol_window": SAVGOL_WINDOW, - "savgol_order": SAVGOL_ORDER, - "form_factors": "cromer-mann-4gaussian", - "normalization": "faber-ziman", - }, - } - - -# --- Trajectory finding ------------------------------------------------------ - - -def find_trajectory(model: str) -> Path | None: - """ - Find NVT trajectory for a model. - - Prefers converted .extxyz, falls back to LAMMPS dump. - - Parameters - ---------- - model : str - Name of the MLIP model. - - Returns - ------- - Path or None - Path to the trajectory file, or None if not found. - """ - # Check for converted .extxyz first - extxyz = DATA_ROOT / model / "nvt_trajectory.extxyz" - if extxyz.exists(): - return extxyz - - # Fall back to original LAMMPS dump - runs_root = Path("/lus/work/CT9/cin1387/lbrugnoli/prove/runs") - model_cell = runs_root / model / "p64_w170" - for d in sorted(model_cell.glob("nvt_*"), reverse=True): - traj = d / "nvt_long_trajectory.lammpstrj" - if traj.exists(): - return traj - for d in sorted(model_cell.glob("pipeline_*"), reverse=True): - traj = d / "nvt_trajectory.lammpstrj" - if traj.exists(): - return traj - - return None - - -# --- Pytest interface (ml-peg convention) ------------------------------------ - - -@pytest.mark.parametrize("model", MODELS) -def test_compute_xray_sq(model: str) -> None: - """ - Compute and save X-ray S(q) in Faber-Ziman convention for one model. - - Parameters - ---------- - model : str - Name of the MLIP model to compute S(q) for. - """ - traj_path = find_trajectory(model) - if traj_path is None: - pytest.skip(f"No NVT trajectory for {model}") - - result = compute_sq_travis_style(traj_path) - result["model"] = model - result["traj_path"] = str(traj_path) - - out_dir = OUT_PATH / model - out_dir.mkdir(parents=True, exist_ok=True) - - with open(out_dir / "xray_sq.json", "w") as f: - json.dump(result, f, indent=2) - - np.savez( - out_dir / "xray_sq.npz", - q=np.array(result["q"]), - Sq=np.array(result["Sq"]), - ) - - assert len(result["q"]) > 10, f"Too few q-points for {model}" From ff0db5c34c7fd136dabbd2ea6674f2bdece15536 Mon Sep 17 00:00:00 2001 From: luca Date: Tue, 28 Apr 2026 11:40:36 +0200 Subject: [PATCH 3/3] Use NPT_MTK and very_slow marker in WiSE MD reference - Switch NPT (Melchionna) -> NPT_MTK (Martyna-Tobias-Klein) so the janus reference matches the LAMMPS fix npt formulation used in production. Pass thermostat_chain=3 and barostat_chain=3 explicitly, matching the LAMMPS default chain length. - Replace pytest.mark.skip with pytest.mark.very_slow per Joseph's review on PR #445: the reference protocol is now opt-in via --run-very-slow rather than unconditionally skipped, so it can be exercised when validating new models. --- .../md_reference/calc_md_reference.py | 43 ++++++++++--------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/ml_peg/calcs/wise_electrolytes/md_reference/calc_md_reference.py b/ml_peg/calcs/wise_electrolytes/md_reference/calc_md_reference.py index e8db0d638..27f7fb5d5 100644 --- a/ml_peg/calcs/wise_electrolytes/md_reference/calc_md_reference.py +++ b/ml_peg/calcs/wise_electrolytes/md_reference/calc_md_reference.py @@ -2,20 +2,22 @@ Reference MD protocol for the WiSE 21 m LiTFSI/H2O benchmark. This script reproduces, with janus-core, the same MD protocol used to -generate the production trajectories analysed by the density, rdf, and -xray_sf sub-benchmarks. Production trajectories were run externally +generate the production trajectories analysed by the consolidated +litfsi_h2o_21m benchmark. Production trajectories were run externally with LAMMPS + MACE (symmetrix/Kokkos) on Adastra (MI250X); the driving scripts are ``in.pipeline_cpu.lmp`` (Min -> NVT -> NPT) and ``in.nvt_continue.lmp`` (NVT continuation) on the Adastra workdir. -The protocol is recast here for: +Integrators are matched to LAMMPS where janus-core exposes a choice: - * transparent, version-pinned documentation of the simulation settings, - * optional ml-peg-native replication on smaller systems or shorter windows. + * NVT: Nosé-Hoover chain (``NVT_NH``) -- equivalent to LAMMPS ``fix nvt``. + * NPT: Martyna-Tobias-Klein chain (``NPT_MTK``) -- the same formulation + used by LAMMPS ``fix npt``. (``NPT`` in janus is Melchionna and would + *not* match.) -It is intentionally marked ``pytest.mark.skip`` in the default test run: -at ~1500 atoms and 250+ ps of cumulative MD, a Janus/ASE run would take -days of GPU time per registered model. Run manually with +The protocol is intentionally marked ``pytest.mark.very_slow``: at ~1500 +atoms and 250+ ps of cumulative MD, an ASE/Janus run takes a day of GPU +time per registered model. Run manually with python calc_md_reference.py @@ -24,12 +26,14 @@ Protocol summary ---------------- System : 64 LiTFSI + 170 H2O (1534 atoms, 21 m) -Ensembles : Min (FIRE) -> NVT 50 ps (NH) -> NPT 200 ps (NH, iso) -> - optional NVT continuation 50 ps (NH) +Ensembles : Min (FIRE) -> NVT 50 ps (NH chain) -> + NPT 200 ps (MTK chain, iso) -> + optional NVT continuation 50 ps (NH chain) Temperature: 298.15 K Pressure : 1.01325 bar (1 atm) Timestep : 0.5 fs TDAMP/PDAMP: 50 fs / 500 fs (100*dt / 1000*dt, Nosé-Hoover damping) +NH chains : length 3 for both thermostat and barostat (LAMMPS default) Dump cadence: every 0.1 ps (= 200 steps) References : Gilbert et al., JCED 62, 2056 (2017); Watanabe et al., JPCB 125, 7477 (2021). @@ -133,7 +137,7 @@ def run_reference_md( # Lazy imports: janus_core pulls in torch, which may be absent in # environments that only run the analysis stack. from janus_core.calculations.geom_opt import GeomOpt - from janus_core.calculations.md import NPT, NVT_NH + from janus_core.calculations.md import NPT_MTK, NVT_NH model.default_dtype = "float64" calc = model.get_calculator() @@ -167,8 +171,11 @@ def run_reference_md( file_prefix=model_out / "nvt_equil", ).run() - # -- NPT production, 200 ps (isotropic Nosé-Hoover; density sampling) ----- - NPT( + # -- NPT production, 200 ps (isotropic MTK chain; density + structure) ---- + # NPT_MTK uses the Martyna-Tobias-Klein chain integrator -- the same + # formulation as LAMMPS `fix npt iso`. Chain length 3 is the LAMMPS + # default for both the thermostat and barostat sub-chains. + NPT_MTK( struct=struct, temp=TEMPERATURE_K, pressure=PRESSURE_GPA, @@ -176,6 +183,8 @@ def run_reference_md( steps=NPT_STEPS, thermostat_time=THERMOSTAT_TIME_FS, barostat_time=BAROSTAT_TIME_FS, + thermostat_chain=3, + barostat_chain=3, stats_every=STATS_EVERY, traj_every=TRAJ_EVERY, seed=SEED, @@ -197,13 +206,7 @@ def run_reference_md( ).run() -@pytest.mark.skip( - reason=( - "Reference protocol only — production MD is run externally with " - "LAMMPS+symmetrix on Adastra (see module docstring). Running this in " - "pytest would take days of GPU per model." - ) -) +@pytest.mark.very_slow @pytest.mark.parametrize("mlip", MODELS.items()) def test_md_reference(mlip: tuple[str, Any]) -> None: """