From 3e7f6d89388d7f36a66fd96489d29b478794681d Mon Sep 17 00:00:00 2001 From: hannahbaumann Date: Fri, 27 Feb 2026 14:12:39 +0100 Subject: [PATCH 1/5] Refactor of universe creation and applying transformations --- src/openfe_analysis/rmsd.py | 92 ++------------------------ src/openfe_analysis/tests/test_rmsd.py | 39 ++++++----- 2 files changed, 27 insertions(+), 104 deletions(-) diff --git a/src/openfe_analysis/rmsd.py b/src/openfe_analysis/rmsd.py index 5591da4..b83e8a2 100644 --- a/src/openfe_analysis/rmsd.py +++ b/src/openfe_analysis/rmsd.py @@ -7,94 +7,9 @@ import numpy as np from MDAnalysis.analysis import rms from MDAnalysis.analysis.base import AnalysisBase -from MDAnalysis.transformations import unwrap from .reader import FEReader -from .transformations import Aligner, ClosestImageShift, NoJump - - -def make_Universe(top: pathlib.Path, trj: nc.Dataset, state: int) -> mda.Universe: - """ - Construct an MDAnalysis Universe from a MultiState NetCDF trajectory - and apply standard analysis transformations. - - The Universe is created using the custom ``FEReader`` to extract a - single state from a multistate simulation. - - Parameters - ---------- - top : pathlib.Path or Topology - Path to a topology file (e.g. PDB) or an already-loaded MDAnalysis - topology object. - trj : nc.Dataset - Open NetCDF dataset produced by - ``openmmtools.multistate.MultiStateReporter``. - state : int - Thermodynamic state index to extract from the multistate trajectory. - - Returns - ------- - MDAnalysis.Universe - A Universe with trajectory transformations applied. - - Notes - ----- - Identifies two AtomGroups: - - protein, defined as having standard amino acid names, then filtered - down to CA - - ligand, defined as resname UNK - - Depending on whether a protein is present, a sequence of trajectory - transformations is applied: - - If a protein is present: - - Unwraps protein and ligand atom to be made whole - - Shifts protein chains and the ligand to the image closest to the first - protein chain (:class:`ClosestImageShift`) - - Aligns the entire system to minimise the protein RMSD (:class:`Aligner`) - - If only a ligand is present: - - Prevents the ligand from jumping between periodic images - - Aligns the ligand to minimize its RMSD - """ - u = mda.Universe( - top, - trj, - index=state, - index_method="state", - format=FEReader, - ) - prot = u.select_atoms("protein and name CA") - ligand = u.select_atoms("resname UNK") - - if prot: - # Unwrap all atoms - unwrap_tr = unwrap(prot + ligand) - - # Shift chains + ligand - chains = [seg.atoms for seg in prot.segments] - shift = ClosestImageShift(chains[0], [*chains[1:], ligand]) - - align = Aligner(prot) - - u.trajectory.add_transformations( - unwrap_tr, - shift, - align, - ) - else: - # if there's no protein - # - make the ligand not jump periodic images between frames - # - align the ligand to minimise its RMSD - nope = NoJump(ligand) - align = Aligner(ligand) - - u.trajectory.add_transformations( - nope, - align, - ) - - return u +from .utils.universe_transformations import apply_transformations, create_universe class Protein2DRMSD(AnalysisBase): @@ -279,11 +194,12 @@ def gather_rms_data( for i in range(n_lambda): # cheeky, but we can read the PDB topology once and reuse per universe # this then only hits the PDB file once for all replicas - u = make_Universe(u_top._topology, ds, state=i) - + u = create_universe(u_top._topology, ds, i) prot = u.select_atoms("protein and name CA") ligand = u.select_atoms("resname UNK") + apply_transformations(u, prot, ligand) + if prot: prot_rmsd = RMSDAnalysis(prot).run(step=skip) output["protein_RMSD"].append(prot_rmsd.results.rmsd) diff --git a/src/openfe_analysis/tests/test_rmsd.py b/src/openfe_analysis/tests/test_rmsd.py index a9e8b09..372f913 100644 --- a/src/openfe_analysis/tests/test_rmsd.py +++ b/src/openfe_analysis/tests/test_rmsd.py @@ -10,8 +10,9 @@ from numpy.testing import assert_allclose from openfe_analysis.reader import FEReader -from openfe_analysis.rmsd import gather_rms_data, make_Universe +from openfe_analysis.rmsd import gather_rms_data from openfe_analysis.transformations import Aligner +from openfe_analysis.utils import universe_transformations @pytest.fixture @@ -22,11 +23,12 @@ def mda_universe(hybrid_system_skipped_pdb, simulation_skipped_nc): Guarantees: - NetCDF file is opened exactly once """ - u = make_Universe( - hybrid_system_skipped_pdb, - simulation_skipped_nc, - state=0, + u = universe_transformations.create_universe( + hybrid_system_skipped_pdb, simulation_skipped_nc, 0 ) + protein = u.select_atoms("protein and name CA") + ligand = u.select_atoms("resname UNK") + universe_transformations.apply_transformations(u, protein, ligand) yield u u.trajectory.close() @@ -108,13 +110,10 @@ def test_gather_rms_data_regression_skippednc(simulation_skipped_nc, hybrid_syst def test_multichain_rmsd_shifting(simulation_skipped_nc, hybrid_system_skipped_pdb): - u = mda.Universe( - hybrid_system_skipped_pdb, - simulation_skipped_nc, - index=0, - format=FEReader, + u = universe_transformations.create_universe( + hybrid_system_skipped_pdb, simulation_skipped_nc, 0 ) - prot = u.select_atoms("protein") + prot = u.select_atoms("protein and name CA") # Do other transformations, but no shifting unwrap_tr = unwrap(prot) for frag in prot.fragments: @@ -132,8 +131,13 @@ def test_multichain_rmsd_shifting(simulation_skipped_nc, hybrid_system_skipped_p u.trajectory.close() # RMSD with shifting - u2 = make_Universe(hybrid_system_skipped_pdb, simulation_skipped_nc, state=0) - prot2 = u2.select_atoms("protein") + u2 = universe_transformations.create_universe( + hybrid_system_skipped_pdb, simulation_skipped_nc, 0 + ) + prot2 = u2.select_atoms("protein and name CA") + + universe_transformations.apply_transformations(u2, protein=prot2) + R2 = rms.RMSD(prot2) R2.run() rmsd_shift = R2.rmsd[:, 2] @@ -142,9 +146,12 @@ def test_multichain_rmsd_shifting(simulation_skipped_nc, hybrid_system_skipped_p def test_chain_radius_of_gyration_stable(simulation_skipped_nc, hybrid_system_skipped_pdb): - u = make_Universe(hybrid_system_skipped_pdb, simulation_skipped_nc, state=0) + u = universe_transformations.create_universe( + hybrid_system_skipped_pdb, simulation_skipped_nc, 0 + ) + protein = u.select_atoms("protein and name CA") + universe_transformations.apply_transformations(u, protein) - protein = u.select_atoms("protein") chain = protein.segments[0].atoms rgs = [] @@ -158,7 +165,7 @@ def test_chain_radius_of_gyration_stable(simulation_skipped_nc, hybrid_system_sk def test_rmsd_reference_is_first_frame(mda_universe): u = mda_universe - prot = u.select_atoms("protein") + prot = u.select_atoms("protein and name CA") _ = next(iter(u.trajectory)) # SAFE ref = prot.positions.copy() From 5e65b34bcf7064bbcdb480363331230d0b49303a Mon Sep 17 00:00:00 2001 From: hannahbaumann Date: Fri, 27 Feb 2026 14:13:50 +0100 Subject: [PATCH 2/5] Add new file --- .../utils/universe_transformations.py | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 src/openfe_analysis/utils/universe_transformations.py diff --git a/src/openfe_analysis/utils/universe_transformations.py b/src/openfe_analysis/utils/universe_transformations.py new file mode 100644 index 0000000..0821665 --- /dev/null +++ b/src/openfe_analysis/utils/universe_transformations.py @@ -0,0 +1,88 @@ +import pathlib +from typing import Optional + +import MDAnalysis as mda +import netCDF4 as nc +from MDAnalysis.transformations import unwrap + +from ..reader import FEReader +from ..transformations import Aligner, ClosestImageShift, NoJump + + +def create_universe(top, trj, state): + return mda.Universe( + top, + trj, + index=state, + index_method="state", + format=FEReader, + ) + + +def apply_transformations( + u: mda.Universe, + protein: Optional[mda.AtomGroup] = None, + ligand: Optional[mda.AtomGroup] = None, +): + """ + Apply a collection of transformations to a Universe. + + Parameters + ---------- + u: Universe + The Universe the transformations are applied to + protein: Optional[AtomGroup] + The AtomGroup of the protein + ligand: Optional[AtomGroup] + The AtomGroup of the ligand + + Notes + ----- + Depending on whether a protein is present, a sequence of trajectory + transformations is applied: + + If a protein is present: + - Unwraps protein and ligand atom to be made whole + - Shifts protein chains and the ligand to the image closest to the first + protein chain (:class:`ClosestImageShift`) + - Aligns the entire system to minimise the protein RMSD (:class:`Aligner`) + + If only a ligand is present: + - Prevents the ligand from jumping between periodic images + - Aligns the ligand to minimize its RMSD + """ + has_protein = protein is not None and protein.n_atoms > 0 + has_ligand = ligand is not None and ligand.n_atoms > 0 + + if has_protein: + group = protein + if has_ligand: + group = protein + ligand + # Unwrap all atoms + unwrap_tr = unwrap(group) + + # Shift chains + ligand + chains = [seg.atoms for seg in protein.segments] + shift_targets = [*chains[1:]] + if has_ligand: + shift_targets.append(ligand) + shift = ClosestImageShift(chains[0], shift_targets) + + align = Aligner(protein) + + u.trajectory.add_transformations( + unwrap_tr, + shift, + align, + ) + elif has_ligand: + # if there's no protein + # - make the ligand not jump periodic images between frames + # - align the ligand to minimise its RMSD + nope = NoJump(ligand) + align = Aligner(ligand) + + u.trajectory.add_transformations( + nope, + align, + ) From 8a8b20acb377b2999a4c0f42e51f8734ad92dc22 Mon Sep 17 00:00:00 2001 From: hannahbaumann Date: Fri, 27 Feb 2026 14:25:29 +0100 Subject: [PATCH 3/5] more updates --- src/openfe_analysis/reader.py | 11 ++++++++ src/openfe_analysis/rmsd.py | 6 ++--- src/openfe_analysis/tests/test_rmsd.py | 26 +++++++------------ ...formations.py => apply_transformations.py} | 13 ---------- 4 files changed, 23 insertions(+), 33 deletions(-) rename src/openfe_analysis/utils/{universe_transformations.py => apply_transformations.py} (90%) diff --git a/src/openfe_analysis/reader.py b/src/openfe_analysis/reader.py index cf7aaea..3b1f816 100644 --- a/src/openfe_analysis/reader.py +++ b/src/openfe_analysis/reader.py @@ -1,6 +1,7 @@ import pathlib from typing import Literal, Optional +import MDAnalysis as mda import netCDF4 as nc import numpy as np import yaml @@ -11,6 +12,16 @@ from openfe_analysis.utils.multistate import _determine_position_indices +def _create_universe_single_state(top, trj, state): + return mda.Universe( + top, + trj, + index=state, + index_method="state", + format=FEReader, + ) + + def _determine_iteration_dt(dataset) -> float: """ Determine the time increment between successive iterations diff --git a/src/openfe_analysis/rmsd.py b/src/openfe_analysis/rmsd.py index b83e8a2..ac37d54 100644 --- a/src/openfe_analysis/rmsd.py +++ b/src/openfe_analysis/rmsd.py @@ -8,8 +8,8 @@ from MDAnalysis.analysis import rms from MDAnalysis.analysis.base import AnalysisBase -from .reader import FEReader -from .utils.universe_transformations import apply_transformations, create_universe +from .reader import _create_universe_single_state +from .utils.apply_transformations import apply_transformations class Protein2DRMSD(AnalysisBase): @@ -194,7 +194,7 @@ def gather_rms_data( for i in range(n_lambda): # cheeky, but we can read the PDB topology once and reuse per universe # this then only hits the PDB file once for all replicas - u = create_universe(u_top._topology, ds, i) + u = _create_universe_single_state(u_top._topology, ds, i) prot = u.select_atoms("protein and name CA") ligand = u.select_atoms("resname UNK") diff --git a/src/openfe_analysis/tests/test_rmsd.py b/src/openfe_analysis/tests/test_rmsd.py index 372f913..b22cfd0 100644 --- a/src/openfe_analysis/tests/test_rmsd.py +++ b/src/openfe_analysis/tests/test_rmsd.py @@ -9,10 +9,10 @@ from MDAnalysis.transformations import unwrap from numpy.testing import assert_allclose -from openfe_analysis.reader import FEReader +from openfe_analysis.reader import _create_universe_single_state from openfe_analysis.rmsd import gather_rms_data from openfe_analysis.transformations import Aligner -from openfe_analysis.utils import universe_transformations +from openfe_analysis.utils import apply_transformations @pytest.fixture @@ -23,12 +23,10 @@ def mda_universe(hybrid_system_skipped_pdb, simulation_skipped_nc): Guarantees: - NetCDF file is opened exactly once """ - u = universe_transformations.create_universe( - hybrid_system_skipped_pdb, simulation_skipped_nc, 0 - ) + u = _create_universe_single_state(hybrid_system_skipped_pdb, simulation_skipped_nc, 0) protein = u.select_atoms("protein and name CA") ligand = u.select_atoms("resname UNK") - universe_transformations.apply_transformations(u, protein, ligand) + apply_transformations.apply_transformations(u, protein, ligand) yield u u.trajectory.close() @@ -110,9 +108,7 @@ def test_gather_rms_data_regression_skippednc(simulation_skipped_nc, hybrid_syst def test_multichain_rmsd_shifting(simulation_skipped_nc, hybrid_system_skipped_pdb): - u = universe_transformations.create_universe( - hybrid_system_skipped_pdb, simulation_skipped_nc, 0 - ) + u = _create_universe_single_state(hybrid_system_skipped_pdb, simulation_skipped_nc, 0) prot = u.select_atoms("protein and name CA") # Do other transformations, but no shifting unwrap_tr = unwrap(prot) @@ -131,12 +127,10 @@ def test_multichain_rmsd_shifting(simulation_skipped_nc, hybrid_system_skipped_p u.trajectory.close() # RMSD with shifting - u2 = universe_transformations.create_universe( - hybrid_system_skipped_pdb, simulation_skipped_nc, 0 - ) + u2 = _create_universe_single_state(hybrid_system_skipped_pdb, simulation_skipped_nc, 0) prot2 = u2.select_atoms("protein and name CA") - universe_transformations.apply_transformations(u2, protein=prot2) + apply_transformations.apply_transformations(u2, protein=prot2) R2 = rms.RMSD(prot2) R2.run() @@ -146,11 +140,9 @@ def test_multichain_rmsd_shifting(simulation_skipped_nc, hybrid_system_skipped_p def test_chain_radius_of_gyration_stable(simulation_skipped_nc, hybrid_system_skipped_pdb): - u = universe_transformations.create_universe( - hybrid_system_skipped_pdb, simulation_skipped_nc, 0 - ) + u = _create_universe_single_state(hybrid_system_skipped_pdb, simulation_skipped_nc, 0) protein = u.select_atoms("protein and name CA") - universe_transformations.apply_transformations(u, protein) + apply_transformations.apply_transformations(u, protein) chain = protein.segments[0].atoms diff --git a/src/openfe_analysis/utils/universe_transformations.py b/src/openfe_analysis/utils/apply_transformations.py similarity index 90% rename from src/openfe_analysis/utils/universe_transformations.py rename to src/openfe_analysis/utils/apply_transformations.py index 0821665..a5aad0e 100644 --- a/src/openfe_analysis/utils/universe_transformations.py +++ b/src/openfe_analysis/utils/apply_transformations.py @@ -1,24 +1,11 @@ -import pathlib from typing import Optional import MDAnalysis as mda -import netCDF4 as nc from MDAnalysis.transformations import unwrap -from ..reader import FEReader from ..transformations import Aligner, ClosestImageShift, NoJump -def create_universe(top, trj, state): - return mda.Universe( - top, - trj, - index=state, - index_method="state", - format=FEReader, - ) - - def apply_transformations( u: mda.Universe, protein: Optional[mda.AtomGroup] = None, From a73f096fd124d01564aec9b9be8cef2dc28d2107 Mon Sep 17 00:00:00 2001 From: hannahbaumann Date: Mon, 13 Apr 2026 13:57:14 +0200 Subject: [PATCH 4/5] Split complex and ligand only treatment into separate functions --- .../utils/apply_transformations.py | 72 +++++++++++-------- 1 file changed, 42 insertions(+), 30 deletions(-) diff --git a/src/openfe_analysis/utils/apply_transformations.py b/src/openfe_analysis/utils/apply_transformations.py index a5aad0e..9eb01cc 100644 --- a/src/openfe_analysis/utils/apply_transformations.py +++ b/src/openfe_analysis/utils/apply_transformations.py @@ -42,34 +42,46 @@ def apply_transformations( has_ligand = ligand is not None and ligand.n_atoms > 0 if has_protein: - group = protein - if has_ligand: - group = protein + ligand - # Unwrap all atoms - unwrap_tr = unwrap(group) - - # Shift chains + ligand - chains = [seg.atoms for seg in protein.segments] - shift_targets = [*chains[1:]] - if has_ligand: - shift_targets.append(ligand) - shift = ClosestImageShift(chains[0], shift_targets) - - align = Aligner(protein) - - u.trajectory.add_transformations( - unwrap_tr, - shift, - align, - ) + lig = ligand if has_ligand else None + transforms = _apply_transformations_complex(protein, lig) elif has_ligand: - # if there's no protein - # - make the ligand not jump periodic images between frames - # - align the ligand to minimise its RMSD - nope = NoJump(ligand) - align = Aligner(ligand) - - u.trajectory.add_transformations( - nope, - align, - ) + transforms = _apply_transformations_ligand_only(ligand) + else: + return + + u.trajectory.add_transformations(*transforms) + + +def _apply_transformations_complex(protein, ligand=None): + """ + Build transformations for systems containing a protein + and optionally a ligand. + """ + transforms = [] + # 1. Make molecules whole (protein + optional ligand) + group = protein if ligand is None else protein + ligand + transforms.append(unwrap(group)) + + # 2. Closest image shift for protein chains + ligand (if present) + chains = [seg.atoms for seg in protein.segments] + shift_targets = chains[1:] + if ligand is not None: + shift_targets.append(ligand) + transforms.append(ClosestImageShift(chains[0], shift_targets)) + + # 3. Align on protein backbone/atoms + transforms.append(Aligner(protein)) + + return transforms + + +def _apply_transformations_ligand_only(ligand): + """ + Build transformations for ligand-only systems. + - make the ligand not jump periodic images between frames + - align the ligand to minimize its RMSD + """ + return [ + NoJump(ligand), + Aligner(ligand), + ] From 2f7a441fdf821381c9c8c87d3c482473f2433cd8 Mon Sep 17 00:00:00 2001 From: hannahbaumann Date: Mon, 13 Apr 2026 14:53:42 +0200 Subject: [PATCH 5/5] Fix docstring --- src/openfe_analysis/utils/apply_transformations.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/openfe_analysis/utils/apply_transformations.py b/src/openfe_analysis/utils/apply_transformations.py index 9eb01cc..112fe99 100644 --- a/src/openfe_analysis/utils/apply_transformations.py +++ b/src/openfe_analysis/utils/apply_transformations.py @@ -29,12 +29,14 @@ def apply_transformations( transformations is applied: If a protein is present: + - Unwraps protein and ligand atom to be made whole - Shifts protein chains and the ligand to the image closest to the first protein chain (:class:`ClosestImageShift`) - Aligns the entire system to minimise the protein RMSD (:class:`Aligner`) If only a ligand is present: + - Prevents the ligand from jumping between periodic images - Aligns the ligand to minimize its RMSD """