diff --git a/src/openfe_analysis/reader.py b/src/openfe_analysis/reader.py index cf7aaea..3b1f816 100644 --- a/src/openfe_analysis/reader.py +++ b/src/openfe_analysis/reader.py @@ -1,6 +1,7 @@ import pathlib from typing import Literal, Optional +import MDAnalysis as mda import netCDF4 as nc import numpy as np import yaml @@ -11,6 +12,16 @@ from openfe_analysis.utils.multistate import _determine_position_indices +def _create_universe_single_state(top, trj, state): + return mda.Universe( + top, + trj, + index=state, + index_method="state", + format=FEReader, + ) + + def _determine_iteration_dt(dataset) -> float: """ Determine the time increment between successive iterations diff --git a/src/openfe_analysis/rmsd.py b/src/openfe_analysis/rmsd.py index 23c2680..80509bf 100644 --- a/src/openfe_analysis/rmsd.py +++ b/src/openfe_analysis/rmsd.py @@ -7,96 +7,9 @@ import numpy as np from MDAnalysis.analysis import diffusionmap, rms from MDAnalysis.analysis.base import AnalysisBase -from MDAnalysis.transformations import unwrap -from .reader import FEReader -from .transformations import Aligner, ClosestImageShift, NoJump - - -def make_Universe(top: pathlib.Path, trj: nc.Dataset, state: int) -> mda.Universe: - """ - Construct an MDAnalysis Universe from a MultiState NetCDF trajectory - and apply standard analysis transformations. - - The Universe is created using the custom ``FEReader`` to extract a - single state from a multistate simulation. - - Parameters - ---------- - top : pathlib.Path or Topology - Path to a topology file (e.g. PDB) or an already-loaded MDAnalysis - topology object. - trj : nc.Dataset - Open NetCDF dataset produced by - ``openmmtools.multistate.MultiStateReporter``. - state : int - Thermodynamic state index to extract from the multistate trajectory. - - Returns - ------- - MDAnalysis.Universe - A Universe with trajectory transformations applied. - - Notes - ----- - Identifies two AtomGroups: - - - Protein, defined as having standard amino acid names, then filtered down to CA - - Ligand, defined as "resname UNK" - - Depending on whether a protein is present, a sequence of trajectory - transformations is applied: - - If a protein is present: - - - Unwraps protein and ligand atom to be made whole - - Shifts protein chains and the ligand to the image closest to the first - protein chain (:class:`ClosestImageShift`) - - Aligns the entire system to minimise the protein RMSD (:class:`Aligner`) - - If only a ligand is present: - - - Prevents the ligand from jumping between periodic images - - Aligns the ligand to minimize its RMSD - """ - u = mda.Universe( - top, - trj, - index=state, - index_method="state", - format=FEReader, - ) - prot = u.select_atoms("protein and name CA") - ligand = u.select_atoms("resname UNK") - - if prot: - # Unwrap all atoms - unwrap_tr = unwrap(prot + ligand) - - # Shift chains + ligand - chains = [seg.atoms for seg in prot.segments] - shift = ClosestImageShift(chains[0], [*chains[1:], ligand]) - - align = Aligner(prot) - - u.trajectory.add_transformations( - unwrap_tr, - shift, - align, - ) - else: - # if there's no protein - # - make the ligand not jump periodic images between frames - # - align the ligand to minimise its RMSD - nope = NoJump(ligand) - align = Aligner(ligand) - - u.trajectory.add_transformations( - nope, - align, - ) - - return u +from .reader import _create_universe_single_state +from .utils.apply_transformations import apply_transformations class Protein2DRMSD(AnalysisBase): @@ -298,11 +211,13 @@ def gather_rms_data( for state_idx in range(n_lambda): # cheeky, but we can read the PDB topology once and reuse per universe # this then only hits the PDB file once for all replicas - u = make_Universe(u_top._topology, ds, state=state_idx) + u = _create_universe_single_state(u_top._topology, ds, state_idx) prot = u.select_atoms("protein and name CA") ligand = u.select_atoms("resname UNK") + apply_transformations(u, prot, ligand) + if prot: prot_rmsd = RMSDAnalysis(prot).run(step=skip) output["protein_RMSD"].append(prot_rmsd.results.rmsd) diff --git a/src/openfe_analysis/tests/test_rmsd.py b/src/openfe_analysis/tests/test_rmsd.py index a9e8b09..b22cfd0 100644 --- a/src/openfe_analysis/tests/test_rmsd.py +++ b/src/openfe_analysis/tests/test_rmsd.py @@ -9,9 +9,10 @@ from MDAnalysis.transformations import unwrap from numpy.testing import assert_allclose -from openfe_analysis.reader import FEReader -from openfe_analysis.rmsd import gather_rms_data, make_Universe +from openfe_analysis.reader import _create_universe_single_state +from openfe_analysis.rmsd import gather_rms_data from openfe_analysis.transformations import Aligner +from openfe_analysis.utils import apply_transformations @pytest.fixture @@ -22,11 +23,10 @@ def mda_universe(hybrid_system_skipped_pdb, simulation_skipped_nc): Guarantees: - NetCDF file is opened exactly once """ - u = make_Universe( - hybrid_system_skipped_pdb, - simulation_skipped_nc, - state=0, - ) + u = _create_universe_single_state(hybrid_system_skipped_pdb, simulation_skipped_nc, 0) + protein = u.select_atoms("protein and name CA") + ligand = u.select_atoms("resname UNK") + apply_transformations.apply_transformations(u, protein, ligand) yield u u.trajectory.close() @@ -108,13 +108,8 @@ def test_gather_rms_data_regression_skippednc(simulation_skipped_nc, hybrid_syst def test_multichain_rmsd_shifting(simulation_skipped_nc, hybrid_system_skipped_pdb): - u = mda.Universe( - hybrid_system_skipped_pdb, - simulation_skipped_nc, - index=0, - format=FEReader, - ) - prot = u.select_atoms("protein") + u = _create_universe_single_state(hybrid_system_skipped_pdb, simulation_skipped_nc, 0) + prot = u.select_atoms("protein and name CA") # Do other transformations, but no shifting unwrap_tr = unwrap(prot) for frag in prot.fragments: @@ -132,8 +127,11 @@ def test_multichain_rmsd_shifting(simulation_skipped_nc, hybrid_system_skipped_p u.trajectory.close() # RMSD with shifting - u2 = make_Universe(hybrid_system_skipped_pdb, simulation_skipped_nc, state=0) - prot2 = u2.select_atoms("protein") + u2 = _create_universe_single_state(hybrid_system_skipped_pdb, simulation_skipped_nc, 0) + prot2 = u2.select_atoms("protein and name CA") + + apply_transformations.apply_transformations(u2, protein=prot2) + R2 = rms.RMSD(prot2) R2.run() rmsd_shift = R2.rmsd[:, 2] @@ -142,9 +140,10 @@ def test_multichain_rmsd_shifting(simulation_skipped_nc, hybrid_system_skipped_p def test_chain_radius_of_gyration_stable(simulation_skipped_nc, hybrid_system_skipped_pdb): - u = make_Universe(hybrid_system_skipped_pdb, simulation_skipped_nc, state=0) + u = _create_universe_single_state(hybrid_system_skipped_pdb, simulation_skipped_nc, 0) + protein = u.select_atoms("protein and name CA") + apply_transformations.apply_transformations(u, protein) - protein = u.select_atoms("protein") chain = protein.segments[0].atoms rgs = [] @@ -158,7 +157,7 @@ def test_chain_radius_of_gyration_stable(simulation_skipped_nc, hybrid_system_sk def test_rmsd_reference_is_first_frame(mda_universe): u = mda_universe - prot = u.select_atoms("protein") + prot = u.select_atoms("protein and name CA") _ = next(iter(u.trajectory)) # SAFE ref = prot.positions.copy() diff --git a/src/openfe_analysis/utils/apply_transformations.py b/src/openfe_analysis/utils/apply_transformations.py new file mode 100644 index 0000000..112fe99 --- /dev/null +++ b/src/openfe_analysis/utils/apply_transformations.py @@ -0,0 +1,89 @@ +from typing import Optional + +import MDAnalysis as mda +from MDAnalysis.transformations import unwrap + +from ..transformations import Aligner, ClosestImageShift, NoJump + + +def apply_transformations( + u: mda.Universe, + protein: Optional[mda.AtomGroup] = None, + ligand: Optional[mda.AtomGroup] = None, +): + """ + Apply a collection of transformations to a Universe. + + Parameters + ---------- + u: Universe + The Universe the transformations are applied to + protein: Optional[AtomGroup] + The AtomGroup of the protein + ligand: Optional[AtomGroup] + The AtomGroup of the ligand + + Notes + ----- + Depending on whether a protein is present, a sequence of trajectory + transformations is applied: + + If a protein is present: + + - Unwraps protein and ligand atom to be made whole + - Shifts protein chains and the ligand to the image closest to the first + protein chain (:class:`ClosestImageShift`) + - Aligns the entire system to minimise the protein RMSD (:class:`Aligner`) + + If only a ligand is present: + + - Prevents the ligand from jumping between periodic images + - Aligns the ligand to minimize its RMSD + """ + has_protein = protein is not None and protein.n_atoms > 0 + has_ligand = ligand is not None and ligand.n_atoms > 0 + + if has_protein: + lig = ligand if has_ligand else None + transforms = _apply_transformations_complex(protein, lig) + elif has_ligand: + transforms = _apply_transformations_ligand_only(ligand) + else: + return + + u.trajectory.add_transformations(*transforms) + + +def _apply_transformations_complex(protein, ligand=None): + """ + Build transformations for systems containing a protein + and optionally a ligand. + """ + transforms = [] + # 1. Make molecules whole (protein + optional ligand) + group = protein if ligand is None else protein + ligand + transforms.append(unwrap(group)) + + # 2. Closest image shift for protein chains + ligand (if present) + chains = [seg.atoms for seg in protein.segments] + shift_targets = chains[1:] + if ligand is not None: + shift_targets.append(ligand) + transforms.append(ClosestImageShift(chains[0], shift_targets)) + + # 3. Align on protein backbone/atoms + transforms.append(Aligner(protein)) + + return transforms + + +def _apply_transformations_ligand_only(ligand): + """ + Build transformations for ligand-only systems. + - make the ligand not jump periodic images between frames + - align the ligand to minimize its RMSD + """ + return [ + NoJump(ligand), + Aligner(ligand), + ]