Skip to content
Open
11 changes: 11 additions & 0 deletions src/openfe_analysis/reader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pathlib
from typing import Literal, Optional

import MDAnalysis as mda
import netCDF4 as nc
import numpy as np
import yaml
Expand All @@ -11,6 +12,16 @@
from openfe_analysis.utils.multistate import _determine_position_indices


def _create_universe_single_state(top, trj, state):
return mda.Universe(
top,
trj,
index=state,
index_method="state",
format=FEReader,
)


def _determine_iteration_dt(dataset) -> float:
"""
Determine the time increment between successive iterations
Expand Down
95 changes: 5 additions & 90 deletions src/openfe_analysis/rmsd.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,96 +7,9 @@
import numpy as np
from MDAnalysis.analysis import diffusionmap, rms
from MDAnalysis.analysis.base import AnalysisBase
from MDAnalysis.transformations import unwrap

from .reader import FEReader
from .transformations import Aligner, ClosestImageShift, NoJump


def make_Universe(top: pathlib.Path, trj: nc.Dataset, state: int) -> mda.Universe:
"""
Construct an MDAnalysis Universe from a MultiState NetCDF trajectory
and apply standard analysis transformations.

The Universe is created using the custom ``FEReader`` to extract a
single state from a multistate simulation.

Parameters
----------
top : pathlib.Path or Topology
Path to a topology file (e.g. PDB) or an already-loaded MDAnalysis
topology object.
trj : nc.Dataset
Open NetCDF dataset produced by
``openmmtools.multistate.MultiStateReporter``.
state : int
Thermodynamic state index to extract from the multistate trajectory.

Returns
-------
MDAnalysis.Universe
A Universe with trajectory transformations applied.

Notes
-----
Identifies two AtomGroups:

- Protein, defined as having standard amino acid names, then filtered down to CA
- Ligand, defined as "resname UNK"

Depending on whether a protein is present, a sequence of trajectory
transformations is applied:

If a protein is present:

- Unwraps protein and ligand atom to be made whole
- Shifts protein chains and the ligand to the image closest to the first
protein chain (:class:`ClosestImageShift`)
- Aligns the entire system to minimise the protein RMSD (:class:`Aligner`)

If only a ligand is present:

- Prevents the ligand from jumping between periodic images
- Aligns the ligand to minimize its RMSD
"""
u = mda.Universe(
top,
trj,
index=state,
index_method="state",
format=FEReader,
)
prot = u.select_atoms("protein and name CA")
ligand = u.select_atoms("resname UNK")

if prot:
# Unwrap all atoms
unwrap_tr = unwrap(prot + ligand)

# Shift chains + ligand
chains = [seg.atoms for seg in prot.segments]
shift = ClosestImageShift(chains[0], [*chains[1:], ligand])

align = Aligner(prot)

u.trajectory.add_transformations(
unwrap_tr,
shift,
align,
)
else:
# if there's no protein
# - make the ligand not jump periodic images between frames
# - align the ligand to minimise its RMSD
nope = NoJump(ligand)
align = Aligner(ligand)

u.trajectory.add_transformations(
nope,
align,
)

return u
from .reader import _create_universe_single_state
from .utils.apply_transformations import apply_transformations


class Protein2DRMSD(AnalysisBase):
Expand Down Expand Up @@ -298,11 +211,13 @@ def gather_rms_data(
for state_idx in range(n_lambda):
# cheeky, but we can read the PDB topology once and reuse per universe
# this then only hits the PDB file once for all replicas
u = make_Universe(u_top._topology, ds, state=state_idx)
u = _create_universe_single_state(u_top._topology, ds, state_idx)

prot = u.select_atoms("protein and name CA")
ligand = u.select_atoms("resname UNK")

apply_transformations(u, prot, ligand)

if prot:
prot_rmsd = RMSDAnalysis(prot).run(step=skip)
output["protein_RMSD"].append(prot_rmsd.results.rmsd)
Expand Down
37 changes: 18 additions & 19 deletions src/openfe_analysis/tests/test_rmsd.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
from MDAnalysis.transformations import unwrap
from numpy.testing import assert_allclose

from openfe_analysis.reader import FEReader
from openfe_analysis.rmsd import gather_rms_data, make_Universe
from openfe_analysis.reader import _create_universe_single_state
from openfe_analysis.rmsd import gather_rms_data
from openfe_analysis.transformations import Aligner
from openfe_analysis.utils import apply_transformations


@pytest.fixture
Expand All @@ -22,11 +23,10 @@ def mda_universe(hybrid_system_skipped_pdb, simulation_skipped_nc):
Guarantees:
- NetCDF file is opened exactly once
"""
u = make_Universe(
hybrid_system_skipped_pdb,
simulation_skipped_nc,
state=0,
)
u = _create_universe_single_state(hybrid_system_skipped_pdb, simulation_skipped_nc, 0)
protein = u.select_atoms("protein and name CA")
ligand = u.select_atoms("resname UNK")
apply_transformations.apply_transformations(u, protein, ligand)
yield u
u.trajectory.close()

Expand Down Expand Up @@ -108,13 +108,8 @@ def test_gather_rms_data_regression_skippednc(simulation_skipped_nc, hybrid_syst


def test_multichain_rmsd_shifting(simulation_skipped_nc, hybrid_system_skipped_pdb):
u = mda.Universe(
hybrid_system_skipped_pdb,
simulation_skipped_nc,
index=0,
format=FEReader,
)
prot = u.select_atoms("protein")
u = _create_universe_single_state(hybrid_system_skipped_pdb, simulation_skipped_nc, 0)
prot = u.select_atoms("protein and name CA")
# Do other transformations, but no shifting
unwrap_tr = unwrap(prot)
for frag in prot.fragments:
Expand All @@ -132,8 +127,11 @@ def test_multichain_rmsd_shifting(simulation_skipped_nc, hybrid_system_skipped_p
u.trajectory.close()

# RMSD with shifting
u2 = make_Universe(hybrid_system_skipped_pdb, simulation_skipped_nc, state=0)
prot2 = u2.select_atoms("protein")
u2 = _create_universe_single_state(hybrid_system_skipped_pdb, simulation_skipped_nc, 0)
prot2 = u2.select_atoms("protein and name CA")

apply_transformations.apply_transformations(u2, protein=prot2)

R2 = rms.RMSD(prot2)
R2.run()
rmsd_shift = R2.rmsd[:, 2]
Expand All @@ -142,9 +140,10 @@ def test_multichain_rmsd_shifting(simulation_skipped_nc, hybrid_system_skipped_p


def test_chain_radius_of_gyration_stable(simulation_skipped_nc, hybrid_system_skipped_pdb):
u = make_Universe(hybrid_system_skipped_pdb, simulation_skipped_nc, state=0)
u = _create_universe_single_state(hybrid_system_skipped_pdb, simulation_skipped_nc, 0)
protein = u.select_atoms("protein and name CA")
apply_transformations.apply_transformations(u, protein)

protein = u.select_atoms("protein")
chain = protein.segments[0].atoms

rgs = []
Expand All @@ -158,7 +157,7 @@ def test_chain_radius_of_gyration_stable(simulation_skipped_nc, hybrid_system_sk

def test_rmsd_reference_is_first_frame(mda_universe):
u = mda_universe
prot = u.select_atoms("protein")
prot = u.select_atoms("protein and name CA")

_ = next(iter(u.trajectory)) # SAFE
ref = prot.positions.copy()
Expand Down
89 changes: 89 additions & 0 deletions src/openfe_analysis/utils/apply_transformations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from typing import Optional

import MDAnalysis as mda
from MDAnalysis.transformations import unwrap

from ..transformations import Aligner, ClosestImageShift, NoJump


def apply_transformations(
Comment thread
hannahbaumann marked this conversation as resolved.
u: mda.Universe,
protein: Optional[mda.AtomGroup] = None,
ligand: Optional[mda.AtomGroup] = None,
):
"""
Apply a collection of transformations to a Universe.

Parameters
----------
u: Universe
The Universe the transformations are applied to
protein: Optional[AtomGroup]
The AtomGroup of the protein
ligand: Optional[AtomGroup]
The AtomGroup of the ligand

Notes
-----
Depending on whether a protein is present, a sequence of trajectory
transformations is applied:

If a protein is present:

- Unwraps protein and ligand atom to be made whole
- Shifts protein chains and the ligand to the image closest to the first
protein chain (:class:`ClosestImageShift`)
- Aligns the entire system to minimise the protein RMSD (:class:`Aligner`)

If only a ligand is present:

- Prevents the ligand from jumping between periodic images
- Aligns the ligand to minimize its RMSD
"""
has_protein = protein is not None and protein.n_atoms > 0
has_ligand = ligand is not None and ligand.n_atoms > 0

if has_protein:
lig = ligand if has_ligand else None
transforms = _apply_transformations_complex(protein, lig)
elif has_ligand:
transforms = _apply_transformations_ligand_only(ligand)
else:
return

u.trajectory.add_transformations(*transforms)


def _apply_transformations_complex(protein, ligand=None):
"""
Build transformations for systems containing a protein
and optionally a ligand.
"""
transforms = []
# 1. Make molecules whole (protein + optional ligand)
group = protein if ligand is None else protein + ligand
transforms.append(unwrap(group))

# 2. Closest image shift for protein chains + ligand (if present)
chains = [seg.atoms for seg in protein.segments]
shift_targets = chains[1:]
if ligand is not None:
shift_targets.append(ligand)
transforms.append(ClosestImageShift(chains[0], shift_targets))

# 3. Align on protein backbone/atoms
transforms.append(Aligner(protein))

return transforms


def _apply_transformations_ligand_only(ligand):
"""
Build transformations for ligand-only systems.
- make the ligand not jump periodic images between frames
- align the ligand to minimize its RMSD
"""
return [
NoJump(ligand),
Aligner(ligand),
]