-
Notifications
You must be signed in to change notification settings - Fork 1
Refactor RMSD analyses into MDAnalysis AnaysisBase classes
#90
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
2977c01
4a960a0
afa829a
df017da
d824ab6
71a11d3
5cabcf4
e16b2e3
616bf42
90ed39e
7d6e49f
4bf598f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,11 +5,9 @@ | |
| import MDAnalysis as mda | ||
| import netCDF4 as nc | ||
| import numpy as np | ||
| import tqdm | ||
| from MDAnalysis.analysis import rms | ||
| from MDAnalysis.lib.mdamath import make_whole | ||
| from MDAnalysis.analysis.base import AnalysisBase | ||
| from MDAnalysis.transformations import unwrap | ||
| from numpy import typing as npt | ||
|
|
||
| from .reader import FEReader | ||
| from .transformations import Aligner, ClosestImageShift, NoJump | ||
|
|
@@ -99,6 +97,139 @@ def make_Universe(top: pathlib.Path, trj: nc.Dataset, state: int) -> mda.Univers | |
| return u | ||
|
|
||
|
|
||
| class ProteinRMSD(AnalysisBase): | ||
| """Calculate the 1D protein RMSD""" | ||
|
|
||
| def __init__(self, atomgroup, **kwargs): | ||
| super(ProteinRMSD, self).__init__(atomgroup.universe.trajectory, **kwargs) | ||
|
|
||
| self._ag = atomgroup | ||
|
|
||
| def _prepare(self): | ||
| """ | ||
| Set the first frame as the reference | ||
| """ | ||
| self.results.rmsd = [] | ||
| self._reference = self._ag.positions | ||
|
|
||
| def _single_frame(self): | ||
| rmsd = rms.rmsd( | ||
| self._ag.positions, | ||
| self._reference, | ||
| center=False, | ||
| superposition=False, | ||
| ) | ||
| self.results.rmsd.append(rmsd) | ||
|
|
||
| def _conclude(self): | ||
| self.results.rmsd = np.asarray(self.results.rmsd) | ||
|
|
||
|
|
||
| class Protein2DRMSD(AnalysisBase): | ||
| """ | ||
| Flattened 2D RMSD matrix | ||
|
|
||
| For all unique frame pairs ``(i, j)`` with ``i < j``, this function | ||
| computes the RMSD between atomic coordinates after optimal alignment. | ||
| """ | ||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you explicitly define |
||
| def __init__(self, atomgroup, weights=None, **kwargs): | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here and everywhere else, add typing where possible? |
||
| """ | ||
| Parameters | ||
| ---------- | ||
| atomgroup: AtomGroup | ||
| Protein atoms (e.g. CA selection) | ||
| weights: np.ndarray, optional | ||
| Per-atom weights to use in the RMSD calculation. If ``None``, | ||
| all atoms are weighted equally. | ||
| """ | ||
| super(Protein2DRMSD, self).__init__(atomgroup.universe.trajectory, **kwargs) | ||
|
|
||
| self._weights = weights | ||
| self._ag = atomgroup | ||
|
|
||
| def _prepare(self): | ||
| self._coords = [] | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you pre-allocate numpy arrays here instead? |
||
| self.results.rmsd2d = [] | ||
|
|
||
| def _single_frame(self): | ||
| self._coords.append(self._ag.positions) | ||
|
|
||
| def _conclude(self): | ||
| positions = np.asarray(self._coords) | ||
| nframes, _, _ = positions.shape | ||
|
|
||
| output = [] | ||
| for i, j in itertools.combinations(range(nframes), 2): | ||
| posi, posj = positions[i], positions[j] | ||
| rmsd = rms.rmsd( | ||
| posi, | ||
| posj, | ||
| self._weights, | ||
| center=True, | ||
| superposition=True, | ||
| ) | ||
| output.append(rmsd) | ||
|
|
||
| self.results.rmsd2d = np.asarray(output) | ||
|
|
||
|
|
||
| class LigandRMSD(AnalysisBase): | ||
| """ | ||
| 1D RMSD time series for a ligand AtomGroup. | ||
| """ | ||
|
|
||
| def __init__(self, atomgroup, **kwargs): | ||
| super(LigandRMSD, self).__init__(atomgroup.universe.trajectory, **kwargs) | ||
|
|
||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please remember to add. |
||
| self._ag = atomgroup | ||
|
|
||
| def _prepare(self): | ||
| self.results.rmsd = [] | ||
| self._reference = self._ag.positions | ||
| self._weights = self._ag.masses / np.mean(self._ag.masses) | ||
|
|
||
| def _single_frame(self): | ||
| rmsd = rms.rmsd( | ||
| self._ag.positions, | ||
| self._reference, | ||
| self._weights, | ||
| center=False, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you document why this is |
||
| superposition=False, | ||
| ) | ||
| self.results.rmsd.append(rmsd) | ||
|
|
||
| def _conclude(self): | ||
| self.results.rmsd = np.asarray(self.results.rmsd) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 2 initial thoughts:
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jthorton : There's a difference between the RMSD class in MDAnalysis in what we've been doing, and I'm not sure yet what we want. In MDAnalysis, by default they are doing a superposition (rotational and translational), while we didn't do that. Now I'm not sure, what are we actually interested in, esp. for the ligand RMSD. Should this be the RMSD of the internal conformation of the ligand, or the RMSD of the ligand in the binding pocket/ligand pose? I think in the solvent we would definitely be more interested in the internal conformation of the ligand. In the binding pocket, I'm not sure. We also calculate the ligand COM displacement as a measure of stability in the pocket, but I'm not sure what we would want for the RMSD. What do you think? |
||
|
|
||
|
|
||
| class LigandCOMDrift(AnalysisBase): | ||
| """ | ||
| Ligand center-of-mass displacement from initial position. | ||
| """ | ||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As above re: parallelizable |
||
| def __init__(self, atomgroup, **kwargs): | ||
| super(LigandCOMDrift, self).__init__(atomgroup.universe.trajectory, **kwargs) | ||
|
|
||
| self._ag = atomgroup | ||
|
|
||
| def _prepare(self): | ||
| self.results.com_drift = [] | ||
| self._initial_com = self._ag.center_of_mass() | ||
|
|
||
| def _single_frame(self): | ||
| # distance between start and current ligand position | ||
| # ignores PBC, but we've already centered the traj | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why ignore PBC? Could you not just pass the Please document this in the docstring. |
||
| drift = mda.lib.distances.calc_bonds( | ||
| self._ag.center_of_mass(), | ||
| self._initial_com, | ||
| ) | ||
| self.results.com_drift.append(drift) | ||
|
|
||
| def _conclude(self): | ||
| self.results.com_drift = np.asarray(self.results.com_drift) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: it may be more ever so slightly efficient to just pre-allocate the array ahead of time in |
||
|
|
||
|
|
||
| def gather_rms_data( | ||
| pdb_topology: pathlib.Path, dataset: pathlib.Path, skip: Optional[int] = None | ||
| ) -> dict[str, list[float]]: | ||
|
|
@@ -159,8 +290,6 @@ def gather_rms_data( | |
| # max against 1 to avoid skip=0 case | ||
| skip = max(n_frames // 500, 1) | ||
|
|
||
| pb = tqdm.tqdm(total=int(n_frames / skip) * n_lambda) | ||
|
|
||
| u_top = mda.Universe(pdb_topology) | ||
|
|
||
| for i in range(n_lambda): | ||
|
|
@@ -171,93 +300,18 @@ def gather_rms_data( | |
| prot = u.select_atoms("protein and name CA") | ||
| ligand = u.select_atoms("resname UNK") | ||
|
|
||
| # save coordinates for 2D RMSD matrix | ||
| # TODO: Some smart guard to avoid allocating a silly amount of memory? | ||
| prot2d = np.empty((len(u.trajectory[::skip]), len(prot), 3), dtype=np.float32) | ||
|
|
||
| prot_start = prot.positions | ||
| ligand_start = ligand.positions | ||
| ligand_initial_com = ligand.center_of_mass() | ||
| ligand_weights = ligand.masses / np.mean(ligand.masses) | ||
|
|
||
| this_protein_rmsd = [] | ||
| this_ligand_rmsd = [] | ||
| this_ligand_wander = [] | ||
|
|
||
| for ts_i, ts in enumerate(u.trajectory[::skip]): | ||
| pb.update() | ||
|
|
||
| if prot: | ||
| prot2d[ts_i, :, :] = prot.positions | ||
| this_protein_rmsd.append( | ||
| rms.rmsd( | ||
| prot.positions, | ||
| prot_start, | ||
| None, # prot_weights, | ||
| center=False, | ||
| superposition=False, | ||
| ) | ||
| ) | ||
| if ligand: | ||
| this_ligand_rmsd.append( | ||
| rms.rmsd( | ||
| ligand.positions, | ||
| ligand_start, | ||
| ligand_weights, | ||
| center=False, | ||
| superposition=False, | ||
| ) | ||
| ) | ||
| this_ligand_wander.append( | ||
| # distance between start and current ligand position | ||
| # ignores PBC, but we've already centered the traj | ||
| mda.lib.distances.calc_bonds(ligand.center_of_mass(), ligand_initial_com) | ||
| ) | ||
|
|
||
| if prot: | ||
| # can ignore weights here as it's all Ca | ||
| rmsd2d = twoD_RMSD(prot2d, w=None) # prot_weights) | ||
| output["protein_RMSD"].append(this_protein_rmsd) | ||
| output["protein_2D_RMSD"].append(rmsd2d) | ||
| if ligand: | ||
| output["ligand_RMSD"].append(this_ligand_rmsd) | ||
| output["ligand_wander"].append(this_ligand_wander) | ||
|
|
||
| output["time(ps)"] = list(np.arange(len(u.trajectory))[::skip] * u.trajectory.dt) | ||
|
|
||
| return output | ||
|
|
||
|
|
||
| def twoD_RMSD(positions, w: Optional[npt.NDArray]) -> list[float]: | ||
| """ | ||
| Compute a flattened 2D RMSD matrix from a trajectory. | ||
|
|
||
| For all unique frame pairs ``(i, j)`` with ``i < j``, this function | ||
| computes the RMSD between atomic coordinates after optimal alignment. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| positions : np.ndarray | ||
| Atomic coordinates for all frames in the trajectory. | ||
| w : np.ndarray, optional | ||
| Per-atom weights to use in the RMSD calculation. If ``None``, | ||
| all atoms are weighted equally. | ||
| prot_rmsd = ProteinRMSD(prot).run(step=skip) | ||
| output["protein_RMSD"].append(prot_rmsd.results.rmsd) | ||
| prot_rmsd2d = Protein2DRMSD(prot).run(step=skip) | ||
| output["protein_2D_RMSD"].append(prot_rmsd2d.results.rmsd2d) | ||
|
|
||
| Returns | ||
| ------- | ||
| list of float | ||
| Flattened list of RMSD values corresponding to all frame pairs | ||
| ``(i, j)`` with ``i < j``. | ||
| """ | ||
| nframes, _, _ = positions.shape | ||
|
|
||
| output = [] | ||
|
|
||
| for i, j in itertools.combinations(range(nframes), 2): | ||
| posi, posj = positions[i], positions[j] | ||
|
|
||
| rmsd = rms.rmsd(posi, posj, w, center=True, superposition=True) | ||
| if ligand: | ||
| lig_rmsd = LigandRMSD(ligand).run(step=skip) | ||
| output["ligand_RMSD"].append(lig_rmsd.results.rmsd) | ||
| lig_com_drift = LigandCOMDrift(ligand).run(step=skip) | ||
| output["ligand_wander"].append(lig_com_drift.results.com_drift) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know this is historical, so it doesn't have to be here, but can we please renamed this to |
||
|
|
||
| output.append(rmsd) | ||
| output["time(ps)"] = np.arange(len(u.trajectory))[::skip] * u.trajectory.dt | ||
|
|
||
| return output | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you maybe expand this to mention you're doing a center of geometry fit as well as a rotational and translational superposition usingg QCP?