Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/notebooks/studies/study_01_biomarker_csf.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1102,7 +1102,7 @@
"print(adata_25pc.obsm[\"X_pca_obs\"].shape)\n",
"\n",
"print(\"\\nPCA loadings: adata.varm['PCs'] with shape (n_var x n_comps):\")\n",
"print(adata_25pc.varm[\"PCs_obs\"].shape)\n",
"print(adata_25pc.varm[\"PCs_pca_obs\"].shape)\n",
"\n",
"print(\"\\nRatio of explained variance: uns['pca']['variance_ratio'] with shape (n_comps,):\")\n",
"print(adata_25pc.uns[\"variance_pca_obs\"][\"variance_ratio\"])\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/notebooks/tutorials/tutorial_01_basic_workflow.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -966,7 +966,7 @@
"print(adata_25pc.obsm[\"X_pca_obs\"].shape)\n",
"\n",
"print(\"\\nPCA loadings: adata.varm['PCs'] with shape (n_var x n_comps):\")\n",
"print(adata_25pc.varm[\"PCs_obs\"].shape)\n",
"print(adata_25pc.varm[\"PCs_pca_obs\"].shape)\n",
"\n",
"print(\"\\nRatio of explained variance: uns['pca']['variance_ratio'] with shape (n_comps,):\")\n",
"print(adata_25pc.uns[\"variance_pca_obs\"][\"variance_ratio\"])\n",
Expand Down
212 changes: 173 additions & 39 deletions docs/notebooks/tutorials/tutorial_03_basic_PCA_workflow.ipynb

Large diffs are not rendered by default.

50 changes: 41 additions & 9 deletions src/alphapepttools/pl/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import logging
from collections import Counter
from collections.abc import Callable
from typing import Any
from typing import Any, Literal

import anndata as ad
import matplotlib as mpl
Expand Down Expand Up @@ -1688,6 +1688,7 @@ def plot_pca(
color_column: str | None = None,
dim_space: str = "obs",
embeddings_name: str | None = None,
method: Literal["pca", "bpca"] = "pca",
label: bool = False, # noqa: FBT001, FBT002
label_column: str | None = None,
ax: plt.Axes | None = None,
Expand Down Expand Up @@ -1728,6 +1729,9 @@ def plot_pca(
embeddings_name
Custom embeddings name if non-default name was used in the PCA function.
If None, uses default naming convention ("X_pca_obs" or "X_pca_var").
method
The method used for dimensionality reduction. Options are "pca" or "bpca" with "pca" as the default.
This is used to construct the default keys if `embeddings_name` is None.
label
Whether to add text labels to points in the scatter plot.
label_column
Expand Down Expand Up @@ -1805,7 +1809,11 @@ def plot_pca(
scatter_kwargs = scatter_kwargs or {}

adata_pca = extract_pca_anndata(
data, dim_space=dim_space, embeddings_name=embeddings_name, expression_columns=color_map_column
data,
dim_space=dim_space,
embeddings_name=embeddings_name,
expression_columns=color_map_column,
method=method,
)

# get the explained variance ratio for the dimensions (for axis labels)
Expand Down Expand Up @@ -1867,6 +1875,7 @@ def scree_plot(
dim_space: str = "obs",
color: str = "blue",
embeddings_name: str | None = None,
method: Literal["pca", "bpca"] = "pca",
scatter_kwargs: dict | None = None,
) -> None:
"""Scree plot showing explained variance for each principal component.
Expand All @@ -1892,6 +1901,9 @@ def scree_plot(
embeddings_name
Custom embeddings name if non-default name was used in the PCA function.
If None, uses default naming convention.
method
The method used for dimensionality reduction. Options are "pca" or "bpca" with "pca" as the default.
This is used to construct the default keys if `embeddings_name` is None.
scatter_kwargs
Additional keyword arguments passed to matplotlib scatter (e.g., s, alpha).

Expand Down Expand Up @@ -1931,7 +1943,7 @@ def scree_plot(
scatter_kwargs = scatter_kwargs or {}

# create the dataframe for plotting, X = pcs, y = explained variance
values = prepare_scree_data_to_plot(adata, n_pcs, dim_space, embeddings_name)
values = prepare_scree_data_to_plot(adata, n_pcs, dim_space, embeddings_name, method=method)

cls.scatter(
data=values,
Expand All @@ -1953,6 +1965,7 @@ def plot_pca_loadings(
ax: plt.Axes,
dim_space: str = "obs",
embeddings_name: str | None = None,
method: Literal["pca", "bpca"] = "pca",
dim: int = 1,
nfeatures: int = 20,
scatter_kwargs: dict | None = None,
Expand All @@ -1977,6 +1990,9 @@ def plot_pca_loadings(
embeddings_name
Custom embeddings name if non-default name was used in the PCA function.
If None, uses default naming convention.
method
The method used for dimensionality reduction. Options are "pca" or "bpca" with "pca" as the default.
This is used to construct the default keys if `embeddings_name` is None.
dim
Principal component number to show loadings for (1-indexed, so 1 = PC1, 2 = PC2, etc.).
nfeatures
Expand Down Expand Up @@ -2035,6 +2051,7 @@ def plot_pca_loadings(
data=data,
dim_space=dim_space,
embeddings_name=embeddings_name,
method=method,
dim=dim,
nfeatures=nfeatures,
)
Expand All @@ -2060,6 +2077,7 @@ def plot_pca_loadings_2d(
ax: plt.Axes,
dim_space: str = "obs",
embeddings_name: str | None = None,
method: Literal["pca", "bpca"] = "pca",
pc_x: int = 1,
pc_y: int = 2,
nfeatures: int = 20,
Expand All @@ -2082,19 +2100,28 @@ def plot_pca_loadings_2d(
ax
Matplotlib axes object to plot on.
dim_space
The dimension space used in PCA. Can be either "obs" (default) for sample projection or "var" for feature projection. By default "obs".
The dimension space used in PCA. Can be either "obs" (default) for sample projection
or "var" for feature projection. By default "obs".
embeddings_name
The custom embeddings name used in PCA. If None, uses default naming convention. By default None.
method
The method used for dimensionality reduction. Options are "pca" or "bpca" with "pca" as the default.
This is used to construct the default keys if `embeddings_name` is None.
pc_x
The PC principal component index to plot on the x axis, by default 1. Corresponds to the principal component order, the first principal is 1 (1-indexed, i.e. the first PC is 1, not 0).
The PC principal component index to plot on the x axis, by default 1.
Corresponds to the principal component order, the first principal is 1 (1-indexed,
i.e. the first PC is 1, not 0).
pc_y
The principal component index to plot on the y axis, by default 2. Corresponds to the principal component order, the first principal is 1 (1-indexed, i.e. the first PC is 1, not 0).
The principal component index to plot on the y axis, by default 2.
Corresponds to the principal component order, the first principal is 1 (1-indexed,
i.e. the first PC is 1, not 0).
nfeatures
The number of top absolute loadings features to label from each component, by default 20
add_labels
Whether to add feature labels of the top `nfeatures` loadings. by default `True`.
add_lines
If True, draw lines connecting the origin (0,0) to the points representing the top `nfeatures` loadings. Default is `False`.
If True, draw lines connecting the origin (0,0) to the points representing the top `nfeatures` loadings.
Default is `False`.
scatter_kwargs
Additional keyword arguments for the matplotlib scatter function. By default None.

Expand Down Expand Up @@ -2129,10 +2156,15 @@ def plot_pca_loadings_2d(
scatter_kwargs = scatter_kwargs or {}

# Generate the correct loadings key name
loadings_key = f"PCs_{dim_space}" if embeddings_name is None else embeddings_name

loadings_df = prepare_pca_2d_loadings_data_to_plot(
data=data, loadings_name=loadings_key, pc_x=pc_x, pc_y=pc_y, nfeatures=nfeatures, dim_space=dim_space
data=data,
embeddings_name=embeddings_name,
method=method,
pc_x=pc_x,
pc_y=pc_y,
nfeatures=nfeatures,
dim_space=dim_space,
)

# plot the loadings of all features (used in PCA) first
Expand Down
14 changes: 7 additions & 7 deletions src/alphapepttools/tl/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,8 @@ def pca(
If provided, this will be used as the key under which to store the PCA results in
`adata.obsm`, `adata.varm`, and `adata.uns` (see Returns).
If None, the default keys will be used:
- For `dim_space='obs'`: `X_pca_obs` for PC coordinates, `PCs_obs` for the feature loadings, `variance_pca_obs` for the variance.
- For `dim_space='var'`: `X_pca_var` for PC corrdinates, `PCs_var` for the sample loadings, `variance_pca_var` for the variance.
- For `dim_space='obs'`: `X_pca_obs` for PC coordinates, `PCs_pca_obs` for the feature loadings, `variance_pca_obs` for the variance.
- For `dim_space='var'`: `X_pca_var` for PC corrdinates, `PCs_pca_var` for the sample loadings, `variance_pca_var` for the variance.
If provided, the keys will be `embeddings_name` for all three data frames.
n_comps
Number of principal components to compute. Defaults to 50, or 1 - minimum
Expand All @@ -247,7 +247,7 @@ def pca(
for `dim_space='obs'` (sample projection):
`.obsm['X_pca_obs' | embeddings_name]` : :class:`~scipy.sparse.csr_matrix` | :class:`~scipy.sparse.csc_matrix` | :class:`~numpy.ndarray` (shape `(adata.n_obs, n_comps)`)
PCA representation of data.
`.varm['PCs_obs' | embeddings_name]` : :class:`~numpy.ndarray` (shape `(adata.n_vars, n_comps)`)
`.varm['PCs_pca_obs' | embeddings_name]` : :class:`~numpy.ndarray` (shape `(adata.n_vars, n_comps)`)
The principal components containing the loadings.
`.uns['variance_pca_obs' | embeddings_name]['variance_ratio']` : :class:`~numpy.ndarray` (shape `(n_comps,)`)
Ratio of explained variance.
Expand All @@ -258,7 +258,7 @@ def pca(
for `dim_space='var'` (sample projection):
`.varm['X_pca_var' | embeddings_name]` : :class:`~scipy.sparse.csr_matrix` | :class:`~scipy.sparse.csc_matrix` | :class:`~numpy.ndarray` (shape `(adata.n_obs, n_comps)`)
PCA representation of data.
`.obsm['PCs_var' | embeddings_name]` : :class:`~numpy.ndarray` (shape `(adata.n_vars, n_comps)`)
`.obsm['PCs_pca_var' | embeddings_name]` : :class:`~numpy.ndarray` (shape `(adata.n_vars, n_comps)`)
The principal components containing the loadings.
`.uns['variance_pca_var' | embeddings_name]['variance_ratio']` : :class:`~numpy.ndarray` (shape `(n_comps,)`)
Ratio of explained variance.
Expand Down Expand Up @@ -304,15 +304,15 @@ def pca(

# The PCA results are now stored in the AnnData object:
# adata.varm['X_pca_var'] - PCA coordinates for each protein (5 x 2)
# adata.obsm['PCs_var'] - Sample loadings (5 x 2)
# adata.obsm['PCs_pca_var'] - Sample loadings (5 x 2)
# adata.uns['variance_pca_var'] - Variance explained by each PC

# To get the PCA embedding of proteins in the reduced space:
protein_pca_coords = adata.varm["X_pca_var"]
# First 4 proteins have coordinates, P5 has NaN (not used in PCA)

# To project samples into the PC space:
sample_loadings = adata.obsm["PCs_var"]
sample_loadings = adata.obsm["PCs_pca_var"]

# To see variance explained by each component:
variance_ratio = adata.uns["variance_pca_var"]["variance_ratio"]
Expand All @@ -337,7 +337,7 @@ def pca(
embeddings_name=embeddings_name,
meta_data_mask_column_name=meta_data_mask_column_name,
default_coords_prefix="X_pca",
default_loadings_prefix="PCs",
default_loadings_prefix="PCs_pca",
default_uns_prefix="variance_pca",
)

Expand Down
56 changes: 41 additions & 15 deletions src/alphapepttools/tl/plot_data_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"""

import logging
from typing import Literal

import anndata as ad
import numpy as np
Expand Down Expand Up @@ -125,7 +126,7 @@ def _validate_pca_loadings_plot_inputs(
adata
The AnnData object containing PCA loadings data.
loadings_name
The key that stores PCA feature loadings (e.g., "PCs").
The key that stores PCA feature loadings (e.g., "PCs_pca").
dim
The principal component index (1-based) to extract loadings for.
dim2
Expand Down Expand Up @@ -206,18 +207,22 @@ def extract_pca_anndata(
adata: ad.AnnData,
dim_space: str = "obs",
embeddings_name: str | None = None,
method: Literal["pca", "bpca"] = "pca",
expression_columns: list[str] | None = None,
) -> ad.AnnData:
"""Extract PCA data required for PCA plotting from an AnnData object.
"""Extract PCA/BPCA data required for plotting from an AnnData object.

Parameters
----------
adata
AnnData object containing PCA results.
AnnData object containing PCA/BPCA results.
dim_space
Either "obs" or "var", indicating the PCA projection space.
Either "obs" or "var", indicating the PCA/BPCA projection space.
embeddings_name
Custom embeddings name or None to use the default naming scheme.
method
The method used for dimensionality reduction. Options are "pca" or "bpca" with "pca" as the default.
This is used to construct the default keys if `embeddings_name` is None.
expression_columns
List of `var_names` to include as additional numerical column(s) in
the returned AnnData's `.obs` for coloring PCA plots by expression.
Expand Down Expand Up @@ -322,8 +327,8 @@ def extract_pca_anndata(

"""
# Resolve PCA keys
pca_coors_key = f"X_pca_{dim_space}" if embeddings_name is None else embeddings_name
pca_var_key = f"variance_pca_{dim_space}" if embeddings_name is None else embeddings_name
pca_coors_key = f"X_{method}_{dim_space}" if embeddings_name is None else embeddings_name
pca_var_key = f"variance_{method}_{dim_space}" if embeddings_name is None else embeddings_name

# Validate inputs
_validate_pca_plot_input(adata, pca_coors_key, pca_var_key, dim_space)
Expand Down Expand Up @@ -359,7 +364,11 @@ def extract_pca_anndata(


def prepare_scree_data_to_plot(
adata: ad.AnnData, n_pcs: int, dim_space: str, embeddings_name: str | None = None
adata: ad.AnnData,
n_pcs: int,
dim_space: str,
embeddings_name: str | None = None,
method: Literal["pca", "bpca"] = "pca",
) -> pd.DataFrame:
"""Prepare scree plot data from AnnData object.

Expand All @@ -373,6 +382,9 @@ def prepare_scree_data_to_plot(
The dimension space used in PCA. Can be either "obs" or "var".
embeddings_name
Custom embeddings name or None for default.
method
The method used for dimensionality reduction. Options are "pca" or "bpca" with "pca" as the default.
This is used to construct the default keys if `embeddings_name` is None.

Returns
-------
Expand Down Expand Up @@ -421,7 +433,7 @@ def prepare_scree_data_to_plot(

"""
# Generate the correct variance key name
variance_key = f"variance_pca_{dim_space}" if embeddings_name is None else embeddings_name
variance_key = f"variance_{method}_{dim_space}" if embeddings_name is None else embeddings_name

# Input checks
_validate_scree_plot_input(adata, n_pcs, dim_space, variance_key)
Expand All @@ -446,6 +458,7 @@ def prepare_pca_1d_loadings_data_to_plot(
dim: int,
nfeatures: int,
embeddings_name: str | None = None,
method: Literal["pca", "bpca"] = "pca",
) -> pd.DataFrame:
"""Prepare the gene loadings (1d) of a PC for plotting.

Expand All @@ -461,6 +474,9 @@ def prepare_pca_1d_loadings_data_to_plot(
The number of top absolute loadings features to plot.
embeddings_name
The custom embeddings name used in PCA. If None, uses default naming convention.
method
The method used for dimensionality reduction. Options are "pca" or "bpca" with "pca" as the default.
This is used to construct the default keys if `embeddings_name` is None.

Returns
-------
Expand Down Expand Up @@ -515,7 +531,7 @@ def prepare_pca_1d_loadings_data_to_plot(

"""
# Generate the correct loadings key name
loadings_key = f"PCs_{dim_space}" if embeddings_name is None else embeddings_name
loadings_key = f"PCs_{method}_{dim_space}" if embeddings_name is None else embeddings_name

# Determine which attribute to use for loadings based on dim_space
loadings_attr = "varm" if dim_space == "obs" else "obsm"
Expand Down Expand Up @@ -545,7 +561,13 @@ def prepare_pca_1d_loadings_data_to_plot(


def prepare_pca_2d_loadings_data_to_plot(
data: ad.AnnData, loadings_name: str, pc_x: int, pc_y: int, nfeatures: int, dim_space: str
data: ad.AnnData,
pc_x: int,
pc_y: int,
nfeatures: int,
dim_space: str,
embeddings_name: str | None = None,
method: Literal["pca", "bpca"] = "pca",
) -> pd.DataFrame:
"""Prepare a DataFrame with PCA feature loadings for the 2D plotting.

Expand All @@ -557,8 +579,11 @@ def prepare_pca_2d_loadings_data_to_plot(
----------
data
The AnnData object containing PCA results.
loadings_name
The key where PCA loadings are stored.
embiddings_name
The custom embeddings name used in PCA. If None, uses default naming convention.
method
The method used for dimensionality reduction. Options are "pca" or "bpca" with "pca" as the default.
This is used to construct the default keys if `embeddings_name` is None.
pc_x
The first principal component index (1-based) to extract loadings for.
pc_y
Expand Down Expand Up @@ -608,7 +633,6 @@ def prepare_pca_2d_loadings_data_to_plot(
# Get loadings for PC1 vs PC2 with top 2 features highlighted
loadings_2d = at.tl.prepare_pca_2d_loadings_data_to_plot(
adata,
loadings_name="PCs_obs", # Default loadings key
pc_x=1, # PC1
pc_y=2, # PC2
nfeatures=2, # Top 2 features per PC
Expand All @@ -624,16 +648,18 @@ def prepare_pca_2d_loadings_data_to_plot(
# - is_top: Boolean flag for top features in either dimension

"""
loadings_key = f"PCs_{method}_{dim_space}" if embeddings_name is None else embeddings_name

_validate_pca_loadings_plot_inputs(
adata=data, loadings_name=loadings_name, dim=pc_x, dim2=pc_y, nfeatures=nfeatures, dim_space=dim_space
adata=data, loadings_name=loadings_key, dim=pc_x, dim2=pc_y, nfeatures=nfeatures, dim_space=dim_space
)

dim1_z = pc_x - 1 # convert to 0-based index
dim2_z = pc_y - 1 # convert to 0-based index

# Determine which attribute to use based on dim_space
loadings_attr = "varm" if dim_space == "obs" else "obsm"
orig_loadings = getattr(data, loadings_attr)[loadings_name]
orig_loadings = getattr(data, loadings_attr)[loadings_key]

loadings = pd.DataFrame(
{
Expand Down
Loading
Loading