diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 3bf8bd7..9c1d7b1 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -37,6 +37,7 @@ jobs:
     outputs:
       changed-apps: ${{ steps.changed-apps.outputs.changed_files }}
       changed-bases: ${{ steps.changed-bases.outputs.changed_files }}
+      changed-bases-runtime: ${{ steps.filter-runtime.outputs.bases }}
     steps:
       - name: Get Changed Apps
         id: changed-apps
@@ -54,10 +55,39 @@ jobs:
           include_only_directories: true
           max_depth: 1
 
+      - name: Checkout (for runtime filter)
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          persist-credentials: false
+
+      - name: Filter Bases With Runtime Variant
+        id: filter-runtime
+        env:
+          CHANGED_BASES: ${{ steps.changed-bases.outputs.changed_files }}
+          DISPATCH_TYPE: ${{ github.event_name == 'workflow_dispatch' && inputs.type || '' }}
+          DISPATCH_IMAGE: ${{ github.event_name == 'workflow_dispatch' && inputs.image || '' }}
+        shell: bash
+        run: |
+          set -euo pipefail
+          if [[ "$DISPATCH_TYPE" == "base" ]]; then
+            candidates=$(jq -nc --arg b "$DISPATCH_IMAGE" '[$b]')
+          else
+            candidates="${CHANGED_BASES:-[]}"
+          fi
+          bases='[]'
+          for b in $(echo "$candidates" | jq -r '.[]'); do
+            if [[ -f "base/$b/Dockerfile.runtime" ]]; then
+              bases=$(echo "$bases" | jq --arg b "$b" '. + [$b]')
+            fi
+          done
+          echo "Runtime-variant bases: $bases"
+          echo "bases=$bases" >> "$GITHUB_OUTPUT"
+
   # Build base images: devel first, then runtime (runtime pulls from published devel)
-  # Runtime variant - depends on devel being published first
+  # Runtime variant - depends on devel being published first.
+  # Only runs for bases that have a Dockerfile.runtime; devel-only bases are skipped.
   build-bases:
-    if: ${{ always() && !failure() && !cancelled() && (needs.prepare.outputs.changed-bases != '[]' || (github.event_name == 'workflow_dispatch' && inputs.type == 'base')) }}
+    if: ${{ always() && !failure() && !cancelled() && needs.prepare.outputs.changed-bases-runtime != '[]' }}
     name: Build Base ${{ matrix.base }}
     needs: ["prepare", "build-bases-devel"]
     uses: ./.github/workflows/image-builder.yaml
@@ -70,7 +100,7 @@ jobs:
     secrets: inherit
     strategy:
       matrix:
-        base: ${{ github.event_name == 'workflow_dispatch' && fromJSON(format('["{0}"]', inputs.image)) || fromJSON(needs.prepare.outputs.changed-bases) }}
+        base: ${{ fromJSON(needs.prepare.outputs.changed-bases-runtime) }}
       fail-fast: false
     with:
       image: ${{ matrix.base }}
diff --git a/base/pytorch/.dockerignore b/base/pytorch/.dockerignore
new file mode 100644
index 0000000..1bb765c
--- /dev/null
+++ b/base/pytorch/.dockerignore
@@ -0,0 +1,5 @@
+# Ignore everything except the files we explicitly need in the build context.
+*
+
+!Dockerfile
+!docker-bake.hcl
diff --git a/base/pytorch/Dockerfile b/base/pytorch/Dockerfile
new file mode 100644
index 0000000..ce6d47d
--- /dev/null
+++ b/base/pytorch/Dockerfile
@@ -0,0 +1,80 @@
+# CUDA 13.0 + PyTorch 2.11 + Python 3.13 base image.
+#
+# Slim PyTorch foundation. uv-managed Python; venv at /opt/venv.
+# Devel variant only — provides nvcc, NVRTC, CUPTI, cuDNN headers for
+# downstream apps that compile CUDA extensions.
+#
+# Build locally:
+#   docker buildx bake image-devel-local
+#
+# Tags: pytorch:cuda13.0-torch2.11-devel, pytorch:devel, pytorch:latest
+
+ARG CUDA_VERSION="13.0.3"
+ARG CUDA_DISTRO="ubuntu24.04"
+ARG UV_VERSION="0.11.8"
+
+FROM ghcr.io/astral-sh/uv:${UV_VERSION} AS uv
+
+FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-${CUDA_DISTRO}
+
+ARG PYTHON_VERSION="3.13"
+ARG TORCH_VERSION="2.11.0"
+ARG TORCHVISION_VERSION="0.26.0"
+ARG TORCHAUDIO_VERSION="2.11.0"
+ARG XFORMERS_VERSION="0.0.35"
+ARG TRITON_VERSION="3.6.0"
+ARG TORCH_INDEX="https://download.pytorch.org/whl/cu130"
+
+ENV DEBIAN_FRONTEND=noninteractive \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    CUDA_HOME=/usr/local/cuda \
+    CPATH=/usr/local/cuda/include \
+    TORCH_CUDA_ARCH_LIST="12.0" \
+    UV_COMPILE_BYTECODE=1 \
+    UV_LINK_MODE=copy \
+    UV_HTTP_TIMEOUT=300 \
+    UV_PYTHON_INSTALL_DIR=/opt/python \
+    UV_PYTHON_PREFERENCE=only-managed \
+    VIRTUAL_ENV=/opt/venv \
+    PATH=/usr/local/cuda/bin:/opt/venv/bin:${PATH}
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential ninja-build git curl ca-certificates tini && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+COPY --from=uv /uv /uvx /usr/local/bin/
+
+RUN uv python install ${PYTHON_VERSION} && \
+    uv venv /opt/venv --python ${PYTHON_VERSION}
+
+# Single resolve across all packages — internally consistent pins.
+# `--index-strategy unsafe-best-match` makes resolution deterministic across
+# the cu130 index + PyPI mix.
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv pip install \
+      --index-url ${TORCH_INDEX} \
+      --extra-index-url https://pypi.org/simple \
+      --index-strategy unsafe-best-match \
+      torch==${TORCH_VERSION} \
+      torchvision==${TORCHVISION_VERSION} \
+      torchaudio==${TORCHAUDIO_VERSION} \
+      xformers==${XFORMERS_VERSION} \
+      triton==${TRITON_VERSION} \
+      accelerate numpy safetensors nvidia-ml-py \
+      sympy packaging pybind11 ninja psutil wheel
+
+# Constraints file for downstream apps to inherit pins (torch ecosystem
+# plus the nvidia-* transitive deps and the cuda-toolkit umbrella).
+RUN uv pip freeze | grep -E \
+    "^(torch|torchvision|torchaudio|xformers|triton|numpy|safetensors|accelerate|nvidia-|cuda-toolkit)==" \
+    > /constraints.txt && \
+    echo "Constraints:" && cat /constraints.txt
+
+# Build-time validation. torch.cuda.is_available() requires --gpus all and is
+# intentionally not checked here — only that torch was built against CUDA and
+# that xformers' py3-none wheel imports against torch's C++ ABI on cp313.
+RUN python -c "import torch; print(f'PyTorch {torch.__version__} CUDA {torch.version.cuda}'); assert torch.version.cuda is not None" && \
+    python -c "import xformers; print(f'xformers {xformers.__version__}')"
+
+ENTRYPOINT ["/usr/bin/tini", "--"]
diff --git a/base/pytorch/docker-bake.hcl b/base/pytorch/docker-bake.hcl
new file mode 100644
index 0000000..eb3b09a
--- /dev/null
+++ b/base/pytorch/docker-bake.hcl
@@ -0,0 +1,41 @@
+target "docker-metadata-action" {}
+
+variable "APP" {
+  default = "pytorch"
+}
+
+variable "VERSION" {
+  // Format: cuda{CUDA_VERSION}-torch{TORCH_VERSION}
+  default = "cuda13.0-torch2.11"
+}
+
+variable "SOURCE" {
+  default = "https://github.com/arsac/containers"
+}
+
+variable "VENDOR" {
+  default = "arsac"
+}
+
+group "default" {
+  targets = ["image-devel-local"]
+}
+
+target "image-devel" {
+  inherits   = ["docker-metadata-action"]
+  dockerfile = "Dockerfile"
+  labels = {
+    "org.opencontainers.image.source" = "${SOURCE}"
+  }
+}
+
+target "image-devel-local" {
+  inherits = ["image-devel"]
+  output   = ["type=docker"]
+  tags     = ["${APP}:${VERSION}-devel", "${APP}:devel", "${APP}:latest"]
+}
+
+target "image-devel-all" {
+  inherits  = ["image-devel"]
+  platforms = ["linux/amd64"]
+}
diff --git a/docs/superpowers/plans/2026-04-28-pytorch-base-image.md b/docs/superpowers/plans/2026-04-28-pytorch-base-image.md
new file mode 100644
index 0000000..8b0351b
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-28-pytorch-base-image.md
@@ -0,0 +1,716 @@
+# `base/pytorch` Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Create a new slim PyTorch base image at `base/pytorch/` (CUDA 13.0, Python 3.13, PyTorch 2.11.0, uv-managed Python and venv), parallel to the existing `base/cuda-ml/` but devel-only, smaller, and minimal.
+
+**Architecture:** Single-stage Dockerfile `FROM nvidia/cuda:13.0.3-cudnn-devel-ubuntu24.04`. uv installs Python 3.13 (managed standalone build) to `/opt/python`, creates a venv at `/opt/venv`, and runs a single resolve installing torch + torchvision + torchaudio + xformers + triton from the cu130 PyTorch index plus PyPI for transitive `nvidia-*-cu13` deps. The existing CI workflow assumes every base image has both a devel and a runtime variant; we modify `release.yaml` to gate the runtime build job on the existence of a `Dockerfile.runtime` so that this devel-only image doesn't break the matrix.
+
+**Tech Stack:** Docker buildx + bake (HCL), uv 0.11.8, Python 3.13, PyTorch 2.11.0+cu130, NVIDIA CUDA 13.0.3 cudnn-devel base image, GitHub Actions.
+
+**Spec:** `docs/superpowers/specs/2026-04-28-pytorch-base-image-design.md`
+
+---
+
+## File structure
+
+**New files (under `base/pytorch/`):**
+- `Dockerfile` — single-stage devel image
+- `docker-bake.hcl` — buildx bake config with devel-only targets
+- `.dockerignore` — minimal, ignore everything but Dockerfile and bake file
+
+**Modified files:**
+- `.github/workflows/release.yaml` — add a `changed-bases-runtime` output to the `prepare` job that filters `changed-bases` to those that have a `Dockerfile.runtime`; switch the `build-bases` (runtime) job to use that filtered list
+
+**Files NOT touched:**
+- `base/cuda-ml/*` — sibling image stays untouched
+- `.github/workflows/image-builder.yaml` — no changes needed; the gate happens in `release.yaml`
+- `.renovaterc.json5` — no annotations on the new bake/Dockerfile (matches `cuda-ml` sibling pattern)
+- `build-push-local.sh` — does not work for this image (assumes a `builder` target that doesn't exist in a single-stage Dockerfile); local builds use `docker buildx bake image-devel-local` instead. Out of scope for this plan.
+
+---
+
+### Task 1: Scaffold `base/pytorch/` directory with `.dockerignore` and `docker-bake.hcl`
+
+**Files:**
+- Create: `base/pytorch/.dockerignore`
+- Create: `base/pytorch/docker-bake.hcl`
+
+- [ ] **Step 1: Create the directory and `.dockerignore`**
+
+```bash
+mkdir -p base/pytorch
+```
+
+Write `base/pytorch/.dockerignore` with:
+
+```
+# Ignore everything except the files we explicitly need in the build context.
+*
+
+!Dockerfile
+!docker-bake.hcl
+```
+
+- [ ] **Step 2: Create `base/pytorch/docker-bake.hcl`**
+
+Write `base/pytorch/docker-bake.hcl`:
+
+```hcl
+target "docker-metadata-action" {}
+
+variable "APP" {
+  default = "pytorch"
+}
+
+variable "VERSION" {
+  // Format: cuda{CUDA_VERSION}-torch{TORCH_VERSION}
+  default = "cuda13.0-torch2.11"
+}
+
+variable "SOURCE" {
+  default = "https://github.com/arsac/containers"
+}
+
+variable "VENDOR" {
+  default = "arsac"
+}
+
+group "default" {
+  targets = ["image-devel-local"]
+}
+
+target "image-devel" {
+  inherits   = ["docker-metadata-action"]
+  dockerfile = "Dockerfile"
+  labels = {
+    "org.opencontainers.image.source" = "${SOURCE}"
+  }
+}
+
+target "image-devel-local" {
+  inherits = ["image-devel"]
+  output   = ["type=docker"]
+  tags     = ["${APP}:${VERSION}-devel", "${APP}:devel", "${APP}:latest"]
+}
+
+target "image-devel-all" {
+  inherits  = ["image-devel"]
+  platforms = ["linux/amd64"]
+}
+```
+
+- [ ] **Step 3: Verify bake config parses**
+
+Run:
+
+```bash
+cd base/pytorch && docker buildx bake image-devel-local --print
+```
+
+Expected: prints a JSON structure containing the `image-devel-local` target with `tags` `["pytorch:cuda13.0-torch2.11-devel","pytorch:devel","pytorch:latest"]` and `dockerfile` `"Dockerfile"`. No error.
+
+If you see `target "image-devel-local" does not exist` or HCL parse errors, fix the bake file before continuing.
+
+- [ ] **Step 4: Verify `app-options` GitHub action would parse `VERSION` and `SOURCE`**
+
+Run from inside `base/pytorch/`:
+
+```bash
+docker buildx bake --list type=variables,format=json --progress=quiet | jq -r '.[] | select(.name == "VERSION" or .name == "SOURCE") | "\(.name)=\(.value)"'
+```
+
+Expected output:
+
+```
+VERSION=cuda13.0-torch2.11
+SOURCE=https://github.com/arsac/containers
+```
+
+This is the exact extraction the CI's `.github/actions/app-options/action.yaml` does. If it fails, the CI integration will fail later.
+
+---
+
+### Task 2: Write `base/pytorch/Dockerfile`
+
+**Files:**
+- Create: `base/pytorch/Dockerfile`
+
+- [ ] **Step 1: Write the Dockerfile**
+
+Write `base/pytorch/Dockerfile`:
+
+```dockerfile
+# CUDA 13.0 + PyTorch 2.11 + Python 3.13 base image.
+#
+# Slim PyTorch foundation. uv-managed Python; venv at /opt/venv.
+# Devel variant only — provides nvcc, NVRTC, CUPTI, cuDNN headers for
+# downstream apps that compile CUDA extensions.
+#
+# Build locally:
+#   docker buildx bake image-devel-local
+#
+# Tags: pytorch:cuda13.0-torch2.11-devel, pytorch:devel, pytorch:latest
+
+ARG CUDA_VERSION="13.0.3"
+ARG CUDA_DISTRO="ubuntu24.04"
+ARG UV_VERSION="0.11.8"
+
+# Named uv stage so ${UV_VERSION} can be expanded — `COPY --from=<image>:${VAR}`
+# does not expand ARGs even when declared in global scope; only `FROM` does.
+FROM ghcr.io/astral-sh/uv:${UV_VERSION} AS uv
+
+FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-${CUDA_DISTRO}
+
+ARG PYTHON_VERSION="3.13"
+ARG TORCH_VERSION="2.11.0"
+ARG TORCHVISION_VERSION="0.26.0"
+ARG TORCHAUDIO_VERSION="2.11.0"
+ARG XFORMERS_VERSION="0.0.35"
+ARG TRITON_VERSION="3.6.0"
+ARG TORCH_INDEX="https://download.pytorch.org/whl/cu130"
+
+ENV DEBIAN_FRONTEND=noninteractive \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    CUDA_HOME=/usr/local/cuda \
+    CPATH=/usr/local/cuda/include \
+    TORCH_CUDA_ARCH_LIST="12.0" \
+    UV_COMPILE_BYTECODE=1 \
+    UV_LINK_MODE=copy \
+    UV_HTTP_TIMEOUT=300 \
+    UV_PYTHON_INSTALL_DIR=/opt/python \
+    UV_PYTHON_PREFERENCE=only-managed \
+    VIRTUAL_ENV=/opt/venv \
+    PATH=/usr/local/cuda/bin:/opt/venv/bin:${PATH}
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential ninja-build git curl ca-certificates tini && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+COPY --from=uv /uv /uvx /usr/local/bin/
+
+RUN uv python install ${PYTHON_VERSION} && \
+    uv venv /opt/venv --python ${PYTHON_VERSION}
+
+# Single resolve across all packages — internally consistent pins.
+# `--index-strategy unsafe-best-match` makes resolution deterministic across
+# the cu130 index + PyPI mix.
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv pip install \
+      --index-url ${TORCH_INDEX} \
+      --extra-index-url https://pypi.org/simple \
+      --index-strategy unsafe-best-match \
+      torch==${TORCH_VERSION} \
+      torchvision==${TORCHVISION_VERSION} \
+      torchaudio==${TORCHAUDIO_VERSION} \
+      xformers==${XFORMERS_VERSION} \
+      triton==${TRITON_VERSION} \
+      accelerate numpy safetensors nvidia-ml-py \
+      sympy packaging pybind11 ninja psutil wheel
+
+# Constraints file for downstream apps to inherit pins (torch ecosystem
+# plus the nvidia-* transitive deps and the cuda-toolkit umbrella).
+RUN uv pip freeze | grep -E \
+    "^(torch|torchvision|torchaudio|xformers|triton|numpy|safetensors|accelerate|nvidia-|cuda-toolkit)==" \
+    > /constraints.txt && \
+    echo "Constraints:" && cat /constraints.txt
+
+# Build-time validation. torch.cuda.is_available() requires --gpus all and is
+# intentionally not checked here — only that torch was built against CUDA and
+# that xformers' py3-none wheel imports against torch's C++ ABI on cp313.
+RUN python -c "import torch; print(f'PyTorch {torch.__version__} CUDA {torch.version.cuda}'); assert torch.version.cuda is not None" && \
+    python -c "import xformers; print(f'xformers {xformers.__version__}')"
+
+ENTRYPOINT ["/usr/bin/tini", "--"]
+```
+
+- [ ] **Step 2: Lint the Dockerfile syntactically**
+
+Run:
+
+```bash
+docker buildx build --check base/pytorch/
+```
+
+Expected: no warnings other than possibly a hint about pinning the uv image to a digest. If you see syntax errors (e.g., bad heredoc, malformed `ENV`), fix them before building.
+
+---
+
+### Task 3: Build the image locally and run smoke tests
+
+This task pulls ~6 GB of base image and downloads ~3 GB of wheels. First build can take 10-20 minutes; subsequent builds are cache-hot.
+
+**Files:**
+- None (verification only)
+
+- [ ] **Step 1: Build the image**
+
+Run:
+
+```bash
+cd base/pytorch && docker buildx bake image-devel-local
+```
+
+Expected: build completes, ending with three tags applied:
+```
+=> => naming to docker.io/library/pytorch:cuda13.0-torch2.11-devel
+=> => naming to docker.io/library/pytorch:devel
+=> => naming to docker.io/library/pytorch:latest
+```
+
+If any of the in-build `RUN python -c "import torch"` or `import xformers` smoke tests fail, the build will halt with the failing assertion. Read the error and fix the spec/Dockerfile before proceeding — do **not** patch around a real ABI mismatch.
+
+- [ ] **Step 2: Verify Python version**
+
+Run:
+
+```bash
+docker run --rm pytorch:latest python --version
+```
+
+Expected: `Python 3.13.x` (some patch version).
+
+- [ ] **Step 3: Verify tini entrypoint**
+
+Run:
+
+```bash
+docker run --rm pytorch:latest /bin/sh -c 'echo $$; ps -o pid,comm 1'
+```
+
+Expected: PID 1 is `tini` (or `tini-static`); the shell is PID 2 or higher. This confirms `ENTRYPOINT ["/usr/bin/tini", "--"]` is wired up.
+
+- [ ] **Step 4: Verify nvcc and NVRTC are available**
+
+Run:
+
+```bash
+docker run --rm pytorch:latest /bin/sh -c 'nvcc --version && ls /usr/local/cuda/lib64/libnvrtc.so*'
+```
+
+Expected: prints CUDA 13.0.x release info and lists at least `libnvrtc.so` and `libnvrtc.so.13`.
+
+- [ ] **Step 5: Verify torch imports and CUDA libs are wired up**
+
+Run:
+
+```bash
+docker run --rm pytorch:latest python -c "
+import torch
+print(f'torch {torch.__version__} CUDA {torch.version.cuda} cuDNN {torch.backends.cudnn.version()}')
+import ctypes, os
+nvidia_dir = os.path.join(os.path.dirname(torch.__file__), '..', 'nvidia')
+print(f'nvidia/ libs dir: {os.path.realpath(nvidia_dir)}')
+print(f'cusparselt present: {os.path.isdir(os.path.join(nvidia_dir, \"cusparselt\"))}')
+print(f'nvshmem present: {os.path.isdir(os.path.join(nvidia_dir, \"nvshmem\"))}')
+"
+```
+
+Expected:
+- `torch 2.11.0+cu130 CUDA 13.0` (or similar)
+- `cuDNN ...` prints a non-`None` integer (e.g., `91900` for 9.19.0)
+- `cusparselt present: True`
+- `nvshmem present: True`
+
+If any of these are False or `None`, torch is missing a transitive nvidia-* package; re-check the cu130 wheel METADATA against the spec's "Validated requirements" table.
+
+- [ ] **Step 6: Verify the constraints file**
+
+Run:
+
+```bash
+docker run --rm pytorch:latest cat /constraints.txt
+```
+
+Expected: lines for at least:
+```
+torch==2.11.0+cu130
+torchvision==0.26.0+cu130
+torchaudio==2.11.0+cu130
+xformers==0.0.35
+triton==3.6.0
+nvidia-cudnn-cu13==9.19.0.56
+nvidia-cusparselt-cu13==0.8.0
+nvidia-nvshmem-cu13==3.4.5
+nvidia-nccl-cu13==2.28.9
+cuda-toolkit==13.0.2
+numpy==<some 2.x version>
+safetensors==<some version>
+accelerate==<some version>
+```
+
+Plus other `nvidia-*-cu13` transitive deps. If any of the explicit pins above are missing, the build's regex grep is wrong.
+
+- [ ] **Step 7: Verify image size is in the expected range**
+
+Run:
+
+```bash
+docker images pytorch:latest --format '{{.Size}}'
+```
+
+Expected: roughly 12-14 GB. If it's under 8 GB you've likely lost the cuDNN base or the torch wheel; if it's over 20 GB, an apt cache layer wasn't cleaned.
+
+- [ ] **Step 8: Tag with the ghcr-ready name and verify**
+
+Run:
+
+```bash
+docker tag pytorch:latest ghcr.io/arsac/pytorch:cuda13.0-torch2.11
+docker images ghcr.io/arsac/pytorch
+```
+
+Expected: shows the new tag pointing at the same image ID as `pytorch:latest`. (Don't push yet — that's CI's job once the PR merges.)
+
+---
+
+### Task 4: Modify `.github/workflows/release.yaml` to gate runtime builds on `Dockerfile.runtime` existence
+
+**Files:**
+- Modify: `.github/workflows/release.yaml`
+
+The current `prepare` job emits `changed-bases` (a JSON array of changed base directory names). The `build-bases` job (which builds runtime variants) iterates that list as a matrix. For `base/pytorch/`, no `Dockerfile.runtime` exists, so the runtime build would fail.
+
+**Fix:** add a new step in `prepare` that filters `changed-bases` to only those with a `Dockerfile.runtime`, exposed as `changed-bases-runtime`. Switch `build-bases` to consume that filtered output.
+
+- [ ] **Step 1: Read the current `prepare` job**
+
+Open `.github/workflows/release.yaml` and locate the `prepare` job (around line 30-55 in the current file). Note its existing `outputs` block:
+
+```yaml
+outputs:
+  changed-apps: ${{ steps.changed-apps.outputs.changed_files }}
+  changed-bases: ${{ steps.changed-bases.outputs.changed_files }}
+```
+
+- [ ] **Step 2: Add a `changed-bases-runtime` output and a filter step**
+
+Edit the `prepare` job. After the `Get Changed Bases` step, add a checkout step (so the filter can read `base/*/Dockerfile.runtime`) and a filter step. Update the `outputs` block.
+
+Replace:
+
+```yaml
+  prepare:
+    name: Prepare
+    runs-on: ubuntu-latest
+    outputs:
+      changed-apps: ${{ steps.changed-apps.outputs.changed_files }}
+      changed-bases: ${{ steps.changed-bases.outputs.changed_files }}
+    steps:
+      - name: Get Changed Apps
+        id: changed-apps
+        uses: bjw-s-labs/action-changed-files@930cef8463348e168cab7235c47fe95a7a235f65 # v0.3.3
+        with:
+          path: apps
+          include_only_directories: true
+          max_depth: 1
+
+      - name: Get Changed Bases
+        id: changed-bases
+        uses: bjw-s-labs/action-changed-files@930cef8463348e168cab7235c47fe95a7a235f65 # v0.3.3
+        with:
+          path: base
+          include_only_directories: true
+          max_depth: 1
+```
+
+with:
+
+```yaml
+  prepare:
+    name: Prepare
+    runs-on: ubuntu-latest
+    outputs:
+      changed-apps: ${{ steps.changed-apps.outputs.changed_files }}
+      changed-bases: ${{ steps.changed-bases.outputs.changed_files }}
+      changed-bases-runtime: ${{ steps.filter-runtime.outputs.bases }}
+    steps:
+      - name: Get Changed Apps
+        id: changed-apps
+        uses: bjw-s-labs/action-changed-files@930cef8463348e168cab7235c47fe95a7a235f65 # v0.3.3
+        with:
+          path: apps
+          include_only_directories: true
+          max_depth: 1
+
+      - name: Get Changed Bases
+        id: changed-bases
+        uses: bjw-s-labs/action-changed-files@930cef8463348e168cab7235c47fe95a7a235f65 # v0.3.3
+        with:
+          path: base
+          include_only_directories: true
+          max_depth: 1
+
+      - name: Checkout (for runtime filter)
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          persist-credentials: false
+
+      - name: Filter Bases With Runtime Variant
+        id: filter-runtime
+        env:
+          CHANGED_BASES: ${{ steps.changed-bases.outputs.changed_files }}
+          DISPATCH_TYPE: ${{ github.event_name == 'workflow_dispatch' && inputs.type || '' }}
+          DISPATCH_IMAGE: ${{ github.event_name == 'workflow_dispatch' && inputs.image || '' }}
+        shell: bash
+        run: |
+          set -euo pipefail
+          if [[ "$DISPATCH_TYPE" == "base" ]]; then
+            candidates=$(jq -nc --arg b "$DISPATCH_IMAGE" '[$b]')
+          else
+            candidates="${CHANGED_BASES:-[]}"
+          fi
+          bases='[]'
+          for b in $(echo "$candidates" | jq -r '.[]'); do
+            if [[ -f "base/$b/Dockerfile.runtime" ]]; then
+              bases=$(echo "$bases" | jq --arg b "$b" '. + [$b]')
+            fi
+          done
+          echo "Runtime-variant bases: $bases"
+          echo "bases=$bases" >> "$GITHUB_OUTPUT"
+```
+
+- [ ] **Step 3: Switch `build-bases` to consume the filtered output**
+
+Locate the `build-bases` job. Replace its `if:` and `strategy.matrix.base`:
+
+Replace:
+
+```yaml
+  build-bases:
+    if: ${{ always() && !failure() && !cancelled() && (needs.prepare.outputs.changed-bases != '[]' || (github.event_name == 'workflow_dispatch' && inputs.type == 'base')) }}
+    name: Build Base ${{ matrix.base }}
+    needs: ["prepare", "build-bases-devel"]
+    uses: ./.github/workflows/image-builder.yaml
+    permissions:
+      attestations: write
+      contents: write
+      id-token: write
+      packages: write
+      security-events: write
+    secrets: inherit
+    strategy:
+      matrix:
+        base: ${{ github.event_name == 'workflow_dispatch' && fromJSON(format('["{0}"]', inputs.image)) || fromJSON(needs.prepare.outputs.changed-bases) }}
+      fail-fast: false
+    with:
+      image: ${{ matrix.base }}
+      path: base
+      release: ${{ github.event_name == 'workflow_dispatch' && inputs.release || github.event_name == 'push' }}
+```
+
+with:
+
+```yaml
+  build-bases:
+    if: ${{ always() && !failure() && !cancelled() && needs.prepare.outputs.changed-bases-runtime != '[]' }}
+    name: Build Base ${{ matrix.base }}
+    needs: ["prepare", "build-bases-devel"]
+    uses: ./.github/workflows/image-builder.yaml
+    permissions:
+      attestations: write
+      contents: write
+      id-token: write
+      packages: write
+      security-events: write
+    secrets: inherit
+    strategy:
+      matrix:
+        base: ${{ fromJSON(needs.prepare.outputs.changed-bases-runtime) }}
+      fail-fast: false
+    with:
+      image: ${{ matrix.base }}
+      path: base
+      release: ${{ github.event_name == 'workflow_dispatch' && inputs.release || github.event_name == 'push' }}
+```
+
+The `build-bases-devel` job stays unchanged — every base still needs a devel build.
+
+The downstream `build-apps` and `status` jobs both reference `build-bases` in `needs:`. When the runtime matrix is empty, `build-bases` resolves to `skipped`. The existing `status` job's checks (`contains(needs.*.result, 'failure')`) treat `skipped` as not-failure, so it still passes. No further changes required to those jobs.
+
+- [ ] **Step 4: Verify YAML parses**
+
+Run:
+
+```bash
+python3 -c "import yaml; yaml.safe_load(open('.github/workflows/release.yaml'))"
+```
+
+Expected: no output (parses cleanly). If it errors with a `yaml.scanner.ScannerError` or `yaml.parser.ParserError`, the indentation or quoting is wrong.
+
+- [ ] **Step 5: Sanity-check the matrix-filter logic locally with mock data**
+
+Run:
+
+```bash
+# Simulate: changed-bases=["cuda-ml","pytorch"], DISPATCH_TYPE empty
+CHANGED_BASES='["cuda-ml","pytorch"]'
+candidates="$CHANGED_BASES"
+bases='[]'
+for b in $(echo "$candidates" | jq -r '.[]'); do
+  if [[ -f "base/$b/Dockerfile.runtime" ]]; then
+    bases=$(echo "$bases" | jq --arg b "$b" '. + [$b]')
+  fi
+done
+echo "$bases"
+```
+
+Expected: `["cuda-ml"]` — `cuda-ml` has a `Dockerfile.runtime`, `pytorch` does not.
+
+If the output is `[]` or `["cuda-ml","pytorch"]`, the filter logic is wrong; re-check the `[[ -f ... ]]` test.
+
+---
+
+### Task 5: Commit all changes
+
+**Files:**
+- All files from tasks 1, 2, and 4
+
+- [ ] **Step 1: Review the diff**
+
+Run:
+
+```bash
+git status
+git diff --stat
+```
+
+Expected file changes:
+
+- `base/pytorch/.dockerignore` (new)
+- `base/pytorch/Dockerfile` (new)
+- `base/pytorch/docker-bake.hcl` (new)
+- `.github/workflows/release.yaml` (modified)
+- `docs/superpowers/specs/2026-04-28-pytorch-base-image-design.md` (new — already exists from brainstorming)
+- `docs/superpowers/plans/2026-04-28-pytorch-base-image.md` (new — this file)
+
+- [ ] **Step 2: Stage and commit**
+
+Run:
+
+```bash
+git checkout -b feat/base-pytorch
+git add base/pytorch/Dockerfile base/pytorch/docker-bake.hcl base/pytorch/.dockerignore \
+        .github/workflows/release.yaml \
+        docs/superpowers/specs/2026-04-28-pytorch-base-image-design.md \
+        docs/superpowers/plans/2026-04-28-pytorch-base-image.md
+git commit -m "$(cat <<'EOF'
+feat(base): add slim pytorch image (cuda13.0, py3.13, torch2.11)
+
+New base/pytorch/ image: CUDA 13.0.3 + cuDNN devel + Python 3.13 (uv-managed) +
+PyTorch 2.11.0+cu130 + xformers 0.0.35 + triton 3.6.0, plus the nvidia-* runtime
+libraries torch dlopens at startup (cuSPARSELt, NVSHMEM, cuDNN, NCCL).
+Devel-only — no Dockerfile.runtime, since downstream apps need nvcc/NVRTC/CUPTI.
+
+Also gates the release.yaml `build-bases` (runtime) job on the existence of a
+Dockerfile.runtime in each changed base directory, so the new devel-only image
+doesn't break the matrix.
+EOF
+)"
+```
+
+- [ ] **Step 3: Verify the commit looks right**
+
+Run:
+
+```bash
+git log -1 --stat
+```
+
+Expected: shows the six file changes listed above and the commit message.
+
+---
+
+### Task 6: Open the PR and verify CI
+
+**Files:**
+- None (CI verification only)
+
+- [ ] **Step 1: Push the branch**
+
+Run:
+
+```bash
+git push -u origin feat/base-pytorch
+```
+
+- [ ] **Step 2: Open the PR**
+
+Run:
+
+```bash
+gh pr create --title "feat(base): add slim pytorch image (cuda13.0, py3.13, torch2.11)" --body "$(cat <<'EOF'
+## Summary
+- New `base/pytorch/` slim PyTorch base image: CUDA 13.0.3 + Python 3.13 (uv-managed) + PyTorch 2.11.0+cu130 + xformers + triton + utility deps. Devel-only (deployment target), no Dockerfile.runtime.
+- Gates `release.yaml` runtime-build matrix on `Dockerfile.runtime` existence so devel-only images don't break the matrix.
+
+## Test plan
+- [ ] `docker buildx bake image-devel-local --print` succeeds locally
+- [ ] `docker buildx bake image-devel-local` builds successfully locally
+- [ ] In-image: `python -c "import torch; assert torch.version.cuda is not None"` passes
+- [ ] In-image: `python -c "import xformers"` passes
+- [ ] In-image: `cat /constraints.txt` shows expected pins
+- [ ] CI `Build Base pytorch (devel)` job succeeds and pushes `ghcr.io/arsac/pytorch:cuda13.0-torch2.11-devel` etc.
+- [ ] CI `Build Base pytorch` (runtime) is skipped (no `Dockerfile.runtime`)
+- [ ] CI `Build Base cuda-ml` (runtime) still runs and succeeds (regression check on the workflow change)
+
+Spec: `docs/superpowers/specs/2026-04-28-pytorch-base-image-design.md`
+Plan: `docs/superpowers/plans/2026-04-28-pytorch-base-image.md`
+EOF
+)"
+```
+
+- [ ] **Step 3: Watch CI**
+
+Run:
+
+```bash
+gh pr checks --watch
+```
+
+Expected:
+
+- `Build Base pytorch (devel)` job runs and succeeds. This pushes `ghcr.io/arsac/pytorch:cuda13.0-torch2.11-devel`, `:devel`, plus semver-derived tags.
+- `Build Base pytorch` (the runtime matrix) is **skipped** because `pytorch` is not in `changed-bases-runtime`.
+- If `cuda-ml` is in the changed-bases list (it shouldn't be unless its files were touched), its `Build Base cuda-ml` runtime job should still run normally — confirms the workflow change didn't break the existing image.
+
+If the devel build fails: read the job logs, fix locally, push again. Do **not** disable the smoke tests inside the Dockerfile to "make CI pass" — the smoke tests catch real ABI/install issues.
+
+If the runtime job is *not* skipped for `pytorch`: the `changed-bases-runtime` filter or the `build-bases` `if:` conditional is wrong; re-check Task 4 Step 2 and Step 3.
+
+- [ ] **Step 4: Verify the published image is reachable**
+
+Once CI is green:
+
+```bash
+docker pull ghcr.io/arsac/pytorch:cuda13.0-torch2.11-devel
+docker run --rm ghcr.io/arsac/pytorch:cuda13.0-torch2.11-devel python -c "import torch, xformers; print(torch.__version__, xformers.__version__)"
+```
+
+Expected: `2.11.0+cu130 0.0.35`.
+
+- [ ] **Step 5: Merge**
+
+Run:
+
+```bash
+gh pr merge --merge --auto
+```
+
+Or merge via GitHub UI per repo policy. Once merged, the published `:latest` tag will be pinned to the new build.
+
+---
+
+## Self-review notes
+
+**Spec coverage:**
+- Goals 1-9 (Python 3.13, CUDA 13.0, PyTorch 2.11 cu130, cuDNN, NVRTC, cuSPARSELt, NVSHMEM, uv, devel variant) — all satisfied by Task 2's Dockerfile and verified in Task 3 Steps 4-6.
+- Architecture (single-stage, uv-managed Python, venv at /opt/venv, constraints file, ENTRYPOINT tini) — Task 2.
+- File-by-file (Dockerfile + docker-bake.hcl + .dockerignore) — Tasks 1 and 2.
+- Build & CI section's runtime-gating fix — Task 4.
+- All five Risks have mitigations baked into the Dockerfile (in-build smoke tests for ABI; UV_VERSION pinned; UV_LINK_MODE=copy; constraints regex includes nvidia- and cuda-toolkit).
+
+**No placeholders detected.** Every step has concrete commands, code, or file paths.
+
+**Type/name consistency:** target names (`image-devel`, `image-devel-local`, `image-devel-all`), env var names (`UV_*`, `VIRTUAL_ENV`, `CPATH`), and ARG names match across Tasks 1, 2, and the spec. Output names (`changed-bases-runtime`) match between Task 4 Steps 2 and 3.
diff --git a/docs/superpowers/specs/2026-04-28-pytorch-base-image-design.md b/docs/superpowers/specs/2026-04-28-pytorch-base-image-design.md
new file mode 100644
index 0000000..32710c7
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-28-pytorch-base-image-design.md
@@ -0,0 +1,278 @@
+# `base/pytorch` — slim PyTorch base image (CUDA 13.0, Python 3.13)
+
+**Date:** 2026-04-28
+**Status:** Design approved, ready for implementation plan
+
+## Overview
+
+A new sibling base image at `base/pytorch/`, parallel to the existing `base/cuda-ml/`. Provides a slim PyTorch foundation on CUDA 13.0 + Python 3.13 + PyTorch 2.11.0, intended as the deployment target for downstream apps that need a CUDA-13 dev environment with PyTorch and uv preinstalled. Apps build on top of this image rather than reinstalling torch.
+
+## Goals
+
+- Standalone, deployable image (devel variant only — see non-goals).
+- uv-managed Python 3.13 (`UV_PYTHON_PREFERENCE=only-managed`).
+- PyTorch 2.11.0 + torchvision 0.26.0 + torchaudio 2.11.0 + xformers 0.0.35 + triton 3.6.0, all from the cu130 PyTorch index.
+- All NVIDIA runtime libraries that torch dlopens at startup are present (cuDNN, cuSPARSELt, NVSHMEM, NCCL, CUDA toolkit pip umbrella).
+- Full devel toolchain available for downstream apps that compile CUDA extensions (nvcc, NVRTC, CUPTI, cuDNN headers).
+- Common build/utility helpers (`accelerate`, `numpy`, `safetensors`, `nvidia-ml-py`, `sympy`, `packaging`, `pybind11`, `ninja`, `psutil`, `wheel`).
+- Constraints file at `/constraints.txt` for downstream apps to pin against.
+
+## Non-goals
+
+- **No runtime (slim) variant.** `nvidia/cuda:*-runtime-*` images deliberately omit nvcc / NVRTC / CUPTI, and these are required for `torch.compile` / Triton JIT and for downstream apps that compile CUDA extensions. Ship only the devel image as the deployment target.
+- **No flash-attn.** v2.8.x has no upstream cu13 wheels and multiple open build-failure issues against CUDA 13. v4 is alpha and on consumer Blackwell (SM 12.0) it falls back to SM80 kernels (~5% slower than FA2 in the only published benchmark, with open crash reports). Apps that need flash attention install per-app or use `torch.nn.functional.scaled_dot_product_attention` (cuDNN-backed FA-style kernels via PyTorch SDPA on Blackwell).
+- **No heavy ML extras.** No xformers-source-builds, hunyuan3d, diso, nvdiffrast, sageattention, opencv, librosa, ffmpeg, etc. Those stay in `base/cuda-ml/` (cu128/py312/full stack) and apps that need them either use `cuda-ml` directly or install on top of `base/pytorch/`.
+- **No `pyproject.toml` + `uv.lock`.** A base image isn't a project; the lockfile pretense (`package = false`) adds two committed files for what amounts to a version-pin list. Pins live in `Dockerfile` `ARG`s, matching the existing `base/cuda-ml/` pattern.
+- **No `--generate-hashes` / wheel-hash reproducibility.** Not conventional for ML/AI base images at this scale (`pytorch/pytorch`, `nvidia/cuda`, the existing `cuda-ml` all use plain version pins). Version pin + immutable index covers the realistic threat model.
+
+## Validated requirements
+
+| # | Requirement | How satisfied |
+|---|---|---|
+| 1 | Python 3.13 | `uv python install 3.13` to `/opt/python`, managed standalone build |
+| 2 | CUDA 13.0 | `nvidia/cuda:13.0.3-cudnn-devel-ubuntu24.04` (13.0.0 tag does not exist on Docker Hub; .3 is the latest patch) |
+| 3 | PyTorch 2.11 cu130 | `torch==2.11.0` from `https://download.pytorch.org/whl/cu130`, plus matching torchvision 0.26.0 + torchaudio 2.11.0 + xformers 0.0.35 + triton 3.6.0 |
+| 4 | cuDNN | `cudnn-devel` base image variant ships system cuDNN at `/usr/lib/x86_64-linux-gnu/`. Torch additionally pulls `nvidia-cudnn-cu13==9.19.0.56` as a transitive dep — torch uses the wheel-bundled version at runtime |
+| 5 | NVRTC | Present in `cuda-13-0` toolkit installed by the `*-devel-*` image. Torch also transitively pulls `cuda-toolkit[...nvrtc]==13.0.2` (PyPI umbrella) |
+| 6 | cuSPARSELt | Torch declares `Requires-Dist: nvidia-cusparselt-cu13==0.8.0; platform_system == "Linux"`, auto-installed. Ships `libcusparseLt.so.0` at `site-packages/nvidia/cusparselt/lib/`. Torch's `__init__.py` adds the path to the loader |
+| 7 | NVSHMEM | Same pattern: `Requires-Dist: nvidia-nvshmem-cu13==3.4.5; platform_system == "Linux"`. Ships `libnvshmem_host.so.3` at `site-packages/nvidia/nvshmem/lib/` |
+| 8 | uv | `COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /usr/local/bin/`. Python managed by uv (`UV_PYTHON_PREFERENCE=only-managed`, `UV_PYTHON_INSTALL_DIR=/opt/python`); venv created by uv at `/opt/venv`; packages installed via `uv pip install --python /opt/venv/bin/python` |
+| 9 | devel variant | `cudnn-devel` base includes nvcc, NVRTC, CUPTI, cuDNN headers, full toolkit. No runtime variant ships |
+
+## Architecture
+
+```
+base/pytorch/
+├── Dockerfile          # devel image, deployment target
+├── docker-bake.hcl     # build config + tags
+└── .dockerignore       # ignore everything but the Dockerfile and bake file
+```
+
+**Single-stage Dockerfile** built `FROM nvidia/cuda:13.0.3-cudnn-devel-ubuntu24.04`. uv installs Python 3.13 to `/opt/python`, creates a venv at `/opt/venv`, and installs torch + ecosystem from the cu130 index plus PyPI for the nvidia-* transitive deps. `/opt/venv/bin` is at the front of `PATH` so `python`, `pip` (via uv), and tools resolve correctly without explicit activation in `RUN`/`CMD` layers or `docker run`. Constraints file written to `/constraints.txt` for downstream apps to pin against.
+
+### Image contents
+
+**System packages (apt):** `build-essential`, `ninja-build`, `git`, `curl`, `ca-certificates`, `tini`. Plus the `cuda-13-0` toolkit + `libcudnn9-cuda-13` already provided by the base image.
+
+**Python packages (cu130 index):**
+- `torch==2.11.0`
+- `torchvision==0.26.0`
+- `torchaudio==2.11.0`
+- `xformers==0.0.35` (abi3 wheel on cu130 index, requires torch≥2.10 ✓)
+- `triton==3.6.0` (only on the PyTorch index — not on PyPI)
+
+**Python packages (PyPI, pulled transitively by torch):**
+- `nvidia-cudnn-cu13==9.19.0.56`
+- `nvidia-cusparselt-cu13==0.8.0`
+- `nvidia-nvshmem-cu13==3.4.5`
+- `nvidia-nccl-cu13==2.28.9`
+- `cuda-toolkit==13.0.2` (umbrella package: cublas, cudart, cufft, cufile, cupti, curand, cusolver, cusparse, nvjitlink, nvrtc, nvtx)
+
+**Python packages (PyPI, explicit utility deps):**
+- `accelerate`, `numpy`, `safetensors`, `nvidia-ml-py`, `sympy`, `packaging`, `pybind11`, `ninja`, `psutil`, `wheel`
+
+### Environment
+
+```
+DEBIAN_FRONTEND=noninteractive
+PYTHONDONTWRITEBYTECODE=1
+PYTHONUNBUFFERED=1
+CUDA_HOME=/usr/local/cuda
+CPATH=/usr/local/cuda/include          # broader than CPLUS_INCLUDE_PATH; covers C and C++
+TORCH_CUDA_ARCH_LIST="12.0"            # consumer Blackwell (RTX 5090, RTX PRO 6000 Workstation)
+UV_COMPILE_BYTECODE=1
+UV_LINK_MODE=copy                      # load-bearing — see Risks
+UV_HTTP_TIMEOUT=300
+UV_PYTHON_INSTALL_DIR=/opt/python
+UV_PYTHON_PREFERENCE=only-managed
+VIRTUAL_ENV=/opt/venv                  # directs uv pip to the venv without --python
+PATH=/usr/local/cuda/bin:/opt/venv/bin:${PATH}
+```
+
+### Constraints file
+
+Written by `uv pip freeze` (auto-targets `/opt/venv` via `VIRTUAL_ENV`) filtered through `grep -E` for the regex:
+
+```
+^(torch|torchvision|torchaudio|xformers|triton|numpy|safetensors|accelerate|nvidia-|cuda-toolkit)==
+```
+
+Wider than the existing `cuda-ml/constraints.txt` regex — adds `nvidia-` and `cuda-toolkit` so downstream apps inherit the locked NVIDIA library pins and don't accidentally upgrade `nvidia-cusparselt-cu13` or `cuda-toolkit` out from under torch.
+
+## File-by-file design
+
+### `base/pytorch/Dockerfile`
+
+```dockerfile
+ARG CUDA_VERSION="13.0.3"
+ARG CUDA_DISTRO="ubuntu24.04"
+ARG UV_VERSION="0.11.8"
+
+# Named uv stage so ${UV_VERSION} can be expanded — `COPY --from=<image>:${VAR}`
+# does not expand ARGs even when they're declared in global scope; only `FROM`
+# does. The named-stage indirection is the canonical Docker workaround.
+FROM ghcr.io/astral-sh/uv:${UV_VERSION} AS uv
+
+FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-${CUDA_DISTRO}
+
+ARG PYTHON_VERSION="3.13"
+ARG TORCH_VERSION="2.11.0"
+ARG TORCHVISION_VERSION="0.26.0"
+ARG TORCHAUDIO_VERSION="2.11.0"
+ARG XFORMERS_VERSION="0.0.35"
+ARG TRITON_VERSION="3.6.0"
+ARG TORCH_INDEX="https://download.pytorch.org/whl/cu130"
+
+ENV DEBIAN_FRONTEND=noninteractive \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    CUDA_HOME=/usr/local/cuda \
+    CPATH=/usr/local/cuda/include \
+    TORCH_CUDA_ARCH_LIST="12.0" \
+    UV_COMPILE_BYTECODE=1 \
+    UV_LINK_MODE=copy \
+    UV_HTTP_TIMEOUT=300 \
+    UV_PYTHON_INSTALL_DIR=/opt/python \
+    UV_PYTHON_PREFERENCE=only-managed \
+    VIRTUAL_ENV=/opt/venv \
+    PATH=/usr/local/cuda/bin:/opt/venv/bin:${PATH}
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential ninja-build git curl ca-certificates tini && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+COPY --from=uv /uv /uvx /usr/local/bin/
+
+RUN uv python install ${PYTHON_VERSION} && \
+    uv venv /opt/venv --python ${PYTHON_VERSION}
+
+# Single resolve across all packages — internally consistent pins.
+# `--index-strategy unsafe-best-match` makes resolution deterministic across the
+# cu130 index + PyPI mix (default `first-index` stops at the first index that has
+# a candidate, which is brittle when transitive deps live on PyPI only).
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv pip install \
+      --index-url ${TORCH_INDEX} \
+      --extra-index-url https://pypi.org/simple \
+      --index-strategy unsafe-best-match \
+      torch==${TORCH_VERSION} \
+      torchvision==${TORCHVISION_VERSION} \
+      torchaudio==${TORCHAUDIO_VERSION} \
+      xformers==${XFORMERS_VERSION} \
+      triton==${TRITON_VERSION} \
+      accelerate numpy safetensors nvidia-ml-py \
+      sympy packaging pybind11 ninja psutil wheel
+
+RUN uv pip freeze | grep -E \
+    "^(torch|torchvision|torchaudio|xformers|triton|numpy|safetensors|accelerate|nvidia-|cuda-toolkit)==" \
+    > /constraints.txt && \
+    echo "Constraints:" && cat /constraints.txt
+
+# Build-time validation. `torch.cuda.is_available()` requires `--gpus all` and is
+# intentionally not checked here — only that torch was built against CUDA and that
+# xformers' py3-none wheel imports against torch's C++ ABI on cp313.
+RUN python -c "import torch; print(f'PyTorch {torch.__version__} CUDA {torch.version.cuda}'); assert torch.version.cuda is not None" && \
+    python -c "import xformers; print(f'xformers {xformers.__version__}')"
+
+ENTRYPOINT ["/usr/bin/tini", "--"]
+```
+
+### `base/pytorch/docker-bake.hcl`
+
+```hcl
+target "docker-metadata-action" {}
+
+variable "APP" {
+  default = "pytorch"
+}
+
+variable "VERSION" {
+  // Format: cuda{CUDA_VERSION}-torch{TORCH_VERSION}
+  default = "cuda13.0-torch2.11"
+}
+
+variable "SOURCE" {
+  default = "https://github.com/arsac/containers"
+}
+
+variable "VENDOR" {
+  default = "arsac"
+}
+
+group "default" {
+  targets = ["image-devel-local"]
+}
+
+target "image-devel" {
+  inherits   = ["docker-metadata-action"]
+  dockerfile = "Dockerfile"
+  labels = {
+    "org.opencontainers.image.source" = "${SOURCE}"
+  }
+}
+
+target "image-devel-local" {
+  inherits = ["image-devel"]
+  output   = ["type=docker"]
+  tags     = ["${APP}:${VERSION}-devel", "${APP}:devel", "${APP}:latest"]
+}
+
+target "image-devel-all" {
+  inherits  = ["image-devel"]
+  platforms = ["linux/amd64"]
+}
+```
+
+Notes:
+- Only devel targets exist (no `image` / `image-local` / `image-all` runtime targets).
+- `image-devel-local` adds `${APP}:latest` so a freshly-built local image is reachable as `pytorch:latest` for downstream-app local builds.
+- `cudnn-devel` adds ~3.5 GB to the base layer vs. plain `devel`. Estimated final image ~12-14 GB. Acceptable since this is the deployment target and the CUDA libs are required.
+
+## Build & CI
+
+**Local build:**
+```
+cd base/pytorch
+docker buildx bake image-devel-local
+```
+
+**Root `build-push-local.sh`:** existing script handles base images by directory walk; should work without modification. Verify during implementation.
+
+**GitHub Actions:** `.github/workflows/release.yaml` auto-detects new directories under `base/**` via `bjw-s-labs/action-changed-files` and dispatches to `image-builder.yaml`. The existing `build-bases-devel` job builds devel targets; the `build-bases` job builds runtime targets.
+
+**Implication for CI:** the existing `release.yaml` calls `build-bases` (runtime) for every changed base. Since `base/pytorch/`'s bake file has no runtime targets, that job will fail at `docker buildx bake --print` for the missing target.
+
+**Resolution:** modify `.github/workflows/release.yaml` to gate the `build-bases` (runtime) job on the existence of a `Dockerfile.runtime` in the base directory. A small composite-action or inline check at the matrix level — e.g., a `runtime-bases` job output that filters `changed-bases` by whether `base/<name>/Dockerfile.runtime` exists — keeps the workflow declarative and avoids publishing duplicate-tag aliases (which would be misleading: `:runtime` consumers would receive the full 12-14 GB devel image). The earlier alternative (stub `image` targets in the bake file aliasing `:devel`) is rejected because publishing identical content under different tag suffixes hides the fact that this image has no runtime variant.
+
+**Renovate:** existing `.renovaterc.json5` regex manager scans for `# datasource=X depName=Y` annotations next to bake variables. The existing `base/cuda-ml/` doesn't use these (manual bumps). Match the pattern: no annotations on the new bake file — manual bumps via PR.
+
+## Risks & open questions
+
+**`cuda-toolkit` PyPI umbrella + system toolkit duplication.**
+The base image installs the CUDA 13 toolkit via apt (in `/usr/local/cuda`), and torch's transitive deps install the `cuda-toolkit==13.0.2` PyPI umbrella into `site-packages/nvidia/`. They coexist; torch loads from `site-packages/nvidia/`, while `nvcc` and CUDA headers come from `/usr/local/cuda`. Disk overhead ~1-2 GB. Eliminating it would require either (a) skipping the apt toolkit and relying entirely on the pip-installed one (loses nvcc, headers — fails req #5/#9), or (b) excluding the pip umbrella from torch's deps (would break torch). Accept the duplication; document in the Dockerfile.
+
+**Triton 3.6.0 not on PyPI.**
+PyPI's latest is triton 3.5.x; 3.6.0 only exists on the PyTorch index. Resolved deterministically by `--index-strategy unsafe-best-match` on `uv pip install`: uv considers all configured indexes and selects the best version match for each name, regardless of which index it was found on first. (Default `first-index` strategy stops at the first index with any candidate, which works coincidentally today because triton is on the torch index but is brittle.)
+
+**xformers 0.0.35 abi3 vs cp313.**
+The cu130 index ships xformers as a `py3-none-manylinux_2_28_x86_64.whl` (universal py3 stable-ABI wheel), not a cp313-tagged one. py3-none wheels still link against torch's C++ ABI, which is cp-version-sensitive — if xformers' published wheel was built against a different torch ABI hash than 2.11+cu130/cp313 ships, you get import-time `_C` symbol errors. Mitigated by the `import xformers` smoke test in the Dockerfile build, which fails the image build if the ABI doesn't line up.
+
+**`cuda-toolkit` PyPI umbrella patch-version skew.**
+The apt-installed system toolkit is 13.0.3 (in `/usr/local/cuda`), the wheel-installed toolkit is `cuda-toolkit==13.0.2` (in `site-packages/nvidia/`). Downstream extensions built against `/usr/local/cuda/include` and then dlopening from `site-packages/nvidia/` at runtime can hit subtle ABI skew in `nvjitlink`/`nvrtc` minor versions. Low impact in practice (NVIDIA maintains intra-13.0.x ABI), but worth knowing when debugging extension build failures. Future cu130 patch updates may close the gap.
+
+**`LD_LIBRARY_PATH` not set; wheel-side libs found only via torch's loader.**
+`libcusparseLt.so.0` (in `site-packages/nvidia/cusparselt/lib/`) and `libnvshmem_host.so.3` (in `site-packages/nvidia/nvshmem/lib/`) are added to the dlopen path by `torch/__init__.py` at import time. Code that calls `ctypes.CDLL("libcusparseLt.so.0")` *before* importing torch — uncommon but possible in profiling/diagnostic code — will not find these libraries. If a downstream app needs this guarantee, set `LD_LIBRARY_PATH` to include both directories in its own image layer.
+
+**`UV_LINK_MODE=copy` is load-bearing.**
+`uv venv` and `uv pip install` default to `hardlink`, which fails when the venv (`/opt/venv`) and the uv cache (`/root/.cache/uv`) are on different filesystems — common in Docker layered storage. `UV_LINK_MODE=copy` is set explicitly to avoid this. A future "drop it for speed" change would silently break the build; do not remove without re-verifying both build paths.
+
+**Image-rebuild cache invalidation when uv version bumps.**
+The `UV_VERSION` ARG pin to `0.11.8` (vs `:latest`) means uv version is part of the layer cache key — bumping it invalidates the apt layer below. This is the right tradeoff: silent bumps via `:latest` defeat reproducibility and can change resolver behavior under your feet. Bump `UV_VERSION` deliberately via PR; expect a full rebuild.
+
+## Out of scope / future work
+
+- **`base/cuda-ml/` cu130 migration.** Currently cu128/py312 with a heavy stack. A future PR could either bump it in place or layer it `FROM ghcr.io/arsac/pytorch:cuda13.0-torch2.11` once both share cu130/py313, eliminating duplicate torch installs across the base family.
+- **Adding flash-attn back** when v4 has functional SM 12.0 kernels (PRs #2349, #2406, #2499 merged) or when v2.x publishes confirmed cu13 wheels.
+- **Multi-arch CUDA support.** Currently `TORCH_CUDA_ARCH_LIST="12.0"`. If the cluster adds Hopper (H100, SM 9.0) or Ada (L40, SM 8.9) nodes, expand the list and rebuild.
+- **Renovate annotations** on the bake file and Dockerfile `ARG`s once added to `base/cuda-ml/` too — keep siblings consistent.
+- **`--torch-backend=cu130` flag.** uv's modern torch-CUDA selection flag (`uv pip install torch --torch-backend=cu130`) is declined here because it only handles `torch` itself; we still need `torchvision`, `torchaudio`, `xformers`, and `triton` from the same cu130 index. Sticking with `--index-url` + `--index-strategy unsafe-best-match` covers all five names uniformly. Re-evaluate if uv extends `--torch-backend` to cover the broader ecosystem.