replicate · michaeldwan · Apr 28, 2026 · Apr 28, 2026 · Apr 28, 2026 · Apr 28, 2026
@@ -23,10 +23,6 @@ flag_file
 bin/*
 .beads/
 
-# Test directory for weights testing
-/test-weights/
-weights.lock
-
 # Auto-:d version files from setuptools-scm
 python/cog/_version.py
 coglet/python/coglet/_version.py

@@ -168,7 +168,6 @@ Stores credentials for `cog push`.
 These commands exist but are hidden from `cog --help`:
 
 - **`cog debug`** -- Generates the Dockerfile from cog.yaml without building (useful for debugging build issues)
-- **`cog inspect`** -- Inspects model images and OCI indices
 - **`cog weights`** -- Parent command for `weights build`, `weights push`, `weights inspect`
 
 There's also a separate `base-image` binary (`cmd/base-image/`) with subcommands for managing Cog base images (`dockerfile`, `build`, `generate-matrix`). This isn't a `cog` subcommand.

@@ -0,0 +1,17 @@
+# Keep the weights/ directory out of the docker build context.
+#
+# With v1 managed weights, `cog.yaml`'s weights: entries are packed into
+# separate OCI layers and land at their `target` paths at runtime via the
+# image index — they must NOT be baked into the model image by `cog build`.
+# Without this exclude, buildkit ships the full (multi-GB) weights/ directory
+# to the docker daemon on every build.
+weights/
+
+# Packed layer cache written by `cog weights build`. Do NOT exclude all of
+# .cog/ — `cog build` stages the SDK + coglet wheels and CA cert under
+# .cog/tmp/ and references them from its generated Dockerfile, so excluding
+# the whole directory breaks the image build.
+.cog/weights-cache/
+
+# Git metadata doesn't belong in the image.
+.git/
@@ -0,0 +1,5 @@
+# Weight files (multi-GB, populated manually — see README.md)
+weights/
+
+# Cog build artifacts (packed layers, cached wheels, etc.)
+.cog/
@@ -0,0 +1,92 @@
+# examples/managed-weights
+
+A minimal cog model used to exercise the v1 managed-weights OCI pipeline
+end-to-end. It produces an OCI image index carrying a model image manifest
+and per-weight manifests.
+
+The predictor validates weight files on disk against `weights.lock`
+(generated by `cog weights import`), errors on any missing files, and
+returns a per-weight status summary from predict().
+
+## Populating `weights/`
+
+The weight directory is git-ignored because it's ~5 GB. Populate it by
+cloning the HuggingFace repo and copying everything except `.git/`:
+
+```bash
+# One-time: clone the weights somewhere outside this repo
+git clone https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3 ~/hf/parakeet
+
+# Copy everything except .git into examples/managed-weights/weights/
+mkdir -p examples/managed-weights/weights
+rsync -a --exclude=.git/ ~/hf/parakeet/ examples/managed-weights/weights/
+```
+
+You can substitute any directory of model files; the pipeline is
+content-agnostic.
+
+## Importing weights
+
+After populating (or changing) `weights/`, regenerate the lockfile:
+
+```bash
+cd examples/managed-weights
+cog weights import
+```
+
+This writes `weights.lock`. The predictor's `setup()` reads this file and
+validates that all expected files exist at the declared targets.
+
+## Running the pipeline
+
+Start a local registry (or point at any registry you control):
+
+```bash
+docker run -d --rm -p 5000:5000 --name cog-test-registry registry:3
+```
+
+Build and push the full bundle. Presence of `weights:` in `cog.yaml`
+triggers the OCI bundle format automatically.
+
+```bash
+cd examples/managed-weights
+cog push localhost:5000/managed-weights
+```
+
+Or run the weight pipeline in isolation (no model image):
+
+```bash
+cd examples/managed-weights
+cog weights build
+cog weights push localhost:5000/managed-weights
+```
+
+## Testing locally
+
+Build the image and run it with weights bind-mounted:
+
+```bash
+cd examples/managed-weights
+cog build -t managed-weights-local
+docker run --rm -p 5050:5000 \
+  -v $(pwd)/weights:/src/weights/parakeet:ro \
+  managed-weights-local
+```
+
+Then hit predict:
+
+```bash
+curl -s -X POST http://localhost:5050/predictions \
+  -H 'Content-Type: application/json' \
+  -d '{"input":{}}' | jq '.output | fromjson'
+```
+
+## Inspecting the output
+
+```bash
+crane manifest localhost:5000/managed-weights:latest | jq .
+crane ls localhost:5000/managed-weights
+```
+
+Weight manifests are pushed under tags of the shape
+`weights-<name>-<12-hex-digest>` (see `pkg/model/weight_pusher.go`).
@@ -0,0 +1,48 @@
+# Example model for testing the v1 managed-weights OCI artifact format.
+#
+# The weights/ directory is populated by a human (see README.md) with
+# nvidia/parakeet-tdt-0.6b-v3 from HuggingFace. It's listed in .gitignore
+# so the ~5GB payload never hits git.
+#
+# Import weights and generate the lockfile:
+#   cog weights import
+#
+# Build and push the full bundle:
+#   cog push localhost:5000/managed-weights
+
+image: registry.cloudflare.com/3515b24d58ec616d11f4ce4290a02ac4/md/examples/managed-weights
+# image: localhost:5000/md/examples/managed-weights
+
+build:
+  gpu: false
+  python_version: "3.12"
+  python_requirements: requirements.txt
+
+predict: "predict.py:Predictor"
+
+weights:
+  - name: parakeet
+    source:
+      uri: weights
+      include:
+        - "*.safetensors"    # HF-format weights (skip the .nemo bundle)
+        - "*.json"           # model + tokenizer configs
+    target: /src/weights/parakeet
+  - name: minilm
+    source: 
+      uri: hf://sentence-transformers/all-MiniLM-L6-v2
+      exclude:
+        - "onnx/"            # ONNX runtime variants (~474 MB)
+        - "openvino/"        # OpenVINO runtime variants (~113 MB)
+        - "pytorch_model.bin"  # legacy format, redundant with model.safetensors
+        - "tf_model.h5"      # TensorFlow weights
+        - "rust_model.ot"    # Rust (tch-rs) weights
+        - "train_script.py"  # training artifact
+        - "data_config.json" # training data config
+        - "README.md"
+        - ".gitattributes"
+    target: /src/weights/minilm
+  # - name: qwen3.6-27b-fp8
+  #   source:
+  #     uri: hf://Qwen/Qwen3.6-27B-FP8
+  #   target: /src/weights/qwen
@@ -0,0 +1,168 @@
+# Infra verification predictor for the v1 managed-weights OCI pipeline.
+# Validates weight files on disk against weights.lock at setup; predict()
+# returns a per-weight status summary.
+
+import hashlib
+import json
+import sys
+from pathlib import Path
+from typing import Any
+
+from cog import BasePredictor
+
+LOCK_PATH = Path("/src/weights.lock")
+
+
+def _file_sha256(path: Path) -> str:
+    h = hashlib.sha256()
+    with open(path, "rb") as f:
+        while chunk := f.read(8 * 1024 * 1024):
+            h.update(chunk)
+    return f"sha256:{h.hexdigest()}"
+
+
+def _validate_weight(
+    name: str, target: str, expected_files: list[dict[str, Any]]
+) -> dict[str, Any]:
+    """Validate a single weight entry from the lockfile.
+
+    Checks presence and size first (cheap), then hashes only files whose
+    size matches (expensive). This way missing or truncated files fail fast
+    without reading gigabytes of data.
+    """
+    target_dir = Path(target)
+
+    if not target_dir.is_dir():
+        return {
+            "name": name,
+            "target": target,
+            "errors": [f"weight directory {target} does not exist"],
+            "warnings": [],
+            "ok": [],
+            "missing": [f["path"] for f in expected_files],
+            "extra": [],
+            "mismatch": [],
+        }
+
+    # Walk the directory once — just stat, no hashing yet.
+    actual_by_path: dict[str, Path] = {}
+    actual_sizes: dict[str, int] = {}
+    for p in sorted(target_dir.rglob("*")):
+        if not p.is_file():
+            continue
+        rel = str(p.relative_to(target_dir))
+        actual_by_path[rel] = p
+        actual_sizes[rel] = p.stat().st_size
+
+    ok: list[str] = []
+    missing: list[str] = []
+    mismatch: list[str] = []
+    errors: list[str] = []
+
+    for entry in expected_files:
+        path = entry["path"]
+
+        if path not in actual_by_path:
+            missing.append(path)
+            errors.append(f"missing: {path}")
+            continue
+
+        disk_size = actual_sizes[path]
+        if disk_size != entry["size"]:
+            mismatch.append(path)
+            errors.append(
+                f"size mismatch: {path} (expected {entry['size']}, got {disk_size})"
+            )
+            actual_by_path.pop(path)
+            continue
+
+        # Size matches — hash to confirm content.
+        digest = _file_sha256(actual_by_path.pop(path))
+        if digest != entry["digest"]:
+            mismatch.append(path)
+            errors.append(f"digest mismatch: {path}")
+        else:
+            ok.append(path)
+
+    extra = sorted(actual_by_path.keys())
+    warnings = [f"extra file: {p}" for p in extra]
+
+    return {
+        "name": name,
+        "target": target,
+        "errors": errors,
+        "warnings": warnings,
+        "ok": ok,
+        "missing": missing,
+        "extra": extra,
+        "mismatch": mismatch,
+    }
+
+
+class Predictor(BasePredictor):
+    def setup(self) -> None:
+        if not LOCK_PATH.exists():
+            raise RuntimeError(f"{LOCK_PATH} not found — cannot validate weights")
+
+        lock = json.loads(LOCK_PATH.read_text())
+
+        self.results: list[dict[str, Any]] = []
+        all_errors: list[str] = []
+
+        for entry in lock["weights"]:
+            name = entry["name"]
+            target = entry["target"]
+            expected_files = [
+                {"path": f["path"], "size": f["size"], "digest": f["digest"]}
+                for f in entry["files"]
+            ]
+
+            # Dump directory contents before validation for debugging.
+            target_dir = Path(target)
+            if target_dir.is_dir():
+                print(f"--- find {target} ---", file=sys.stderr)
+                for p in sorted(target_dir.rglob("*")):
+                    suffix = "/" if p.is_dir() else f"  ({p.stat().st_size})"
+                    print(f"  {p.relative_to(target_dir)}{suffix}", file=sys.stderr)
+                print("---", file=sys.stderr)
+            else:
+                print(f"--- {target}: does not exist ---", file=sys.stderr)
+
+            print(
+                f"validating weight '{name}' at {target} ({len(expected_files)} files)",
+                file=sys.stderr,
+            )
+            result = _validate_weight(name, target, expected_files)
+            self.results.append(result)
+
+            for w in result["warnings"]:
+                print(f"  WARNING: {w}", file=sys.stderr)
+
+            if result["errors"]:
+                for e in result["errors"]:
+                    all_errors.append(f"[{name}] {e}")
+            else:
+                print(f"  OK ({len(result['ok'])} files)", file=sys.stderr)
+
+        if all_errors:
+            msg = "weight validation failed:\n" + "\n".join(
+                f"  - {e}" for e in all_errors
+            )
+            raise RuntimeError(msg)
+
+        print("all weights validated", file=sys.stderr)
+
+    def predict(self) -> str:
+        summary = []
+        for r in self.results:
+            entry: dict[str, Any] = {
+                "name": r["name"],
+                "target": r["target"],
+                "status": "ok" if not r["errors"] else "error",
+                "ok": len(r["ok"]),
+                "missing": r["missing"],
+                "extra": r["extra"],
+                "mismatch": r["mismatch"],
+            }
+            summary.append(entry)
+        return json.dumps(summary)