diff --git a/tests/scripts/codex-desc-avg-budget.bats b/tests/scripts/codex-desc-avg-budget.bats
new file mode 100644
index 000000000..0b0c712df
--- /dev/null
+++ b/tests/scripts/codex-desc-avg-budget.bats
@@ -0,0 +1,46 @@
+#!/usr/bin/env bats
+# ag-vzbt: the codex-description catalog budget must be a per-skill AVERAGE (scales
+# with skill count) instead of a hard aggregate that walls off the Nth+ skill.
+# Fixture-driven via BUDGET_REPO_ROOT. We assert on the codex-catalog output line
+# (robust to unrelated checks in the gate).
+
+setup() {
+  GATE="$BATS_TEST_DIRNAME/../../tests/skills/test-token-budgets.sh"
+  FIX="$(mktemp -d)"
+  mkdir -p "$FIX/skills" "$FIX/skills-codex"
+  mk() { # mk <name> <description>
+    mkdir -p "$FIX/skills-codex/$1"
+    printf -- '---\nname: %s\ndescription: %s\n---\n# %s\n' "$1" "$2" "$1" > "$FIX/skills-codex/$1/SKILL.md"
+  }
+  export -f mk
+}
+
+teardown() { rm -rf "$FIX"; }
+
+@test "codex catalog PASSES when average description length is under budget" {
+  mk a "short terse codex description here"   # ~34 chars
+  mk b "another short terse codex description"  # ~36 chars
+  run env BUDGET_REPO_ROOT="$FIX" bash "$GATE"
+  echo "$output"
+  [[ "$output" == *"skills-codex description catalog"* ]]
+  echo "$output" | grep "skills-codex description catalog" | grep -q "PASS"
+}
+
+@test "codex catalog FAILS when average description length is over budget" {
+  # Each ~120 chars → avg ~120 >> 45 per-skill-avg cap, but still < 180 per-skill hard cap.
+  long="this is a deliberately verbose codex description that pushes the per skill average well over the configured budget ceiling"
+  mk a "$long"
+  mk b "$long"
+  run env BUDGET_REPO_ROOT="$FIX" bash "$GATE"
+  echo "$output"
+  echo "$output" | grep "skills-codex description catalog" | grep -q "FAIL"
+}
+
+@test "budget scales: 100 short-desc skills pass even though total > old 2800 wall" {
+  # 100 x ~38 chars = ~3800 total (> the old 2800 hard aggregate) but avg ~38 < 45.
+  # Passes ONLY under the per-skill-average rule — proves the wall is gone.
+  for i in $(seq 1 100); do mk "skill$i" "terse codex description number $i here"; done
+  run env BUDGET_REPO_ROOT="$FIX" bash "$GATE"
+  echo "$output"
+  echo "$output" | grep "skills-codex description catalog" | grep -q "PASS"
+}
diff --git a/tests/skills/test-token-budgets.sh b/tests/skills/test-token-budgets.sh
index a4a5593e4..cc4daf634 100755
--- a/tests/skills/test-token-budgets.sh
+++ b/tests/skills/test-token-budgets.sh
@@ -12,7 +12,9 @@
 set -euo pipefail
 
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+# BUDGET_REPO_ROOT overrides for fixture tests
+# (tests/scripts/codex-desc-avg-budget.bats); production derives from script location.
+REPO_ROOT="${BUDGET_REPO_ROOT:-$(cd "$SCRIPT_DIR/../.." && pwd)}"
 SKILL_ROOTS=("$REPO_ROOT/skills" "$REPO_ROOT/skills-codex")
 
 # Colors
@@ -27,11 +29,13 @@ SKILL_FAIL_LIMIT=10000
 SKILL_WARN_LIMIT=8000
 SESSION_FAIL_LIMIT=8000
 DESC_FAIL_CHARS=180
-# Always-loaded codex skill catalog. Sized to fit the catalog with modest
-# headroom: ~81 skills x ~34 avg description chars. Raised from 2600 (calibrated
-# at exactly 77 skills with zero slack) to 2700 when expert-council landed, then
-# to 2800 when using-gc landed (the catalog grew to 81 skills, ag-p4p).
-CODEX_DESC_TOTAL_FAIL_CHARS=2800
+# Always-loaded codex skill catalog. The budget is a PER-SKILL AVERAGE, not a hard
+# aggregate (ag-vzbt): a hard total (raised 2600→2700→2800 as skills landed) walls
+# off the Nth+ skill and forced /burndown into a 17-char stub. An average scales
+# with the catalog — each terse description keeps the avg low; the gate fails only
+# if descriptions are bloated on average. Current avg ~35; cap 45 = comfortable
+# headroom while preserving the terse-description discipline.
+CODEX_DESC_AVG_FAIL_CHARS=45
 
 # Token estimation: bytes / 4
 estimate_tokens() {
@@ -195,12 +199,12 @@ fi
 
 if [[ "$codex_desc_count" -gt 0 ]]; then
     codex_desc_avg=$((codex_desc_total / codex_desc_count))
-    if [[ "$codex_desc_total" -gt "$CODEX_DESC_TOTAL_FAIL_CHARS" ]]; then
-        echo -e "  ${RED}[FAIL]${NC} skills-codex description catalog: ${codex_desc_total} chars > ${CODEX_DESC_TOTAL_FAIL_CHARS} aggregate limit"
+    if [[ "$codex_desc_avg" -gt "$CODEX_DESC_AVG_FAIL_CHARS" ]]; then
+        echo -e "  ${RED}[FAIL]${NC} skills-codex description catalog: avg ${codex_desc_avg} chars/skill > ${CODEX_DESC_AVG_FAIL_CHARS} per-skill-avg limit (${codex_desc_total} chars over ${codex_desc_count} skills)"
         ((failed++)) || true
     else
-        pct=$((codex_desc_total * 100 / CODEX_DESC_TOTAL_FAIL_CHARS))
-        echo -e "  ${GREEN}[PASS]${NC} skills-codex description catalog: ${codex_desc_total} chars (${pct}% of ${CODEX_DESC_TOTAL_FAIL_CHARS}, avg ${codex_desc_avg})"
+        pct=$((codex_desc_avg * 100 / CODEX_DESC_AVG_FAIL_CHARS))
+        echo -e "  ${GREEN}[PASS]${NC} skills-codex description catalog: avg ${codex_desc_avg} chars/skill (${pct}% of ${CODEX_DESC_AVG_FAIL_CHARS} per-skill-avg; ${codex_desc_total} chars over ${codex_desc_count} skills)"
         ((passed++)) || true
     fi
 else