cloudflare · whoiskatrin · Apr 4, 2026 · Apr 4, 2026 · Apr 4, 2026 · Apr 4, 2026
diff --git a/.changeset/workflow-safe-generated-ids.md b/.changeset/workflow-safe-generated-ids.md
@@ -0,0 +1,5 @@
+---
+"agents": patch
+---
+
+Generate workflow instance IDs with a Cloudflare-safe alphabet so `runWorkflow()` no longer produces invalid IDs containing `_`.
diff --git a/.github/workflows/playground-e2e.yml b/.github/workflows/playground-e2e.yml
@@ -0,0 +1,61 @@
+name: Playground E2E
+
+on:
+  schedule:
+    - cron: "0 0 * * *"
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  e2e:
+    name: Playground browser tests
+    runs-on: ubuntu-24.04
+    timeout-minutes: 30
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          fetch-depth: 1
+
+      - uses: actions/setup-node@v6
+        with:
+          node-version: 24
+          cache: npm
+
+      - run: npm ci
+
+      - name: Get Playwright version
+        id: playwright-version
+        run: echo "version=$(jq -r '.packages[\"node_modules/playwright\"].version' package-lock.json)" >> $GITHUB_OUTPUT
+
+      - name: Cache Playwright browsers
+        uses: actions/cache@v5
+        id: playwright-cache
+        with:
+          path: ~/.cache/ms-playwright
+          key: ${{ runner.os }}-playwright-${{ steps.playwright-version.outputs.version }}
+
+      - name: Install Playwright browsers
+        if: steps.playwright-cache.outputs.cache-hit != 'true'
+        run: npx playwright install --with-deps chromium
+
+      - name: Run playground e2e tests
+        env:
+          CLOUDFLARE_API_TOKEN: ${{ secrets.CF_AI_GATEWAY_TOKEN }}
+          CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CF_AI_GATEWAY_ACCOUNT_ID }}
+          CLOUDFLARE_GATEWAY_ID: ${{ secrets.CF_AI_GATEWAY_NAME }}
+        run: npm run test:playground:e2e
+
+      - name: Upload Playwright report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: playground-playwright-report
+          path: |
+            playwright-report
+            examples/playground/playwright-report
+            test-results
+            examples/playground/test-results
+          if-no-files-found: ignore
diff --git a/.github/workflows/pullrequest.yml b/.github/workflows/pullrequest.yml
@@ -17,7 +17,7 @@ env:
 
 jobs:
   ci:
-    timeout-minutes: 20
+    timeout-minutes: 30
     runs-on: ubuntu-24.04
     steps:
       - uses: actions/checkout@v6
@@ -58,4 +58,22 @@ jobs:
         run: npx playwright install --with-deps chromium
 
       - run: CI=true npx nx run-many -t test
+
+      - name: Run playground E2E tests
+        env:
+          CLOUDFLARE_API_TOKEN: ${{ secrets.CF_AI_GATEWAY_TOKEN }}
+          CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CF_AI_GATEWAY_ACCOUNT_ID }}
+          CLOUDFLARE_GATEWAY_ID: ${{ secrets.CF_AI_GATEWAY_NAME }}
+        run: npm run test:playground:e2e
+
+      - name: Upload Playwright report
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: playground-playwright-report
+          path: |
+            examples/playground/playwright-report
+            examples/playground/test-results
+          if-no-files-found: ignore
+
       - run: npx pkg-pr-new publish --peerDeps ./packages/*
diff --git a/.gitignore b/.gitignore
@@ -148,6 +148,7 @@ __screenshots__
 
 # Playwright test artifacts
 test-results/
+playwright-report/
 
 # Nx
 .nx/cache

diff --git a/examples/playground/README.md b/examples/playground/README.md
@@ -99,7 +99,30 @@ playground/
 
 ## Testing
 
-See [testing.md](./testing.md) for a comprehensive guide on manually testing every feature.
+See [testing.md](./testing.md) for the source-of-truth test plan. **All E2E tests are AI-driven** — the test runner parses `testing.md` into scenarios, then uses an LLM to translate each scenario's natural-language actions and assertions into Playwright commands at runtime.
+
+```bash
+# Run the browser suite locally
+npm run test:e2e
+```
+
+**How it works:**
+
+1. `e2e/parse-testing-md.ts` parses `testing.md` into structured scenario objects
+2. `e2e/ai-runner.spec.ts` creates one Playwright `test()` per scenario
+3. `e2e/ai-executor.ts` navigates to the route, takes an accessibility snapshot, sends the scenario + snapshot to a Workers AI LLM, and executes the returned actions
+4. Scenarios flagged `deployed-only` are auto-skipped in local/CI environments
+
+**Required environment variables:**
+
+- `CLOUDFLARE_API_TOKEN` — Cloudflare API token with Workers AI access
+- `CLOUDFLARE_ACCOUNT_ID` — Cloudflare account ID
+
+**Adding a new test:** Edit `testing.md` — no Playwright code needed. The AI runner will pick it up automatically.
+
+The test command includes a smart dependency prepare step: it only rebuilds `agents`, `@cloudflare/ai-chat`, `@cloudflare/codemode`, and `@cloudflare/voice` when their source is newer than their built `dist/` output.
+
+GitHub Actions runs the playground browser suite on every pull request (blocking merge) and nightly.
 
 ## Configuration
 

diff --git a/examples/playground/e2e/.gitignore b/examples/playground/e2e/.gitignore
@@ -0,0 +1,3 @@
+# Playwright test artifacts
+test-results/
+playwright-report/