Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion .github/workflows/deploy-network.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ on:
description: "Source tag that triggered this deploy"
required: false
type: string
notify_on_failure:
description: "Whether this workflow should send its own failure notification"
required: false
type: boolean
default: true
workflow_dispatch:
inputs:
network:
Expand Down Expand Up @@ -74,6 +79,11 @@ on:
description: "Source tag that triggered this deploy"
required: false
type: string
notify_on_failure:
description: "Whether this workflow should send its own failure notification"
required: false
type: boolean
default: true

concurrency:
group: deploy-network-${{ inputs.network }}-${{ inputs.namespace || inputs.network }}-${{ inputs.aztec_docker_image || inputs.semver }}-${{ github.ref || github.ref_name }}
Expand Down Expand Up @@ -242,7 +252,7 @@ jobs:
} >> "$GITHUB_STEP_SUMMARY"

- name: Notify Slack and dispatch ClaudeBox on failure
if: failure()
if: failure() && inputs.notify_on_failure
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
GH_TOKEN: ${{ secrets.AZTEC_BOT_GITHUB_TOKEN }}
Expand Down
121 changes: 101 additions & 20 deletions .github/workflows/weekly-proving-bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Weekly Real Proving Benchmark

on:
schedule:
- cron: "0 6 * * 1" # Every Monday at 6 AM UTC
- cron: "0 6 * * 1" # Every Monday at 6 AM UTC
workflow_dispatch:
inputs:
nightly_tag:
Expand All @@ -15,8 +15,11 @@ concurrency:
cancel-in-progress: true

jobs:
real-proving-benchmark:
select-image:
runs-on: ubuntu-latest
outputs:
nightly_tag: ${{ steps.nightly-tag.outputs.nightly_tag }}
docker_image: ${{ steps.nightly-tag.outputs.docker_image }}
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
Expand All @@ -32,12 +35,15 @@ jobs:
current_version=$(jq -r '."."' .release-please-manifest.json)
nightly_tag="${current_version}-nightly.$(date -u +%Y%m%d)"
fi
echo "nightly_tag=$nightly_tag" >> $GITHUB_OUTPUT

docker_image="aztecprotocol/aztec:${nightly_tag}"
echo "nightly_tag=$nightly_tag" >> "$GITHUB_OUTPUT"
echo "docker_image=$docker_image" >> "$GITHUB_OUTPUT"
echo "Using nightly tag: $nightly_tag"

- name: Check if Docker image exists
run: |
DOCKER_IMAGE="aztecprotocol/aztec:${{ steps.nightly-tag.outputs.nightly_tag }}"
DOCKER_IMAGE="${{ steps.nightly-tag.outputs.docker_image }}"
echo "Checking if Docker image exists: $DOCKER_IMAGE"
if docker manifest inspect "$DOCKER_IMAGE" > /dev/null 2>&1; then
echo "Docker image exists: $DOCKER_IMAGE"
Expand All @@ -46,6 +52,53 @@ jobs:
exit 1
fi

deploy-real-proving-network:
needs: select-image
uses: ./.github/workflows/deploy-network.yml
with:
network: prove-n-tps-real
namespace: prove-n-tps-real
aztec_docker_image: ${{ needs.select-image.outputs.docker_image }}
ref: next
notify_on_failure: false
secrets: inherit

wait-for-first-l2-block:
needs: deploy-real-proving-network
runs-on: ubuntu-latest
timeout-minutes: 120
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
ref: next

- name: Authenticate to Google Cloud
uses: google-github-actions/auth@6fc4af4b145ae7821d527454aa9bd537d1f2dc5f
with:
credentials_json: ${{ secrets.GCP_SA_KEY }}

- name: Set up Cloud SDK
uses: google-github-actions/setup-gcloud@6189d56e4096ee891640bb02ac264be376592d6a
with:
install_components: gke-gcloud-auth-plugin

- name: Wait for first L2 block
env:
GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
run: |
cd spartan
./bootstrap.sh wait_for_l2_block prove-n-tps-real

benchmark:
needs: wait-for-first-l2-block
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
ref: next

- name: Run real proving benchmarks
timeout-minutes: 180
env:
Expand All @@ -59,20 +112,9 @@ jobs:
RUN_ID: ${{ github.run_id }}
AWS_SHUTDOWN_TIME: 180
NO_SPOT: 1
SKIP_NETWORK_DEPLOY: "1"
run: |
./.github/ci3.sh network-proving-bench prove-n-tps-real prove-n-tps-real "aztecprotocol/aztec:${{ steps.nightly-tag.outputs.nightly_tag }}"

- name: Cleanup network resources
if: always()
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
GITHUB_TOKEN: ${{ secrets.AZTEC_BOT_GITHUB_TOKEN }}
BUILD_INSTANCE_SSH_KEY: ${{ secrets.BUILD_INSTANCE_SSH_KEY }}
GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }}
GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
NO_SPOT: 1
run: ./.github/ci3.sh network-teardown prove-n-tps-real prove-n-tps-real
./.github/ci3.sh network-proving-bench prove-n-tps-real prove-n-tps-real

- name: Download benchmarks
if: always()
Expand All @@ -81,7 +123,7 @@ jobs:
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
run: |
if ./ci.sh gh-spartan-proving-bench; then
echo "ENABLE_DEPLOY_BENCH=1" >> $GITHUB_ENV
echo "ENABLE_DEPLOY_BENCH=1" >> "$GITHUB_ENV"
fi

- name: Upload benchmarks
Expand All @@ -100,13 +142,52 @@ jobs:
fail-on-alert: false
max-items-in-chart: 100

cleanup:
if: always()
needs:
- select-image
- deploy-real-proving-network
- wait-for-first-l2-block
- benchmark
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
ref: next

- name: Cleanup network resources
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
GITHUB_TOKEN: ${{ secrets.AZTEC_BOT_GITHUB_TOKEN }}
BUILD_INSTANCE_SSH_KEY: ${{ secrets.BUILD_INSTANCE_SSH_KEY }}
GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }}
GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
NO_SPOT: 1
run: ./.github/ci3.sh network-teardown prove-n-tps-real prove-n-tps-real

notify-failure:
if: ${{ always() && failure() && github.event_name != 'workflow_dispatch' }}
needs:
- select-image
- deploy-real-proving-network
- wait-for-first-l2-block
- benchmark
- cleanup
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
ref: next

- name: Notify Slack and dispatch ClaudeBox on failure
if: failure() && github.event_name != 'workflow_dispatch'
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
GITHUB_TOKEN: ${{ secrets.AZTEC_BOT_GITHUB_TOKEN }}
run: |
TAG="${{ steps.nightly-tag.outputs.nightly_tag }}"
TAG="${{ needs.select-image.outputs.nightly_tag || 'unknown' }}"
RUN_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
./ci3/slack_notify_with_claudebox_kickoff "#alerts-next-scenario" \
"Weekly Real Proving Benchmark FAILED (nightly tag ${TAG}): <${RUN_URL}|View Run> (🤖)" \
Expand Down
22 changes: 12 additions & 10 deletions bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -740,22 +740,24 @@ case "$cmd" in
;;
"ci-network-proving-bench")
# Args: <env_file> <namespace> [docker_image]
# Deploys network and runs proving benchmarks. Cleanup should be done separately.
# Deploys network and runs proving benchmarks. Set SKIP_NETWORK_DEPLOY=1 to run against an existing network.
export CI=1
env_file="${1:?env_file is required}"
namespace="${2:?namespace is required}"
docker_image="${3:-}"
build
# If no docker image provided, build and push to aztecdev
if [ -z "$docker_image" ]; then
release-image/bootstrap.sh push_pr
docker_image="aztecprotocol/aztecdev:$(git rev-parse HEAD)"
fi
# Set up environment and deploy using spartan
export NAMESPACE="$namespace"
export AZTEC_DOCKER_IMAGE="$docker_image"
spartan/bootstrap.sh network_deploy "${env_file}"
# Run proving benchmarks
if [ "${SKIP_NETWORK_DEPLOY:-0}" != "1" ]; then
# If no docker image provided, build and push to aztecdev
if [ -z "$docker_image" ]; then
release-image/bootstrap.sh push_pr
docker_image="aztecprotocol/aztecdev:$(git rev-parse HEAD)"
fi
export AZTEC_DOCKER_IMAGE="$docker_image"
spartan/bootstrap.sh network_deploy "${env_file}"
else
echo "SKIP_NETWORK_DEPLOY=1, running proving benchmarks against existing network '$namespace'."
fi
spartan/bootstrap.sh proving_bench "${env_file}"
rm -rf bench-out
mkdir -p bench-out
Expand Down
7 changes: 5 additions & 2 deletions ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ function print_usage {
echo_cmd "network-scenarios" "Spin up EC2 instances to run network scenario tests in parallel."
echo_cmd "network-tests" "Spin up an EC2 instance to run tests on a network."
echo_cmd "network-bench" "Spin up an EC2 instance to run benchmarks on a network."
echo_cmd "network-proving-bench" "Spin up an EC2 instance to deploy a network and run proving benchmarks. Set SKIP_NETWORK_DEPLOY=1 to skip deploy."
echo_cmd "network-bench-10tps" "Spin up an EC2 instance to run the 10 TPS benchmark on bench-10tps."
echo_cmd "network-teardown" "Spin up an EC2 instance to teardown a network deployment."
echo_cmd "network-tests-kind" "Spin up an EC2 instance to run a KIND-based spartan test."
Expand Down Expand Up @@ -253,11 +254,13 @@ case "$cmd" in
;;
network-proving-bench)
# Args: <scenario> <namespace> [docker_image]
# Deploys network and runs proving benchmarks.
# Deploys network and runs proving benchmarks. Set SKIP_NETWORK_DEPLOY=1 to run against an existing network.
export CI_DASHBOARD="network"
export JOB_ID="x-${2:?namespace is required}-network-proving-bench" CPUS=16
export INSTANCE_POSTFIX="n-proving-bench"
bootstrap_ec2 "./bootstrap.sh ci-network-proving-bench $*"
skip_network_deploy=0
[ "${SKIP_NETWORK_DEPLOY:-0}" = "1" ] && skip_network_deploy=1
bootstrap_ec2 "SKIP_NETWORK_DEPLOY=$skip_network_deploy ./bootstrap.sh ci-network-proving-bench $*"
;;
network-block-capacity-bench)
# Args: <scenario> <namespace> [docker_image]
Expand Down
7 changes: 7 additions & 0 deletions spartan/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,13 @@ case "$cmd" in
fi
fi
;;
"wait_for_l2_block")
env_file="$1"
source_env_basic "$env_file"
gcp_auth
source_network_env "$env_file"
./scripts/wait_for_l2_block.sh "$NAMESPACE"
;;
"single_test")
run_network_tests "$1" "$2"
;;
Expand Down
48 changes: 34 additions & 14 deletions spartan/scripts/wait_for_l2_block.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,41 +6,61 @@
# AZTEC_SLOT_DURATION - seconds per L2 slot
# AZTEC_EPOCH_DURATION - slots per epoch
# AZTEC_LAG_IN_EPOCHS_FOR_VALIDATOR_SET - epochs to wait for validator set
# AZTEC_LAG_IN_EPOCHS_FOR_RANDAO - epochs to wait for RANDAO seed

set -euo pipefail

namespace="${1:?namespace is required}"

slot_duration="${AZTEC_SLOT_DURATION:?AZTEC_SLOT_DURATION must be set}"
epoch_duration="${AZTEC_EPOCH_DURATION:?AZTEC_EPOCH_DURATION must be set}"
lag_epochs="${AZTEC_LAG_IN_EPOCHS_FOR_VALIDATOR_SET:?AZTEC_LAG_IN_EPOCHS_FOR_VALIDATOR_SET must be set}"
validator_lag_epochs="${AZTEC_LAG_IN_EPOCHS_FOR_VALIDATOR_SET:?AZTEC_LAG_IN_EPOCHS_FOR_VALIDATOR_SET must be set}"
randao_lag_epochs="${AZTEC_LAG_IN_EPOCHS_FOR_RANDAO:-$validator_lag_epochs}"

# Time to first block = lag_epochs * epoch_duration * slot_duration + buffer
# Add 2x buffer for deployment overhead, validator registration, etc.
expected_wait=$((lag_epochs * epoch_duration * slot_duration))
max_wait=$((expected_wait * 2 + 120)) # 2x expected + 2min buffer
if [ "$validator_lag_epochs" -gt "$randao_lag_epochs" ]; then
lag_epochs="$validator_lag_epochs"
else
lag_epochs="$randao_lag_epochs"
fi

# A fresh rollup needs lag + 1 complete epochs before the first committee-backed
# block can be proposed. Add half an epoch plus 5m for deployment and RPC jitter.
warmup_epochs=$((lag_epochs + 1))
expected_wait=$((warmup_epochs * epoch_duration * slot_duration))
buffer=$((epoch_duration * slot_duration / 2 + 300))
max_wait="${L2_BLOCK_WAIT_TIMEOUT_SECONDS:-$((expected_wait + buffer))}"
poll_interval=10

echo "Waiting for L2 blocks (slot=${slot_duration}s, epoch=${epoch_duration} slots, lag=${lag_epochs} epochs)"
echo "Expected first block in ~${expected_wait}s, max wait ${max_wait}s"
echo "Waiting for L2 blocks (slot=${slot_duration}s, epoch=${epoch_duration} slots, validator_lag=${validator_lag_epochs}, randao_lag=${randao_lag_epochs})"
echo "Expected first block after ~${expected_wait}s from genesis, max wait ${max_wait}s from now"

rpc_pod="${namespace}-rpc-aztec-node-0"
block_number_request="{\"jsonrpc\":\"2.0\",\"method\":\"node_getBlockNumber\",\"params\":[],\"id\":1}"
elapsed=0
while [ $elapsed -lt $max_wait ]; do
block_number=$(kubectl exec -n "$namespace" "$rpc_pod" -- \
curl -s -X POST http://localhost:8080 \
-H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","method":"node_getBlockNumber","params":[],"id":1}' 2>/dev/null \
| grep -o '"result":[0-9]*' | grep -o '[0-9]*' || echo "0")
block_number=$(kubectl --request-timeout=10s exec -n "$namespace" "$rpc_pod" -- \
sh -c "curl --max-time 5 -s -X POST http://localhost:8080 \
-H \"Content-Type: application/json\" \
-d \"\$1\" \
| jq -r \".result // 0\"" \
sh "$block_number_request" 2>/dev/null || echo "0")

if [ "$block_number" -ge 1 ] 2>/dev/null; then
echo "L2 block $block_number mined after ${elapsed}s"
exit 0
fi

echo "Waiting for L2 blocks... (${elapsed}s/${max_wait}s, block: ${block_number:-0})"
sleep $poll_interval
elapsed=$((elapsed + poll_interval))
sleep_for=$poll_interval
remaining=$((max_wait - elapsed))
if [ "$remaining" -lt "$sleep_for" ]; then
sleep_for=$remaining
fi
if [ "$sleep_for" -le 0 ]; then
break
fi
sleep "$sleep_for"
elapsed=$((elapsed + sleep_for))
done

echo "Warning: No L2 blocks mined after ${max_wait}s"
Expand Down
Loading