diff --git a/.semaphore/end-to-end/scripts/README.md b/.semaphore/end-to-end/scripts/README.md new file mode 100644 index 00000000000..9250540ed5c --- /dev/null +++ b/.semaphore/end-to-end/scripts/README.md @@ -0,0 +1,79 @@ +# End-to-end CI scripts + +Orchestrator scripts for the Semaphore e2e jobs. Two top-level entry points +drive two different job shapes: + +| Entry point | Job shape | +|---|---| +| `body_standard.sh` | Standard e2e: provision a cluster, install Calico, optionally migrate/upgrade, run tests | +| `body_flannel-migration.sh` | Flannel-to-Calico migration test with a pre- and post-migration test run | + +Both dispatch to single-purpose **phase scripts** under `phases/`. Each phase +is self-contained, documents its required env vars at the top, and can be +sourced individually when reproducing part of a CI run locally. + +## Phases + +| Phase | Purpose | +|---|---| +| `phases/provision.sh` | `bz provision` + Semaphore cache store | +| `phases/install.sh` | `bz install` (install Calico on the provisioned cluster) | +| `phases/configure.sh` | Post-install env setup: PATH, external-node creds, IPAM pool, failsafe ports | +| `phases/migrate.sh` | Optional operator migration, AKS migration, `bz upgrade` | +| `phases/run_tests.sh` | Acquire and run the e2e binary (local build, hashrelease download, or `bz tests` fallback) | +| `phases/hcp.sh` | Hosted control plane flow (separate provision + test tooling) | + +## Reproducing a CI run locally + +Each phase script lists its required env vars in its header comment. In the +common case, reproducing a CI job looks like: + +```bash +cd "${BZ_HOME}" +source phases/provision.sh +source phases/install.sh +source phases/configure.sh +source phases/run_tests.sh +``` + +Phases are **sourced**, not executed, so env vars exported by earlier phases +(e.g. `PATH`, `EXT_IP`) flow into later phases. Running a phase standalone +works the same way -- source it from a shell you've set up with the +required env vars. + +## Adding a new phase + +1. Create `phases/.sh` with a header comment listing required env vars. +2. Omit `set -eo pipefail` from the phase -- the orchestrator sets it once + and phases inherit via sourcing. +3. Add the phase to the appropriate body script's dispatch logic. +4. Add a row to the phase table above. + +## The test runner + +`phases/run_tests.sh` selects the test execution strategy automatically: + +| Condition | Strategy | +|---|---| +| `RUN_LOCAL_TESTS` is set | Build the e2e binary from local source (per-PR CI) | +| `TEST_TYPE == k8s-e2e` | Download the pre-built binary from the hashrelease (scheduled CI) | +| Otherwise | Fall back to `bz tests` (benchmarks, certification, etc.) | + +The first two paths run the binary via `make e2e-run` inside +`calico/go-build`. Developers can use the same target directly: + +```bash +KUBECONFIG=/path/to/kubeconfig \ + E2E_TEST_CONFIG=e2e/config/gcp-bpf.yaml \ + make e2e-run +``` + +See `e2e/config/*.yaml` for available test-selection configs and +`e2e/pkg/testconfig/` for the config format. + +## Legacy notes + +- `body_flannel-migration.sh` still uses `./bz.sh tests:run` for its pre- + and post-migration test runs -- that's a different legacy runner than + `bz tests` and has tests the in-repo binary doesn't yet cover. Migrate + to `make e2e-run` when parity lands. diff --git a/.semaphore/end-to-end/scripts/body_flannel-migration.sh b/.semaphore/end-to-end/scripts/body_flannel-migration.sh index e467671b692..3e6be2f5e98 100755 --- a/.semaphore/end-to-end/scripts/body_flannel-migration.sh +++ b/.semaphore/end-to-end/scripts/body_flannel-migration.sh @@ -1,5 +1,17 @@ #!/usr/bin/env bash +# body_flannel-migration.sh - flannel-to-Calico migration test flow. +# +# Provisions a cluster, installs flannel + a CNI plugin helper, runs a basic +# connectivity smoke test, applies Calico + the flannel-migration job, waits +# for the migration to complete, then runs the full e2e suite on Calico. +# +# Uses the legacy `./bz.sh tests:run` test runner (not the in-repo binary). +# When the in-repo binary reaches parity, this script can migrate to +# `make e2e-run` like body_standard.sh's run_tests_local.sh phase. set -exo pipefail + +PHASES="$(dirname "$0")/phases" + echo "[INFO] starting job..." export CNI_VERSION=${CNI_VERSION:-"v1.1.1"} @@ -9,7 +21,7 @@ export CALICO_MANIFEST=${CALICO_MANIFEST:-"manifests/flannel-migration/calico.ya export MIGRATION_MANIFEST=${MIGRATION_MANIFEST:-"manifests/flannel-migration/migration-job.yaml"} if [ "${USE_HASH_RELEASE}" == "true" ]; then - echo "[INFO] Using hash release for flannel migration" + echo "[INFO] Using hash release for flannel migration" LATEST_HASHREL="https://latest-os.docs.eng.tigera.net/${RELEASE_STREAM}.txt" echo "Checking ${LATEST_HASHREL} for latest hash release url..." DOCS_URL=$(curl --retry 9 --retry-all-errors -sS ${LATEST_HASHREL}) @@ -28,16 +40,15 @@ export BZ_LOCAL=${BZ_HOME}/.local export KUBECONFIG=$BZ_LOCAL/kubeconfig export PATH=$PATH:$BZ_LOCAL/bin -# Seems like modern OSes no longer include br_netfilter by default which breaks flannel. Install it in case we need it. +# Modern OSes no longer include br_netfilter by default, which breaks flannel. echo "[INFO] installing br_netfilter..." sudo modprobe br_netfilter mkdir -p "$BZ_LOGS_DIR" cd "${BZ_HOME}" -bz provision |& tee >(gzip --stdout > "${BZ_LOGS_DIR}/provision.log.gz") -cache store "$SEMAPHORE_JOB_ID" ../bz +source "${PHASES}/provision.sh" -# Install bridge CNI plugin (needed by kube-flannel manifest) +# Install bridge CNI plugin (needed by kube-flannel manifest). kubectl apply -f - <(gzip --stdout > "${BZ_LOGS_DIR}/e2e-tests-pre.log") + +# Run a basic services test to check that flannel networking is working. +K8S_E2E_FLAGS='--ginkgo.focus=should.serve.a.basic.endpoint.from.pods' \ + ./bz.sh tests:run |& tee >(gzip --stdout > "${BZ_LOGS_DIR}/e2e-tests-pre.log") + kubectl delete -n kube-system ds cni-installer || true # remove the CNI installer daemonset kubectl apply -f "$DOCS_URL/$CALICO_MANIFEST" wget -O calico-migration.yaml "$DOCS_URL/$MIGRATION_MANIFEST" kubectl apply -f - < ./calico-migration.yaml -sleep 5 # to make sure the job has started before we check its status +sleep 5 # make sure the job has started before we check its status kubectl -n kube-system get jobs flannel-migration kubectl -n kube-system describe jobs flannel-migration kubectl get po -A -owide @@ -117,9 +132,11 @@ kubectl -n kube-system get jobs flannel-migration kubectl -n kube-system describe jobs flannel-migration kubectl -n kube-system logs -l k8s-app=flannel-migration-controller kubectl get po -A -owide -# delete the migration job because the presence of a non-Running pod in kube-system upsets the e2es. + +# Delete the migration job because the presence of a non-Running pod in +# kube-system upsets the e2es. kubectl -n kube-system delete job/flannel-migration || true kubectl -n kube-system delete po -l k8s-app=flannel-migration-controller || true -# Run e2e on uplevel calico +# Run e2e on uplevel calico. ./bz.sh tests:run |& tee >(gzip --stdout > "${BZ_LOGS_DIR}/e2e-tests.log") diff --git a/.semaphore/end-to-end/scripts/body_standard.sh b/.semaphore/end-to-end/scripts/body_standard.sh index 9df97bd596d..389a684984e 100755 --- a/.semaphore/end-to-end/scripts/body_standard.sh +++ b/.semaphore/end-to-end/scripts/body_standard.sh @@ -1,139 +1,50 @@ #!/usr/bin/env bash +# body_standard.sh - orchestrator for the standard e2e flow. +# +# Dispatches to phase scripts in scripts/phases/. Each phase is self-contained +# and documents its required env vars. See scripts/README.md for the phase +# model and guidance on running phases standalone. set -eo pipefail -echo "[INFO] starting job..." +PHASES="$(dirname "$0")/phases" + if [[ "${BZ_VERBOSE}" == "true" ]]; then VERBOSE="--verbose" else VERBOSE="" fi +export VERBOSE -if [[ "${HCP_ENABLED}" == "true" ]]; then - echo "[INFO] starting hcp job..." - - echo "[INFO] starting hcp provision..." - hcp-provision.sh |& tee ${BZ_LOGS_DIR}/provision.log - - cache delete ${SEMAPHORE_JOB_ID} - cache store ${SEMAPHORE_JOB_ID} ${BZ_HOME} - - echo "[INFO] Test logs will be available here after the run: ${SEMAPHORE_ORGANIZATION_URL}/artifacts/jobs/${SEMAPHORE_JOB_ID}?path=semaphore%2Flogs" - echo "[INFO] Alternatively, you can view logs while job is running using 'sem attach ${SEMAPHORE_JOB_ID}' and then 'tail -f ${BZ_LOGS_DIR}/${TEST_TYPE}-tests.log'" - - echo "[INFO] starting hcp testing..." - hcp-test.sh |& tee ${BZ_LOGS_DIR}/${TEST_TYPE}-tests.log - -else - echo "[INFO] starting job..." - echo "[INFO] BZ_HOME=${BZ_HOME}" - - cd "${BZ_HOME}" - if [[ "${HCP_STAGE}" == "hosting" || "${HCP_STAGE}" == "destroy-hosting" ]]; then - : # Skip provisioning for hosting stages as cluster already exists - else - echo "[INFO] starting bz provision..." - bz provision $VERBOSE |& tee >(gzip --stdout > ${BZ_LOGS_DIR}/provision.log.gz) - - cache delete $SEMAPHORE_JOB_ID - cache store ${SEMAPHORE_JOB_ID} ${BZ_HOME} - - echo "[INFO] starting bz install..." - bz install $VERBOSE |& tee >(gzip --stdout > ${BZ_LOGS_DIR}/install.log.gz) - - if [[ "${HCP_STAGE}" == "setup-hosting" ]]; then - echo "[INFO] HCP_STAGE=${HCP_STAGE}, storing hosting cluster profile in cache" - cache store ${SEMAPHORE_WORKFLOW_ID}-hosting-${HOSTING_CLUSTER} ${BZ_HOME} - fi - fi - - # Put the bin dir into the PATH - export PATH=$PATH:${BZ_LOCAL_DIR}/bin - - if [[ "${ENABLE_EXTERNAL_NODE}" == "true" ]]; then - export EXT_USER=ubuntu - EXT_IP=$(cat "${BZ_LOCAL_DIR}"/external_ip) - export EXT_IP - export EXT_KEY=${BZ_LOCAL_DIR}/external_key - export K8S_E2E_DOCKER_EXTRA_FLAGS="-v $EXT_KEY:/key --env EXT_USER --env EXT_KEY=/key --env EXT_IP $K8S_E2E_DOCKER_EXTRA_FLAGS" - echo "EXT_USER=ubuntu EXT_IP=$EXT_IP, EXT_KEY=$EXT_KEY" - echo "K8S_E2E_DOCKER_EXTRA_FLAGS=$K8S_E2E_DOCKER_EXTRA_FLAGS" - fi - - if [ -n "${IPAM_TEST_POOL_SUBNET}" ]; then - export K8S_E2E_DOCKER_EXTRA_FLAGS="$K8S_E2E_DOCKER_EXTRA_FLAGS --env IPAM_TEST_POOL_SUBNET" - echo "IPAM_TEST_POOL_SUBNET=$IPAM_TEST_POOL_SUBNET" - fi +echo "[INFO] starting job..." +echo "[INFO] BZ_HOME=${BZ_HOME}" - if [ "${FAILSAFE_443}" == "true" ]; then - KUBECONFIG=${BZ_LOCAL_DIR}/kubeconfig kubectl patch felixconfiguration default --type=merge -p '{"spec":{"failsafeOutboundHostPorts": [{"protocol": "udp", "port":53},{"protocol": "udp", "port":67},{"protocol": "tcp", "port":179},{"protocol": "tcp", "port":2379},{"protocol": "tcp", "port":2380},{"protocol": "tcp", "port":5473},{"protocol": "tcp", "port":443},{"protocol": "tcp", "port":6666},{"protocol": "tcp", "port":6667}]}}' - fi +# HCP jobs take a separate path with their own provision/test tooling. +if [[ "${HCP_ENABLED}" == "true" ]]; then + source "${PHASES}/hcp.sh" + exit 0 +fi - # Perform the operator migration following the instructions here: - # https://projectcalico.docs.tigera.io/maintenance/operator-migration - if [[ -n "$OPERATOR_MIGRATE" ]]; then - ${HOME}/${SEMAPHORE_GIT_DIR}/.semaphore/end-to-end/scripts/test_scripts/operator_migrate.sh |& tee >(gzip --stdout > ${BZ_LOGS_DIR}/operator_migrate.log.gz) - fi - # Perform the AKS migration following the instructions here: - # https://docs.tigera.io/calico/latest/getting-started/kubernetes/managed-public-cloud/aks-migrate - if [[ -n "$DESIRED_POLICY" ]]; then - echo "[INFO] starting AKS migration..." - bz addons run aks-migrate:setup - fi +cd "${BZ_HOME}" - if [[ -n "$UPLEVEL_RELEASE_STREAM" ]]; then - echo "[INFO] starting bz upgrade..." - bz upgrade $VERBOSE | tee >(gzip --stdout > ${BZ_LOGS_DIR}/upgrade.log.gz) - fi +# HCP hosting/destroy-hosting stages join an existing cluster provisioned by a +# prior workflow step, so they skip provisioning and install entirely. +if [[ "${HCP_STAGE}" != "hosting" && "${HCP_STAGE}" != "destroy-hosting" ]]; then + source "${PHASES}/provision.sh" + source "${PHASES}/install.sh" +fi - if [[ ${MCM_STAGE:-} != *-mgmt* ]] && [[ ${HCP_STAGE:-} != *-hosting* ]]; then - echo "[INFO] Test logs will be available here after the run: ${SEMAPHORE_ORGANIZATION_URL}/artifacts/jobs/${SEMAPHORE_JOB_ID}?path=semaphore%2Flogs" - echo "[INFO] Alternatively, you can view logs while job is running using 'sem attach ${SEMAPHORE_JOB_ID}' and then 'tail -f ${BZ_LOGS_DIR}/${TEST_TYPE}-tests.log'" +source "${PHASES}/configure.sh" +source "${PHASES}/migrate.sh" - if [[ -n "$RUN_LOCAL_TESTS" ]]; then - echo "[INFO] starting e2e testing from local binary..." - pushd "${HOME}/calico" - make -C e2e build |& tee >(gzip --stdout > "${BZ_LOGS_DIR}/${TEST_TYPE}-build.log.gz") - GO_BUILD_VER=$(grep '^GO_BUILD_VER=' ./metadata.mk | cut -d= -f2) - # Disable shellcheck double quote validation for ${K8S_E2E_FLAGS} as this var can contain multiple args and should be word split - # Capture the exit code so that the JUnit copy below runs even when - # tests fail (set -e would otherwise bail out before the cp). - #shellcheck disable=SC2086 - e2e_rc=0 - docker run --rm --init --net=host \ - -e LOCAL_USER_ID="$(id -u)" \ - -e GOCACHE=/go-cache \ - -e GOPATH=/go \ - -e KUBECONFIG=/kubeconfig \ - -e PRODUCT=calico \ - -e CREATE_WINDOWS_NODES \ - -e FUNCTIONAL_AREA \ - -e INSTALLER \ - -e PROVISIONER \ - -e K8S_VERSION \ - -e DATAPLANE \ - -e ENCAPSULATION_TYPE \ - -e WINDOWS_OS \ - -e USE_VENDORED_CNI \ - -v "$(pwd)":/go/src/github.com/projectcalico/calico:rw \ - -v "$(pwd)"/.go-pkg-cache:/go-cache:rw \ - -v "${BZ_LOCAL_DIR}/kubeconfig:/kubeconfig:ro" \ - -w /go/src/github.com/projectcalico/calico \ - "calico/go-build:${GO_BUILD_VER}" \ - go run github.com/onsi/ginkgo/v2/ginkgo -procs="${E2E_PROCS:-4}" \ - --junit-report=junit.xml --output-dir=report \ - ./e2e/bin/k8s/e2e.test -- ${K8S_E2E_FLAGS} \ - |& tee "${BZ_LOGS_DIR}/${TEST_TYPE}-tests.log" || e2e_rc=$? +# MCM (Multi-Cluster Management) management stages and HCP hosting stages +# only provision infrastructure for other jobs to test against - they don't +# run tests themselves. These are enterprise-only flows; MCM_STAGE and +# HCP_STAGE are unset for OSS jobs. +if [[ ${MCM_STAGE:-} == *-mgmt* || ${HCP_STAGE:-} == *-hosting* ]]; then + exit 0 +fi - # Copy JUnit XML to REPORT_DIR so the epilogue publishes it. - mkdir -p "${REPORT_DIR}" - cp report/junit.xml "${REPORT_DIR}/junit.xml" 2>/dev/null || true - popd +echo "[INFO] Test logs will be available here after the run: ${SEMAPHORE_ORGANIZATION_URL}/artifacts/jobs/${SEMAPHORE_JOB_ID}?path=semaphore%2Flogs" +echo "[INFO] Alternatively, you can view logs while job is running using 'sem attach ${SEMAPHORE_JOB_ID}' and then 'tail -f ${BZ_LOGS_DIR}/${TEST_TYPE}-tests.log'" - # Propagate the original test exit code. - exit $e2e_rc - else - echo "[INFO] starting bz testing..." - bz tests $VERBOSE |& tee >(gzip --stdout > ${BZ_LOGS_DIR}/${TEST_TYPE}-tests.log.gz) - fi - fi -fi +source "${PHASES}/run_tests.sh" diff --git a/.semaphore/end-to-end/scripts/phases/configure.sh b/.semaphore/end-to-end/scripts/phases/configure.sh new file mode 100755 index 00000000000..d53b6c1aaa7 --- /dev/null +++ b/.semaphore/end-to-end/scripts/phases/configure.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# configure.sh - configure test environment after cluster install. +# +# Sets PATH to include the bz-provisioned bin dir, exports external-node +# credentials when ENABLE_EXTERNAL_NODE=true, propagates IPAM test config, +# and applies the optional failsafe patch. +# +# Required env: +# BZ_LOCAL_DIR +# Optional env: +# ENABLE_EXTERNAL_NODE, IPAM_TEST_POOL_SUBNET, FAILSAFE_443, +# K8S_E2E_DOCKER_EXTRA_FLAGS +# +# Exports consumed by later phases: +# PATH, EXT_USER, EXT_IP, EXT_KEY, K8S_E2E_DOCKER_EXTRA_FLAGS +# +# Sourced from body_*.sh. + +if [[ -z "${BZ_LOCAL_DIR}" ]]; then echo "[ERROR] BZ_LOCAL_DIR is required but not set"; exit 1; fi + +export PATH=$PATH:${BZ_LOCAL_DIR}/bin + +if [[ "${ENABLE_EXTERNAL_NODE}" == "true" ]]; then + export EXT_USER=ubuntu + EXT_IP=$(cat "${BZ_LOCAL_DIR}/external_ip") + export EXT_IP + export EXT_KEY=${BZ_LOCAL_DIR}/external_key + export K8S_E2E_DOCKER_EXTRA_FLAGS="-v $EXT_KEY:/key --env EXT_USER --env EXT_KEY=/key --env EXT_IP $K8S_E2E_DOCKER_EXTRA_FLAGS" + echo "EXT_USER=ubuntu EXT_IP=$EXT_IP, EXT_KEY=$EXT_KEY" + echo "K8S_E2E_DOCKER_EXTRA_FLAGS=$K8S_E2E_DOCKER_EXTRA_FLAGS" +fi + +if [ -n "${IPAM_TEST_POOL_SUBNET}" ]; then + export K8S_E2E_DOCKER_EXTRA_FLAGS="$K8S_E2E_DOCKER_EXTRA_FLAGS --env IPAM_TEST_POOL_SUBNET" + echo "IPAM_TEST_POOL_SUBNET=$IPAM_TEST_POOL_SUBNET" +fi + +# Some pipelines (e.g., VPP) need port 443 added to the failsafe outbound rules +# so nodes can reach the kube-apiserver. This replaces the default failsafe list +# with one that includes 443. TODO: consider making 443 a default failsafe port +# so this patch isn't needed. +if [ "${FAILSAFE_443}" == "true" ]; then + KUBECONFIG=${BZ_LOCAL_DIR}/kubeconfig kubectl patch felixconfiguration default --type=merge \ + -p '{"spec":{"failsafeOutboundHostPorts": [{"protocol": "udp", "port":53},{"protocol": "udp", "port":67},{"protocol": "tcp", "port":179},{"protocol": "tcp", "port":2379},{"protocol": "tcp", "port":2380},{"protocol": "tcp", "port":5473},{"protocol": "tcp", "port":443},{"protocol": "tcp", "port":6666},{"protocol": "tcp", "port":6667}]}}' +fi diff --git a/.semaphore/end-to-end/scripts/phases/hcp.sh b/.semaphore/end-to-end/scripts/phases/hcp.sh new file mode 100755 index 00000000000..b0f6a8a3eca --- /dev/null +++ b/.semaphore/end-to-end/scripts/phases/hcp.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# hcp.sh - hosted control plane (HCP) provision + test flow. +# +# HCP jobs have their own provisioning tool (hcp-provision.sh) and test +# runner (hcp-test.sh), and don't share phases with the standard bz flow. +# +# Required env: +# BZ_HOME, BZ_LOGS_DIR, SEMAPHORE_JOB_ID, SEMAPHORE_ORGANIZATION_URL, +# TEST_TYPE +# +# Sourced from body_*.sh. + +for _var in BZ_HOME BZ_LOGS_DIR SEMAPHORE_JOB_ID SEMAPHORE_ORGANIZATION_URL TEST_TYPE; do + if [[ -z "${!_var}" ]]; then echo "[ERROR] ${_var} is required but not set"; exit 1; fi +done + +echo "[INFO] starting hcp job..." + +echo "[INFO] starting hcp provision..." +hcp-provision.sh |& tee "${BZ_LOGS_DIR}/provision.log" + +cache delete "${SEMAPHORE_JOB_ID}" +cache store "${SEMAPHORE_JOB_ID}" "${BZ_HOME}" + +echo "[INFO] Test logs will be available here after the run: ${SEMAPHORE_ORGANIZATION_URL}/artifacts/jobs/${SEMAPHORE_JOB_ID}?path=semaphore%2Flogs" +echo "[INFO] Alternatively, you can view logs while job is running using 'sem attach ${SEMAPHORE_JOB_ID}' and then 'tail -f ${BZ_LOGS_DIR}/${TEST_TYPE}-tests.log'" + +echo "[INFO] starting hcp testing..." +hcp-test.sh |& tee "${BZ_LOGS_DIR}/${TEST_TYPE}-tests.log" diff --git a/.semaphore/end-to-end/scripts/phases/install.sh b/.semaphore/end-to-end/scripts/phases/install.sh new file mode 100755 index 00000000000..4c8fff0b831 --- /dev/null +++ b/.semaphore/end-to-end/scripts/phases/install.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# install.sh - install Calico onto a provisioned cluster via bz. +# +# Required env: +# BZ_HOME, BZ_LOGS_DIR +# Optional env: +# VERBOSE, HCP_STAGE, HOSTING_CLUSTER, SEMAPHORE_WORKFLOW_ID +# +# Sourced from body_*.sh. Assumes cwd == $BZ_HOME. + +for _var in BZ_HOME BZ_LOGS_DIR; do + if [[ -z "${!_var}" ]]; then echo "[ERROR] ${_var} is required but not set"; exit 1; fi +done + +echo "[INFO] starting bz install..." +bz install ${VERBOSE} |& tee >(gzip --stdout > "${BZ_LOGS_DIR}/install.log.gz") + +if [[ "${HCP_STAGE}" == "setup-hosting" ]]; then + echo "[INFO] HCP_STAGE=${HCP_STAGE}, storing hosting cluster profile in cache" + cache store "${SEMAPHORE_WORKFLOW_ID}-hosting-${HOSTING_CLUSTER}" "${BZ_HOME}" +fi diff --git a/.semaphore/end-to-end/scripts/phases/migrate.sh b/.semaphore/end-to-end/scripts/phases/migrate.sh new file mode 100755 index 00000000000..39f904298be --- /dev/null +++ b/.semaphore/end-to-end/scripts/phases/migrate.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# migrate.sh - optional cluster modifications before tests run. +# +# Handles three independent migration/upgrade workflows, each gated on its +# own env var: +# OPERATOR_MIGRATE - run the operator migration script +# (see https://projectcalico.docs.tigera.io/maintenance/operator-migration) +# DESIRED_POLICY - run the AKS migration add-on +# (see https://docs.tigera.io/calico/latest/getting-started/kubernetes/managed-public-cloud/aks-migrate) +# UPLEVEL_RELEASE_STREAM - run bz upgrade +# +# Required env: +# BZ_LOGS_DIR, HOME, SEMAPHORE_GIT_DIR +# Optional env: +# VERBOSE +# +# Sourced from body_*.sh. + +for _var in BZ_LOGS_DIR HOME SEMAPHORE_GIT_DIR; do + if [[ -z "${!_var}" ]]; then echo "[ERROR] ${_var} is required but not set"; exit 1; fi +done + +if [[ -n "${OPERATOR_MIGRATE}" ]]; then + "${HOME}/${SEMAPHORE_GIT_DIR}/.semaphore/end-to-end/scripts/test_scripts/operator_migrate.sh" \ + |& tee >(gzip --stdout > "${BZ_LOGS_DIR}/operator_migrate.log.gz") +fi + +if [[ -n "${DESIRED_POLICY}" ]]; then + echo "[INFO] starting AKS migration..." + bz addons run aks-migrate:setup +fi + +if [[ -n "${UPLEVEL_RELEASE_STREAM}" ]]; then + echo "[INFO] starting bz upgrade..." + bz upgrade ${VERBOSE} |& tee >(gzip --stdout > "${BZ_LOGS_DIR}/upgrade.log.gz") +fi diff --git a/.semaphore/end-to-end/scripts/phases/provision.sh b/.semaphore/end-to-end/scripts/phases/provision.sh new file mode 100755 index 00000000000..18ca0e8c247 --- /dev/null +++ b/.semaphore/end-to-end/scripts/phases/provision.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# provision.sh - provision a test cluster with bz. +# +# Required env: +# BZ_HOME, BZ_LOGS_DIR, SEMAPHORE_JOB_ID +# Optional env: +# VERBOSE (e.g. "--verbose") +# +# Sourced from body_*.sh. Assumes cwd == $BZ_HOME. Safe to run standalone once +# env is set. + +for _var in BZ_HOME BZ_LOGS_DIR SEMAPHORE_JOB_ID; do + if [[ -z "${!_var}" ]]; then echo "[ERROR] ${_var} is required but not set"; exit 1; fi +done + +echo "[INFO] starting bz provision..." +bz provision ${VERBOSE} |& tee >(gzip --stdout > "${BZ_LOGS_DIR}/provision.log.gz") + +cache delete "${SEMAPHORE_JOB_ID}" +cache store "${SEMAPHORE_JOB_ID}" "${BZ_HOME}" diff --git a/.semaphore/end-to-end/scripts/phases/run_tests.sh b/.semaphore/end-to-end/scripts/phases/run_tests.sh new file mode 100755 index 00000000000..089a2b1294a --- /dev/null +++ b/.semaphore/end-to-end/scripts/phases/run_tests.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash +# run_tests.sh - acquire an e2e binary and run it, or defer to bz tests. +# +# Three modes, selected automatically: +# 1. RUN_LOCAL_TESTS is set → build the e2e binary from local source +# (per-PR CI / GCP e2e block). +# 2. TEST_TYPE == k8s-e2e → download the pre-built binary from the +# hashrelease (scheduled CI). +# 3. Otherwise → fall back to `bz tests` for non-e2e test +# types (benchmarks, certification, etc.). +# +# Required env: +# BZ_LOCAL_DIR, BZ_LOGS_DIR, HOME, REPORT_DIR, TEST_TYPE +# Required for local builds: +# E2E_TEST_CONFIG +# Required for hashrelease downloads: +# RELEASE_STREAM +# +# Sourced from body_*.sh. Exits with the test exit code. + +for _var in BZ_LOCAL_DIR BZ_LOGS_DIR HOME REPORT_DIR TEST_TYPE; do + if [[ -z "${!_var}" ]]; then echo "[ERROR] ${_var} is required but not set"; exit 1; fi +done + +if [[ -n "${RUN_LOCAL_TESTS:-}" ]]; then + # Per-PR CI: build the e2e binary from the local source tree. + echo "[INFO] building e2e binary from local source..." + pushd "${HOME}/calico" || exit + make -C e2e build |& tee >(gzip --stdout > "${BZ_LOGS_DIR}/${TEST_TYPE}-build.log.gz") + E2E_BINARY=/go/src/github.com/projectcalico/calico/e2e/bin/k8s/e2e.test + popd || exit +elif [[ "${TEST_TYPE}" == "k8s-e2e" ]]; then + # Scheduled CI: download the pre-built e2e binary from the hashrelease. + echo "[INFO] downloading e2e binary from hashrelease..." + HASHREL_URL=$(curl --retry 9 --retry-all-errors -sS "https://latest-os.docs.eng.tigera.net/${RELEASE_STREAM}.txt") + echo "[INFO] hashrelease URL: ${HASHREL_URL}" + ARCH=$(uname -m); [[ "$ARCH" == "x86_64" ]] && ARCH=amd64; [[ "$ARCH" == "aarch64" ]] && ARCH=arm64 + mkdir -p "${HOME}/calico/e2e/bin/k8s" + curl --retry 9 --retry-all-errors -fsSL "${HASHREL_URL}/files/e2e/e2e-linux-${ARCH}.test" -o "${HOME}/calico/e2e/bin/k8s/e2e.test" + chmod +x "${HOME}/calico/e2e/bin/k8s/e2e.test" + echo "[INFO] downloaded e2e binary to ${HOME}/calico/e2e/bin/k8s/e2e.test" + E2E_BINARY=/go/src/github.com/projectcalico/calico/e2e/bin/k8s/e2e.test +fi + +if [[ -n "${E2E_BINARY:-}" ]]; then + # Run the e2e binary directly via ginkgo inside calico/go-build. + echo "[INFO] starting e2e tests..." + pushd "${HOME}/calico" || exit + GO_BUILD_VER=$(grep '^GO_BUILD_VER=' ./metadata.mk | cut -d= -f2) + + # Capture the exit code so the JUnit copy below runs even when tests fail + # (set -e would otherwise bail out before the cp). + e2e_rc=0 + docker run --rm --init --net=host \ + -e LOCAL_USER_ID="$(id -u)" \ + -e GOCACHE=/go-cache \ + -e GOPATH=/go \ + -e KUBECONFIG=/kubeconfig \ + -e PRODUCT=${PRODUCT:-calico} \ + ${K8S_E2E_DOCKER_EXTRA_FLAGS:-} \ + -v "$(pwd)":/go/src/github.com/projectcalico/calico:rw \ + -v "$(pwd)"/.go-pkg-cache:/go-cache:rw \ + -v "${BZ_LOCAL_DIR}/kubeconfig:/kubeconfig:ro" \ + -w /go/src/github.com/projectcalico/calico \ + "calico/go-build:${GO_BUILD_VER}" \ + make e2e-run \ + KUBECONFIG=/kubeconfig \ + E2E_TEST_CONFIG="${E2E_TEST_CONFIG}" \ + E2E_OUTPUT_DIR=report \ + E2E_JUNIT_REPORT=junit.xml \ + |& tee "${BZ_LOGS_DIR}/${TEST_TYPE}-tests.log" || e2e_rc=$? + + # Copy JUnit XML to REPORT_DIR so the epilogue publishes it. + mkdir -p "${REPORT_DIR}" + cp report/junit.xml "${REPORT_DIR}/junit.xml" 2>/dev/null || true + popd || exit + + # Propagate the original test exit code. + exit ${e2e_rc} +else + # Non-e2e test types (benchmarks, certification, etc.) -- defer to bz. + echo "[INFO] starting bz testing..." + bz tests ${VERBOSE} |& tee >(gzip --stdout > "${BZ_LOGS_DIR}/${TEST_TYPE}-tests.log.gz") +fi diff --git a/.semaphore/semaphore-scheduled-builds.yml b/.semaphore/semaphore-scheduled-builds.yml index f06cc8628f5..0d01f04836e 100644 --- a/.semaphore/semaphore-scheduled-builds.yml +++ b/.semaphore/semaphore-scheduled-builds.yml @@ -607,22 +607,16 @@ blocks: value: calico - name: FUNCTIONAL_AREA value: "e2e-gcp.yml" - - name: RUN_LOCAL_TESTS - value: "true" - name: USE_PRIVATE_REGISTRY value: "true" - name: PRIVATE_REGISTRY value: "quay.io/" - - name: K8S_E2E_FLAGS - # Label filter selects sig-calico tests and excludes: - # - Slow: long-running tests not suitable for CI - # - Disruptive: tests that break cluster state for other tests - # - RequiresBGPMesh: requires BGP routing, but this cluster uses VXLAN - # - NoEncap: requires non-encapsulated routing, but this cluster uses VXLAN - # - Feature:Istio: Calico policy not enforced with Istio ambient on BPF dataplane - # - ExternalNode: requires a non-cluster node with EXT_IP/EXT_KEY/EXT_USER configured - # - RequiresXtables: requires iptables or nftables dataplane, but this cluster uses BPF - value: "--ginkgo.label-filter=sig-calico&&!Slow&&!Disruptive&&!RequiresBGPMesh&&!NoEncap&&!Feature:Istio&&!ExternalNode&&!RequiresXtables" + - name: RUN_LOCAL_TESTS + value: "true" + - name: E2E_TEST_CONFIG + # Test selection is driven by the config file; see gcp-bpf.yaml for the + # include/exclude rationale. + value: "e2e/config/gcp-bpf.yaml" - name: USE_API_SERVER value: "false" jobs: diff --git a/.semaphore/semaphore.yml b/.semaphore/semaphore.yml index 41d1a90a9e5..b4ad7ea3428 100644 --- a/.semaphore/semaphore.yml +++ b/.semaphore/semaphore.yml @@ -1035,22 +1035,16 @@ blocks: value: calico - name: FUNCTIONAL_AREA value: "e2e-gcp.yml" - - name: RUN_LOCAL_TESTS - value: "true" - name: USE_PRIVATE_REGISTRY value: "true" - name: PRIVATE_REGISTRY value: "quay.io/" - - name: K8S_E2E_FLAGS - # Label filter selects sig-calico tests and excludes: - # - Slow: long-running tests not suitable for CI - # - Disruptive: tests that break cluster state for other tests - # - RequiresBGPMesh: requires BGP routing, but this cluster uses VXLAN - # - NoEncap: requires non-encapsulated routing, but this cluster uses VXLAN - # - Feature:Istio: Calico policy not enforced with Istio ambient on BPF dataplane - # - ExternalNode: requires a non-cluster node with EXT_IP/EXT_KEY/EXT_USER configured - # - RequiresXtables: requires iptables or nftables dataplane, but this cluster uses BPF - value: "--ginkgo.label-filter=sig-calico&&!Slow&&!Disruptive&&!RequiresBGPMesh&&!NoEncap&&!Feature:Istio&&!ExternalNode&&!RequiresXtables" + - name: RUN_LOCAL_TESTS + value: "true" + - name: E2E_TEST_CONFIG + # Test selection is driven by the config file; see gcp-bpf.yaml for the + # include/exclude rationale. + value: "e2e/config/gcp-bpf.yaml" - name: USE_API_SERVER value: "false" jobs: diff --git a/.semaphore/semaphore.yml.d/blocks/20-e2e-gcp.yml b/.semaphore/semaphore.yml.d/blocks/20-e2e-gcp.yml index dc678646950..23c5e20785b 100644 --- a/.semaphore/semaphore.yml.d/blocks/20-e2e-gcp.yml +++ b/.semaphore/semaphore.yml.d/blocks/20-e2e-gcp.yml @@ -22,22 +22,16 @@ value: calico - name: FUNCTIONAL_AREA value: "e2e-gcp.yml" - - name: RUN_LOCAL_TESTS - value: "true" - name: USE_PRIVATE_REGISTRY value: "true" - name: PRIVATE_REGISTRY value: "quay.io/" - - name: K8S_E2E_FLAGS - # Label filter selects sig-calico tests and excludes: - # - Slow: long-running tests not suitable for CI - # - Disruptive: tests that break cluster state for other tests - # - RequiresBGPMesh: requires BGP routing, but this cluster uses VXLAN - # - NoEncap: requires non-encapsulated routing, but this cluster uses VXLAN - # - Feature:Istio: Calico policy not enforced with Istio ambient on BPF dataplane - # - ExternalNode: requires a non-cluster node with EXT_IP/EXT_KEY/EXT_USER configured - # - RequiresXtables: requires iptables or nftables dataplane, but this cluster uses BPF - value: "--ginkgo.label-filter=sig-calico&&!Slow&&!Disruptive&&!RequiresBGPMesh&&!NoEncap&&!Feature:Istio&&!ExternalNode&&!RequiresXtables" + - name: RUN_LOCAL_TESTS + value: "true" + - name: E2E_TEST_CONFIG + # Test selection is driven by the config file; see gcp-bpf.yaml for the + # include/exclude rationale. + value: "e2e/config/gcp-bpf.yaml" - name: USE_API_SERVER value: "false" jobs: diff --git a/Makefile b/Makefile index c1663f418ad..24c4839866b 100644 --- a/Makefile +++ b/Makefile @@ -228,8 +228,11 @@ push-chart: bin/helm ############################################################################### E2E_PROCS ?= 4 E2E_TEST_CONFIG ?= e2e/config/kind.yaml +E2E_OUTPUT_DIR ?= report +E2E_JUNIT_REPORT ?= e2e_conformance.xml K8S_NETPOL_SUPPORTED_FEATURES ?= "ClusterNetworkPolicy,ClusterNetworkPolicyNamedPorts" K8S_NETPOL_UNSUPPORTED_FEATURES ?= "" +CLUSTER_ROUTING ?= BIRD ## Build all test images, create a kind cluster, and deploy Calico on it. .PHONY: kind-up @@ -248,24 +251,27 @@ kind-migration-test: ## Create a kind cluster and run all e2e tests. e2e-test: $(MAKE) -C e2e build - $(MAKE) kind-up - $(MAKE) e2e-run-test - $(MAKE) e2e-run-cnp-test + CLUSTER_ROUTING=$(CLUSTER_ROUTING) $(MAKE) kind-up + $(MAKE) e2e-run KUBECONFIG=$(KIND_KUBECONFIG) + $(MAKE) e2e-run-cnp KUBECONFIG=$(KIND_KUBECONFIG) ## Create a kind cluster and run the ClusterNetworkPolicy specific e2e tests. e2e-test-clusternetworkpolicy: $(MAKE) -C e2e build - $(MAKE) kind-up - $(MAKE) e2e-run-cnp-test - -## Run the general e2e tests against a pre-existing kind cluster. -e2e-run-test: - mkdir -p report - KUBECONFIG=$(KIND_KUBECONFIG) go run github.com/onsi/ginkgo/v2/ginkgo -procs=$(E2E_PROCS) --junit-report=e2e_conformance.xml --output-dir=report/ ./e2e/bin/k8s/e2e.test -- --calico.test-config=$(abspath $(E2E_TEST_CONFIG)) - -## Run the ClusterNetworkPolicy specific e2e tests against a pre-existing kind cluster. -e2e-run-cnp-test: - KUBECONFIG=$(KIND_KUBECONFIG) ./e2e/bin/clusternetworkpolicy/e2e.test \ + CLUSTER_ROUTING=$(CLUSTER_ROUTING) $(MAKE) kind-up + $(MAKE) e2e-run-cnp KUBECONFIG=$(KIND_KUBECONFIG) + +## Run the general e2e tests against the cluster at $KUBECONFIG. +## Callers must set KUBECONFIG explicitly (e.g. $(KIND_KUBECONFIG) for kind). +e2e-run: + @if [ -z "$(KUBECONFIG)" ]; then echo "e2e-run: KUBECONFIG must be set"; exit 1; fi + mkdir -p $(E2E_OUTPUT_DIR) + KUBECONFIG=$(KUBECONFIG) go run github.com/onsi/ginkgo/v2/ginkgo -procs=$(E2E_PROCS) --junit-report=$(E2E_JUNIT_REPORT) --output-dir=$(E2E_OUTPUT_DIR)/ ./e2e/bin/k8s/e2e.test -- --calico.test-config=$(abspath $(E2E_TEST_CONFIG)) + +## Run the ClusterNetworkPolicy specific e2e tests against the cluster at $KUBECONFIG. +e2e-run-cnp: + @if [ -z "$(KUBECONFIG)" ]; then echo "e2e-run-cnp: KUBECONFIG must be set"; exit 1; fi + KUBECONFIG=$(KUBECONFIG) ./e2e/bin/clusternetworkpolicy/e2e.test \ -exempt-features=$(K8S_NETPOL_UNSUPPORTED_FEATURES) \ -supported-features=$(K8S_NETPOL_SUPPORTED_FEATURES) diff --git a/e2e/config/gcp-bpf.yaml b/e2e/config/gcp-bpf.yaml index d6a8e62bd4c..ddec7b1c1e1 100644 --- a/e2e/config/gcp-bpf.yaml +++ b/e2e/config/gcp-bpf.yaml @@ -1,12 +1,26 @@ # gcp-bpf.yaml - GCP kubeadm with BPF dataplane. # # Used by the in-repo e2e CI block (20-e2e-gcp.yml). Runs sig-calico tests -# and networking conformance on a VXLAN cluster. +# on a VXLAN cluster with the BPF dataplane. +# +# This config is standalone (doesn't extend base.yaml) because it uses a +# narrower include scope (sig-calico only, no sig-network Conformance) that +# can't be expressed as an additive extension of base's broader includes. -extends: base.yaml +include: + - sig-calico exclude: labels: + - label: Slow + reason: "long-running tests not suitable for CI" + + - label: Disruptive + reason: "breaks cluster state for other parallel tests" + + - label: ExternalNode + reason: "requires a non-cluster node with EXT_IP/EXT_KEY/EXT_USER configured" + - label: RequiresBGPMesh reason: "requires BGP routing, but this cluster uses VXLAN"