diff --git a/docker-compose.dev.yaml b/docker-compose.dev.yaml index 939ce9e91b0..2ea1a167763 100644 --- a/docker-compose.dev.yaml +++ b/docker-compose.dev.yaml @@ -1,5 +1,3 @@ -version: '3.9' - services: postgres: image: postgres:16-alpine @@ -53,7 +51,7 @@ services: - neo4j_data:/data - neo4j_logs:/logs healthcheck: - test: [ 'CMD-SHELL', 'cypher-shell -u ${NEO4J_USERNAME:-neo4j} -p ${NEO4J_PASSWORD} "RETURN 1"' ] + test: [ 'CMD-SHELL', 'cypher-shell -u ${NEO4J_USERNAME:-neo4j} -p ${NEO4J_PASSWORD:-summit_dev_pw} "RETURN 1"' ] interval: 15s timeout: 10s retries: 10 @@ -128,7 +126,7 @@ services: OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4318 OTEL_EXPORTER_OTLP_METRICS_ENDPOINT: http://otel-collector:4318/v1/metrics ports: - - '4000:4000' + - '4001:4000' depends_on: postgres: condition: service_healthy @@ -147,6 +145,32 @@ services: - summit labels: - 'prometheus.job=summit_api' + - 'prometheus.port=4001' + + api-gateway: + build: + context: . + dockerfile: services/api-gateway/Dockerfile + container_name: summit-api-gateway + restart: unless-stopped + environment: + - PORT=4000 + - NODE_ENV=development + - GRAPH_SERVICE_URL=http://server:4000 + ports: + - '4000:4000' + depends_on: + server: + condition: service_healthy + networks: + - summit + healthcheck: + test: ['CMD', 'curl', '-f', 'http://localhost:4000/health/live'] + interval: 15s + timeout: 5s + retries: 10 + labels: + - 'prometheus.job=summit_api_gateway' - 'prometheus.port=4000' nginx: @@ -155,31 +179,36 @@ services: restart: unless-stopped ports: - '4100:4100' - - '9464:9464' # Prometheus port for Nginx metrics if exposed + - '9464:9464' volumes: - ./services/nginx/nginx.conf:/etc/nginx/nginx.conf:ro depends_on: server: condition: service_healthy - # Update dependencies if they were tied to the old gateway service - # Assuming 'api' might have been a placeholder dependency or also proxied - # policy-compiler: - # condition: service_healthy - # typesense: - # condition: service_healthy networks: - summit labels: - 'prometheus.job=summit_nginx_gateway' - 'prometheus.port=4100' - - 'prometheus.path=/health' # Assuming Nginx exposes /health for healthcheck + - 'prometheus.path=/health' prov-ledger: - build: ./services/prov-ledger + build: + context: . + dockerfile: services/prov-ledger/Dockerfile ports: [ "4010:4010" ] + healthcheck: + test: [ 'CMD', 'curl', '-f', 'http://localhost:4010/health' ] + interval: 15s + timeout: 5s + retries: 10 + networks: + - summit policy-compiler: - build: ./services/policy-compiler + build: + context: . + dockerfile: services/policy-compiler/Dockerfile ports: [ "8102:8080" ] healthcheck: test: [ "CMD", "curl", "-f", "http://localhost:8080/health" ] @@ -190,43 +219,81 @@ services: - summit ai-nlq: - build: ./services/ai-nlq + build: + context: . + dockerfile: services/ai-nlq/Dockerfile ports: [ "8103:8080" ] + healthcheck: + test: [ 'CMD', 'curl', '-f', 'http://localhost:8080/health' ] + interval: 15s + timeout: 5s + retries: 10 + networks: + - summit er-service: - build: ./services/er-service + build: + context: . + dockerfile: services/er-service/Dockerfile ports: [ "8104:8080" ] + healthcheck: + test: [ 'CMD', 'curl', '-f', 'http://localhost:8080/health' ] + interval: 15s + timeout: 5s + retries: 10 + networks: + - summit ingest: - build: ./services/ingest + build: + context: . + dockerfile: services/ingest/Dockerfile ports: [ "8105:8080" ] + healthcheck: + test: [ 'CMD', 'curl', '-f', 'http://localhost:8080/health' ] + interval: 15s + timeout: 5s + retries: 10 + networks: + - summit zk-tx: - build: ./services/zk-tx + build: + context: . + dockerfile: services/zk-tx/Dockerfile ports: [ "8106:8080" ] + healthcheck: + test: [ 'CMD', 'curl', '-f', 'http://localhost:8080/health' ] + interval: 15s + timeout: 5s + retries: 10 + networks: + - summit predictd: build: context: . dockerfile: services/predictd/Dockerfile + networks: + - summit slo-exporter: build: - context: ./apps/slo-exporter - dockerfile: Dockerfile + context: . + dockerfile: apps/slo-exporter/Dockerfile container_name: summit-slo-exporter restart: unless-stopped environment: PORT: 9092 PROMETHEUS_URL: http://prometheus:9090 - API_URL: http://api:4000 + API_URL: http://server:4000 PREDICTD_PORT: 4001 ports: - '9092:9092' depends_on: prometheus: condition: service_started - api: + server: condition: service_healthy networks: - summit @@ -263,6 +330,8 @@ services: - "/policies" volumes: - ./services/opa/policies:/policies + networks: + - summit web: build: @@ -273,14 +342,14 @@ services: env_file: - ${DEV_ENV_FILE:-.env} environment: - VITE_API_URL: http://server:4000/graphql - VITE_WS_URL: ws://server:4000/graphql + VITE_API_URL: http://api-gateway:4000/graphql + VITE_WS_URL: ws://api-gateway:4000/graphql VITE_WEBSOCKET_URL: ws://websocket-server:9001 VITE_PORT: 3000 ports: - '3000:3000' depends_on: - server: + api-gateway: condition: service_healthy healthcheck: test: [ 'CMD', 'curl', '-f', 'http://localhost:3000' ] @@ -292,8 +361,8 @@ services: websocket-server: build: - context: ./services/websocket-server - dockerfile: Dockerfile + context: . + dockerfile: services/websocket-server/Dockerfile command: node dist/index.js container_name: summit-websocket-server restart: unless-stopped @@ -452,8 +521,8 @@ services: ai-sandbox: build: - context: ./services/ai-sandbox - dockerfile: Dockerfile + context: . + dockerfile: services/ai-sandbox/Dockerfile command: node dist/index.js container_name: summit-ai-sandbox restart: unless-stopped @@ -483,8 +552,8 @@ services: agentic-mesh-evaluation: build: - context: ./services/agentic-mesh-evaluation - dockerfile: Dockerfile + context: . + dockerfile: services/agentic-mesh-evaluation/Dockerfile container_name: summit-agentic-mesh-evaluation restart: unless-stopped environment: @@ -522,6 +591,11 @@ services: OTEL_SERVICE_NAME: summit-ai OTEL_EXPORTER_OTLP_TRACES_ENDPOINT: http://otel-collector:4318/v1/traces OTEL_EXPORTER_OTLP_METRICS_ENDPOINT: http://otel-collector:4318/v1/metrics + healthcheck: + test: [ 'CMD', 'curl', '-f', 'http://localhost:8000/health' ] + interval: 15s + timeout: 5s + retries: 10 networks: - summit labels: diff --git a/docs/roadmap/STATUS.json b/docs/roadmap/STATUS.json index c8a844a900f..f11c36e4f9d 100644 --- a/docs/roadmap/STATUS.json +++ b/docs/roadmap/STATUS.json @@ -1,7 +1,13 @@ { - "last_updated": "2026-03-25T03:00:00Z", - "revision_note": "Integrated GA control plane integrity checks and PR9 trust intelligence layer hardening with deterministic manifest verification.", + "last_updated": "2026-03-28T01:30:00Z", + "revision_note": "Reassessed GA readiness and reduced branch-protection drift false positives by distinguishing unknown verification states from confirmed policy drift.", "initiatives": [ + { + "id": "branch-protection-drift-signal-hardening", + "status": "completed", + "owner": "codex", + "notes": "Updated branch-protection drift checker to emit drift_status (in_sync|drift_detected|unknown), avoid false drift positives when GitHub metadata is inaccessible, and add explicit fail-on-unknown behavior with regression tests." + }, { "id": "pr9-trust-intelligence-layer", "status": "completed", diff --git a/scripts/release/check_branch_protection_drift.sh b/scripts/release/check_branch_protection_drift.sh index 5435f1efd7a..30bf51e2fcd 100755 --- a/scripts/release/check_branch_protection_drift.sh +++ b/scripts/release/check_branch_protection_drift.sh @@ -28,6 +28,7 @@ EXCEPTIONS_FILE="${REPO_ROOT}/docs/ci/REQUIRED_CHECKS_EXCEPTIONS.yml" OUT_DIR="artifacts/release-train" VERBOSE=false FAIL_ON_DRIFT=false +FAIL_ON_UNKNOWN=false usage() { cat << 'EOF' @@ -42,6 +43,7 @@ Options: --exceptions FILE Exceptions file path (default: docs/ci/REQUIRED_CHECKS_EXCEPTIONS.yml) --out-dir DIR Output directory (default: artifacts/release-train) --fail-on-drift Exit with code 1 if drift is detected + --fail-on-unknown Exit with code 1 if branch protection cannot be evaluated --verbose Enable verbose logging --help Show this help @@ -100,6 +102,10 @@ while [[ $# -gt 0 ]]; do FAIL_ON_DRIFT=true shift ;; + --fail-on-unknown) + FAIL_ON_UNKNOWN=true + shift + ;; --verbose) VERBOSE=true shift @@ -212,40 +218,47 @@ GITHUB_CHECKS="" GITHUB_COUNT=0 API_ACCESSIBLE=true -# Try to fetch branch protection -set +e -API_RESPONSE=$(gh api "$API_ENDPOINT" 2>&1) -API_EXIT_CODE=$? -set -e - -if [[ $API_EXIT_CODE -ne 0 ]]; then +if ! command -v gh &>/dev/null; then API_ACCESSIBLE=false - - if echo "$API_RESPONSE" | grep -q "404"; then - API_ERROR="Branch protection not configured for $BRANCH" - log_warn "$API_ERROR" - elif echo "$API_RESPONSE" | grep -q "403"; then - API_ERROR="Insufficient permissions to read branch protection (requires admin or read:org scope)" - log_warn "$API_ERROR" - else - API_ERROR="API error: $API_RESPONSE" - log_warn "$API_ERROR" - fi + API_ERROR="GitHub CLI (gh) is not installed; cannot query branch protection" + log_warn "$API_ERROR" else - # Extract required contexts (check names) - GITHUB_CHECKS=$(echo "$API_RESPONSE" | jq -r '.contexts[]? // empty' 2>/dev/null | sort || echo "") + # Try to fetch branch protection + set +e + API_RESPONSE=$(gh api "$API_ENDPOINT" 2>&1) + API_EXIT_CODE=$? + set -e + + if [[ $API_EXIT_CODE -ne 0 ]]; then + API_ACCESSIBLE=false + + if echo "$API_RESPONSE" | grep -q "404"; then + API_ERROR="Branch protection not configured for $BRANCH" + log_warn "$API_ERROR" + elif echo "$API_RESPONSE" | grep -q "403"; then + API_ERROR="Insufficient permissions to read branch protection (requires admin or read:org scope)" + log_warn "$API_ERROR" + else + API_ERROR="API error: $API_RESPONSE" + log_warn "$API_ERROR" + fi + else + # Extract required contexts (check names) + GITHUB_CHECKS=$(echo "$API_RESPONSE" | jq -r '.contexts[]? // empty' 2>/dev/null | sort || echo "") - # Also try the newer 'checks' array format - if [[ -z "$GITHUB_CHECKS" ]]; then - GITHUB_CHECKS=$(echo "$API_RESPONSE" | jq -r '.checks[]?.context // empty' 2>/dev/null | sort || echo "") - fi + # Also try the newer 'checks' array format + if [[ -z "$GITHUB_CHECKS" ]]; then + GITHUB_CHECKS=$(echo "$API_RESPONSE" | jq -r '.checks[]?.context // empty' 2>/dev/null | sort || echo "") + fi - GITHUB_COUNT=$(echo "$GITHUB_CHECKS" | grep -c . || echo 0) - log "GitHub requires $GITHUB_COUNT status checks" + GITHUB_COUNT=$(echo "$GITHUB_CHECKS" | grep -c . || echo 0) + log "GitHub requires $GITHUB_COUNT status checks" + fi fi # --- Step 4: Compare sets --- DRIFT_DETECTED=false +DRIFT_STATUS="in_sync" MISSING_IN_GITHUB=() EXTRA_IN_GITHUB=() EXCEPTED_MISSING=() @@ -285,7 +298,11 @@ if [[ "$API_ACCESSIBLE" == "true" ]]; then log "Missing in GitHub: ${#MISSING_IN_GITHUB[@]} (${#EXCEPTED_MISSING[@]} excepted)" log "Extra in GitHub: ${#EXTRA_IN_GITHUB[@]} (${#EXCEPTED_EXTRA[@]} excepted)" else - DRIFT_DETECTED=true # Unknown state is treated as potential drift + DRIFT_STATUS="unknown" +fi + +if [[ "$DRIFT_STATUS" != "unknown" && "$DRIFT_DETECTED" == "true" ]]; then + DRIFT_STATUS="drift_detected" fi # --- Step 5: Generate reports --- @@ -311,6 +328,7 @@ cat > "$OUT_DIR/branch_protection_drift_report.json" << EOF "exceptions_loaded": $EXCEPTIONS_LOADED, "api_accessible": $API_ACCESSIBLE, "api_error": $(jq -n --arg err "$API_ERROR" 'if $err == "" then null else $err end'), + "drift_status": "$DRIFT_STATUS", "drift_detected": $DRIFT_DETECTED, "summary": { "policy_check_count": $POLICY_COUNT, @@ -354,6 +372,7 @@ cat > "$OUT_DIR/branch_protection_drift_report.md" << EOF | Extra in GitHub | ${#EXTRA_IN_GITHUB[@]} | | Excepted (Missing) | ${#EXCEPTED_MISSING[@]} | | Excepted (Extra) | ${#EXCEPTED_EXTRA[@]} | +| Drift Status | $DRIFT_STATUS | | **Drift Detected** | $DRIFT_DETECTED | --- @@ -382,6 +401,20 @@ Unable to read branch protection settings. This could mean: EOF fi +if [[ "$DRIFT_STATUS" == "unknown" ]]; then + cat >> "$OUT_DIR/branch_protection_drift_report.md" << EOF +## Status: Unknown (Verification Blocked) + +Branch protection drift could not be evaluated because GitHub branch protection metadata +was not accessible in this environment. + +> This is a governance visibility blocker, not confirmed policy drift. + +--- + +EOF +fi + if [[ ${#MISSING_IN_GITHUB[@]} -gt 0 ]]; then cat >> "$OUT_DIR/branch_protection_drift_report.md" << EOF ## Missing in GitHub Branch Protection @@ -537,7 +570,7 @@ log_info "Drift report generated:" log_info " JSON: $OUT_DIR/branch_protection_drift_report.json" log_info " Markdown: $OUT_DIR/branch_protection_drift_report.md" -if [[ "$DRIFT_DETECTED" == "true" ]]; then +if [[ "$DRIFT_STATUS" == "drift_detected" ]]; then log_warn "DRIFT DETECTED - Policy and GitHub branch protection are out of sync" if [[ ${#MISSING_IN_GITHUB[@]} -gt 0 ]]; then log_warn " Missing in GitHub: ${MISSING_IN_GITHUB[*]}" @@ -550,6 +583,12 @@ if [[ "$DRIFT_DETECTED" == "true" ]]; then log_error "Failing due to detected drift (--fail-on-drift active)" exit 1 fi +elif [[ "$DRIFT_STATUS" == "unknown" ]]; then + log_warn "DRIFT STATUS UNKNOWN - unable to evaluate branch protection in this environment" + if [[ "$FAIL_ON_UNKNOWN" == "true" ]]; then + log_error "Failing due to unknown drift status (--fail-on-unknown active)" + exit 1 + fi else log_info "No drift detected - Policy and GitHub branch protection are in sync" fi diff --git a/scripts/release/tests/check_branch_protection_drift.test.sh b/scripts/release/tests/check_branch_protection_drift.test.sh new file mode 100755 index 00000000000..5d1365054fe --- /dev/null +++ b/scripts/release/tests/check_branch_protection_drift.test.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash +# check_branch_protection_drift.test.sh +# Focused regression tests for unknown-vs-drift handling in drift checks. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +RELEASE_SCRIPTS="${SCRIPT_DIR}/.." +TEMP_DIR="" + +TESTS_RUN=0 +TESTS_FAILED=0 + +setup() { + TEMP_DIR=$(mktemp -d) +} + +teardown() { + if [[ -n "${TEMP_DIR}" && -d "${TEMP_DIR}" ]]; then + rm -rf "${TEMP_DIR}" + TEMP_DIR="" + fi +} + +trap teardown EXIT + +assert_eq() { + local expected="$1" + local actual="$2" + local message="$3" + TESTS_RUN=$((TESTS_RUN + 1)) + if [[ "${expected}" != "${actual}" ]]; then + echo "[FAIL] ${message} (expected='${expected}' actual='${actual}')" + TESTS_FAILED=$((TESTS_FAILED + 1)) + else + echo "[PASS] ${message}" + fi +} + +test_unknown_when_gh_unavailable() { + setup + + local out_dir="${TEMP_DIR}/out" + mkdir -p "${out_dir}" + + # Ensure gh is not discoverable for this test process. + PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" \ + "${RELEASE_SCRIPTS}/check_branch_protection_drift.sh" \ + --repo BHG/summit \ + --branch main \ + --out-dir "${out_dir}" >/dev/null 2>&1 || true + + local report="${out_dir}/branch_protection_drift_report.json" + local drift_status + local drift_detected + drift_status=$(jq -r '.drift_status' "${report}") + drift_detected=$(jq -r '.drift_detected' "${report}") + + assert_eq "unknown" "${drift_status}" "drift_status is unknown when gh metadata is inaccessible" + assert_eq "false" "${drift_detected}" "drift_detected is false when status is unknown" + + teardown +} + +test_fail_on_unknown_exits_nonzero() { + setup + + local out_dir="${TEMP_DIR}/out" + mkdir -p "${out_dir}" + + set +e + PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" \ + "${RELEASE_SCRIPTS}/check_branch_protection_drift.sh" \ + --repo BHG/summit \ + --branch main \ + --fail-on-unknown \ + --out-dir "${out_dir}" >/dev/null 2>&1 + local exit_code=$? + set -e + + assert_eq "1" "${exit_code}" "--fail-on-unknown exits with status 1 when status is unknown" + + teardown +} + +main() { + test_unknown_when_gh_unavailable + test_fail_on_unknown_exits_nonzero + + echo + echo "Tests run: ${TESTS_RUN}" + if [[ "${TESTS_FAILED}" -gt 0 ]]; then + echo "Failures: ${TESTS_FAILED}" + exit 1 + fi + echo "All tests passed." +} + +main "$@"