Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 115 additions & 83 deletions .github/workflows/deploy-aws.yml
Original file line number Diff line number Diff line change
@@ -1,34 +1,54 @@
name: Deploy to AWS
name: Deploy Release to AWS

on:
push:
branches: ["main"]
release:
types: [published]
workflow_dispatch:
inputs:
image_tag:
description: 'Image tag to deploy (defaults to release tag or commit SHA)'
required: false
type: string
apply_infra:
description: 'Apply terraform before deploying workloads'
required: false
type: boolean
default: false

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
group: deploy-aws-${{ github.ref_name || github.sha }}
cancel-in-progress: false

env:
AWS_REGION: us-east-1
ECR_REGISTRY: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.us-east-1.amazonaws.com
EKS_CLUSTER: summit-prod-eks
K8S_NAMESPACE: default
AWS_ROLE_NAME: github-actions-deploy-role
ECR_REGISTRY: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.us-east-1.amazonaws.com

permissions:
id-token: write
contents: read

jobs:
pre-deploy-gate:
uses: ./.github/workflows/gate.yml
with:
region: us-east-1
preflight:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4

- name: Validate release deployment prerequisites
run: |
set -euo pipefail
test -n "${{ secrets.AWS_ACCOUNT_ID }}"
test -f terraform/environments/prod/main.tf
test -f charts/universal-app/Chart.yaml
test -f scripts/verify-deployment.sh

build-and-push:
needs: pre-deploy-gate
needs: preflight
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
service: [maestro, prov-ledger, policy-lac]
include:
Expand All @@ -48,125 +68,137 @@ jobs:
steps:
- uses: actions/checkout@v4

- name: Configure AWS Credentials
- name: Setup pnpm
uses: pnpm/action-setup@v4
with:
version: 9.15.4

- name: Setup Node
uses: actions/setup-node@v4
with:
node-version: '20'
cache: 'pnpm'

- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/github-actions-deploy-role
role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/${{ env.AWS_ROLE_NAME }}
aws-region: ${{ env.AWS_REGION }}

- name: Dependency Audit
- name: Dependency audit
run: |
if [ "${{ matrix.service }}" == "maestro" ]; then
pip install safety && safety check
set -euo pipefail
if [ "${{ matrix.service }}" = "maestro" ]; then
python -m pip install --upgrade pip safety
safety check --full-report
else
pnpm install --frozen-lockfile
pnpm audit --audit-level=high
fi

- name: Login to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2

- name: Security Scan (Trivy)
uses: aquasecurity/trivy-action@57a97c7e7821a5776cebc9bb87c984fa69cba8f1 # 0.35.0
with:
scan-type: "fs"
scan-ref: "."
trivy-config: trivy.yaml
exit-code: "0" # Don't fail build yet, just report
ignore-unfixed: true
severity: "CRITICAL,HIGH"

- name: Build and Push Docker Image
- name: Build and push image
env:
ECR_REPOSITORY: summit/${{ matrix.service }}
IMAGE_TAG: ${{ github.sha }}
RELEASE_TAG: ${{ inputs.image_tag || github.event.release.tag_name || github.sha }}
run: |
set -euo pipefail
docker build \
-t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG \
-f ${{ matrix.dockerfile }} \
--build-arg SERVICE_PATH=${{ matrix.path }} \
${{ matrix.context }}
docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG
-t "$ECR_REGISTRY/$ECR_REPOSITORY:$RELEASE_TAG" \
-f "${{ matrix.dockerfile }}" \
--build-arg SERVICE_PATH="${{ matrix.path }}" \
"${{ matrix.context }}"
docker push "$ECR_REGISTRY/$ECR_REPOSITORY:$RELEASE_TAG"

deploy-infra:
needs: [build-and-push, pre-deploy-gate]
needs: build-and-push
runs-on: ubuntu-22.04
if: inputs.apply_infra == true
steps:
- uses: actions/checkout@v4

- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/${{ env.AWS_ROLE_NAME }}
aws-region: ${{ env.AWS_REGION }}

- name: Setup Terraform
uses: hashicorp/setup-terraform@v3

- name: Terraform Apply
- name: Terraform apply
working-directory: terraform/environments/prod
run: |
terraform init
terraform apply -auto-approve
set -euo pipefail
terraform init -input=false
terraform apply -auto-approve -input=false

deploy-k8s:
needs: deploy-infra
needs: [build-and-push, deploy-infra]
if: ${{ always() && needs.build-and-push.result == 'success' && (needs.deploy-infra.result == 'success' || needs.deploy-infra.result == 'skipped') }}
runs-on: ubuntu-22.04
environment: production
steps:
- uses: actions/checkout@v4

- name: Configure AWS Credentials
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/github-actions-deploy-role
role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/${{ env.AWS_ROLE_NAME }}
aws-region: ${{ env.AWS_REGION }}

- name: Capture Governance Evidence
- name: Setup Helm
uses: azure/setup-helm@v4

- name: Setup kubectl
uses: azure/setup-kubectl@v4

- name: Update kubeconfig
run: aws eks update-kubeconfig --name "$EKS_CLUSTER" --region "$AWS_REGION"

- name: Capture governance evidence
run: |
set -euo pipefail
mkdir -p evidence-artifacts
aws sts get-caller-identity > evidence-artifacts/caller-identity.json
# Attempt to capture role policy, ignore failure if permission denied
aws iam get-role --role-name github-actions-deploy-role --query 'Role.AssumeRolePolicyDocument' --output json > evidence-artifacts/trust-policy.json || echo "Could not fetch trust policy" > evidence-artifacts/trust-policy-error.txt

- name: Upload Governance Evidence
uses: actions/upload-artifact@v4
with:
name: governance-evidence-deploy-k8s
path: evidence-artifacts/
aws eks describe-cluster --name "$EKS_CLUSTER" --region "$AWS_REGION" > evidence-artifacts/cluster.json

- name: Update Kubeconfig
run: aws eks update-kubeconfig --name ${{ env.EKS_CLUSTER }}

- name: Deploy Manifests
- name: Deploy workloads
env:
RELEASE_TAG: ${{ inputs.image_tag || github.event.release.tag_name || github.sha }}
run: |
# Deploy Maestro
set -euo pipefail
helm upgrade --install maestro charts/universal-app \
--set image.repository=$ECR_REGISTRY/summit/maestro \
--set image.tag=${{ github.sha }} \
--set service.targetPort=8001 \
--set ingress.enabled=true \
--set ingress.hosts[0].host=api.summit.internal \
--set ingress.hosts[0].paths[0].path=/maestro \
--set ingress.hosts[0].paths[0].pathType=Prefix \
--namespace default

# Deploy Prov Ledger
--namespace "$K8S_NAMESPACE" \
--set image.repository="$ECR_REGISTRY/summit/maestro" \
--set image.tag="$RELEASE_TAG" \
--set service.targetPort=8001
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Preserve Maestro ingress overrides during Helm upgrade

This Helm invocation no longer sets ingress.enabled=true (or the hosts/paths overrides) for maestro; because charts/universal-app/values.yaml defaults ingress.enabled to false, running this workflow will render the ingress manifest absent and Helm will remove the existing ingress on upgrade. In environments that rely on the /maestro ingress route, this change causes an external availability outage immediately after deployment.

Useful? React with 👍 / 👎.


helm upgrade --install prov-ledger charts/universal-app \
--set image.repository=$ECR_REGISTRY/summit/prov-ledger \
--set image.tag=${{ github.sha }} \
--set service.targetPort=4010 \
--namespace default
--namespace "$K8S_NAMESPACE" \
--set image.repository="$ECR_REGISTRY/summit/prov-ledger" \
--set image.tag="$RELEASE_TAG" \
--set service.targetPort=4010

# Deploy Policy LAC
helm upgrade --install policy-lac charts/universal-app \
--set image.repository=$ECR_REGISTRY/summit/policy-lac \
--set image.tag=${{ github.sha }} \
--set service.targetPort=4000 \
--namespace default
--namespace "$K8S_NAMESPACE" \
--set image.repository="$ECR_REGISTRY/summit/policy-lac" \
--set image.tag="$RELEASE_TAG" \
--set service.targetPort=4000

- name: Post-Deployment Smoke Test
- name: Verify rollouts and smoke check
run: |
# Wait for rollout
kubectl rollout status deployment/maestro --timeout=118s

# Run the project's internal smoke test script
# We use kubectl exec to run it from inside a pod or curl the ingress
echo "Running Health Check..."
set -euo pipefail
kubectl rollout status deployment/maestro --namespace "$K8S_NAMESPACE" --timeout=180s
kubectl rollout status deployment/prov-ledger --namespace "$K8S_NAMESPACE" --timeout=180s
kubectl rollout status deployment/policy-lac --namespace "$K8S_NAMESPACE" --timeout=180s
./scripts/verify-deployment.sh

# Optional: Run app-level functional smoke tests
# pnpm run test:smoke
- name: Upload governance evidence
uses: actions/upload-artifact@v4
with:
name: aws-deploy-evidence-${{ github.run_id }}
path: evidence-artifacts/
Comment on lines +205 to +210
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Upload deployment evidence even when rollout fails

The artifact upload runs only on success by default, so any failure in rollout verification or verify-deployment.sh prevents evidence-artifacts/ from being published. That removes the caller/cluster evidence needed for incident triage and compliance in exactly the failure scenarios where it is most valuable; adding if: always() (or moving upload earlier) keeps evidence available for failed runs.

Useful? React with 👍 / 👎.

retention-days: 30
Loading