Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
197 changes: 118 additions & 79 deletions .github/workflows/deploy-aws.yml
Original file line number Diff line number Diff line change
@@ -1,34 +1,54 @@
name: Deploy to AWS
name: Deploy Release to AWS

on:
push:
branches: ["main"]
release:
types: [published]
workflow_dispatch:
inputs:
image_tag:
description: 'Image tag to deploy (defaults to release tag or commit SHA)'
required: false
type: string
apply_infra:
description: 'Apply terraform before deploying workloads'
required: false
type: boolean
default: false

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
group: deploy-aws-production
cancel-in-progress: false

env:
AWS_REGION: us-east-1
ECR_REGISTRY: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.us-east-1.amazonaws.com
EKS_CLUSTER: summit-prod-eks
K8S_NAMESPACE: default
AWS_ROLE_NAME: github-actions-deploy-role
ECR_REGISTRY: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.us-east-1.amazonaws.com

permissions:
id-token: write
contents: read

jobs:
pre-deploy-gate:
uses: ./.github/workflows/gate.yml
with:
region: us-east-1
preflight:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4

- name: Validate release deployment prerequisites
run: |
set -euo pipefail
test -n "${{ secrets.AWS_ACCOUNT_ID }}"
test -f terraform/environments/prod/main.tf
test -f charts/universal-app/Chart.yaml
test -f scripts/verify-deployment.sh

build-and-push:
needs: pre-deploy-gate
needs: preflight
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
service: [maestro, prov-ledger, policy-lac]
include:
Expand All @@ -48,125 +68,144 @@ jobs:
steps:
- uses: actions/checkout@v4

- name: Configure AWS Credentials
- name: Setup pnpm
uses: pnpm/action-setup@v4
with:
version: 9.15.4

- name: Setup Node
uses: actions/setup-node@v4
with:
node-version: '20'
cache: 'pnpm'

- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/github-actions-deploy-role
role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/${{ env.AWS_ROLE_NAME }}
aws-region: ${{ env.AWS_REGION }}

- name: Dependency Audit
- name: Dependency audit
run: |
if [ "${{ matrix.service }}" == "maestro" ]; then
pip install safety && safety check
set -euo pipefail
if [ "${{ matrix.service }}" = "maestro" ]; then
python -m pip install --upgrade pip safety
safety check -r maestro/requirements.txt --full-report
else
pnpm install --frozen-lockfile
pnpm audit --audit-level=high
fi

- name: Login to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2

- name: Security Scan (Trivy)
uses: aquasecurity/trivy-action@57a97c7e7821a5776cebc9bb87c984fa69cba8f1 # 0.35.0
with:
scan-type: "fs"
scan-ref: "."
trivy-config: trivy.yaml
exit-code: "0" # Don't fail build yet, just report
ignore-unfixed: true
severity: "CRITICAL,HIGH"

- name: Build and Push Docker Image
- name: Build and push image
if: ${{ github.event_name == 'release' || inputs.image_tag == '' }}
env:
ECR_REPOSITORY: summit/${{ matrix.service }}
IMAGE_TAG: ${{ github.sha }}
RELEASE_TAG: ${{ inputs.image_tag || github.event.release.tag_name || github.sha }}
run: |
set -euo pipefail
docker build \
-t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG \
-f ${{ matrix.dockerfile }} \
--build-arg SERVICE_PATH=${{ matrix.path }} \
${{ matrix.context }}
docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG
-t "$ECR_REGISTRY/$ECR_REPOSITORY:$RELEASE_TAG" \
-f "${{ matrix.dockerfile }}" \
--build-arg SERVICE_PATH="${{ matrix.path }}" \
"${{ matrix.context }}"
docker push "$ECR_REGISTRY/$ECR_REPOSITORY:$RELEASE_TAG"

deploy-infra:
needs: [build-and-push, pre-deploy-gate]
needs: build-and-push
runs-on: ubuntu-22.04
if: inputs.apply_infra == true
steps:
- uses: actions/checkout@v4

- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/${{ env.AWS_ROLE_NAME }}
aws-region: ${{ env.AWS_REGION }}

- name: Setup Terraform
uses: hashicorp/setup-terraform@v3

- name: Terraform Apply
- name: Terraform apply
env:
TF_VAR_aws_region: ${{ env.AWS_REGION }}
working-directory: terraform/environments/prod
run: |
terraform init
terraform apply -auto-approve
set -euo pipefail
terraform init -input=false
terraform apply -auto-approve -input=false

deploy-k8s:
needs: deploy-infra
needs: [build-and-push, deploy-infra]
if: ${{ always() && needs.build-and-push.result == 'success' && (needs.deploy-infra.result == 'success' || needs.deploy-infra.result == 'skipped') }}
runs-on: ubuntu-22.04
environment: production
steps:
- uses: actions/checkout@v4

- name: Configure AWS Credentials
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/github-actions-deploy-role
role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/${{ env.AWS_ROLE_NAME }}
aws-region: ${{ env.AWS_REGION }}

- name: Capture Governance Evidence
- name: Setup Helm
uses: azure/setup-helm@v4

- name: Setup kubectl
uses: azure/setup-kubectl@v4

- name: Update kubeconfig
run: aws eks update-kubeconfig --name "$EKS_CLUSTER" --region "$AWS_REGION"

- name: Capture governance evidence
run: |
set -euo pipefail
mkdir -p evidence-artifacts
aws sts get-caller-identity > evidence-artifacts/caller-identity.json
# Attempt to capture role policy, ignore failure if permission denied
aws iam get-role --role-name github-actions-deploy-role --query 'Role.AssumeRolePolicyDocument' --output json > evidence-artifacts/trust-policy.json || echo "Could not fetch trust policy" > evidence-artifacts/trust-policy-error.txt
aws eks describe-cluster --name "$EKS_CLUSTER" --region "$AWS_REGION" > evidence-artifacts/cluster.json

- name: Upload Governance Evidence
uses: actions/upload-artifact@v4
with:
name: governance-evidence-deploy-k8s
path: evidence-artifacts/

- name: Update Kubeconfig
run: aws eks update-kubeconfig --name ${{ env.EKS_CLUSTER }}

- name: Deploy Manifests
- name: Deploy workloads
env:
RELEASE_TAG: ${{ inputs.image_tag || github.event.release.tag_name || github.sha }}
run: |
# Deploy Maestro
set -euo pipefail
helm upgrade --install maestro charts/universal-app \
--set image.repository=$ECR_REGISTRY/summit/maestro \
--set image.tag=${{ github.sha }} \
--namespace "$K8S_NAMESPACE" \
--set image.repository="$ECR_REGISTRY/summit/maestro" \
--set image.tag="$RELEASE_TAG" \
--set service.targetPort=8001 \
--set ingress.enabled=true \
--set ingress.hosts[0].host=api.summit.internal \
--set ingress.hosts[0].paths[0].path=/maestro \
--set ingress.hosts[0].paths[0].pathType=Prefix \
--namespace default
--set fullnameOverride=maestro

# Deploy Prov Ledger
helm upgrade --install prov-ledger charts/universal-app \
--set image.repository=$ECR_REGISTRY/summit/prov-ledger \
--set image.tag=${{ github.sha }} \
--namespace "$K8S_NAMESPACE" \
--set image.repository="$ECR_REGISTRY/summit/prov-ledger" \
--set image.tag="$RELEASE_TAG" \
--set service.targetPort=4010 \
--namespace default
--set fullnameOverride=prov-ledger

# Deploy Policy LAC
helm upgrade --install policy-lac charts/universal-app \
--set image.repository=$ECR_REGISTRY/summit/policy-lac \
--set image.tag=${{ github.sha }} \
--namespace "$K8S_NAMESPACE" \
--set image.repository="$ECR_REGISTRY/summit/policy-lac" \
--set image.tag="$RELEASE_TAG" \
--set service.targetPort=4000 \
--namespace default
--set fullnameOverride=policy-lac

- name: Post-Deployment Smoke Test
- name: Verify rollouts and smoke check
run: |
# Wait for rollout
kubectl rollout status deployment/maestro --timeout=118s

# Run the project's internal smoke test script
# We use kubectl exec to run it from inside a pod or curl the ingress
echo "Running Health Check..."
set -euo pipefail
kubectl rollout status deployment/maestro --namespace "$K8S_NAMESPACE" --timeout=180s
kubectl rollout status deployment/prov-ledger --namespace "$K8S_NAMESPACE" --timeout=180s
kubectl rollout status deployment/policy-lac --namespace "$K8S_NAMESPACE" --timeout=180s
./scripts/verify-deployment.sh

# Optional: Run app-level functional smoke tests
# pnpm run test:smoke
- name: Upload governance evidence
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: aws-deploy-evidence-${{ github.run_id }}-attempt-${{ github.run_attempt }}
path: evidence-artifacts/
Comment on lines +205 to +210
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Upload deployment evidence even when rollout fails

The artifact upload runs only on success by default, so any failure in rollout verification or verify-deployment.sh prevents evidence-artifacts/ from being published. That removes the caller/cluster evidence needed for incident triage and compliance in exactly the failure scenarios where it is most valuable; adding if: always() (or moving upload earlier) keeps evidence available for failed runs.

Useful? React with 👍 / 👎.

retention-days: 30
Loading