Skip to content
Draft
Show file tree
Hide file tree
Changes from 37 commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
a11c07a
Adding initial scalable setup
yl-nuwan Jun 5, 2026
b17ebc1
fixed the yaml
yl-nuwan Jun 5, 2026
b23c318
removed the conditional exit added for testing
yl-nuwan Jun 5, 2026
1c7ba00
fixed the typo
yl-nuwan Jun 5, 2026
9b228cc
feat(aws-queue-mode): separate ECS execution and task roles with Dyna…
yl-nuwan Jun 5, 2026
b6f6d39
feat(cleanup): add clean and rebuild scripts for scalable containeriz…
yl-nuwan Jun 5, 2026
8fb5329
feat(ecs): add command override variables for ECS tasks and update ma…
yl-nuwan Jun 5, 2026
cc5ef72
feat(aws-containerized): add queue-aware REST handler for ECS deploym…
yl-nuwan Jun 5, 2026
dfcc8fe
fix(ecs-queue-handler): correct message attribute parameter in queue …
yl-nuwan Jun 5, 2026
2c981bb
feat(aws-containerized): add structured logging for agent request lif…
yl-nuwan Jun 5, 2026
ed49d79
testing with the rest async mode
yl-nuwan Jun 8, 2026
8efdd03
Fixed the mode type
yl-nuwan Jun 8, 2026
1afc5e6
fix(aws-containerized): correct path parameter substitution in async …
yl-nuwan Jun 8, 2026
36b6794
chore(aws-containerized): rename sqs.tf to queue.tf and remove redund…
yl-nuwan Jun 8, 2026
8faeac4
docs(aws-containerized): add queue mode documentation and update exam…
yl-nuwan Jun 8, 2026
d3858f3
style(aws-containerized): format code and improve string consistency
yl-nuwan Jun 8, 2026
79f1230
formated the documant
yl-nuwan Jun 8, 2026
b6e8e5e
Enabled a basic containerized test to check the backward compatibility
yl-nuwan Jun 8, 2026
189275e
fix(aws-deployment): add session ID validation for REST_ASYNC polling
yl-nuwan Jun 8, 2026
b53802a
fix(aws-containerized): improve error handling and validation in ECS …
yl-nuwan Jun 8, 2026
e3eb26e
fix(aws): correct HTTP status code and error messages for missing res…
yl-nuwan Jun 8, 2026
2d4b53b
feat(aws-containerized): add SQS-based autoscaling for Agent Runner
yl-nuwan Jun 8, 2026
256fc97
removed duplicate definition
yl-nuwan Jun 8, 2026
2d62c50
load testing
yl-nuwan Jun 9, 2026
cfab4f3
disabled the cache tempararly
yl-nuwan Jun 9, 2026
ecaad3c
Merge branch 'develop' into CNT-scalability
yl-nuwan Jun 9, 2026
9bd2f92
Merge branch 'develop' into CNT-scalability
yl-nuwan Jun 9, 2026
517dce4
fix: update agentkernel dependency version to 0.5.1 in deploy scripts…
yl-nuwan Jun 9, 2026
66cef74
fix: correct indentation in deploy script for config file copy
yl-nuwan Jun 9, 2026
81436c3
feat: add agent runner autoscaling documentation and configuration de…
yl-nuwan Jun 14, 2026
c61869e
refactor: clean up comments and enabled integration test configuration
yl-nuwan Jun 15, 2026
fd84cc3
re enabled the cache
yl-nuwan Jun 15, 2026
85dc2ce
linted
yl-nuwan Jun 15, 2026
fae5c49
enabled the agent code after load testing
yl-nuwan Jun 18, 2026
45ebaf2
fix: update autoscaling conditions to use local.enable_autoscaling in…
yl-nuwan Jun 18, 2026
5362d28
Potential fix for pull request finding
yl-nuwan Jun 18, 2026
0db8c89
Potential fix for pull request finding
yl-nuwan Jun 18, 2026
234b3fe
resolve pr review suggestions
yl-nuwan Jun 20, 2026
8729ad7
refactor(containerized): restructure infrastructure into modular comp…
yl-nuwan Jun 20, 2026
1ba0207
reduced test scope
yl-nuwan Jun 21, 2026
b475aaf
refactor: update containerized module configurations for consistency …
yl-nuwan Jun 21, 2026
b8faff2
feat: add create_dynamodb_memory_table variable and update IAM polici…
yl-nuwan Jun 21, 2026
c7a11d3
refactor: update package_path references in containerized module conf…
yl-nuwan Jun 21, 2026
71b4243
switch to sync mode for testing for the application
yl-nuwan Jun 21, 2026
b8cfb32
reduced test scope
yl-nuwan Jun 21, 2026
fbc2bee
containerized code refactor
lakindu-yl Jun 26, 2026
8dda440
containerized mode documentation updates
lakindu-yl Jun 26, 2026
b2c5c34
refactor sqs_consumer
lakindu-yl Jun 28, 2026
66ee9fa
function naming update
lakindu-yl Jun 28, 2026
5e4e09d
update groups thread execution
lakindu-yl Jun 28, 2026
4965e5c
updates, and refactors
lakindu-yl Jun 28, 2026
3fbe6ce
example import fixes
lakindu-yl Jun 28, 2026
8b4e1c7
example import fix 2
lakindu-yl Jun 28, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/integration-test-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ weekly:
- type: aws-containerized
path: examples/aws-containerized/mcp/multi
deploy_dir: deploy
- type: aws-containerized
path: examples/aws-containerized/openai-dynamodb-scalable
deploy_dir: deploy

# AWS Serverless
- type: aws-serverless
Expand Down Expand Up @@ -97,7 +100,7 @@ weekly:
path: examples/gcp-serverless/openai-auth
deploy_dir: deploy

# # GCP Containerized
# GCP Containerized
- type: gcp-containerized
path: examples/gcp-containerized/openai
deploy_dir: deploy
Expand Down
183 changes: 183 additions & 0 deletions ak-deployment/ak-aws/containerized/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,172 @@ resource "aws_cloudwatch_metric_alarm" "high_cpu" {
}
```

### Scalable Queue Mode with SQS

For high-throughput workloads with asynchronous processing:

```hcl
module "agent_runner_image" {
source = "yaalalabs/ak-common/aws//modules/ecr"

env_alias = var.env_alias
module_name = "${var.module_name}-runner"
product_alias = var.product_alias
source_path = "${path.module}/dist-agent-runner"
}

module "scalable_queue_app" {
source = "yaalalabs/ak-containerized/aws"

region = "us-west-2"
product_alias = "myapp"
env_alias = "prod"
product_display_name = "Scalable Agent Service"

module_name = "scalable"
package_path = "${path.module}/dist-rest-service"

vpc_id = var.vpc_id
private_subnet_ids = var.private_subnet_ids

# ECS Configuration for REST Service
ecs_cpu = 1024
ecs_memory = 2048
ecs_desired_count = 2
ecs_container_port = 8000

# Override container command (ECR module injects Lambda-style CMD)
ecs_container_command = ["python", "app_rest_service.py"]

# Session storage
create_dynamodb_memory_table = true

# Enable Queue Mode
enable_queue_mode = true
queue_mode_type = "sync" # or "async" for polling mode

# Enable Autoscaling (optional)
enable_agent_runner_autoscaling = true
agent_runner_min_count = 1
agent_runner_max_count = 10
agent_runner_backlog_target = 10

# Agent Runner Configuration
agent_runner_image_uri = module.agent_runner_image.docker_image_uri
agent_runner_command = ["python", "app_agent_runner.py"]
agent_runner_cpu = 1024
agent_runner_memory = 2048
agent_runner_desired_count = 2

# SQS Configuration
sqs_input_visibility_timeout = 120 # Should exceed agent processing time
sqs_output_visibility_timeout = 60

environment_variables = {
OPENAI_API_KEY = var.openai_api_key
}

api_version = "v1"
agent_endpoint = "chat"
}
```

**Queue Mode Architecture:**
- **REST Service** (Thread 1): Accepts HTTP requests, enqueues to Input Queue
- **REST Service** (Thread 2): Polls Output Queue, writes to DynamoDB Response Store
- **Agent Runner**: Polls Input Queue, executes agents, sends to Output Queue
- **REST_SYNC**: Client blocks until response ready in DynamoDB
- **REST_ASYNC**: Client receives `request_id`, polls GET endpoint for result

**Use Cases:**
- Long-running agent workflows (>30s)
- Workloads requiring independent scaling of API and processing
- High-throughput scenarios with bursty traffic
- Decoupling request handling from agent execution

See [examples/aws-containerized/openai-dynamodb-scalable](../../../examples/aws-containerized/openai-dynamodb-scalable/) for complete implementation.

### Agent Runner Autoscaling

When `enable_agent_runner_autoscaling = true`, the Agent Runner automatically scales based on queue backlog.

#### How It Works

A Lambda function runs every **1 minute** and calculates:

```
BacklogPerTask = ApproximateNumberOfMessages / max(RunningTasks, 1)
```

AWS Target Tracking monitors this metric and scales using:

```
New Capacity = Current Capacity × (Current Metric / Target)
```

- **Scale-out**: Aggressive and fast (~3-4 minutes)
- **Scale-in**: Conservative and gradual (~15-30 minutes, multiple cycles)

#### Configuration

```hcl
enable_agent_runner_autoscaling = true
agent_runner_min_count = 1 # Minimum tasks (0 = scale to zero)
agent_runner_max_count = 10 # Maximum tasks
agent_runner_backlog_target = 10 # Target messages per task
agent_runner_scale_in_cooldown = 120 # Seconds between scale-in
agent_runner_scale_out_cooldown = 30 # Seconds between scale-out
```

**Choosing Target Value:**
- **Lower target** (e.g., 5): Scales out sooner, more tasks, faster processing, higher cost
- **Higher target** (e.g., 20): Scales out later, fewer tasks, slower processing, lower cost
- Start with default (10), monitor queue depth and task count, then tune

#### Scaling Examples

**Scale-Out:**
```
Queue=100, Tasks=1, BacklogPerTask=100, Target=10
→ Scales to ~10 tasks quickly (2-3 minutes)
```

**Scale-In (gradual over multiple cycles):**
```
Queue=20, Tasks=10, BacklogPerTask=2, Target=10

Cycle 1: Remove 3 tasks → 7 tasks
Wait 2min (cooldown)
Cycle 2: Remove 2 tasks → 5 tasks
...continues until stable
```

#### Monitoring

**CloudWatch Metrics:**
- `Custom/ECS/BacklogPerTask` - Custom metric
- `RunningTaskCount` - ECS service
- `ApproximateNumberOfMessages` - SQS queue

**View Lambda Logs:**
```bash
aws logs tail /aws/lambda/<prefix>-backlog-metric --follow
```

**View Scaling Activity:**
```bash
aws application-autoscaling describe-scaling-activities \
--service-namespace ecs \
--resource-id service/<cluster>/<service>
```

#### Troubleshooting

- **Scaling too aggressively**: Increase `backlog_target` or cooldown periods
- **Not scaling fast enough**: Decrease `backlog_target`, verify Lambda is running
- **Stuck at min/max**: Check capacity limits, verify messages are being processed
- **Scale to zero not working**: Set `min_count = 0`, wait 30+ minutes for gradual scale-in

## 📥 Inputs

| Name | Description | Type | Default | Required |
Expand Down Expand Up @@ -304,6 +470,23 @@ resource "aws_cloudwatch_metric_alarm" "high_cpu" {
| **State Management** |
| `create_redis_cluster` | Enable Redis ElastiCache cluster | `bool` | `false` | no |
| `create_dynamodb_memory_table` | Enable DynamoDB table for session storage | `bool` | `false` | no |
| **Queue Mode (Scalable Architecture)** |
| `enable_queue_mode` | Enable SQS queue mode with separate Agent Runner service | `bool` | `false` | no |
| `enable_agent_runner_autoscaling` | Enable SQS-based autoscaling for Agent Runner (requires `enable_queue_mode=true`) | `bool` | `false` | no |
| `queue_mode_type` | Queue mode type: `sync` (client blocks) or `async` (client polls) | `string` | `"sync"` | no |
| `agent_runner_image_uri` | Docker image URI for Agent Runner (required when `enable_queue_mode=true`) | `string` | `null` | conditional |
| `agent_runner_command` | Command override for Agent Runner container | `list(string)` | `null` | no |
| `agent_runner_cpu` | Fargate CPU units for Agent Runner | `number` | `1024` | no |
| `agent_runner_memory` | Fargate memory in MiB for Agent Runner | `number` | `2048` | no |
| `agent_runner_desired_count` | Number of Agent Runner tasks | `number` | `1` | no |
| `agent_runner_min_count` | Minimum tasks when autoscaling enabled | `number` | `0` | no |
| `agent_runner_max_count` | Maximum tasks when autoscaling enabled | `number` | `10` | no |
| `agent_runner_backlog_target` | Target messages per task for autoscaling (lower=more aggressive) | `number` | `10` | no |
| `agent_runner_scale_in_cooldown` | Seconds to wait before scaling in again | `number` | `120` | no |
| `agent_runner_scale_out_cooldown` | Seconds to wait before scaling out again | `number` | `30` | no |
| `sqs_input_visibility_timeout` | SQS Input Queue visibility timeout (seconds) | `number` | `30` | no |
| `sqs_output_visibility_timeout` | SQS Output Queue visibility timeout (seconds) | `number` | `30` | no |
| `ecs_container_command` | Command override for REST Service container | `list(string)` | `null` | no |

## 📤 Outputs

Expand Down
20 changes: 16 additions & 4 deletions ak-deployment/ak-aws/containerized/ecs.tf

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Improve the documentation to mention that in queue mode this becomes the REST handler.

Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ module "ecs" {
image = module.docker_image[0].docker_image_uri
essential = true
readonlyRootFilesystem = false

# Command override - if provided, replaces the Docker image's CMD
command = var.ecs_container_command

portMappings = [
{
name = local.container_name,
Expand All @@ -95,11 +99,19 @@ module "ecs" {
]
enable_cloudwatch_logging = true
environment = [
for k, v in merge(var.environment_variables, local.redis_url != null ? {
AK_SESSION__REDIS__URL = local.redis_url
} : {},
local.dynamodb_memory_table_arn != null ? {
for k, v in merge(
var.environment_variables,
local.redis_url != null ? {
AK_SESSION__REDIS__URL = local.redis_url
} : {},
local.dynamodb_memory_table_arn != null ? {
AK_SESSION__DYNAMODB__TABLE_NAME = local.dynamodb_memory_table_name
} : {},
# Queue mode — inject queue URLs and response store table name
var.enable_queue_mode ? {
AK_EXECUTION__QUEUES__INPUT__URL = module.input_queue[0].queue_url
AK_EXECUTION__QUEUES__OUTPUT__URL = module.output_queue[0].queue_url
AK_EXECUTION__RESPONSE_STORE__DYNAMODB__TABLE_NAME = aws_dynamodb_table.response_store[0].name
} : {}
) : {
name = k
Expand Down
25 changes: 25 additions & 0 deletions ak-deployment/ak-aws/containerized/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,28 @@ output "private_subnet_ids" {
description = "Private subnet IDs used for the deployment"
value = local.subnet_ids
}

output "input_queue_url" {
description = "URL of the SQS Input Queue (queue mode only)"
value = var.enable_queue_mode ? module.input_queue[0].queue_url : null
}

output "output_queue_url" {
description = "URL of the SQS Output Queue (queue mode only)"
value = var.enable_queue_mode ? module.output_queue[0].queue_url : null
}

output "response_store_table_name" {
description = "DynamoDB Response Store table name (queue mode only)"
value = var.enable_queue_mode ? aws_dynamodb_table.response_store[0].name : null
}

output "agent_runner_service_name" {
description = "ECS Agent Runner service name (queue mode only)"
value = var.enable_queue_mode ? aws_ecs_service.agent_runner[0].name : null
}

output "rest_service_image_uri" {
description = "Docker image URI used by the REST Service ECS task"
value = module.docker_image[0].docker_image_uri
}
Loading