diff --git a/crates/cli/src/config.rs b/crates/cli/src/config.rs index a3297efba..f52495e4b 100644 --- a/crates/cli/src/config.rs +++ b/crates/cli/src/config.rs @@ -325,6 +325,12 @@ pub(crate) struct ServerArgs { /// Generic plugin configuration JSON for process-level gateway plugin activation. #[arg(long, env = "NEMO_RELAY_PLUGIN_CONFIG")] pub(crate) plugin_config: Option, + /// Maximum accepted coding-agent hook payload size, in bytes. + #[arg(long, env = "NEMO_RELAY_MAX_HOOK_PAYLOAD_BYTES")] + pub(crate) max_hook_payload_bytes: Option, + /// Maximum accepted provider passthrough request body size, in bytes. + #[arg(long, env = "NEMO_RELAY_MAX_PASSTHROUGH_BODY_BYTES")] + pub(crate) max_passthrough_body_bytes: Option, } impl ServerArgs { @@ -338,10 +344,15 @@ impl ServerArgs { || self.openai_base_url.is_some() || self.anthropic_base_url.is_some() || self.plugin_config.is_some() + || self.max_hook_payload_bytes.is_some() + || self.max_passthrough_body_bytes.is_some() || self.config.is_some() } } +pub(crate) const DEFAULT_MAX_HOOK_PAYLOAD_BYTES: usize = 20 * 1024 * 1024; +pub(crate) const DEFAULT_MAX_PASSTHROUGH_BODY_BYTES: usize = 100 * 1024 * 1024; + #[derive(Debug, Clone)] pub(crate) struct GatewayConfig { pub(crate) bind: SocketAddr, @@ -349,6 +360,8 @@ pub(crate) struct GatewayConfig { pub(crate) anthropic_base_url: String, pub(crate) metadata: Option, pub(crate) plugin_config: Option, + pub(crate) max_hook_payload_bytes: usize, + pub(crate) max_passthrough_body_bytes: usize, } #[derive(Debug, Clone, Args)] @@ -506,11 +519,18 @@ impl Default for CursorAgentConfig { // `PluginConfig` activation path. #[derive(Debug, Clone, Default, Deserialize)] struct FileConfig { + gateway: Option, upstream: Option, plugins: Option, agents: Option, } +#[derive(Debug, Clone, Default, Deserialize)] +struct FileGatewayConfig { + max_hook_payload_bytes: Option, + max_passthrough_body_bytes: Option, +} + #[derive(Debug, Clone, Default, Deserialize)] struct FileUpstreamConfig { openai_base_url: Option, @@ -559,6 +579,8 @@ impl Default for GatewayConfig { anthropic_base_url: "https://api.anthropic.com".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, } } } @@ -655,6 +677,13 @@ fn apply_server_overrides(config: &mut GatewayConfig, args: &ServerArgs) -> Resu if let Some(value) = &args.plugin_config { apply_cli_plugin_config(config, value)?; } + if let Some(value) = args.max_hook_payload_bytes { + config.max_hook_payload_bytes = validate_body_limit("max hook payload bytes", value)?; + } + if let Some(value) = args.max_passthrough_body_bytes { + config.max_passthrough_body_bytes = + validate_body_limit("max passthrough body bytes", value)?; + } Ok(()) } @@ -709,7 +738,7 @@ fn load_shared_config(explicit: Option<&PathBuf>) -> Result Resul let config: FileConfig = value.try_into().map_err(|error| { CliError::Config(format!("invalid gateway configuration shape: {error}")) })?; + apply_file_gateway_config(&mut resolved.gateway, config.gateway)?; apply_file_upstream_config(&mut resolved.gateway, config.upstream); apply_file_plugins_config(&mut resolved.gateway, config.plugins); apply_file_agents_config(&mut resolved.agents, config.agents); Ok(()) } +fn apply_file_gateway_config( + gateway: &mut GatewayConfig, + config: Option, +) -> Result<(), CliError> { + let Some(config) = config else { + return Ok(()); + }; + if let Some(value) = config.max_hook_payload_bytes { + gateway.max_hook_payload_bytes = + validate_body_limit("gateway.max_hook_payload_bytes", value)?; + } + if let Some(value) = config.max_passthrough_body_bytes { + gateway.max_passthrough_body_bytes = + validate_body_limit("gateway.max_passthrough_body_bytes", value)?; + } + Ok(()) +} + // Applies upstream LLM provider URLs. These are the bases for OpenAI- and Anthropic-shaped // gateway routes; transparent `run` mode can still override them per invocation. fn apply_file_upstream_config(gateway: &mut GatewayConfig, upstream: Option) { @@ -923,7 +971,7 @@ fn apply_file_agents_config(agents: &mut AgentConfigs, file_agents: Option Result<(), CliError> { if let Ok(value) = std::env::var("NEMO_RELAY_GATEWAY_BIND") && let Ok(value) = value.parse() { @@ -935,6 +983,29 @@ fn apply_env_config(config: &mut GatewayConfig) { if let Ok(value) = std::env::var("NEMO_RELAY_ANTHROPIC_BASE_URL") { config.anthropic_base_url = value; } + if let Ok(value) = std::env::var("NEMO_RELAY_MAX_HOOK_PAYLOAD_BYTES") { + config.max_hook_payload_bytes = + parse_env_body_limit("NEMO_RELAY_MAX_HOOK_PAYLOAD_BYTES", &value)?; + } + if let Ok(value) = std::env::var("NEMO_RELAY_MAX_PASSTHROUGH_BODY_BYTES") { + config.max_passthrough_body_bytes = + parse_env_body_limit("NEMO_RELAY_MAX_PASSTHROUGH_BODY_BYTES", &value)?; + } + Ok(()) +} + +fn parse_env_body_limit(name: &str, raw: &str) -> Result { + let value = raw.parse::().map_err(|error| { + CliError::Config(format!("{name} must be a positive byte count: {error}")) + })?; + validate_body_limit(name, value) +} + +fn validate_body_limit(name: &str, value: usize) -> Result { + if value == 0 { + return Err(CliError::Config(format!("{name} must be greater than 0"))); + } + Ok(value) } // Recursively merges TOML tables and replaces scalar/array values from the higher-priority side. diff --git a/crates/cli/src/error.rs b/crates/cli/src/error.rs index fdaa9053e..3c9796781 100644 --- a/crates/cli/src/error.rs +++ b/crates/cli/src/error.rs @@ -13,6 +13,8 @@ pub(crate) enum CliError { GuardrailRejected(String), #[error("invalid hook payload: {0}")] InvalidPayload(String), + #[error("payload too large: {0}")] + PayloadTooLarge(String), #[error("gateway upstream error: {0}")] Upstream(#[from] reqwest::Error), #[error("http error: {0}")] @@ -50,6 +52,7 @@ impl IntoResponse for CliError { let guardrail_reason = self.guardrail_rejection_reason().map(ToOwned::to_owned); let status = match (guardrail_reason.is_some(), self) { (true, _) => StatusCode::FORBIDDEN, + (false, Self::PayloadTooLarge(_)) => StatusCode::PAYLOAD_TOO_LARGE, (false, Self::InvalidPayload(_)) => StatusCode::BAD_REQUEST, (false, Self::Upstream(_)) => StatusCode::BAD_GATEWAY, ( diff --git a/crates/cli/src/gateway.rs b/crates/cli/src/gateway.rs index 9562491ab..2d5f9b797 100644 --- a/crates/cli/src/gateway.rs +++ b/crates/cli/src/gateway.rs @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 +use std::error::Error; use std::sync::{Arc, Mutex}; use async_stream::stream; @@ -8,6 +9,7 @@ use axum::body::{Body, Bytes}; use axum::extract::State; use axum::http::{HeaderMap, HeaderName, HeaderValue, Method, Request, Response, StatusCode}; use futures_util::StreamExt; +use http_body_util::LengthLimitError; use nemo_relay::api::llm::{ LlmCallExecuteParams, LlmRequest, LlmStreamCallExecuteParams, llm_call_execute, llm_stream_call_execute, @@ -29,8 +31,6 @@ use crate::error::CliError; use crate::server::AppState; use crate::session::{GatewayCallPrep, LlmGatewayStart, SessionManager}; -const MAX_BODY_BYTES: usize = 100 * 1024 * 1024; - /// Proxies supported LLM API requests through NeMo Relay's managed execution pipeline. /// /// The gateway buffers the inbound body once, opens a managed LLM call against the resolved @@ -79,9 +79,9 @@ async fn prepare_gateway_request( let provider = ProviderRoute::from_path(parts.uri.path()).ok_or_else(|| { CliError::InvalidPayload(format!("unsupported gateway path {}", parts.uri.path())) })?; - let body_bytes = axum::body::to_bytes(body, MAX_BODY_BYTES) + let body_bytes = axum::body::to_bytes(body, config.max_passthrough_body_bytes) .await - .map_err(|error| CliError::InvalidPayload(error.to_string()))?; + .map_err(passthrough_body_error)?; let request_json = serde_json::from_slice::(&body_bytes).unwrap_or(Value::Null); let path_and_query = parts .uri @@ -106,6 +106,19 @@ async fn prepare_gateway_request( }) } +fn passthrough_body_error(error: axum::Error) -> CliError { + if error.source().is_some_and(|source| { + source.is::() + || source + .source() + .is_some_and(|source| source.is::()) + }) { + CliError::PayloadTooLarge(error.to_string()) + } else { + CliError::InvalidPayload(error.to_string()) + } +} + // Builds the [`LlmGatewayStart`] payload from a prepared request. Identifier resolution is shared // across streaming and non-streaming paths so correlation behavior is consistent for every route. // Provider-specific fallbacks are resolved here, before request execution leaves the gateway path, diff --git a/crates/cli/src/launcher.rs b/crates/cli/src/launcher.rs index acf5d268a..9961f3624 100644 --- a/crates/cli/src/launcher.rs +++ b/crates/cli/src/launcher.rs @@ -607,6 +607,14 @@ impl PreparedRun { "anthropic_base_url = {}", resolved.gateway.anthropic_base_url ); + println!( + "max_hook_payload_bytes = {}", + resolved.gateway.max_hook_payload_bytes + ); + println!( + "max_passthrough_body_bytes = {}", + resolved.gateway.max_passthrough_body_bytes + ); let destinations = exporter_destinations(&resolved.gateway); if destinations.is_empty() { println!("exporters = not_configured"); diff --git a/crates/cli/src/server.rs b/crates/cli/src/server.rs index 690cc4ad9..4329a4aec 100644 --- a/crates/cli/src/server.rs +++ b/crates/cli/src/server.rs @@ -4,7 +4,8 @@ use std::sync::{Arc, Mutex}; use std::time::{Duration, Instant}; -use axum::extract::State; +use axum::extract::rejection::JsonRejection; +use axum::extract::{DefaultBodyLimit, State}; use axum::http::HeaderMap; use axum::routing::{get, post}; use axum::{Json, Router}; @@ -144,6 +145,7 @@ impl AppState { } fn router_with_state(state: AppState) -> Router { + let max_hook_payload_bytes = state.config.max_hook_payload_bytes; Router::new() .route("/healthz", get(healthz)) .route("/hooks/codex", post(codex_hook)) @@ -158,6 +160,7 @@ fn router_with_state(state: AppState) -> Router { .route("/v1/messages", post(gateway::passthrough)) .route("/v1/messages/count_tokens", post(gateway::passthrough)) .route("/v1/models", get(gateway::models)) + .layer(DefaultBodyLimit::max(max_hook_payload_bytes)) .with_state(state) } @@ -240,9 +243,10 @@ impl Drop for PluginActivation { async fn codex_hook( State(state): State, headers: HeaderMap, - Json(payload): Json, + payload: Result, JsonRejection>, ) -> Result, CliError> { state.touch(); + let Json(payload) = payload.map_err(hook_payload_rejection)?; let outcome = codex::adapt(payload, &headers); state .sessions @@ -256,9 +260,10 @@ async fn codex_hook( async fn claude_code_hook( State(state): State, headers: HeaderMap, - Json(payload): Json, + payload: Result, JsonRejection>, ) -> Result, CliError> { state.touch(); + let Json(payload) = payload.map_err(hook_payload_rejection)?; let outcome = claude_code::adapt(payload, &headers); state .sessions @@ -272,9 +277,10 @@ async fn claude_code_hook( async fn cursor_hook( State(state): State, headers: HeaderMap, - Json(payload): Json, + payload: Result, JsonRejection>, ) -> Result, CliError> { state.touch(); + let Json(payload) = payload.map_err(hook_payload_rejection)?; let outcome = cursor::adapt(payload, &headers); state .sessions @@ -288,9 +294,10 @@ async fn cursor_hook( async fn hermes_hook( State(state): State, headers: HeaderMap, - Json(payload): Json, + payload: Result, JsonRejection>, ) -> Result, CliError> { state.touch(); + let Json(payload) = payload.map_err(hook_payload_rejection)?; let outcome = hermes::adapt(payload, &headers); state .sessions @@ -299,6 +306,14 @@ async fn hermes_hook( Ok(Json(outcome.response)) } +fn hook_payload_rejection(rejection: JsonRejection) -> CliError { + if rejection.status() == axum::http::StatusCode::PAYLOAD_TOO_LARGE { + CliError::PayloadTooLarge(rejection.to_string()) + } else { + CliError::InvalidPayload(rejection.to_string()) + } +} + #[cfg(test)] #[path = "../tests/coverage/server_tests.rs"] mod tests; diff --git a/crates/cli/tests/cli_tests.rs b/crates/cli/tests/cli_tests.rs index 354371fa3..65917236b 100644 --- a/crates/cli/tests/cli_tests.rs +++ b/crates/cli/tests/cli_tests.rs @@ -604,6 +604,8 @@ command = "codex --full-auto" .env("NEMO_RELAY_GATEWAY_BIND", "127.0.0.1:0") .env("NEMO_RELAY_OPENAI_BASE_URL", "http://env-openai") .env("NEMO_RELAY_ANTHROPIC_BASE_URL", "http://env-anthropic") + .env("NEMO_RELAY_MAX_HOOK_PAYLOAD_BYTES", "444") + .env("NEMO_RELAY_MAX_PASSTHROUGH_BODY_BYTES", "555") .args(["run", "--agent", "codex", "--dry-run"]) .output() .unwrap(); @@ -612,11 +614,38 @@ command = "codex --full-auto" let stdout = String::from_utf8_lossy(&output.stdout); assert!(stdout.contains("openai_base_url = http://env-openai")); assert!(stdout.contains("anthropic_base_url = http://env-anthropic")); + assert!(stdout.contains("max_hook_payload_bytes = 444")); + assert!(stdout.contains("max_passthrough_body_bytes = 555")); assert!(!stdout.contains("atif_dir")); assert!(!stdout.contains("openinference_endpoint")); assert!(stdout.contains("argv = codex")); } +#[test] +fn cli_run_rejects_zero_body_limit_env() { + let temp = tempfile::tempdir().unwrap(); + let config = temp.path().join("config.toml"); + std::fs::write(&config, "").unwrap(); + + let output = Command::new(gateway_bin()) + .env("NEMO_RELAY_MAX_HOOK_PAYLOAD_BYTES", "0") + .args([ + "--config", + config.to_str().unwrap(), + "run", + "--agent", + "codex", + "--dry-run", + ]) + .output() + .unwrap(); + + assert!(!output.status.success()); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!(stderr.contains("NEMO_RELAY_MAX_HOOK_PAYLOAD_BYTES")); + assert!(stderr.contains("greater than 0")); +} + #[test] fn cli_hook_forward_fails_open_without_gateway_url() { let mut child = Command::new(gateway_bin()) diff --git a/crates/cli/tests/coverage/config_tests.rs b/crates/cli/tests/coverage/config_tests.rs index e6ea8dda3..be80bb601 100644 --- a/crates/cli/tests/coverage/config_tests.rs +++ b/crates/cli/tests/coverage/config_tests.rs @@ -13,6 +13,8 @@ fn config() -> GatewayConfig { anthropic_base_url: "http://anthropic".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, } } @@ -100,6 +102,10 @@ fn explicit_toml_config_maps_supported_sections() { openai_base_url = "http://openai" anthropic_base_url = "http://anthropic" +[gateway] +max_hook_payload_bytes = 12345 +max_passthrough_body_bytes = 67890 + [plugins] config = { components = [] } @@ -135,6 +141,8 @@ command = "hermes --yolo chat" assert_eq!(resolved.gateway.bind.to_string(), "127.0.0.1:0"); assert_eq!(resolved.gateway.openai_base_url, "http://openai"); assert_eq!(resolved.gateway.anthropic_base_url, "http://anthropic"); + assert_eq!(resolved.gateway.max_hook_payload_bytes, 12345); + assert_eq!(resolved.gateway.max_passthrough_body_bytes, 67890); assert_eq!(resolved.gateway.metadata, None); assert_eq!( resolved.gateway.plugin_config, @@ -710,6 +718,8 @@ fn server_resolution_applies_all_server_overrides() { openai_base_url: Some("http://cli-openai".into()), anthropic_base_url: Some("http://cli-anthropic".into()), plugin_config: Some(r#"{"version":1,"components":[]}"#.into()), + max_hook_payload_bytes: Some(222), + max_passthrough_body_bytes: Some(333), }; let resolved = resolve_server_config(&args).unwrap(); @@ -717,6 +727,8 @@ fn server_resolution_applies_all_server_overrides() { assert_eq!(resolved.gateway.bind.to_string(), "127.0.0.1:0"); assert_eq!(resolved.gateway.openai_base_url, "http://cli-openai"); assert_eq!(resolved.gateway.anthropic_base_url, "http://cli-anthropic"); + assert_eq!(resolved.gateway.max_hook_payload_bytes, 222); + assert_eq!(resolved.gateway.max_passthrough_body_bytes, 333); assert_eq!( resolved.gateway.plugin_config, Some(json!({ "version": 1, "components": [] })) @@ -724,6 +736,44 @@ fn server_resolution_applies_all_server_overrides() { assert!(args.requested_daemon_mode()); } +#[test] +fn gateway_body_limit_defaults_are_stable() { + let gateway = GatewayConfig::default(); + + assert_eq!( + gateway.max_hook_payload_bytes, + crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES + ); + assert_eq!( + gateway.max_passthrough_body_bytes, + crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES + ); +} + +#[test] +fn gateway_body_limit_file_values_must_be_nonzero() { + let temp = tempfile::tempdir().unwrap(); + let path = temp.path().join("config.toml"); + for (field, expected) in [ + ("max_hook_payload_bytes", "gateway.max_hook_payload_bytes"), + ( + "max_passthrough_body_bytes", + "gateway.max_passthrough_body_bytes", + ), + ] { + std::fs::write(&path, format!("[gateway]\n{field} = 0\n")).unwrap(); + let args = ServerArgs { + config: Some(path.clone()), + ..ServerArgs::default() + }; + + let error = resolve_server_config(&args).unwrap_err().to_string(); + + assert!(error.contains(expected)); + assert!(error.contains("greater than 0")); + } +} + #[test] fn run_resolution_applies_all_run_overrides() { let temp = tempfile::tempdir().unwrap(); diff --git a/crates/cli/tests/coverage/gateway_tests.rs b/crates/cli/tests/coverage/gateway_tests.rs index 221de911e..43b956fa6 100644 --- a/crates/cli/tests/coverage/gateway_tests.rs +++ b/crates/cli/tests/coverage/gateway_tests.rs @@ -111,6 +111,8 @@ fn provider_routes_preserve_path_query_and_choose_upstream() { anthropic_base_url: "http://anthropic/".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, }; assert_eq!( @@ -139,6 +141,8 @@ fn openai_upstream_url_accepts_origin_or_v1_base() { anthropic_base_url: "http://anthropic".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, }; assert_eq!( @@ -721,6 +725,8 @@ async fn passthrough_rejects_unsupported_provider_path_directly() { anthropic_base_url: "http://anthropic".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, }; let state = AppState { config: config.clone(), @@ -748,6 +754,8 @@ async fn models_rejects_non_get_requests_directly() { anthropic_base_url: "http://anthropic".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, }; let state = AppState { config: config.clone(), diff --git a/crates/cli/tests/coverage/server_tests.rs b/crates/cli/tests/coverage/server_tests.rs index b0ae57cf1..42e507b7f 100644 --- a/crates/cli/tests/coverage/server_tests.rs +++ b/crates/cli/tests/coverage/server_tests.rs @@ -163,6 +163,8 @@ fn test_config() -> GatewayConfig { anthropic_base_url: "http://127.0.0.1".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, } } @@ -188,6 +190,19 @@ fn find_scope_event<'a>( }) } +async fn assert_payload_too_large_response(response: axum::response::Response) { + assert_eq!(response.status(), StatusCode::PAYLOAD_TOO_LARGE); + let bytes = response.into_body().collect().await.unwrap().to_bytes(); + let body: Value = serde_json::from_slice(&bytes).unwrap(); + assert_eq!(body["error"]["type"], json!("nemo_relay_gateway_error")); + assert!( + body["error"]["message"] + .as_str() + .is_some_and(|message| message.contains("payload too large")), + "unexpected 413 body: {body}" + ); +} + #[tokio::test] async fn codex_hook_keeps_codex_response_shape() { let app = router(test_config()); @@ -214,6 +229,58 @@ async fn codex_hook_keeps_codex_response_shape() { assert_eq!(body, json!({})); } +#[tokio::test] +async fn hook_payload_above_axum_default_succeeds_with_relay_default_limit() { + let app = router(test_config()); + let response = app + .oneshot( + Request::builder() + .method("POST") + .uri("/hooks/codex") + .header("content-type", "application/json") + .body(Body::from( + json!({ + "session_id": "codex-large-hook", + "hook_event_name": "sessionStart", + "large": "x".repeat(2 * 1024 * 1024 + 1024) + }) + .to_string(), + )) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(response.status(), StatusCode::OK); +} + +#[tokio::test] +async fn hook_payload_limit_returns_structured_413() { + let mut config = test_config(); + config.max_hook_payload_bytes = 128; + let app = router(config); + let response = app + .oneshot( + Request::builder() + .method("POST") + .uri("/hooks/codex") + .header("content-type", "application/json") + .body(Body::from( + json!({ + "session_id": "codex-too-large-hook", + "hook_event_name": "sessionStart", + "large": "x".repeat(1024) + }) + .to_string(), + )) + .unwrap(), + ) + .await + .unwrap(); + + assert_payload_too_large_response(response).await; +} + #[tokio::test] async fn healthz_returns_ok() { let app = router(test_config()); @@ -1662,6 +1729,10 @@ async fn gateway_errors_render_structured_json_responses() { let response = CliError::Config("bad config".into()).into_response(); assert_eq!(response.status(), StatusCode::INTERNAL_SERVER_ERROR); + + let response = CliError::PayloadTooLarge("too much".into()).into_response(); + + assert_payload_too_large_response(response).await; } #[tokio::test] @@ -1976,6 +2047,34 @@ async fn gateway_returns_bad_gateway_when_upstream_is_unreachable() { assert_eq!(response.status(), StatusCode::BAD_GATEWAY); } +#[tokio::test] +async fn passthrough_body_limit_returns_structured_413() { + let upstream = spawn_upstream(false).await; + let mut config = test_config(); + config.openai_base_url = upstream.url(); + config.max_passthrough_body_bytes = 32; + let app = router(config); + let response = app + .oneshot( + Request::builder() + .method("POST") + .uri("/v1/responses") + .header("content-type", "application/json") + .body(Body::from( + json!({ + "model": "gpt-test", + "input": "x".repeat(1024) + }) + .to_string(), + )) + .unwrap(), + ) + .await + .unwrap(); + + assert_payload_too_large_response(response).await; +} + #[tokio::test] async fn models_route_forwards_get_requests() { let upstream = spawn_models_upstream().await; diff --git a/crates/cli/tests/coverage/session_tests.rs b/crates/cli/tests/coverage/session_tests.rs index aea67e1c0..cdab66c4c 100644 --- a/crates/cli/tests/coverage/session_tests.rs +++ b/crates/cli/tests/coverage/session_tests.rs @@ -595,6 +595,8 @@ async fn nests_agent_subagent_and_tool_lifecycle() { anthropic_base_url: "http://127.0.0.1".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, }; let manager = SessionManager::new(config); let headers = HeaderMap::new(); @@ -1730,6 +1732,8 @@ async fn writes_atif_on_session_end_from_plugin_config() { anthropic_base_url: "http://127.0.0.1".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, }; let manager = SessionManager::new(config); let mut headers = HeaderMap::new(); @@ -1987,6 +1991,8 @@ async fn duplicate_agent_end_does_not_overwrite_atif_with_empty_session() { anthropic_base_url: "http://127.0.0.1".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, }; let manager = SessionManager::new(config); let headers = HeaderMap::new(); @@ -2065,6 +2071,8 @@ async fn writes_hermes_api_hook_usage_to_atif_metrics() { anthropic_base_url: "http://127.0.0.1".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, }; let manager = SessionManager::new(config); let headers = HeaderMap::new(); @@ -2146,6 +2154,8 @@ async fn writes_hermes_api_hook_reported_cost_to_atif_metrics() { anthropic_base_url: "http://127.0.0.1".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, }; let manager = SessionManager::new(config); let headers = HeaderMap::new(); @@ -3402,6 +3412,8 @@ async fn handles_out_of_order_subagent_and_tool_end_events() { anthropic_base_url: "http://127.0.0.1".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, }; let manager = SessionManager::new(config); let headers = HeaderMap::new(); @@ -3477,6 +3489,8 @@ async fn out_of_order_started_subagent_end_does_not_leak_scope() { anthropic_base_url: "http://127.0.0.1".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, }; let manager = SessionManager::new(config); let headers = HeaderMap::new(); @@ -3548,6 +3562,8 @@ async fn agent_end_closes_nested_active_subagents_lifo() { anthropic_base_url: "http://127.0.0.1".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, }; let manager = SessionManager::new(config); let headers = HeaderMap::new(); @@ -3603,6 +3619,8 @@ async fn llm_lifecycle_starts_implicit_gateway_session() { anthropic_base_url: "http://127.0.0.1".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, }; let manager = SessionManager::new(config); let active = manager @@ -4085,6 +4103,8 @@ async fn llm_lifecycle_uses_single_active_hook_session_when_header_is_missing() anthropic_base_url: "http://127.0.0.1".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, }; let manager = SessionManager::new(config); manager @@ -4141,6 +4161,8 @@ async fn single_pending_llm_hint_claims_next_gateway_llm() { anthropic_base_url: "http://127.0.0.1".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, }; let manager = SessionManager::new(config); manager @@ -4237,6 +4259,8 @@ async fn multiple_llm_hints_resolve_by_generation_id() { anthropic_base_url: "http://127.0.0.1".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, }; let manager = SessionManager::new(config); manager @@ -4351,6 +4375,8 @@ async fn ambiguous_llm_hints_fall_back_to_agent_scope() { anthropic_base_url: "http://127.0.0.1".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, }; let manager = SessionManager::new(config); manager @@ -4443,6 +4469,8 @@ async fn no_active_hint_reuses_last_llm_owner() { anthropic_base_url: "http://127.0.0.1".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, }; let manager = SessionManager::new(config); manager @@ -6234,6 +6262,8 @@ fn session_test_config() -> GatewayConfig { anthropic_base_url: "http://127.0.0.1".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, } } @@ -6247,6 +6277,8 @@ async fn turn_ended_is_noop_without_active_turn_scope() { anthropic_base_url: "http://127.0.0.1".into(), metadata: None, plugin_config: None, + max_hook_payload_bytes: crate::config::DEFAULT_MAX_HOOK_PAYLOAD_BYTES, + max_passthrough_body_bytes: crate::config::DEFAULT_MAX_PASSTHROUGH_BODY_BYTES, }; let manager = SessionManager::new(config); manager diff --git a/docs/nemo-relay-cli/basic-usage.mdx b/docs/nemo-relay-cli/basic-usage.mdx index 847f0c159..30109ae34 100644 --- a/docs/nemo-relay-cli/basic-usage.mdx +++ b/docs/nemo-relay-cli/basic-usage.mdx @@ -135,6 +135,10 @@ Config file locations are: Example: ```toml +[gateway] +max_hook_payload_bytes = 20971520 +max_passthrough_body_bytes = 104857600 + [upstream] openai_base_url = "https://api.openai.com/v1" anthropic_base_url = "https://api.anthropic.com" @@ -261,6 +265,11 @@ Common environment variables for direct gateway server use are: - `NEMO_RELAY_GATEWAY_BIND` - `NEMO_RELAY_OPENAI_BASE_URL` - `NEMO_RELAY_ANTHROPIC_BASE_URL` +- `NEMO_RELAY_MAX_HOOK_PAYLOAD_BYTES` +- `NEMO_RELAY_MAX_PASSTHROUGH_BODY_BYTES` + +The default hook payload limit is `20MiB`. The default provider passthrough body +limit is `100MiB`. Set both values in bytes. Plugin configuration controls process-level Observability exporters. Per-session configuration controls structured metadata on the top-level agent begin event