Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 73 additions & 2 deletions crates/cli/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,12 @@ pub(crate) struct ServerArgs {
/// Generic plugin configuration JSON for process-level gateway plugin activation.
#[arg(long, env = "NEMO_RELAY_PLUGIN_CONFIG")]
pub(crate) plugin_config: Option<String>,
/// Maximum accepted coding-agent hook payload size, in bytes.
#[arg(long, env = "NEMO_RELAY_MAX_HOOK_PAYLOAD_BYTES")]
pub(crate) max_hook_payload_bytes: Option<usize>,
/// Maximum accepted provider passthrough request body size, in bytes.
#[arg(long, env = "NEMO_RELAY_MAX_PASSTHROUGH_BODY_BYTES")]
pub(crate) max_passthrough_body_bytes: Option<usize>,
}

impl ServerArgs {
Expand All @@ -338,17 +344,24 @@ impl ServerArgs {
|| self.openai_base_url.is_some()
|| self.anthropic_base_url.is_some()
|| self.plugin_config.is_some()
|| self.max_hook_payload_bytes.is_some()
|| self.max_passthrough_body_bytes.is_some()
|| self.config.is_some()
}
}

pub(crate) const DEFAULT_MAX_HOOK_PAYLOAD_BYTES: usize = 20 * 1024 * 1024;
pub(crate) const DEFAULT_MAX_PASSTHROUGH_BODY_BYTES: usize = 100 * 1024 * 1024;

#[derive(Debug, Clone)]
pub(crate) struct GatewayConfig {
pub(crate) bind: SocketAddr,
pub(crate) openai_base_url: String,
pub(crate) anthropic_base_url: String,
pub(crate) metadata: Option<Value>,
pub(crate) plugin_config: Option<Value>,
pub(crate) max_hook_payload_bytes: usize,
pub(crate) max_passthrough_body_bytes: usize,
}

#[derive(Debug, Clone, Args)]
Expand Down Expand Up @@ -506,11 +519,18 @@ impl Default for CursorAgentConfig {
// `PluginConfig` activation path.
#[derive(Debug, Clone, Default, Deserialize)]
struct FileConfig {
gateway: Option<FileGatewayConfig>,
upstream: Option<FileUpstreamConfig>,
plugins: Option<FilePluginsConfig>,
agents: Option<FileAgentsConfig>,
}

#[derive(Debug, Clone, Default, Deserialize)]
struct FileGatewayConfig {
max_hook_payload_bytes: Option<usize>,
max_passthrough_body_bytes: Option<usize>,
}

#[derive(Debug, Clone, Default, Deserialize)]
struct FileUpstreamConfig {
openai_base_url: Option<String>,
Expand Down Expand Up @@ -559,6 +579,8 @@ impl Default for GatewayConfig {
anthropic_base_url: "https://api.anthropic.com".into(),
metadata: None,
plugin_config: None,
max_hook_payload_bytes: DEFAULT_MAX_HOOK_PAYLOAD_BYTES,
max_passthrough_body_bytes: DEFAULT_MAX_PASSTHROUGH_BODY_BYTES,
}
}
}
Expand Down Expand Up @@ -655,6 +677,13 @@ fn apply_server_overrides(config: &mut GatewayConfig, args: &ServerArgs) -> Resu
if let Some(value) = &args.plugin_config {
apply_cli_plugin_config(config, value)?;
}
if let Some(value) = args.max_hook_payload_bytes {
config.max_hook_payload_bytes = validate_body_limit("max hook payload bytes", value)?;
}
if let Some(value) = args.max_passthrough_body_bytes {
config.max_passthrough_body_bytes =
validate_body_limit("max passthrough body bytes", value)?;
}
Ok(())
}

Expand Down Expand Up @@ -709,7 +738,7 @@ fn load_shared_config(explicit: Option<&PathBuf>) -> Result<ResolvedConfig, CliE
config_toml_plugin_sources.first(),
plugin_toml,
)?;
apply_env_config(&mut resolved.gateway);
apply_env_config(&mut resolved.gateway)?;
Ok(resolved)
}

Expand Down Expand Up @@ -812,12 +841,31 @@ fn apply_file_config(resolved: &mut ResolvedConfig, value: toml::Value) -> Resul
let config: FileConfig = value.try_into().map_err(|error| {
CliError::Config(format!("invalid gateway configuration shape: {error}"))
})?;
apply_file_gateway_config(&mut resolved.gateway, config.gateway)?;
apply_file_upstream_config(&mut resolved.gateway, config.upstream);
apply_file_plugins_config(&mut resolved.gateway, config.plugins);
apply_file_agents_config(&mut resolved.agents, config.agents);
Ok(())
}

fn apply_file_gateway_config(
gateway: &mut GatewayConfig,
config: Option<FileGatewayConfig>,
) -> Result<(), CliError> {
let Some(config) = config else {
return Ok(());
};
if let Some(value) = config.max_hook_payload_bytes {
gateway.max_hook_payload_bytes =
validate_body_limit("gateway.max_hook_payload_bytes", value)?;
}
if let Some(value) = config.max_passthrough_body_bytes {
gateway.max_passthrough_body_bytes =
validate_body_limit("gateway.max_passthrough_body_bytes", value)?;
}
Ok(())
}

// Applies upstream LLM provider URLs. These are the bases for OpenAI- and Anthropic-shaped
// gateway routes; transparent `run` mode can still override them per invocation.
fn apply_file_upstream_config(gateway: &mut GatewayConfig, upstream: Option<FileUpstreamConfig>) {
Expand Down Expand Up @@ -923,7 +971,7 @@ fn apply_file_agents_config(agents: &mut AgentConfigs, file_agents: Option<FileA

// Applies environment variables after file configuration. Invalid bind values are ignored here to
// preserve existing startup behavior, while string values replace earlier layers when present.
fn apply_env_config(config: &mut GatewayConfig) {
fn apply_env_config(config: &mut GatewayConfig) -> Result<(), CliError> {
if let Ok(value) = std::env::var("NEMO_RELAY_GATEWAY_BIND")
&& let Ok(value) = value.parse()
{
Expand All @@ -935,6 +983,29 @@ fn apply_env_config(config: &mut GatewayConfig) {
if let Ok(value) = std::env::var("NEMO_RELAY_ANTHROPIC_BASE_URL") {
config.anthropic_base_url = value;
}
if let Ok(value) = std::env::var("NEMO_RELAY_MAX_HOOK_PAYLOAD_BYTES") {
config.max_hook_payload_bytes =
parse_env_body_limit("NEMO_RELAY_MAX_HOOK_PAYLOAD_BYTES", &value)?;
}
if let Ok(value) = std::env::var("NEMO_RELAY_MAX_PASSTHROUGH_BODY_BYTES") {
config.max_passthrough_body_bytes =
parse_env_body_limit("NEMO_RELAY_MAX_PASSTHROUGH_BODY_BYTES", &value)?;
}
Ok(())
}

fn parse_env_body_limit(name: &str, raw: &str) -> Result<usize, CliError> {
let value = raw.parse::<usize>().map_err(|error| {
CliError::Config(format!("{name} must be a positive byte count: {error}"))
})?;
validate_body_limit(name, value)
}

fn validate_body_limit(name: &str, value: usize) -> Result<usize, CliError> {
if value == 0 {
return Err(CliError::Config(format!("{name} must be greater than 0")));
}
Ok(value)
}

// Recursively merges TOML tables and replaces scalar/array values from the higher-priority side.
Expand Down
3 changes: 3 additions & 0 deletions crates/cli/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ pub(crate) enum CliError {
GuardrailRejected(String),
#[error("invalid hook payload: {0}")]
InvalidPayload(String),
#[error("payload too large: {0}")]
PayloadTooLarge(String),
#[error("gateway upstream error: {0}")]
Upstream(#[from] reqwest::Error),
#[error("http error: {0}")]
Expand Down Expand Up @@ -50,6 +52,7 @@ impl IntoResponse for CliError {
let guardrail_reason = self.guardrail_rejection_reason().map(ToOwned::to_owned);
let status = match (guardrail_reason.is_some(), self) {
(true, _) => StatusCode::FORBIDDEN,
(false, Self::PayloadTooLarge(_)) => StatusCode::PAYLOAD_TOO_LARGE,
(false, Self::InvalidPayload(_)) => StatusCode::BAD_REQUEST,
(false, Self::Upstream(_)) => StatusCode::BAD_GATEWAY,
(
Expand Down
21 changes: 17 additions & 4 deletions crates/cli/src/gateway.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

use std::error::Error;
use std::sync::{Arc, Mutex};

use async_stream::stream;
use axum::body::{Body, Bytes};
use axum::extract::State;
use axum::http::{HeaderMap, HeaderName, HeaderValue, Method, Request, Response, StatusCode};
use futures_util::StreamExt;
use http_body_util::LengthLimitError;
use nemo_relay::api::llm::{
LlmCallExecuteParams, LlmRequest, LlmStreamCallExecuteParams, llm_call_execute,
llm_stream_call_execute,
Expand All @@ -29,8 +31,6 @@ use crate::error::CliError;
use crate::server::AppState;
use crate::session::{GatewayCallPrep, LlmGatewayStart, SessionManager};

const MAX_BODY_BYTES: usize = 100 * 1024 * 1024;

/// Proxies supported LLM API requests through NeMo Relay's managed execution pipeline.
///
/// The gateway buffers the inbound body once, opens a managed LLM call against the resolved
Expand Down Expand Up @@ -79,9 +79,9 @@ async fn prepare_gateway_request(
let provider = ProviderRoute::from_path(parts.uri.path()).ok_or_else(|| {
CliError::InvalidPayload(format!("unsupported gateway path {}", parts.uri.path()))
})?;
let body_bytes = axum::body::to_bytes(body, MAX_BODY_BYTES)
let body_bytes = axum::body::to_bytes(body, config.max_passthrough_body_bytes)
.await
.map_err(|error| CliError::InvalidPayload(error.to_string()))?;
.map_err(passthrough_body_error)?;
let request_json = serde_json::from_slice::<Value>(&body_bytes).unwrap_or(Value::Null);
let path_and_query = parts
.uri
Expand All @@ -106,6 +106,19 @@ async fn prepare_gateway_request(
})
}

fn passthrough_body_error(error: axum::Error) -> CliError {
if error.source().is_some_and(|source| {
source.is::<LengthLimitError>()
|| source
.source()
.is_some_and(|source| source.is::<LengthLimitError>())
}) {
CliError::PayloadTooLarge(error.to_string())
} else {
CliError::InvalidPayload(error.to_string())
}
}

// Builds the [`LlmGatewayStart`] payload from a prepared request. Identifier resolution is shared
// across streaming and non-streaming paths so correlation behavior is consistent for every route.
// Provider-specific fallbacks are resolved here, before request execution leaves the gateway path,
Expand Down
8 changes: 8 additions & 0 deletions crates/cli/src/launcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,14 @@ impl PreparedRun {
"anthropic_base_url = {}",
resolved.gateway.anthropic_base_url
);
println!(
"max_hook_payload_bytes = {}",
resolved.gateway.max_hook_payload_bytes
);
println!(
"max_passthrough_body_bytes = {}",
resolved.gateway.max_passthrough_body_bytes
);
let destinations = exporter_destinations(&resolved.gateway);
if destinations.is_empty() {
println!("exporters = not_configured");
Expand Down
25 changes: 20 additions & 5 deletions crates/cli/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant};

use axum::extract::State;
use axum::extract::rejection::JsonRejection;
use axum::extract::{DefaultBodyLimit, State};
use axum::http::HeaderMap;
use axum::routing::{get, post};
use axum::{Json, Router};
Expand Down Expand Up @@ -144,6 +145,7 @@ impl AppState {
}

fn router_with_state(state: AppState) -> Router {
let max_hook_payload_bytes = state.config.max_hook_payload_bytes;
Router::new()
.route("/healthz", get(healthz))
.route("/hooks/codex", post(codex_hook))
Expand All @@ -158,6 +160,7 @@ fn router_with_state(state: AppState) -> Router {
.route("/v1/messages", post(gateway::passthrough))
.route("/v1/messages/count_tokens", post(gateway::passthrough))
.route("/v1/models", get(gateway::models))
.layer(DefaultBodyLimit::max(max_hook_payload_bytes))
.with_state(state)
}

Expand Down Expand Up @@ -240,9 +243,10 @@ impl Drop for PluginActivation {
async fn codex_hook(
State(state): State<AppState>,
headers: HeaderMap,
Json(payload): Json<Value>,
payload: Result<Json<Value>, JsonRejection>,
) -> Result<Json<Value>, CliError> {
state.touch();
let Json(payload) = payload.map_err(hook_payload_rejection)?;
let outcome = codex::adapt(payload, &headers);
state
.sessions
Expand All @@ -256,9 +260,10 @@ async fn codex_hook(
async fn claude_code_hook(
State(state): State<AppState>,
headers: HeaderMap,
Json(payload): Json<Value>,
payload: Result<Json<Value>, JsonRejection>,
) -> Result<Json<Value>, CliError> {
state.touch();
let Json(payload) = payload.map_err(hook_payload_rejection)?;
let outcome = claude_code::adapt(payload, &headers);
state
.sessions
Expand All @@ -272,9 +277,10 @@ async fn claude_code_hook(
async fn cursor_hook(
State(state): State<AppState>,
headers: HeaderMap,
Json(payload): Json<Value>,
payload: Result<Json<Value>, JsonRejection>,
) -> Result<Json<Value>, CliError> {
state.touch();
let Json(payload) = payload.map_err(hook_payload_rejection)?;
let outcome = cursor::adapt(payload, &headers);
state
.sessions
Expand All @@ -288,9 +294,10 @@ async fn cursor_hook(
async fn hermes_hook(
State(state): State<AppState>,
headers: HeaderMap,
Json(payload): Json<Value>,
payload: Result<Json<Value>, JsonRejection>,
) -> Result<Json<Value>, CliError> {
state.touch();
let Json(payload) = payload.map_err(hook_payload_rejection)?;
let outcome = hermes::adapt(payload, &headers);
state
.sessions
Expand All @@ -299,6 +306,14 @@ async fn hermes_hook(
Ok(Json(outcome.response))
}

fn hook_payload_rejection(rejection: JsonRejection) -> CliError {
if rejection.status() == axum::http::StatusCode::PAYLOAD_TOO_LARGE {
CliError::PayloadTooLarge(rejection.to_string())
} else {
CliError::InvalidPayload(rejection.to_string())
}
}

#[cfg(test)]
#[path = "../tests/coverage/server_tests.rs"]
mod tests;
29 changes: 29 additions & 0 deletions crates/cli/tests/cli_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -604,6 +604,8 @@ command = "codex --full-auto"
.env("NEMO_RELAY_GATEWAY_BIND", "127.0.0.1:0")
.env("NEMO_RELAY_OPENAI_BASE_URL", "http://env-openai")
.env("NEMO_RELAY_ANTHROPIC_BASE_URL", "http://env-anthropic")
.env("NEMO_RELAY_MAX_HOOK_PAYLOAD_BYTES", "444")
.env("NEMO_RELAY_MAX_PASSTHROUGH_BODY_BYTES", "555")
.args(["run", "--agent", "codex", "--dry-run"])
.output()
.unwrap();
Expand All @@ -612,11 +614,38 @@ command = "codex --full-auto"
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.contains("openai_base_url = http://env-openai"));
assert!(stdout.contains("anthropic_base_url = http://env-anthropic"));
assert!(stdout.contains("max_hook_payload_bytes = 444"));
assert!(stdout.contains("max_passthrough_body_bytes = 555"));
assert!(!stdout.contains("atif_dir"));
assert!(!stdout.contains("openinference_endpoint"));
assert!(stdout.contains("argv = codex"));
}

#[test]
fn cli_run_rejects_zero_body_limit_env() {
let temp = tempfile::tempdir().unwrap();
let config = temp.path().join("config.toml");
std::fs::write(&config, "").unwrap();

let output = Command::new(gateway_bin())
.env("NEMO_RELAY_MAX_HOOK_PAYLOAD_BYTES", "0")
.args([
"--config",
config.to_str().unwrap(),
"run",
"--agent",
"codex",
"--dry-run",
])
.output()
.unwrap();

assert!(!output.status.success());
let stderr = String::from_utf8_lossy(&output.stderr);
assert!(stderr.contains("NEMO_RELAY_MAX_HOOK_PAYLOAD_BYTES"));
assert!(stderr.contains("greater than 0"));
}

#[test]
fn cli_hook_forward_fails_open_without_gateway_url() {
let mut child = Command::new(gateway_bin())
Expand Down
Loading
Loading