Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion lib/llm/src/protocols/openai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ pub(crate) trait OpenAIStopConditionsProvider {

fn get_stop(&self) -> Option<Vec<String>>;

fn get_stop_token_ids(&self) -> Result<Option<Vec<TokenIdType>>> {
Ok(None)
}

fn nvext(&self) -> Option<&nvext::NvExt>;

/// Get ignore_eos from CommonExt if the type supports it.
Expand Down Expand Up @@ -180,6 +184,7 @@ impl<T: OpenAIStopConditionsProvider> StopConditionsProvider for T {
let max_tokens = self.get_max_tokens();
let min_tokens = self.get_min_tokens();
let stop = self.get_stop();
let stop_token_ids_hidden = self.get_stop_token_ids()?;
let max_thinking_tokens = self.get_max_thinking_tokens();

if let Some(stop) = &stop
Expand All @@ -195,7 +200,7 @@ impl<T: OpenAIStopConditionsProvider> StopConditionsProvider for T {
max_tokens,
min_tokens,
stop,
stop_token_ids_hidden: None,
stop_token_ids_hidden,
ignore_eos,
max_thinking_tokens,
})
Expand Down
15 changes: 15 additions & 0 deletions lib/llm/src/protocols/openai/chat_completions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use validator::Validate;

use crate::engines::ValidateRequest;
use crate::preprocessor::media::MediaDecoder;
use crate::types::TokenIdType;

use super::{
OpenAIOutputOptionsProvider, OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider,
Expand Down Expand Up @@ -307,6 +308,20 @@ impl OpenAIStopConditionsProvider for NvCreateChatCompletionRequest {
})
}

fn get_stop_token_ids(&self) -> anyhow::Result<Option<Vec<TokenIdType>>> {
let Some(value) = self.unsupported_fields.get("stop_token_ids") else {
return Ok(None);
};
if value.is_null() {
return Ok(None);
}
serde_json::from_value(value.clone())
.map(Some)
.map_err(|err| {
anyhow::anyhow!("stop_token_ids must be an array of unsigned token IDs: {err}")
})
}

/// Returns a reference to the optional `NvExt` extension, if available.
fn nvext(&self) -> Option<&NvExt> {
self.nvext.as_ref()
Expand Down
3 changes: 3 additions & 0 deletions lib/llm/src/protocols/openai/validate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ const PASSTHROUGH_EXTRA_FIELDS: &[&str] = &[
// Opt-in for `nvext.prompt_logprobs` on the response. Aliased through
// to vLLM's `sampling_params.prompt_logprobs` in a follow-up.
"return_prompt_logprobs",
// Renderer-style token stops. The OpenAI schema has string stops only,
// but Prime-RL/verifiers renderers already produce token IDs.
"stop_token_ids",
];

/// Validates that no unsupported fields are present in the request
Expand Down
38 changes: 31 additions & 7 deletions lib/llm/tests/test_common_ext.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

use dynamo_llm::protocols::{
common::StopConditionsProvider,
openai::{
chat_completions::NvCreateChatCompletionRequest,
common_ext::{CommonExt, CommonExtProvider},
completions::NvCreateCompletionRequest,
nvext::NvExt,
use dynamo_llm::{
engines::ValidateRequest,
protocols::{
common::StopConditionsProvider,
openai::{
chat_completions::NvCreateChatCompletionRequest,
common_ext::{CommonExt, CommonExtProvider},
completions::NvCreateCompletionRequest,
nvext::NvExt,
},
},
};

Expand Down Expand Up @@ -213,6 +216,27 @@ fn test_max_thinking_tokens_extraction() {
assert_eq!(stop_conditions_none.max_thinking_tokens, None);
}

#[test]
fn test_chat_completions_stop_token_ids_extraction() {
let json_str = r#"{
"model": "test-model",
"messages": [{"role": "user", "content": "(token-in mode)"}],
"nvext": {
"token_data": [1, 2, 3]
},
"stop_token_ids": [151645, 151643]
}"#;

let request: NvCreateChatCompletionRequest = serde_json::from_str(json_str).unwrap();

request.validate().unwrap();
let stop_conditions = request.extract_stop_conditions().unwrap();
assert_eq!(
stop_conditions.stop_token_ids_hidden,
Some(vec![151645, 151643])
);
}

#[test]
fn test_chat_completions_no_common_values() {
// Test that when no common values are set, we get None
Expand Down
Loading