Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions crates/puffer-provider-openai/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,13 @@ pub fn extract_chat_completions_visible_text(response: &OpenAIChatCompletionsRes
response::extract_chat_completions_visible_text(response)
}

/// Re-export of the reasoning-text sanitizer applied to
/// `reasoning_content` and `<think>` blocks before they round-trip into
/// the next request. Exposed so other crates (request serializers,
/// session-store writers) can apply the same filter on data that
/// arrived from somewhere other than `extract_chat_completions_reasoning`.
pub use response::sanitize_reasoning_text;

/// Extracts tool calls from a parsed OpenAI Responses API payload.
pub fn extract_responses_tool_calls(
response: &OpenAIResponsesResponse,
Expand Down
32 changes: 30 additions & 2 deletions crates/puffer-provider-openai/src/response.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,11 @@ pub struct OpenAIChatChoiceMessage {
/// under `message.reasoning.summary` or `message.reasoning.content`).
/// 3. A `<think>…</think>` block inside `message.content` (the
/// open-source / DeepSeek-R1 distill convention).
///
/// The returned text is sanitized via [`sanitize_reasoning_text`] so a
/// stray NUL or C0 control byte in the vendor's chain-of-thought
/// doesn't round-trip into the next request and get rejected by the
/// vendor's own validator.
pub fn extract_chat_completions_reasoning(
response: &OpenAIChatCompletionsResponse,
) -> Option<String> {
Expand All @@ -119,7 +124,7 @@ pub fn extract_chat_completions_reasoning(
.map(str::trim)
.filter(|s| !s.is_empty())
{
return Some(value.to_string());
return Some(sanitize_reasoning_text(value));
}

// (3) <think>…</think> inside content. Catches DeepSeek-R1-style
Expand All @@ -128,13 +133,36 @@ pub fn extract_chat_completions_reasoning(
if let Some(content) = message.content.as_ref() {
let raw = content_to_text(content);
if let Some(thinking) = extract_think_block(&raw) {
return Some(thinking);
return Some(sanitize_reasoning_text(&thinking));
}
}

None
}

/// Strips NUL (`\x00`) and other C0 control bytes from `text`, keeping
/// `\t`, `\n`, `\r` and DEL untouched-as-original (DEL is dropped).
///
/// Kimi K2.6 has been observed emitting a stray `\x00` inside its own
/// `reasoning_content` chain-of-thought, then refusing the same string
/// on replay the next turn with `400 "reasoning_content at position N
/// must be a valid UTF-8 string: string contains \x00"`. Filtering at
/// extraction time keeps the multi-turn round-trip clean for every
/// downstream consumer (request serializer, session_store, trace
/// emitter) without needing per-call-site guards.
pub fn sanitize_reasoning_text(text: &str) -> String {
text.chars()
.filter(|c| {
let cp = *c as u32;
if cp < 0x20 {
matches!(*c, '\t' | '\n' | '\r')
} else {
cp != 0x7f
}
})
.collect()
}

/// Returns the visible-to-user portion of the assistant message,
/// stripped of any `<think>…</think>` block. Used by the agent loop
/// after `extract_chat_completions_reasoning` so the same content
Expand Down
35 changes: 34 additions & 1 deletion crates/puffer-provider-openai/tests/reasoning_extraction.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use puffer_provider_openai::{
extract_chat_completions_reasoning, extract_chat_completions_visible_text,
parse_chat_completions_response,
parse_chat_completions_response, sanitize_reasoning_text,
};

#[test]
Expand Down Expand Up @@ -66,3 +66,36 @@ fn empty_reasoning_content_returns_none() {
let parsed = parse_chat_completions_response(payload).unwrap();
assert_eq!(extract_chat_completions_reasoning(&parsed), None);
}

#[test]
fn strips_nul_byte_from_reasoning_content() {
// Kimi K2.6 has been observed emitting a stray \x00 mid-reasoning,
// then refusing the same string on replay with HTTP 400. Verify
// the NUL is filtered while \t \n \r survive.
let payload = "{\"id\":\"x\",\"object\":\"chat.completion\",\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"answer\",\"reasoning_content\":\"first line\\nsecond\\u0000third\\tfourth\"},\"finish_reason\":\"stop\"}]}";
let parsed = parse_chat_completions_response(payload).unwrap();
let got = extract_chat_completions_reasoning(&parsed).expect("reasoning");
assert!(!got.contains('\u{0000}'), "NUL leaked: {got:?}");
assert!(got.contains('\n'), "newline got stripped: {got:?}");
assert!(got.contains('\t'), "tab got stripped: {got:?}");
assert!(got.contains("secondthird"), "NUL boundary not spliced: {got:?}");
}

#[test]
fn strips_control_bytes_from_think_block() {
// The <think> fallback path should also sanitize so DeepSeek-R1
// distill outputs round-trip cleanly when the reasoning leaks a
// C0 byte.
let payload = "{\"id\":\"x\",\"object\":\"chat.completion\",\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"<think>good\\u0000bad\\u0001end</think>visible\"},\"finish_reason\":\"stop\"}]}";
let parsed = parse_chat_completions_response(payload).unwrap();
let got = extract_chat_completions_reasoning(&parsed).expect("reasoning");
assert_eq!(got, "goodbadend");
}

#[test]
fn sanitize_preserves_whitespace_and_strips_del() {
assert_eq!(
sanitize_reasoning_text("a\tb\nc\rd\u{0000}e\u{007f}f"),
"a\tb\nc\rdef"
);
}