diff --git a/crates/puffer-provider-openai/src/lib.rs b/crates/puffer-provider-openai/src/lib.rs index aaabbf826..f29aac91f 100644 --- a/crates/puffer-provider-openai/src/lib.rs +++ b/crates/puffer-provider-openai/src/lib.rs @@ -162,6 +162,13 @@ pub fn extract_chat_completions_visible_text(response: &OpenAIChatCompletionsRes response::extract_chat_completions_visible_text(response) } +/// Re-export of the reasoning-text sanitizer applied to +/// `reasoning_content` and `` blocks before they round-trip into +/// the next request. Exposed so other crates (request serializers, +/// session-store writers) can apply the same filter on data that +/// arrived from somewhere other than `extract_chat_completions_reasoning`. +pub use response::sanitize_reasoning_text; + /// Extracts tool calls from a parsed OpenAI Responses API payload. pub fn extract_responses_tool_calls( response: &OpenAIResponsesResponse, diff --git a/crates/puffer-provider-openai/src/response.rs b/crates/puffer-provider-openai/src/response.rs index 63096ab4d..229cef388 100644 --- a/crates/puffer-provider-openai/src/response.rs +++ b/crates/puffer-provider-openai/src/response.rs @@ -107,6 +107,11 @@ pub struct OpenAIChatChoiceMessage { /// under `message.reasoning.summary` or `message.reasoning.content`). /// 3. A `` block inside `message.content` (the /// open-source / DeepSeek-R1 distill convention). +/// +/// The returned text is sanitized via [`sanitize_reasoning_text`] so a +/// stray NUL or C0 control byte in the vendor's chain-of-thought +/// doesn't round-trip into the next request and get rejected by the +/// vendor's own validator. pub fn extract_chat_completions_reasoning( response: &OpenAIChatCompletionsResponse, ) -> Option { @@ -119,7 +124,7 @@ pub fn extract_chat_completions_reasoning( .map(str::trim) .filter(|s| !s.is_empty()) { - return Some(value.to_string()); + return Some(sanitize_reasoning_text(value)); } // (3) inside content. Catches DeepSeek-R1-style @@ -128,13 +133,36 @@ pub fn extract_chat_completions_reasoning( if let Some(content) = message.content.as_ref() { let raw = content_to_text(content); if let Some(thinking) = extract_think_block(&raw) { - return Some(thinking); + return Some(sanitize_reasoning_text(&thinking)); } } None } +/// Strips NUL (`\x00`) and other C0 control bytes from `text`, keeping +/// `\t`, `\n`, `\r` and DEL untouched-as-original (DEL is dropped). +/// +/// Kimi K2.6 has been observed emitting a stray `\x00` inside its own +/// `reasoning_content` chain-of-thought, then refusing the same string +/// on replay the next turn with `400 "reasoning_content at position N +/// must be a valid UTF-8 string: string contains \x00"`. Filtering at +/// extraction time keeps the multi-turn round-trip clean for every +/// downstream consumer (request serializer, session_store, trace +/// emitter) without needing per-call-site guards. +pub fn sanitize_reasoning_text(text: &str) -> String { + text.chars() + .filter(|c| { + let cp = *c as u32; + if cp < 0x20 { + matches!(*c, '\t' | '\n' | '\r') + } else { + cp != 0x7f + } + }) + .collect() +} + /// Returns the visible-to-user portion of the assistant message, /// stripped of any `` block. Used by the agent loop /// after `extract_chat_completions_reasoning` so the same content diff --git a/crates/puffer-provider-openai/tests/reasoning_extraction.rs b/crates/puffer-provider-openai/tests/reasoning_extraction.rs index 873e44e93..50df55abc 100644 --- a/crates/puffer-provider-openai/tests/reasoning_extraction.rs +++ b/crates/puffer-provider-openai/tests/reasoning_extraction.rs @@ -1,6 +1,6 @@ use puffer_provider_openai::{ extract_chat_completions_reasoning, extract_chat_completions_visible_text, - parse_chat_completions_response, + parse_chat_completions_response, sanitize_reasoning_text, }; #[test] @@ -66,3 +66,36 @@ fn empty_reasoning_content_returns_none() { let parsed = parse_chat_completions_response(payload).unwrap(); assert_eq!(extract_chat_completions_reasoning(&parsed), None); } + +#[test] +fn strips_nul_byte_from_reasoning_content() { + // Kimi K2.6 has been observed emitting a stray \x00 mid-reasoning, + // then refusing the same string on replay with HTTP 400. Verify + // the NUL is filtered while \t \n \r survive. + let payload = "{\"id\":\"x\",\"object\":\"chat.completion\",\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"answer\",\"reasoning_content\":\"first line\\nsecond\\u0000third\\tfourth\"},\"finish_reason\":\"stop\"}]}"; + let parsed = parse_chat_completions_response(payload).unwrap(); + let got = extract_chat_completions_reasoning(&parsed).expect("reasoning"); + assert!(!got.contains('\u{0000}'), "NUL leaked: {got:?}"); + assert!(got.contains('\n'), "newline got stripped: {got:?}"); + assert!(got.contains('\t'), "tab got stripped: {got:?}"); + assert!(got.contains("secondthird"), "NUL boundary not spliced: {got:?}"); +} + +#[test] +fn strips_control_bytes_from_think_block() { + // The fallback path should also sanitize so DeepSeek-R1 + // distill outputs round-trip cleanly when the reasoning leaks a + // C0 byte. + let payload = "{\"id\":\"x\",\"object\":\"chat.completion\",\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"good\\u0000bad\\u0001endvisible\"},\"finish_reason\":\"stop\"}]}"; + let parsed = parse_chat_completions_response(payload).unwrap(); + let got = extract_chat_completions_reasoning(&parsed).expect("reasoning"); + assert_eq!(got, "goodbadend"); +} + +#[test] +fn sanitize_preserves_whitespace_and_strips_del() { + assert_eq!( + sanitize_reasoning_text("a\tb\nc\rd\u{0000}e\u{007f}f"), + "a\tb\nc\rdef" + ); +}