berabuddies · n-WN · May 13, 2026
diff --git a/crates/puffer-provider-openai/src/lib.rs b/crates/puffer-provider-openai/src/lib.rs
@@ -162,6 +162,13 @@ pub fn extract_chat_completions_visible_text(response: &OpenAIChatCompletionsRes
     response::extract_chat_completions_visible_text(response)
 }
 
+/// Re-export of the reasoning-text sanitizer applied to
+/// `reasoning_content` and `<think>` blocks before they round-trip into
+/// the next request. Exposed so other crates (request serializers,
+/// session-store writers) can apply the same filter on data that
+/// arrived from somewhere other than `extract_chat_completions_reasoning`.
+pub use response::sanitize_reasoning_text;
+
 /// Extracts tool calls from a parsed OpenAI Responses API payload.
 pub fn extract_responses_tool_calls(
     response: &OpenAIResponsesResponse,

diff --git a/crates/puffer-provider-openai/src/response.rs b/crates/puffer-provider-openai/src/response.rs
@@ -107,6 +107,11 @@ pub struct OpenAIChatChoiceMessage {
 ///    under `message.reasoning.summary` or `message.reasoning.content`).
 /// 3. A `<think>…</think>` block inside `message.content` (the
 ///    open-source / DeepSeek-R1 distill convention).
+///
+/// The returned text is sanitized via [`sanitize_reasoning_text`] so a
+/// stray NUL or C0 control byte in the vendor's chain-of-thought
+/// doesn't round-trip into the next request and get rejected by the
+/// vendor's own validator.
 pub fn extract_chat_completions_reasoning(
     response: &OpenAIChatCompletionsResponse,
 ) -> Option<String> {
@@ -119,7 +124,7 @@ pub fn extract_chat_completions_reasoning(
         .map(str::trim)
         .filter(|s| !s.is_empty())
     {
-        return Some(value.to_string());
+        return Some(sanitize_reasoning_text(value));
     }
 
     // (3) <think>…</think> inside content. Catches DeepSeek-R1-style
@@ -128,13 +133,36 @@ pub fn extract_chat_completions_reasoning(
     if let Some(content) = message.content.as_ref() {
         let raw = content_to_text(content);
         if let Some(thinking) = extract_think_block(&raw) {
-            return Some(thinking);
+            return Some(sanitize_reasoning_text(&thinking));
         }
     }
 
     None
 }
 
+/// Strips NUL (`\x00`) and other C0 control bytes from `text`, keeping
+/// `\t`, `\n`, `\r` and DEL untouched-as-original (DEL is dropped).
+///
+/// Kimi K2.6 has been observed emitting a stray `\x00` inside its own
+/// `reasoning_content` chain-of-thought, then refusing the same string
+/// on replay the next turn with `400 "reasoning_content at position N
+/// must be a valid UTF-8 string: string contains \x00"`. Filtering at
+/// extraction time keeps the multi-turn round-trip clean for every
+/// downstream consumer (request serializer, session_store, trace
+/// emitter) without needing per-call-site guards.
+pub fn sanitize_reasoning_text(text: &str) -> String {
+    text.chars()
+        .filter(|c| {
+            let cp = *c as u32;
+            if cp < 0x20 {
+                matches!(*c, '\t' | '\n' | '\r')
+            } else {
+                cp != 0x7f
+            }
+        })
+        .collect()
+}
+
 /// Returns the visible-to-user portion of the assistant message,
 /// stripped of any `<think>…</think>` block. Used by the agent loop
 /// after `extract_chat_completions_reasoning` so the same content

diff --git a/crates/puffer-provider-openai/tests/reasoning_extraction.rs b/crates/puffer-provider-openai/tests/reasoning_extraction.rs
@@ -1,6 +1,6 @@
 use puffer_provider_openai::{
     extract_chat_completions_reasoning, extract_chat_completions_visible_text,
-    parse_chat_completions_response,
+    parse_chat_completions_response, sanitize_reasoning_text,
 };
 
 #[test]
@@ -66,3 +66,36 @@ fn empty_reasoning_content_returns_none() {
     let parsed = parse_chat_completions_response(payload).unwrap();
     assert_eq!(extract_chat_completions_reasoning(&parsed), None);
 }
+
+#[test]
+fn strips_nul_byte_from_reasoning_content() {
+    // Kimi K2.6 has been observed emitting a stray \x00 mid-reasoning,
+    // then refusing the same string on replay with HTTP 400. Verify
+    // the NUL is filtered while \t \n \r survive.
+    let payload = "{\"id\":\"x\",\"object\":\"chat.completion\",\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"answer\",\"reasoning_content\":\"first line\\nsecond\\u0000third\\tfourth\"},\"finish_reason\":\"stop\"}]}";
+    let parsed = parse_chat_completions_response(payload).unwrap();
+    let got = extract_chat_completions_reasoning(&parsed).expect("reasoning");
+    assert!(!got.contains('\u{0000}'), "NUL leaked: {got:?}");
+    assert!(got.contains('\n'), "newline got stripped: {got:?}");
+    assert!(got.contains('\t'), "tab got stripped: {got:?}");
+    assert!(got.contains("secondthird"), "NUL boundary not spliced: {got:?}");
+}
+
+#[test]
+fn strips_control_bytes_from_think_block() {
+    // The <think> fallback path should also sanitize so DeepSeek-R1
+    // distill outputs round-trip cleanly when the reasoning leaks a
+    // C0 byte.
+    let payload = "{\"id\":\"x\",\"object\":\"chat.completion\",\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"<think>good\\u0000bad\\u0001end</think>visible\"},\"finish_reason\":\"stop\"}]}";
+    let parsed = parse_chat_completions_response(payload).unwrap();
+    let got = extract_chat_completions_reasoning(&parsed).expect("reasoning");
+    assert_eq!(got, "goodbadend");
+}
+
+#[test]
+fn sanitize_preserves_whitespace_and_strips_del() {
+    assert_eq!(
+        sanitize_reasoning_text("a\tb\nc\rd\u{0000}e\u{007f}f"),
+        "a\tb\nc\rdef"
+    );
+}