brainlid · brainlid · May 15, 2026 · May 15, 2026
diff --git a/lib/chat_models/chat_req_llm.ex b/lib/chat_models/chat_req_llm.ex
@@ -476,6 +476,54 @@ if Code.ensure_loaded?(ReqLLM) do
       {[delta], state}
     end
 
+    # OpenAI-style streaming initial tool_call chunk: metadata carries :index
+    # (and usually :id), and the name is present. When :arguments is an empty
+    # map (placeholder for "args will arrive as tool_call_args fragments"),
+    # emit an :incomplete ToolCall with :arguments left nil so subsequent
+    # string fragments can be concatenated by ToolCall.append_arguments/2 and
+    # later JSON-decoded by ToolCall.complete/1. When :arguments is already a
+    # non-empty map (single-shot delivery — no fragments will follow), emit
+    # the ToolCall as :complete. In both cases the :index is preserved so the
+    # ToolCall lands in the same MessageDelta slot the fragment chunks target.
+    defp process_stream_chunk(
+           %ReqLLM.StreamChunk{
+             type: :tool_call,
+             name: name,
+             arguments: args,
+             metadata: %{index: block_index} = meta
+           },
+           state
+         )
+         when is_binary(name) and is_integer(block_index) do
+      id = meta[:id] || "tool_#{:erlang.unique_integer([:positive])}"
+
+      base_attrs = %{
+        type: :function,
+        call_id: id,
+        name: name,
+        index: block_index
+      }
+
+      attrs =
+        if is_map(args) and map_size(args) > 0 do
+          Map.merge(base_attrs, %{status: :complete, arguments: args})
+        else
+          Map.put(base_attrs, :status, :incomplete)
+        end
+
+      tool_call = ToolCall.new!(attrs)
+
+      delta =
+        MessageDelta.new!(%{
+          role: :assistant,
+          tool_calls: [tool_call],
+          status: :incomplete,
+          index: 0
+        })
+
+      {[delta], state}
+    end
+
     # Tool call arg fragment: emit incomplete ToolCall delta with the partial JSON string.
     # ToolCall.merge/2 will concatenate binary arguments strings across deltas.
     defp process_stream_chunk(

diff --git a/test/chat_models/chat_req_llm_test.exs b/test/chat_models/chat_req_llm_test.exs
@@ -1067,6 +1067,95 @@ if Code.ensure_loaded?(ReqLLM) do
         assert Enum.all?(deltas, &match?(%MessageDelta{}, &1))
       end
 
+      test "openai-style streaming tool call assembles a valid Message (regression)" do
+        # Reproduces the pattern emitted by ReqLLM's default OpenAI decoder
+        # (used by direct OpenAI, LiteLLM proxy, etc.):
+        #   - one :tool_call chunk with metadata: %{id, index}, name, arguments: %{}
+        #   - one or more :meta chunks with tool_call_args: %{index, fragment}
+        #   - a terminal :meta with finish_reason: :tool_calls
+        #
+        # Before the fix, the initial chunk landed at index: nil while the
+        # fragment chunks landed at index: 0, producing an orphan ToolCall
+        # without call_id/name and a "delta_conversion_failed" error from
+        # Message.new — exactly the LiteLLM-proxied OpenAI failure mode.
+        model = ChatReqLLM.new!(%{model: "openai:gpt-4o", stream: true})
+
+        chunks = [
+          %ReqLLM.StreamChunk{
+            type: :tool_call,
+            name: "ask_user",
+            arguments: %{},
+            metadata: %{id: "call_abc123", index: 0}
+          },
+          %ReqLLM.StreamChunk{
+            type: :meta,
+            metadata: %{tool_call_args: %{index: 0, fragment: "{\"question\":"}}
+          },
+          %ReqLLM.StreamChunk{
+            type: :meta,
+            metadata: %{tool_call_args: %{index: 0, fragment: "\"hi?\"}"}}
+          },
+          %ReqLLM.StreamChunk{
+            type: :meta,
+            metadata: %{finish_reason: :tool_calls, terminal?: true}
+          }
+        ]
+
+        stub(ReqLLM, :stream_text, fn _model, _context, _opts ->
+          {:ok, fake_stream_response(chunks)}
+        end)
+
+        assert {:ok, chain} =
+                 %{llm: model}
+                 |> LLMChain.new!()
+                 |> LLMChain.add_message(Message.new_user!("ask"))
+                 |> LLMChain.run()
+
+        last_msg = List.last(chain.messages)
+        assert %Message{role: :assistant, status: :complete} = last_msg
+        assert [%ToolCall{} = tc] = last_msg.tool_calls
+        assert tc.call_id == "call_abc123"
+        assert tc.name == "ask_user"
+        assert tc.status == :complete
+        assert tc.arguments == %{"question" => "hi?"}
+      end
+
+      test "openai-style streaming tool call with no follow-up fragments still completes" do
+        # If args fit in the initial chunk (non-empty map), no fragment :meta
+        # chunks will follow. The ToolCall should be emitted as :complete with
+        # the arguments already populated.
+        model = ChatReqLLM.new!(%{model: "openai:gpt-4o", stream: true})
+
+        chunks = [
+          %ReqLLM.StreamChunk{
+            type: :tool_call,
+            name: "get_time",
+            arguments: %{"zone" => "UTC"},
+            metadata: %{id: "call_xyz", index: 0}
+          },
+          %ReqLLM.StreamChunk{
+            type: :meta,
+            metadata: %{finish_reason: :tool_calls, terminal?: true}
+          }
+        ]
+
+        stub(ReqLLM, :stream_text, fn _model, _context, _opts ->
+          {:ok, fake_stream_response(chunks)}
+        end)
+
+        assert {:ok, chain} =
+                 %{llm: model}
+                 |> LLMChain.new!()
+                 |> LLMChain.add_message(Message.new_user!("time?"))
+                 |> LLMChain.run()
+
+        last_msg = List.last(chain.messages)
+        assert [%ToolCall{} = tc] = last_msg.tool_calls
+        assert tc.call_id == "call_xyz"
+        assert tc.name == "get_time"
+        assert tc.arguments == %{"zone" => "UTC"}
+      end
+
       test "LLMChain runs successfully with stream:true" do
         model = ChatReqLLM.new!(%{model: @live_model, stream: true})