From 36cc95437459405da8d9510ef924d0e0a3698a08 Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 23 Dec 2025 20:53:56 +0700 Subject: [PATCH] fix: jan parser is broken with nested tool call tag in arguments --- .../tool_parsers/test_jan_tool_parser.py | 575 ++++++++++++++++++ .../openai/tool_parsers/jan_tool_parser.py | 59 +- 2 files changed, 622 insertions(+), 12 deletions(-) create mode 100644 tests/entrypoints/openai/tool_parsers/test_jan_tool_parser.py diff --git a/tests/entrypoints/openai/tool_parsers/test_jan_tool_parser.py b/tests/entrypoints/openai/tool_parsers/test_jan_tool_parser.py new file mode 100644 index 000000000000..08d270bc50d1 --- /dev/null +++ b/tests/entrypoints/openai/tool_parsers/test_jan_tool_parser.py @@ -0,0 +1,575 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import json + +import pytest + +from vllm.entrypoints.openai.protocol import ChatCompletionRequest +from vllm.entrypoints.openai.tool_parsers.jan_tool_parser import JanToolParser +from vllm.tokenizers import TokenizerLike + +from .utils import run_tool_extraction + +# Test fixtures + + +@pytest.fixture +def qwen_tokenizer() -> TokenizerLike: + from vllm.tokenizers import get_tokenizer + + return get_tokenizer("Qwen/Qwen3-32B") + + +@pytest.fixture +def jan_parser(qwen_tokenizer: TokenizerLike) -> JanToolParser: + return JanToolParser(qwen_tokenizer) + + +@pytest.fixture +def chat_request() -> ChatCompletionRequest: + return ChatCompletionRequest( + seed=42, + model="Qwen/Qwen3-32B", + messages=[], + ) + + +# Unit tests for non-streaming tool call extraction + + +def test_jan_parser_non_streaming_no_tool_call( + jan_parser: JanToolParser, + chat_request: ChatCompletionRequest, +) -> None: + """Test that plain text without tool calls is handled correctly.""" + text = """This is not a tool call.""" + tool_call = jan_parser.extract_tool_calls( + model_output=text, + request=chat_request, + ) + + assert tool_call is not None + assert not tool_call.tools_called + assert tool_call.content == text + + +def test_jan_parser_non_streaming_tool_call_between_tags( + jan_parser: JanToolParser, + chat_request: ChatCompletionRequest, +) -> None: + """Test tool call extraction with complete opening and closing tags.""" + text = """ +{"name": "get_current_weather", "arguments": {"location": "Boston", "unit": "celsius"}} +""" + tool_call = jan_parser.extract_tool_calls( + model_output=text, + request=chat_request, + ) + + assert tool_call is not None + assert tool_call.tools_called + assert len(tool_call.tool_calls) == 1 + assert tool_call.tool_calls[0].function.name == "get_current_weather" + + arguments = json.loads(tool_call.tool_calls[0].function.arguments) + assert arguments["location"] == "Boston" + assert arguments["unit"] == "celsius" + + +def test_jan_parser_non_streaming_tool_call_until_eos( + jan_parser: JanToolParser, + chat_request: ChatCompletionRequest, +) -> None: + """Test tool call extraction without closing tag (until end of string).""" + text = """ +{"name": "get_current_weather", "arguments": {"location": "Boston"}}""" + tool_call = jan_parser.extract_tool_calls( + model_output=text, + request=chat_request, + ) + + assert tool_call is not None + assert tool_call.tools_called + assert len(tool_call.tool_calls) == 1 + assert tool_call.tool_calls[0].function.name == "get_current_weather" + + arguments = json.loads(tool_call.tool_calls[0].function.arguments) + assert arguments["location"] == "Boston" + + +def test_jan_parser_non_streaming_tool_call_with_content( + jan_parser: JanToolParser, + chat_request: ChatCompletionRequest, +) -> None: + """Test tool call extraction when there's content before the tool call.""" + text = """Let me check the weather for you. + +{"name": "get_current_weather", "arguments": {"location": "San Francisco"}} +""" + tool_call = jan_parser.extract_tool_calls( + model_output=text, + request=chat_request, + ) + + assert tool_call is not None + assert tool_call.tools_called + assert tool_call.content == "Let me check the weather for you.\n" + assert len(tool_call.tool_calls) == 1 + assert tool_call.tool_calls[0].function.name == "get_current_weather" + + +def test_jan_parser_non_streaming_tool_call_invalid_json( + jan_parser: JanToolParser, + chat_request: ChatCompletionRequest, +) -> None: + """Test that invalid JSON in tool call is handled gracefully.""" + # Missing closing brace to trigger exception + text = """ +{"name": "get_current_weather", "arguments": {"location": "Boston"}""" + tool_call = jan_parser.extract_tool_calls( + model_output=text, + request=chat_request, + ) + + assert tool_call is not None + assert not tool_call.tools_called + + +def test_jan_parser_non_streaming_multiple_tool_calls( + jan_parser: JanToolParser, + chat_request: ChatCompletionRequest, +) -> None: + """Test extraction of multiple tool calls in sequence.""" + text = """ +{"name": "get_current_weather", "arguments": {"location": "Boston"}} + + +{"name": "get_current_weather", "arguments": {"location": "New York"}} +""" + tool_call = jan_parser.extract_tool_calls( + model_output=text, + request=chat_request, + ) + + assert tool_call is not None + assert tool_call.tools_called + assert len(tool_call.tool_calls) == 2 + assert tool_call.tool_calls[0].function.name == "get_current_weather" + assert tool_call.tool_calls[1].function.name == "get_current_weather" + + args_0 = json.loads(tool_call.tool_calls[0].function.arguments) + args_1 = json.loads(tool_call.tool_calls[1].function.arguments) + assert args_0["location"] == "Boston" + assert args_1["location"] == "New York" + + +def test_jan_parser_non_streaming_boolean_parameter( + jan_parser: JanToolParser, + chat_request: ChatCompletionRequest, +) -> None: + """Test tool call with boolean parameter.""" + text = """ +{"name": "final_answer", "arguments": {"trigger": true}} +""" + tool_call = jan_parser.extract_tool_calls( + model_output=text, + request=chat_request, + ) + + assert tool_call is not None + assert tool_call.tools_called + assert tool_call.tool_calls[0].function.name == "final_answer" + + arguments = json.loads(tool_call.tool_calls[0].function.arguments) + assert arguments["trigger"] is True + + +def test_jan_parser_non_streaming_integer_parameter( + jan_parser: JanToolParser, + chat_request: ChatCompletionRequest, +) -> None: + """Test tool call with integer parameter.""" + text = """ +{"name": "get_product_info", "arguments": {"product_id": 12345, "quantity": 10}} +""" + tool_call = jan_parser.extract_tool_calls( + model_output=text, + request=chat_request, + ) + + assert tool_call is not None + assert tool_call.tools_called + assert tool_call.tool_calls[0].function.name == "get_product_info" + + arguments = json.loads(tool_call.tool_calls[0].function.arguments) + assert arguments["product_id"] == 12345 + assert arguments["quantity"] == 10 + + +# Unit tests for streaming tool call extraction + + +def test_jan_parser_streaming_just_forward_text( + qwen_tokenizer: TokenizerLike, + jan_parser: JanToolParser, + chat_request: ChatCompletionRequest, +) -> None: + """Test that plain text without tool calls is streamed correctly.""" + text = """This is some prior text that has nothing to do with tool calling.""" + + content, tool_calls = run_tool_extraction( + jan_parser, + text, + chat_request, + streaming=True, + ) + + assert content == text + assert len(tool_calls) == 0 + + +def test_jan_parser_streaming_simple_tool_call( + qwen_tokenizer: TokenizerLike, + jan_parser: JanToolParser, + chat_request: ChatCompletionRequest, +) -> None: + """Test streaming extraction of a simple tool call.""" + text = """ +{"name": "get_current_weather", "arguments": {"location": "San Francisco", "unit": "celsius"}} +""" + + content, tool_calls = run_tool_extraction( + jan_parser, + text, + chat_request, + streaming=True, + ) + + assert len(tool_calls) == 1 + assert tool_calls[0].function.name == "get_current_weather" + + arguments = json.loads(tool_calls[0].function.arguments) + assert arguments["location"] == "San Francisco" + assert arguments["unit"] == "celsius" + + +def test_jan_parser_streaming_tool_call_with_boolean( + qwen_tokenizer: TokenizerLike, + jan_parser: JanToolParser, + chat_request: ChatCompletionRequest, +) -> None: + """Test streaming extraction of tool call with boolean parameter.""" + text = """ +{"name": "final_answer", "arguments": {"trigger": true}} +""" + + content, tool_calls = run_tool_extraction( + jan_parser, + text, + chat_request, + streaming=True, + ) + + assert len(tool_calls) == 1 + assert tool_calls[0].function.name == "final_answer" + + arguments = json.loads(tool_calls[0].function.arguments) + assert arguments["trigger"] is True + + +def test_jan_parser_streaming_tool_call_with_content( + qwen_tokenizer: TokenizerLike, + jan_parser: JanToolParser, + chat_request: ChatCompletionRequest, +) -> None: + """Test streaming when there's text content before the tool call.""" + text = """Let me search for that information. + +{"name": "search_database", "arguments": {"query": "vllm documentation"}} +""" + + content, tool_calls = run_tool_extraction( + jan_parser, + text, + chat_request, + streaming=True, + ) + + assert len(tool_calls) == 1 + assert tool_calls[0].function.name == "search_database" + + # Content before tool call should be preserved + assert content is not None + assert "Let me search" in content + + +def test_jan_parser_streaming_nested_arguments( + qwen_tokenizer: TokenizerLike, + jan_parser: JanToolParser, + chat_request: ChatCompletionRequest, +) -> None: + """Test streaming extraction with nested JSON arguments.""" + text = """ +{"name": "complex_query", "arguments": {"filters": {"category": "electronics", "price_range": {"min": 100, "max": 500}}, "sort": "relevance"}} +""" + + content, tool_calls = run_tool_extraction( + jan_parser, + text, + chat_request, + streaming=True, + ) + + assert len(tool_calls) == 1 + assert tool_calls[0].function.name == "complex_query" + + arguments = json.loads(tool_calls[0].function.arguments) + assert arguments["filters"]["category"] == "electronics" + assert arguments["filters"]["price_range"]["min"] == 100 + assert arguments["filters"]["price_range"]["max"] == 500 + assert arguments["sort"] == "relevance" + + +def test_jan_parser_streaming_unicode_content( + qwen_tokenizer: TokenizerLike, + jan_parser: JanToolParser, + chat_request: ChatCompletionRequest, +) -> None: + """Test streaming with unicode characters in arguments.""" + text = """ +{"name": "translate", "arguments": {"text": "Hello 世界", "target_lang": "español"}} +""" + + content, tool_calls = run_tool_extraction( + jan_parser, + text, + chat_request, + streaming=True, + ) + + assert len(tool_calls) == 1 + assert tool_calls[0].function.name == "translate" + + arguments = json.loads(tool_calls[0].function.arguments) + assert "世界" in arguments["text"] + assert arguments["target_lang"] == "español" + + +def test_jan_parser_adjust_request_with_tools( + jan_parser: JanToolParser, +) -> None: + """Test that adjust_request correctly sets skip_special_tokens to False when tools are present.""" + request = ChatCompletionRequest( + model="test-model", + messages=[], + tools=[ + { + "type": "function", + "function": { + "name": "test_function", + "description": "A test function", + "parameters": {"type": "object", "properties": {}}, + }, + } + ], + tool_choice="auto", + ) + + adjusted_request = jan_parser.adjust_request(request) + assert adjusted_request.skip_special_tokens is False + + +def test_jan_parser_adjust_request_tool_choice_none( + jan_parser: JanToolParser, +) -> None: + """Test that adjust_request doesn't modify skip_special_tokens when tool_choice is 'none'.""" + request = ChatCompletionRequest( + model="test-model", + messages=[], + tools=[ + { + "type": "function", + "function": { + "name": "test_function", + "description": "A test function", + "parameters": {"type": "object", "properties": {}}, + }, + } + ], + tool_choice="none", + ) + + adjusted_request = jan_parser.adjust_request(request) + # When tool_choice is "none", skip_special_tokens should not be set to False + assert adjusted_request.skip_special_tokens is not False or adjusted_request.skip_special_tokens is None + + +def test_jan_parser_adjust_request_no_tools( + jan_parser: JanToolParser, +) -> None: + """Test that adjust_request doesn't modify request when no tools are present.""" + request = ChatCompletionRequest( + model="test-model", + messages=[], + ) + + adjusted_request = jan_parser.adjust_request(request) + # Without tools, the request should pass through parent's adjust_request + assert adjusted_request is not None + + +# Edge cases and error handling + + +def test_jan_parser_empty_tool_call( + jan_parser: JanToolParser, + chat_request: ChatCompletionRequest, +) -> None: + """Test handling of empty tool call tags.""" + text = """""" + tool_call = jan_parser.extract_tool_calls( + model_output=text, + request=chat_request, + ) + + assert tool_call is not None + # Should not crash, but also should not consider this a valid tool call + assert not tool_call.tools_called + + +def test_jan_parser_malformed_tool_tags( + jan_parser: JanToolParser, + chat_request: ChatCompletionRequest, +) -> None: + """Test handling of malformed tool call tags.""" + text = """ +{"name": "test" +""" + tool_call = jan_parser.extract_tool_calls( + model_output=text, + request=chat_request, + ) + + assert tool_call is not None + # Should handle gracefully without crashing + assert not tool_call.tools_called + + +def test_jan_parser_array_arguments( + jan_parser: JanToolParser, + chat_request: ChatCompletionRequest, +) -> None: + """Test tool call with array arguments.""" + text = """ +{"name": "batch_process", "arguments": {"items": [1, 2, 3, 4, 5], "operation": "sum"}} +""" + tool_call = jan_parser.extract_tool_calls( + model_output=text, + request=chat_request, + ) + + assert tool_call is not None + assert tool_call.tools_called + assert tool_call.tool_calls[0].function.name == "batch_process" + + arguments = json.loads(tool_call.tool_calls[0].function.arguments) + assert arguments["items"] == [1, 2, 3, 4, 5] + assert arguments["operation"] == "sum" + +def test_jan_parser_streaming_tool_call_with_nested_tag( + qwen_tokenizer: TokenizerLike, + jan_parser: JanToolParser, + chat_request: ChatCompletionRequest, +) -> None: + """Test streaming extraction of tool call with boolean parameter.""" + text = """ +{"name": "final_answer", "arguments": {"q": "test call"}} +""" + + content, tool_calls = run_tool_extraction( + jan_parser, + text, + chat_request, + streaming=True, + ) + + assert len(tool_calls) == 1 + assert tool_calls[0].function.name == "final_answer" + + arguments = json.loads(tool_calls[0].function.arguments) + assert arguments["q"] == "test call" + + +def test_jan_parser_non_streaming_multiple_tool_calls_auto_complete( + jan_parser: JanToolParser, + chat_request: ChatCompletionRequest, +) -> None: + """Test extraction of multiple tool calls in sequence then auto complete the missing close bracket.""" + text = """ +{"name": "get_current_weather", "arguments": {"location": "Boston"}} + + +{"name": "get_current_weather", "arguments": {"location": "New York"}} + + +{"name": "get_current_weather", "arguments": {"location": "Los Angeles"}} + + +{"name": "get_current_weather", "arguments": {"location": "Chicago"}} +""" + tool_call = jan_parser.extract_tool_calls( + model_output=text, + request=chat_request, + ) + + assert tool_call is not None + assert tool_call.tools_called + assert len(tool_call.tool_calls) == 4 + assert tool_call.tool_calls[0].function.name == "get_current_weather" + assert tool_call.tool_calls[1].function.name == "get_current_weather" + assert tool_call.tool_calls[2].function.name == "get_current_weather" + assert tool_call.tool_calls[3].function.name == "get_current_weather" + + args_0 = json.loads(tool_call.tool_calls[0].function.arguments) + args_1 = json.loads(tool_call.tool_calls[1].function.arguments) + args_2 = json.loads(tool_call.tool_calls[2].function.arguments) + args_3 = json.loads(tool_call.tool_calls[3].function.arguments) + assert args_0["location"] == "Boston" + assert args_1["location"] == "New York" + assert args_2["location"] == "Los Angeles" + assert args_3["location"] == "Chicago" + +def test_jan_parser_streaming_multiple_tool_calls_auto_complete( + jan_parser: JanToolParser, + chat_request: ChatCompletionRequest, +) -> None: + """Test extraction of multiple tool calls in sequence with token-by-token streaming, then auto complete the missing close bracket.""" + text = """ +{"name": "get_current_weather", "arguments": {"location": "Boston"}} + + +{"name": "get_current_weather", "arguments": {"location": "New York"} + + +{"name": "get_current_weather", "arguments": {"location": "Los Angeles"} +""" + # Use streaming mode to process token by token + content, tool_calls = run_tool_extraction( + jan_parser, + text, + chat_request, + streaming=True, + ) + + assert len(tool_calls) == 3 + assert tool_calls[0].function.name == "get_current_weather" + assert tool_calls[1].function.name == "get_current_weather" + assert tool_calls[2].function.name == "get_current_weather" + + args_0 = json.loads(tool_calls[0].function.arguments) + args_1 = json.loads(tool_calls[1].function.arguments) + args_2 = json.loads(tool_calls[2].function.arguments) + assert args_0["location"] == "Boston" + assert args_1["location"] == "New York" + assert args_2["location"] == "Los Angeles" \ No newline at end of file diff --git a/vllm/entrypoints/openai/tool_parsers/jan_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/jan_tool_parser.py index 546b1247b3e7..ae184230a258 100644 --- a/vllm/entrypoints/openai/tool_parsers/jan_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/jan_tool_parser.py @@ -83,7 +83,15 @@ def __init__(self, tokenizer: TokenizerLike): # When the last token is encountered, empty the buffer and return it. # If a token appears in an incorrect sequence while storing in the buffer, # return the preceding buffer along with the token. + # IMPORTANT: Disable buffering when parsing a tool call to preserve content + # that looks like tags (e.g., "" in JSON arguments). def tool_call_delta_buffer(self, delta_text: str): + # If we're already parsing a tool call, don't buffer anything + # This preserves content like "" or "" in arguments + # We detect the actual end tag using tag counting in the main logic + if self.parsing_tool: + return delta_text + # If the sequence of tool_call_start or tool_call_end tokens is not yet # complete, fill the buffer with the token and return "". if ( @@ -251,15 +259,6 @@ def extract_tool_calls_streaming( ): self.parsing_tool = True self._buf = delta_text - if len(delta_token_ids) > 1: - tool_call_portion = current_text.split(self.tool_call_start_token)[ - -1 - ] - else: - tool_call_portion = None - delta = None - - text_portion = None # set cursors and state appropriately self.current_tool_id += 1 @@ -267,13 +266,49 @@ def extract_tool_calls_streaming( self.streamed_args_for_tool.append("") logger.debug("Starting on a new tool %s", self.current_tool_id) + if len(delta_token_ids) > 1: + # Extract the current tool call portion by finding tag positions + # Find the position of the current_tool_id-th start tag + pos = 0 + for _ in range(self.current_tool_id + 1): + pos = current_text.find(self.tool_call_start_token, pos) + if pos == -1: + break + pos += len(self.tool_call_start_token) + + if pos > 0: + # Extract from this position to the next end tag or end of string + end_pos = current_text.find(self.tool_call_end_token, pos) + tool_call_portion = current_text[pos:end_pos] if end_pos != -1 else current_text[pos:] + else: + tool_call_portion = current_text.split(self.tool_call_start_token)[-1] + else: + tool_call_portion = None + delta = None + + text_portion = None + # case -- we're updating an existing tool call elif ( cur_tool_start_count > cur_tool_end_count - and cur_tool_start_count == prev_tool_start_count + and (cur_tool_start_count == prev_tool_start_count or self.parsing_tool) ): - # get the portion of the text that's the tool call - tool_call_portion = current_text.split(self.tool_call_start_token)[-1] + # get the portion of the text that's the current tool call + # Find the position of the current_tool_id-th start tag + pos = 0 + for _ in range(self.current_tool_id + 1): + pos = current_text.find(self.tool_call_start_token, pos) + if pos == -1: + break + pos += len(self.tool_call_start_token) + + if pos > 0: + # Extract from this position to the next end tag or end of string + end_pos = current_text.find(self.tool_call_end_token, pos) + tool_call_portion = current_text[pos:end_pos] if end_pos != -1 else current_text[pos:] + else: + # Fallback to splitting approach + tool_call_portion = current_text.split(self.tool_call_start_token)[-1] text_portion = None # case -- the current tool call is being closed.