From 936415d986b501654bef177d0e199b1af70c4971 Mon Sep 17 00:00:00 2001 From: Taksh Date: Sat, 9 May 2026 08:40:51 +0530 Subject: [PATCH] Fix create_completion reducing request.max_tokens for all prompts via one long prompt --- fastchat/serve/openai_api_server.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/fastchat/serve/openai_api_server.py b/fastchat/serve/openai_api_server.py index a6ffee96b..30891d638 100644 --- a/fastchat/serve/openai_api_server.py +++ b/fastchat/serve/openai_api_server.py @@ -558,9 +558,6 @@ async def create_completion(request: CompletionRequest): if error_check_ret is not None: return error_check_ret - if isinstance(max_tokens, int) and max_tokens < request.max_tokens: - request.max_tokens = max_tokens - if request.stream: generator = generate_completion_stream_generator( request, request.n, worker_addr