diff --git a/openrag/routers/openai.py b/openrag/routers/openai.py index e659c59e..e61a7fd2 100644 --- a/openrag/routers/openai.py +++ b/openrag/routers/openai.py @@ -320,14 +320,13 @@ async def openai_chat_completion( detail="The last message must be a non-empty user message", ) - check_tokens_limit(request, log) - log.debug( "Received chat completion request with messages: {}", truncate(str(request.messages)), ) if is_direct_llm_model(request): + check_tokens_limit(request, log) partitions = None else: partitions = await get_partition_name(model_name, user_partitions, is_admin=user["is_admin"]) @@ -421,9 +420,8 @@ async def openai_completion( detail="Streaming is not supported for this endpoint", ) - check_tokens_limit(request, log) - if is_direct_llm_model(request): + check_tokens_limit(request, log) partitions = None else: partitions = await get_partition_name(model_name, user_partitions, is_admin=user["is_admin"])