Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions lmdeploy/serve/managers/session_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,13 +196,18 @@ def __init__(self):
"""Initialize the session manager."""

self.sessions = {}
self.session_id_generator = itertools.count(1)
self.session_id_generator = itertools.count(0)
self.request_handle_pool = None
Comment thread
lvhan028 marked this conversation as resolved.
self.loop = None

def get(self, session_id: int | None = None, **kwargs) -> Session:
def get(self, session_id: int | None = None, create_if_not_exists: bool = True, **kwargs) -> Session | None:
"""Create a new session."""
session_id = session_id or next(self.session_id_generator)
if not create_if_not_exists:
return self.sessions.get(session_id, None)
Comment thread
lvhan028 marked this conversation as resolved.

if session_id is None:
session_id = next(self.session_id_generator)

if session_id in self.sessions:
logger.debug(f'[SessionManager] session {session_id} already exists. Updating...')
session = self.sessions[session_id]
Expand Down
58 changes: 40 additions & 18 deletions lmdeploy/serve/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,25 +103,43 @@ class VariableInterface:
tool_parser: ToolParser | None = None
allow_terminate_by_client: bool = False
enable_abort_handling: bool = False

@staticmethod
def get_session(session_id: int) -> Session:
session_mgr = VariableInterface.get_session_manager()
if session_id == -1:
# map user input session_id to inside session_id
user_session_id_map: dict[int, int] = {}

Comment thread
lvhan028 marked this conversation as resolved.
Outdated
@classmethod
def create_session(cls, user_session_id: int | None = None) -> Session:
session_mgr = cls.get_session_manager()
if user_session_id is None or user_session_id == -1:
# user doesn't input session_id, so we need to generate a new one
session = session_mgr.get()
else:
session = session_mgr.get(session_id)
# find the inside session_id by user_session_id, create a new one
# if it doesn't exist and update the user_session_id_map
session_id = cls.user_session_id_map.get(user_session_id, None)
session = session_mgr.get(session_id, create_if_not_exists=True)
cls.user_session_id_map[user_session_id] = session.session_id
Comment thread
lvhan028 marked this conversation as resolved.
Outdated
# Stamp epoch for ``stop_all_session`` / ``abort_all`` coordination in ``AsyncEngine.generate``.
session.epoch = VariableInterface.async_engine.epoch
session.epoch = cls.async_engine.epoch
return session

@staticmethod
def get_session_manager():
return VariableInterface.async_engine.session_mgr
@classmethod
def find_session(cls, user_session_id: int) -> Session | None:
"""Find the session by user_session_id.

Users cannot access inner session_id directly.
"""
if user_session_id not in cls.user_session_id_map:
return None
session_id = cls.user_session_id_map.get(user_session_id, None)
return cls.get_session_manager().get(session_id, create_if_not_exists=False)

@staticmethod
def get_engine_config():
return VariableInterface.async_engine.backend_config
@classmethod
def get_session_manager(cls):
return cls.async_engine.session_mgr

@classmethod
def get_engine_config(cls):
return cls.async_engine.backend_config


router = APIRouter()
Expand Down Expand Up @@ -419,7 +437,7 @@ async def chat_completions_v1(request: ChatCompletionRequest, raw_request: Reque
return error_check_ret
if VariableInterface.tool_parser is not None:
request = VariableInterface.tool_parser.adjust_request(request)
session = VariableInterface.get_session(request.session_id)
session = VariableInterface.create_session(request.session_id)
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

chat_completions_v1 now passes a mapped/internal session via create_session(). Later in this handler the response id is derived from session.session_id, which will now be the internal id (not the user-provided session_id) and can effectively expose internal ids to clients. Consider switching response ids to use the user-provided session id (or a separate request UUID) so internal ids remain an implementation detail, and keep behavior consistent with /v1/completions.

Copilot uses AI. Check for mistakes.

json_request = await raw_request.json()
migration_request = json_request.pop('migration_request', None)
Expand Down Expand Up @@ -793,10 +811,10 @@ async def completions_v1(request: CompletionRequest, raw_request: Request = None
sessions = []
if isinstance(request.prompt, str):
request.prompt = [request.prompt]
sessions.append(VariableInterface.get_session(request.session_id))
sessions.append(VariableInterface.create_session(request.session_id))
elif isinstance(request.prompt, list):
for i in range(len(request.prompt)):
sessions.append(VariableInterface.get_session(i + 1))
sessions.append(VariableInterface.create_session(i + 1))
if isinstance(request.stop, str):
request.stop = [request.stop]
random_seed = request.seed if request.seed else None
Expand Down Expand Up @@ -971,7 +989,7 @@ async def generate(request: GenerateReqInput, raw_request: Request = None):
if error_check_ret is not None:
return error_check_ret

session = VariableInterface.get_session(request.session_id)
session = VariableInterface.create_session(request.session_id)

prompt = request.prompt
input_ids = request.input_ids
Expand Down Expand Up @@ -1265,8 +1283,12 @@ async def abort_request(request: AbortRequest, raw_request: Request = None):
if request.abort_all:
await VariableInterface.async_engine.stop_all_session()
else:
session = VariableInterface.get_session(request.session_id)
session = VariableInterface.find_session(request.session_id)
if session is None:
return create_error_response(HTTPStatus.BAD_REQUEST, f'Session {request.session_id} not found.')
await session.async_abort()
session_mgr = VariableInterface.get_session_manager()
session_mgr.remove(session)
Comment thread
lvhan028 marked this conversation as resolved.
return Response(status_code=200)


Expand Down
Loading