Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,6 @@ __pycache__

.env
config.debug.yaml
data/
data/

tests/
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,35 @@ gemini:
- model_name: "gemini-3.0-pro"
model_header:
x-goog-ext-525001261-jspb: '[1,null,null,null,"9d8ca3786ebdfbea",null,null,0,[4],null,null,1]'
gems:
# Disabled by default to avoid accidental creation/update/deletion of gems.
enabled: false
# Policy mode:
# - off: disabled
# - fetch_only: load existing server-managed gems only
# - create_on_demand: create missing managed gems when needed (rate-limited)
# - privacy: reserved for ephemeral request-time flow (startup sync skipped)
policy: "off"
create_rate_limit_per_minute: 12
managed_gems_max_total: 200
cleanup:
enabled: false
unused_days: 7
touch_interval_minutes: 60
dry_run: false
max_deletes_per_run: 5
require_managed_marker: true
fetch_on_init: true
include_hidden_on_fetch: false
Comment thread
Vigno04 marked this conversation as resolved.
policies:
enabled: false
prefix: "fastapi_policy_"
Comment thread
Vigno04 marked this conversation as resolved.
default_policy:
enabled: false
key: "general_capability_guardrail"
# If `prompt` is null (or omitted), the implementation's built-in
# base system prompt will be used instead.
prompt: null
```

#### Environment Variables
Expand Down
123 changes: 116 additions & 7 deletions app/server/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,28 @@ def _prepare_messages_for_model(
return prepared


def _extract_leading_system_prompt(messages: list[Message]) -> tuple[str | None, list[Message]]:
"""Extract and remove leading system messages, returning joined system text.

Only leading system messages are extracted to preserve regular conversation flow.
"""
if not messages:
return None, messages

idx = 0
system_parts: list[str] = []
while idx < len(messages) and messages[idx].role == "system":
text = text_from_message(messages[idx]).strip()
if text:
system_parts.append(text)
idx += 1

if not system_parts:
return None, messages

return "\n\n".join(system_parts), messages[idx:]


def _response_items_to_messages(
items: str | list[ResponseInputItem],
) -> tuple[list[Message], str | list[ResponseInputItem]]:
Expand Down Expand Up @@ -1773,18 +1795,64 @@ async def create_chat_completion(
structured_requirement = _build_structured_requirement(request.response_format)
extra_instr = [structured_requirement.instruction] if structured_requirement else None

# This ensures that server-injected system instructions are part of the history
msgs = _prepare_messages_for_model(
# Split leading user-provided system prompt so we can attach it as a managed gem
# when create_on_demand is enabled.
system_prompt_text, non_system_messages = _extract_leading_system_prompt(request.messages)

if not non_system_messages:
Comment thread
Vigno04 marked this conversation as resolved.
Outdated
non_system_messages = request.messages

# Prepared messages with system prompt removed (candidate gem path).
msgs_without_system = _prepare_messages_for_model(
non_system_messages,
request.tools,
request.tool_choice,
extra_instr,
)

# Prepared messages with full system prompt retained (fallback path).
msgs_with_system = _prepare_messages_for_model(
request.messages,
request.tools,
request.tool_choice,
extra_instr,
)

# Prefer searching reusable sessions against system-stripped history because
# gem-based sessions persist that history shape.
msgs = msgs_without_system if system_prompt_text else msgs_with_system

session, client, remain = await _find_reusable_session(db, pool, model, msgs)
reused_session = session is not None
use_google_temporary_mode = g_config.gemini.chat_mode == ChatMode.TEMPORARY

# Fallback search for legacy sessions that still contain explicit system messages.
if session is None and system_prompt_text and msgs_with_system != msgs_without_system:
session, client, remain = await _find_reusable_session(db, pool, model, msgs_with_system)
if session is not None:
msgs = msgs_with_system

managed_system_gem_id: str | None = None
if system_prompt_text:
target_client = client
if target_client is None:
target_client = await pool.acquire()
client = target_client

managed_system_gem_id = await target_client.system_prompt_gem_id_or_create(system_prompt_text)
if managed_system_gem_id:
# When gem is available, keep system text out of the prompt payload.
msgs = msgs_without_system
if session is not None:
session.gem = managed_system_gem_id
else:
# Fall back to explicit system-text path.
msgs = msgs_with_system

# If we changed message mode after initial reuse lookup, re-check reuse quickly.
if session is None and msgs in (msgs_without_system, msgs_with_system):
session, client, remain = await _find_reusable_session(db, pool, model, msgs)

if session:
if not remain:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No new messages.")
Expand All @@ -1810,8 +1878,9 @@ async def create_chat_completion(
)
else:
try:
client = await pool.acquire()
session = client.start_chat(model=model)
if client is None:
client = await pool.acquire()
session = client.start_chat(model=model, gem=managed_system_gem_id)
# Use the already prepared 'msgs' for a fresh session
m_input, files = await _process_conversation_with_compaction(
msgs,
Expand Down Expand Up @@ -1972,12 +2041,26 @@ async def create_response(
request.tool_choice if isinstance(request.tool_choice, (str, ToolChoiceFunction)) else None
)

messages = _prepare_messages_for_model(
# Split leading system/instruction content so it can be mapped to a managed
# gem when create_on_demand is enabled.
system_prompt_text, conv_without_system = _extract_leading_system_prompt(conv_messages)
if not conv_without_system:
conv_without_system = conv_messages

messages_without_system = _prepare_messages_for_model(
conv_without_system,
Comment thread
Vigno04 marked this conversation as resolved.
Outdated
standard_tools or None,
model_tool_choice,
extra_instr or None,
)
messages_with_system = _prepare_messages_for_model(
conv_messages,
standard_tools or None,
model_tool_choice,
extra_instr or None,
)
messages = messages_without_system if system_prompt_text else messages_with_system

pool, db = GeminiClientPool(), LMDBConversationStore()
try:
model = _get_model_by_name(request.model)
Expand All @@ -1987,6 +2070,31 @@ async def create_response(
session, client, remain = await _find_reusable_session(db, pool, model, messages)
reused_session = session is not None
use_google_temporary_mode = g_config.gemini.chat_mode == ChatMode.TEMPORARY

# Fallback reuse search for legacy sessions that still included explicit system text.
if session is None and system_prompt_text and messages_with_system != messages_without_system:
session, client, remain = await _find_reusable_session(db, pool, model, messages_with_system)
if session is not None:
messages = messages_with_system

managed_system_gem_id: str | None = None
if system_prompt_text:
target_client = client
if target_client is None:
target_client = await pool.acquire()
client = target_client

managed_system_gem_id = await target_client.system_prompt_gem_id_or_create(system_prompt_text)
if managed_system_gem_id:
messages = messages_without_system
if session is not None:
session.gem = managed_system_gem_id
else:
messages = messages_with_system

# If message shape changed after gem resolution, search reusable session again.
if session is None and messages in (messages_without_system, messages_with_system):
session, client, remain = await _find_reusable_session(db, pool, model, messages)
if session:
msgs = _prepare_messages_for_model(
remain,
Expand All @@ -2008,8 +2116,9 @@ async def create_response(
)
else:
try:
client = await pool.acquire()
session = client.start_chat(model=model)
if client is None:
client = await pool.acquire()
session = client.start_chat(model=model, gem=managed_system_gem_id)
m_input, files = await _process_conversation_with_compaction(
messages,
tmp_dir,
Expand Down
Loading
Loading