diff --git a/.changeset/swift-pandas-juggle.md b/.changeset/swift-pandas-juggle.md new file mode 100644 index 000000000..ae0dbb90d --- /dev/null +++ b/.changeset/swift-pandas-juggle.md @@ -0,0 +1,5 @@ +--- +'@livekit/agents-plugin-google': patch +--- + +fix Gemini 3.1 realtime `generateReply()` continuation and restricted-model history handling diff --git a/plugins/google/src/beta/realtime/realtime_api.test.ts b/plugins/google/src/beta/realtime/realtime_api.test.ts new file mode 100644 index 000000000..c008eab20 --- /dev/null +++ b/plugins/google/src/beta/realtime/realtime_api.test.ts @@ -0,0 +1,196 @@ +// SPDX-FileCopyrightText: 2026 LiveKit, Inc. +// +// SPDX-License-Identifier: Apache-2.0 +import { llm } from '@livekit/agents'; +import { describe, expect, it } from 'vitest'; +import { + buildGenerateReplyClientEvents, + isRestrictedClientContentModel, + RealtimeModel, + supportsServerSideChatContext, +} from './realtime_api.js'; +import type * as api_proto from './api_proto.js'; + +describe('Google realtime generateReply compatibility helpers', () => { + it('detects restricted client-content models', () => { + expect(isRestrictedClientContentModel('gemini-3.1-flash-live-preview')).toBe(true); + expect(isRestrictedClientContentModel('gemini-2.5-flash-native-audio-preview-12-2025')).toBe( + false, + ); + }); + + it('tracks whether server-side chat context syncing is supported', () => { + expect(supportsServerSideChatContext('gemini-3.1-flash-live-preview')).toBe(false); + expect( + supportsServerSideChatContext('gemini-2.5-flash-native-audio-preview-12-2025'), + ).toBe(true); + }); + + it('builds the 2.5 placeholder user turn event', () => { + expect( + buildGenerateReplyClientEvents({ + model: 'gemini-2.5-flash-native-audio-preview-12-2025', + instructions: 'Say hello in one short sentence.', + }), + ).toEqual([ + { + type: 'content', + value: { + turns: [ + { + parts: [{ text: 'Say hello in one short sentence.' }], + role: 'model', + }, + { + parts: [{ text: '.' }], + role: 'user', + }, + ], + turnComplete: true, + }, + }, + ]); + }); + + it('builds a 2.5 event without instructions', () => { + expect( + buildGenerateReplyClientEvents({ + model: 'gemini-2.5-flash-native-audio-preview-12-2025', + }), + ).toEqual([ + { + type: 'content', + value: { + turns: [ + { + parts: [{ text: '.' }], + role: 'user', + }, + ], + turnComplete: true, + }, + }, + ]); + }); + + it('builds a Gemini 3.1 realtimeInput event with instructions', () => { + expect( + buildGenerateReplyClientEvents({ + model: 'gemini-3.1-flash-live-preview', + instructions: 'Continue naturally after the tool result.', + }), + ).toEqual([ + { + type: 'realtime_input', + value: { + text: 'Continue naturally after the tool result.', + }, + }, + ]); + }); + + it('builds a Gemini 3.1 realtimeInput dot trigger without instructions', () => { + expect( + buildGenerateReplyClientEvents({ + model: 'gemini-3.1-flash-live-preview', + }), + ).toEqual([ + { + type: 'realtime_input', + value: { + text: '.', + }, + }, + ]); + }); + + it('prepends activityEnd when inUserActivity is true (3.1)', () => { + expect( + buildGenerateReplyClientEvents({ + model: 'gemini-3.1-flash-live-preview', + instructions: 'Hello', + inUserActivity: true, + }), + ).toEqual([ + { + type: 'realtime_input', + value: { + activityEnd: {}, + }, + }, + { + type: 'realtime_input', + value: { + text: 'Hello', + }, + }, + ]); + }); + + it('prepends activityEnd when inUserActivity is true (2.5)', () => { + const events = buildGenerateReplyClientEvents({ + model: 'gemini-2.5-flash-native-audio-preview-12-2025', + inUserActivity: true, + }); + expect(events[0]).toEqual({ + type: 'realtime_input', + value: { activityEnd: {} }, + }); + expect(events[1]!.type).toBe('content'); + }); + + it('restricted models still send tool responses from updateChatCtx', async () => { + const session = new RealtimeModel({ + apiKey: 'test', + model: 'gemini-3.1-flash-live-preview', + }).session() as unknown as { + activeSession?: unknown; + messageChannel: { + items: api_proto.ClientEvents[]; + put(event: api_proto.ClientEvents): Promise; + }; + updateChatCtx(chatCtx: llm.ChatContext): Promise; + }; + + const events: api_proto.ClientEvents[] = []; + Object.defineProperty(session, 'activeSession', { + configurable: true, + get: () => ({}), + set: () => undefined, + }); + session.messageChannel.put = async (event) => { + events.push(event); + }; + + const chatCtx = llm.ChatContext.empty(); + chatCtx.insert([ + llm.ChatMessage.create({ + role: 'assistant', + content: 'The tool finished successfully.', + }), + llm.FunctionCallOutput.create({ + callId: 'call_123', + isError: false, + name: 'lookup_weather', + output: '{"temperature_c":21}', + }), + ]); + + await session.updateChatCtx(chatCtx); + + expect(events).toEqual([ + { + type: 'tool_response', + value: { + functionResponses: [ + { + id: 'call_123', + name: 'lookup_weather', + response: { output: '{"temperature_c":21}' }, + }, + ], + }, + }, + ]); + }); +}); diff --git a/plugins/google/src/beta/realtime/realtime_api.ts b/plugins/google/src/beta/realtime/realtime_api.ts index 2f9caf087..655a90a70 100644 --- a/plugins/google/src/beta/realtime/realtime_api.ts +++ b/plugins/google/src/beta/realtime/realtime_api.ts @@ -77,6 +77,73 @@ function setsEqual(a: Set, b: Set): boolean { return a.size === b.size && [...a].every((x) => b.has(x)); } +// Restricted Gemini Live models reject mid-session sendClientContent. +// Gemini 3.1+ rejects mid-session sendClientContent. +// generateReply must use sendRealtimeInput, and the current JS SDK path cannot +// keep chat context synchronized server-side for these models. +const RESTRICTED_CLIENT_CONTENT_MODELS = new Set(['gemini-3.1-flash-live-preview']); + +export function isRestrictedClientContentModel(model: string): boolean { + return RESTRICTED_CLIENT_CONTENT_MODELS.has(model); +} + +export function supportsServerSideChatContext(model: string): boolean { + return !isRestrictedClientContentModel(model); +} + +export function buildGenerateReplyClientEvents(options: { + model: string; + instructions?: string; + inUserActivity?: boolean; +}): api_proto.ClientEvents[] { + const events: api_proto.ClientEvents[] = []; + + if (options.inUserActivity) { + events.push({ + type: 'realtime_input', + value: { + activityEnd: {}, + }, + }); + } + + if (isRestrictedClientContentModel(options.model)) { + // Gemini 3.1+ rejects sendClientContent mid-session. + // Use sendRealtimeInput({ text }) instead — it triggers generation on all Live models. + events.push({ + type: 'realtime_input', + value: { + text: options.instructions ?? '.', + }, + }); + + return events; + } + + // Gemini 2.5 generateReply relies on ending with a synthetic user turn. + const turns: types.Content[] = []; + if (options.instructions !== undefined) { + turns.push({ + parts: [{ text: options.instructions }], + role: 'model', + }); + } + turns.push({ + parts: [{ text: '.' }], + role: 'user', + }); + + events.push({ + type: 'content', + value: { + turns, + turnComplete: true, + }, + }); + + return events; +} + /** * Internal realtime options for Google Realtime API */ @@ -288,9 +355,8 @@ export class RealtimeModel extends llm.RealtimeModel { /** * Thinking configuration for native audio models. - * If not set, the model's default thinking behavior is used. - * Gemini 3.1 live models use `thinkingLevel`. - * Gemini 2.5 live models use `thinkingBudget`. + * Use `{ thinkingBudget: 0 }` to disable thinking. + * Use `{ thinkingBudget: -1 }` for automatic/dynamic thinking. */ thinkingConfig?: types.ThinkingConfig; } = {}, @@ -573,6 +639,7 @@ export class RealtimeSession extends llm.RealtimeSession { } if (!this.realtimeModel.capabilities.midSessionInstructionsUpdate) { + this.markRestartNeeded(); return; } @@ -627,6 +694,24 @@ export class RealtimeSession extends llm.RealtimeSession { const toolResults = this.getToolResultsForRealtime(appendCtx, this.options.vertexai); + if (!supportsServerSideChatContext(this.options.model)) { + if (turns.length > 0) { + this.#logger.warn( + 'updateChatCtx is not currently applied on restricted model ' + + this.options.model + + '. Storing chat context locally only until the JS SDK exposes a supported history path.', + ); + } + if (toolResults) { + this.sendClientEvent({ + type: 'tool_response', + value: toolResults, + }); + } + this._chatCtx = chatCtx.copy(); + return; + } + if (turns.length > 0) { const shouldSendRealtimeText = this.pendingInterruptText; @@ -728,13 +813,6 @@ export class RealtimeSession extends llm.RealtimeSession { } async generateReply(instructions?: string): Promise { - if (!this.realtimeModel.capabilities.midSessionChatCtxUpdate) { - this.#logger.warn( - `generateReply is not compatible with '${this.options.model}' and will be ignored.`, - ); - throw new Error(`generateReply is not compatible with '${this.options.model}'`); - } - if (this.pendingGenerationFut && !this.pendingGenerationFut.done) { this.#logger.warn( 'generateReply called while another generation is pending, cancelling previous.', @@ -745,37 +823,19 @@ export class RealtimeSession extends llm.RealtimeSession { const fut = new Future(); this.pendingGenerationFut = fut; + const events = buildGenerateReplyClientEvents({ + model: this.options.model, + instructions, + inUserActivity: this.inUserActivity, + }); + if (this.inUserActivity) { - this.sendClientEvent({ - type: 'realtime_input', - value: { - activityEnd: {}, - }, - }); this.inUserActivity = false; } - // Gemini requires the last message to end with user's turn - // so we need to add a placeholder user turn in order to trigger a new generation - const turns: types.Content[] = []; - if (instructions !== undefined) { - turns.push({ - parts: [{ text: instructions }], - role: 'model', - }); + for (const event of events) { + this.sendClientEvent(event); } - turns.push({ - parts: [{ text: '.' }], - role: 'user', - }); - - this.sendClientEvent({ - type: 'content', - value: { - turns, - turnComplete: true, - }, - }); const timeoutHandle = setTimeout(() => { if (!fut.done) { @@ -929,18 +989,26 @@ export class RealtimeSession extends llm.RealtimeSession { try { this.activeSession = session; - // Send existing chat context - const [turns] = await this._chatCtx - .copy({ - excludeFunctionCall: true, - }) - .toProviderFormat('google', false); - - if (turns.length > 0) { - await session.sendClientContent({ - turns, - turnComplete: false, - }); + // Send existing chat context when the current SDK path can sync history. + if (supportsServerSideChatContext(this.options.model)) { + const [turns] = await this._chatCtx + .copy({ + excludeFunctionCall: true, + }) + .toProviderFormat('google', false); + + if (turns.length > 0) { + await session.sendClientContent({ + turns, + turnComplete: false, + }); + } + } else if (this._chatCtx.items.length > 0) { + this.#logger.warn( + 'Initial chat context is not currently applied for restricted model ' + + this.options.model + + '. The current JS SDK path cannot seed prior chat history for this model.', + ); } } finally { unlock(); @@ -1017,6 +1085,15 @@ export class RealtimeSession extends llm.RealtimeSession { switch (msg.type) { case 'content': + // Gemini 3.1+ rejects sendClientContent mid-session — drop the event + // instead of crashing the session with a 1007 error. + if (isRestrictedClientContentModel(this.options.model)) { + this.#logger.warn( + 'Dropping sendClientContent event for restricted model. ' + + 'Use reconnect-based updates or sendRealtimeInput instead.', + ); + break; + } const { turns, turnComplete } = msg.value; if (LK_GOOGLE_DEBUG) { this.#logger.debug(`(client) -> ${JSON.stringify(this.loggableClientEvent(msg))}`);