diff --git a/.changeset/swift-pandas-juggle.md b/.changeset/swift-pandas-juggle.md
new file mode 100644
index 000000000..ae0dbb90d
--- /dev/null
+++ b/.changeset/swift-pandas-juggle.md
@@ -0,0 +1,5 @@
+---
+'@livekit/agents-plugin-google': patch
+---
+
+fix Gemini 3.1 realtime `generateReply()` continuation and restricted-model history handling
diff --git a/plugins/google/src/beta/realtime/realtime_api.test.ts b/plugins/google/src/beta/realtime/realtime_api.test.ts
new file mode 100644
index 000000000..c008eab20
--- /dev/null
+++ b/plugins/google/src/beta/realtime/realtime_api.test.ts
@@ -0,0 +1,196 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { llm } from '@livekit/agents';
+import { describe, expect, it } from 'vitest';
+import {
+  buildGenerateReplyClientEvents,
+  isRestrictedClientContentModel,
+  RealtimeModel,
+  supportsServerSideChatContext,
+} from './realtime_api.js';
+import type * as api_proto from './api_proto.js';
+
+describe('Google realtime generateReply compatibility helpers', () => {
+  it('detects restricted client-content models', () => {
+    expect(isRestrictedClientContentModel('gemini-3.1-flash-live-preview')).toBe(true);
+    expect(isRestrictedClientContentModel('gemini-2.5-flash-native-audio-preview-12-2025')).toBe(
+      false,
+    );
+  });
+
+  it('tracks whether server-side chat context syncing is supported', () => {
+    expect(supportsServerSideChatContext('gemini-3.1-flash-live-preview')).toBe(false);
+    expect(
+      supportsServerSideChatContext('gemini-2.5-flash-native-audio-preview-12-2025'),
+    ).toBe(true);
+  });
+
+  it('builds the 2.5 placeholder user turn event', () => {
+    expect(
+      buildGenerateReplyClientEvents({
+        model: 'gemini-2.5-flash-native-audio-preview-12-2025',
+        instructions: 'Say hello in one short sentence.',
+      }),
+    ).toEqual([
+      {
+        type: 'content',
+        value: {
+          turns: [
+            {
+              parts: [{ text: 'Say hello in one short sentence.' }],
+              role: 'model',
+            },
+            {
+              parts: [{ text: '.' }],
+              role: 'user',
+            },
+          ],
+          turnComplete: true,
+        },
+      },
+    ]);
+  });
+
+  it('builds a 2.5 event without instructions', () => {
+    expect(
+      buildGenerateReplyClientEvents({
+        model: 'gemini-2.5-flash-native-audio-preview-12-2025',
+      }),
+    ).toEqual([
+      {
+        type: 'content',
+        value: {
+          turns: [
+            {
+              parts: [{ text: '.' }],
+              role: 'user',
+            },
+          ],
+          turnComplete: true,
+        },
+      },
+    ]);
+  });
+
+  it('builds a Gemini 3.1 realtimeInput event with instructions', () => {
+    expect(
+      buildGenerateReplyClientEvents({
+        model: 'gemini-3.1-flash-live-preview',
+        instructions: 'Continue naturally after the tool result.',
+      }),
+    ).toEqual([
+      {
+        type: 'realtime_input',
+        value: {
+          text: 'Continue naturally after the tool result.',
+        },
+      },
+    ]);
+  });
+
+  it('builds a Gemini 3.1 realtimeInput dot trigger without instructions', () => {
+    expect(
+      buildGenerateReplyClientEvents({
+        model: 'gemini-3.1-flash-live-preview',
+      }),
+    ).toEqual([
+      {
+        type: 'realtime_input',
+        value: {
+          text: '.',
+        },
+      },
+    ]);
+  });
+
+  it('prepends activityEnd when inUserActivity is true (3.1)', () => {
+    expect(
+      buildGenerateReplyClientEvents({
+        model: 'gemini-3.1-flash-live-preview',
+        instructions: 'Hello',
+        inUserActivity: true,
+      }),
+    ).toEqual([
+      {
+        type: 'realtime_input',
+        value: {
+          activityEnd: {},
+        },
+      },
+      {
+        type: 'realtime_input',
+        value: {
+          text: 'Hello',
+        },
+      },
+    ]);
+  });
+
+  it('prepends activityEnd when inUserActivity is true (2.5)', () => {
+    const events = buildGenerateReplyClientEvents({
+      model: 'gemini-2.5-flash-native-audio-preview-12-2025',
+      inUserActivity: true,
+    });
+    expect(events[0]).toEqual({
+      type: 'realtime_input',
+      value: { activityEnd: {} },
+    });
+    expect(events[1]!.type).toBe('content');
+  });
+
+  it('restricted models still send tool responses from updateChatCtx', async () => {
+    const session = new RealtimeModel({
+      apiKey: 'test',
+      model: 'gemini-3.1-flash-live-preview',
+    }).session() as unknown as {
+      activeSession?: unknown;
+      messageChannel: {
+        items: api_proto.ClientEvents[];
+        put(event: api_proto.ClientEvents): Promise<void>;
+      };
+      updateChatCtx(chatCtx: llm.ChatContext): Promise<void>;
+    };
+
+    const events: api_proto.ClientEvents[] = [];
+    Object.defineProperty(session, 'activeSession', {
+      configurable: true,
+      get: () => ({}),
+      set: () => undefined,
+    });
+    session.messageChannel.put = async (event) => {
+      events.push(event);
+    };
+
+    const chatCtx = llm.ChatContext.empty();
+    chatCtx.insert([
+      llm.ChatMessage.create({
+        role: 'assistant',
+        content: 'The tool finished successfully.',
+      }),
+      llm.FunctionCallOutput.create({
+        callId: 'call_123',
+        isError: false,
+        name: 'lookup_weather',
+        output: '{"temperature_c":21}',
+      }),
+    ]);
+
+    await session.updateChatCtx(chatCtx);
+
+    expect(events).toEqual([
+      {
+        type: 'tool_response',
+        value: {
+          functionResponses: [
+            {
+              id: 'call_123',
+              name: 'lookup_weather',
+              response: { output: '{"temperature_c":21}' },
+            },
+          ],
+        },
+      },
+    ]);
+  });
+});
diff --git a/plugins/google/src/beta/realtime/realtime_api.ts b/plugins/google/src/beta/realtime/realtime_api.ts
index 2f9caf087..655a90a70 100644
--- a/plugins/google/src/beta/realtime/realtime_api.ts
+++ b/plugins/google/src/beta/realtime/realtime_api.ts
@@ -77,6 +77,73 @@ function setsEqual<T>(a: Set<T>, b: Set<T>): boolean {
   return a.size === b.size && [...a].every((x) => b.has(x));
 }
 
+// Restricted Gemini Live models reject mid-session sendClientContent.
+// Gemini 3.1+ rejects mid-session sendClientContent.
+// generateReply must use sendRealtimeInput, and the current JS SDK path cannot
+// keep chat context synchronized server-side for these models.
+const RESTRICTED_CLIENT_CONTENT_MODELS = new Set(['gemini-3.1-flash-live-preview']);
+
+export function isRestrictedClientContentModel(model: string): boolean {
+  return RESTRICTED_CLIENT_CONTENT_MODELS.has(model);
+}
+
+export function supportsServerSideChatContext(model: string): boolean {
+  return !isRestrictedClientContentModel(model);
+}
+
+export function buildGenerateReplyClientEvents(options: {
+  model: string;
+  instructions?: string;
+  inUserActivity?: boolean;
+}): api_proto.ClientEvents[] {
+  const events: api_proto.ClientEvents[] = [];
+
+  if (options.inUserActivity) {
+    events.push({
+      type: 'realtime_input',
+      value: {
+        activityEnd: {},
+      },
+    });
+  }
+
+  if (isRestrictedClientContentModel(options.model)) {
+    // Gemini 3.1+ rejects sendClientContent mid-session.
+    // Use sendRealtimeInput({ text }) instead — it triggers generation on all Live models.
+    events.push({
+      type: 'realtime_input',
+      value: {
+        text: options.instructions ?? '.',
+      },
+    });
+
+    return events;
+  }
+
+  // Gemini 2.5 generateReply relies on ending with a synthetic user turn.
+  const turns: types.Content[] = [];
+  if (options.instructions !== undefined) {
+    turns.push({
+      parts: [{ text: options.instructions }],
+      role: 'model',
+    });
+  }
+  turns.push({
+    parts: [{ text: '.' }],
+    role: 'user',
+  });
+
+  events.push({
+    type: 'content',
+    value: {
+      turns,
+      turnComplete: true,
+    },
+  });
+
+  return events;
+}
+
 /**
  * Internal realtime options for Google Realtime API
  */
@@ -288,9 +355,8 @@ export class RealtimeModel extends llm.RealtimeModel {
 
       /**
        * Thinking configuration for native audio models.
-       * If not set, the model's default thinking behavior is used.
-       * Gemini 3.1 live models use `thinkingLevel`.
-       * Gemini 2.5 live models use `thinkingBudget`.
+       * Use `{ thinkingBudget: 0 }` to disable thinking.
+       * Use `{ thinkingBudget: -1 }` for automatic/dynamic thinking.
        */
       thinkingConfig?: types.ThinkingConfig;
     } = {},
@@ -573,6 +639,7 @@ export class RealtimeSession extends llm.RealtimeSession {
     }
 
     if (!this.realtimeModel.capabilities.midSessionInstructionsUpdate) {
+      this.markRestartNeeded();
       return;
     }
 
@@ -627,6 +694,24 @@ export class RealtimeSession extends llm.RealtimeSession {
 
       const toolResults = this.getToolResultsForRealtime(appendCtx, this.options.vertexai);
 
+      if (!supportsServerSideChatContext(this.options.model)) {
+        if (turns.length > 0) {
+          this.#logger.warn(
+            'updateChatCtx is not currently applied on restricted model ' +
+              this.options.model +
+              '. Storing chat context locally only until the JS SDK exposes a supported history path.',
+          );
+        }
+        if (toolResults) {
+          this.sendClientEvent({
+            type: 'tool_response',
+            value: toolResults,
+          });
+        }
+        this._chatCtx = chatCtx.copy();
+        return;
+      }
+
       if (turns.length > 0) {
         const shouldSendRealtimeText = this.pendingInterruptText;
 
@@ -728,13 +813,6 @@ export class RealtimeSession extends llm.RealtimeSession {
   }
 
   async generateReply(instructions?: string): Promise<llm.GenerationCreatedEvent> {
-    if (!this.realtimeModel.capabilities.midSessionChatCtxUpdate) {
-      this.#logger.warn(
-        `generateReply is not compatible with '${this.options.model}' and will be ignored.`,
-      );
-      throw new Error(`generateReply is not compatible with '${this.options.model}'`);
-    }
-
     if (this.pendingGenerationFut && !this.pendingGenerationFut.done) {
       this.#logger.warn(
         'generateReply called while another generation is pending, cancelling previous.',
@@ -745,37 +823,19 @@ export class RealtimeSession extends llm.RealtimeSession {
     const fut = new Future<llm.GenerationCreatedEvent>();
     this.pendingGenerationFut = fut;
 
+    const events = buildGenerateReplyClientEvents({
+      model: this.options.model,
+      instructions,
+      inUserActivity: this.inUserActivity,
+    });
+
     if (this.inUserActivity) {
-      this.sendClientEvent({
-        type: 'realtime_input',
-        value: {
-          activityEnd: {},
-        },
-      });
       this.inUserActivity = false;
     }
 
-    // Gemini requires the last message to end with user's turn
-    // so we need to add a placeholder user turn in order to trigger a new generation
-    const turns: types.Content[] = [];
-    if (instructions !== undefined) {
-      turns.push({
-        parts: [{ text: instructions }],
-        role: 'model',
-      });
+    for (const event of events) {
+      this.sendClientEvent(event);
     }
-    turns.push({
-      parts: [{ text: '.' }],
-      role: 'user',
-    });
-
-    this.sendClientEvent({
-      type: 'content',
-      value: {
-        turns,
-        turnComplete: true,
-      },
-    });
 
     const timeoutHandle = setTimeout(() => {
       if (!fut.done) {
@@ -929,18 +989,26 @@ export class RealtimeSession extends llm.RealtimeSession {
         try {
           this.activeSession = session;
 
-          // Send existing chat context
-          const [turns] = await this._chatCtx
-            .copy({
-              excludeFunctionCall: true,
-            })
-            .toProviderFormat('google', false);
-
-          if (turns.length > 0) {
-            await session.sendClientContent({
-              turns,
-              turnComplete: false,
-            });
+          // Send existing chat context when the current SDK path can sync history.
+          if (supportsServerSideChatContext(this.options.model)) {
+            const [turns] = await this._chatCtx
+              .copy({
+                excludeFunctionCall: true,
+              })
+              .toProviderFormat('google', false);
+
+            if (turns.length > 0) {
+              await session.sendClientContent({
+                turns,
+                turnComplete: false,
+              });
+            }
+          } else if (this._chatCtx.items.length > 0) {
+            this.#logger.warn(
+              'Initial chat context is not currently applied for restricted model ' +
+                this.options.model +
+                '. The current JS SDK path cannot seed prior chat history for this model.',
+            );
           }
         } finally {
           unlock();
@@ -1017,6 +1085,15 @@ export class RealtimeSession extends llm.RealtimeSession {
 
         switch (msg.type) {
           case 'content':
+            // Gemini 3.1+ rejects sendClientContent mid-session — drop the event
+            // instead of crashing the session with a 1007 error.
+            if (isRestrictedClientContentModel(this.options.model)) {
+              this.#logger.warn(
+                'Dropping sendClientContent event for restricted model. ' +
+                  'Use reconnect-based updates or sendRealtimeInput instead.',
+              );
+              break;
+            }
             const { turns, turnComplete } = msg.value;
             if (LK_GOOGLE_DEBUG) {
               this.#logger.debug(`(client) -> ${JSON.stringify(this.loggableClientEvent(msg))}`);