MoonshotAI · wbxl2000 · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026
diff --git a/.changeset/fix-web-streaming-render-jank.md b/.changeset/fix-web-streaming-render-jank.md
@@ -0,0 +1,5 @@
+---
+"@moonshot-ai/kimi-code": patch
+---
+
+Keep the web chat responsive during long streaming replies by isolating live token text from the rest of the UI state, so it no longer stalls the main thread.
diff --git a/apps/kimi-web/src/api/daemon/eventReducer.ts b/apps/kimi-web/src/api/daemon/eventReducer.ts
@@ -84,7 +84,13 @@ export function createInitialState(): KimiClientState {
 function cloneState(s: KimiClientState): KimiClientState {
   return {
     ...s,
-    sessions: [...s.sessions],
+    // Reuse the `sessions` array reference when an event does not touch it.
+    // Every session-mutating case below already builds its own array via
+    // `[...]` / `.map` / `.filter`, so sharing the reference is safe — and it
+    // keeps `rawState.sessions` stable for events that don't change sessions,
+    // so the sidebar computeds (sessionsForView / workspaceGroups /
+    // mergedWorkspaces) are not dirtied by unrelated events.
+    sessions: s.sessions,
     messagesBySession: { ...s.messagesBySession },
     approvalsBySession: { ...s.approvalsBySession },
     planReviewByToolCallId: { ...s.planReviewByToolCallId },

diff --git a/apps/kimi-web/src/components/chat/ChatPane.vue b/apps/kimi-web/src/components/chat/ChatPane.vue
@@ -5,6 +5,7 @@ import { useI18n } from 'vue-i18n';
 import type { ChatTurn, ApprovalBlock, FilePreviewRequest, ToolMedia } from '../../types';
 import ToolCall from './ToolCall.vue';
 import Markdown from './Markdown.vue';
+import StreamingBlocks from './StreamingBlocks.vue';
 import ThinkingBlock from './ThinkingBlock.vue';
 import ActivityNotice from './ActivityNotice.vue';
 import AgentCard from './AgentCard.vue';
@@ -44,6 +45,12 @@ onUnmounted(() => {
 const props = withDefaults(
   defineProps<{
     turns: ChatTurn[];
+    /**
+     * The session these turns belong to. Used by the streaming renderer to look
+     * up the live text in the streaming store. Optional so SideChatPanel (which
+     * renders a subagent transcript, not a streaming session) can omit it.
+     */
+    sessionId?: string;
     approvals?: { approvalId: string; block: ApprovalBlock; agentName?: string }[];
     /**
      * Bubble chat layout: render each turn as a chat bubble (user = right-aligned
@@ -537,6 +544,14 @@ function isStreamingRenderBlock(turn: ChatTurn, block: { sourceIndex: number }):
           <AgentGroup v-else-if="blk.kind === 'agentGroup'" :members="blk.members" @open="emit('openAgent', { turnId: turn.id, blockIndex: blk.sourceIndex, memberId: $event })" />
           <ToolCall v-else-if="blk.kind === 'tool'" :tool="blk.tool" :mobile="childBubble" :tool-diff-panel="toolDiffPanel" @open-media="emit('openMedia', $event)" @open-file="emit('openFile', $event)" @open-tool-diff="emit('openToolDiff', $event)" />
         </template>
+        <StreamingBlocks
+          v-if="sessionId && turn.id === streamingTurnId"
+          :session-id="sessionId"
+          :turn-id="turn.id"
+          :mobile="childBubble"
+          @open-file="(target) => emit('openFile', target)"
+          @open-thinking="emit('openThinking', $event)"
+        />
         <div v-if="turn.id !== streamingTurnId && isAssistantRunEnd(ti) && (assistantRunFinalText(ti).trim().length > 0 || turn.durationMs !== undefined)" class="a-msg-ft">
           <span v-if="turn.durationMs !== undefined" class="a-duration" :title="`${turn.durationMs} ms`">{{ formatDuration(turn.durationMs) }}</span>
           <button
@@ -679,6 +694,14 @@ function isStreamingRenderBlock(turn: ChatTurn, block: { sourceIndex: number }):
               <AgentGroup v-else-if="blk.kind === 'agentGroup'" :members="blk.members" @open="emit('openAgent', { turnId: turn.id, blockIndex: blk.sourceIndex, memberId: $event })" />
               <ToolCall v-else-if="blk.kind === 'tool'" :tool="blk.tool" :tool-diff-panel="toolDiffPanel" @open-media="emit('openMedia', $event)" @open-file="emit('openFile', $event)" @open-tool-diff="emit('openToolDiff', $event)" />
             </template>
+            <StreamingBlocks
+              v-if="sessionId && turn.id === streamingTurnId"
+              :session-id="sessionId"
+              :turn-id="turn.id"
+              :mobile="childBubble"
+              @open-file="(target) => emit('openFile', target)"
+              @open-thinking="emit('openThinking', $event)"
+            />
           </template>
         </div>
 

diff --git a/apps/kimi-web/src/components/chat/ConversationPane.vue b/apps/kimi-web/src/components/chat/ConversationPane.vue
@@ -1008,6 +1008,7 @@ defineExpose({ loadComposerForEdit });
               ref="chatPaneRef"
               :key="fileReloadKey ?? 'no-session'"
               :turns="turns"
+              :session-id="sessionId"
               :approvals="approvals"
               :bubble="bubble"
               :mobile="mobile"

diff --git a/apps/kimi-web/src/components/chat/Markdown.vue b/apps/kimi-web/src/components/chat/Markdown.vue
@@ -70,12 +70,15 @@ const renderPlan = computed(() => {
 // Code blocks follow the app colour scheme (shiki re-renders on flip).
 const isDark = useIsDark();
 
-// markstream's chat mode can batch nodes and defer offscreen nodes. Batching is
-// safe for settled history, but viewport deferral can leave individual code
-// blocks blank in our internal chat scroller when visibility events are missed
-// during a session/theme switch. Keep batching for history, but always mount the
-// actual nodes so every code block has at least its plain fallback immediately.
-const allowBatchRender = computed(() => !props.streaming);
+// markstream's chat mode batches node mounting across frames (frame-budget
+// scheduling) and can defer offscreen nodes. Viewport deferral can leave
+// individual code blocks blank in our internal chat scroller when visibility
+// events are missed, so it stays disabled below (`deferNodesUntilVisible:
+// false`). Batching itself only spreads mounting by a frame or two and is
+// exactly the scenario streaming needs, so it stays on for both live and
+// settled content (the `loading: false` code-block prop already removes the
+// skeleton, so a not-yet-mounted block simply appears a frame later).
+const allowBatchRender = computed(() => true);
 
 // ---------------------------------------------------------------------------
 // Local image resolution — rewrite the SOURCE TEXT before markstream sees it.

diff --git a/apps/kimi-web/src/components/chat/StreamingBlocks.vue b/apps/kimi-web/src/components/chat/StreamingBlocks.vue
@@ -0,0 +1,51 @@
+<!-- apps/kimi-web/src/components/chat/StreamingBlocks.vue -->
+<!--
+  Renders the live (still-streaming) text/thinking blocks of the active
+  assistant message. This is the ONLY component that re-renders on each
+  `assistantDelta`: it subscribes to the fine-grained streaming store, so the
+  rest of the app (App, sidebar, the turn list) does not move on every token.
+
+  Mounted by ChatPane only for the turn that is currently streaming; unmounts
+  when the turn settles (the committed content in `messagesBySession` takes
+  over).
+-->
+<script setup lang="ts">
+import { computed } from 'vue';
+import Markdown from './Markdown.vue';
+import ThinkingBlock from './ThinkingBlock.vue';
+import { streamingBySession } from '../../composables/client/streamingStore';
+import type { FilePreviewRequest } from '../../types';
+
+const props = withDefaults(
+  defineProps<{
+    sessionId: string;
+    turnId: string;
+    mobile?: boolean;
+  }>(),
+  { mobile: false },
+);
+
+const emit = defineEmits<{
+  openFile: [target: FilePreviewRequest];
+  openThinking: [target: { turnId: string; blockIndex: number }];
+}>();
+
+// Subscribe to this session's live blocks. Only this computed (and therefore
+// only this component) is dirtied when a delta appends to the store.
+const blocks = computed(() => streamingBySession[props.sessionId]?.blocks ?? []);
+</script>
+
+<template>
+  <template v-for="blk in blocks" :key="`stream-${blk.kind}-${blk.contentIndex}`">
+    <ThinkingBlock
+      v-if="blk.kind === 'thinking'"
+      :text="blk.text"
+      :mobile="mobile"
+      :streaming="true"
+      @open="emit('openThinking', { turnId, blockIndex: blk.contentIndex })"
+    />
+    <div v-else-if="blk.kind === 'text' && blk.text" class="msg">
+      <Markdown :text="blk.text" :streaming="true" :open-file="(target) => emit('openFile', target)" />
+    </div>
+  </template>
+</template>
diff --git a/apps/kimi-web/src/composables/client/streamingStore.ts b/apps/kimi-web/src/composables/client/streamingStore.ts
@@ -0,0 +1,71 @@
+// apps/kimi-web/src/composables/client/streamingStore.ts
+//
+// Fine-grained streaming-text store, kept OUTSIDE `rawState` on purpose.
+//
+// `assistantDelta` is the only genuinely high-frequency event (dozens to
+// hundreds per second). Routing it through the immutable reducer + the coarse
+// `rawState` graph makes every delta re-render the whole App and recompute the
+// sidebar computeds (see the main-thread-jank investigation). Instead, deltas
+// append here and only the single `StreamingBlocks` component subscribed to a
+// session re-renders.
+//
+// Lifecycle: deltas append; `messageUpdated` (authoritative full content) and
+// turn-end (`sessionStatusChanged` idle/aborted) clear the entry so the
+// committed content in `messagesBySession` takes over without duplication.
+
+import { reactive } from 'vue';
+
+export interface StreamingBlock {
+  contentIndex: number;
+  kind: 'text' | 'thinking';
+  text: string;
+}
+
+export interface StreamingState {
+  /** id of the assistant message currently being streamed. */
+  messageId: string;
+  /** Ordered live text/thinking blocks (always trailing in the message). */
+  blocks: StreamingBlock[];
+}
+
+/**
+ * Per-session live streaming state. A session has at most one in-flight
+ * assistant message (its trailing one), so a single entry per session suffices.
+ */
+export const streamingBySession = reactive<Record<string, StreamingState>>({});
+
+/**
+ * Append one `assistantDelta` to the streaming store. O(1): either mutates the
+ * trailing block's text in place (same contentIndex) or pushes a new block
+ * (new contentIndex, rare). Never touches `rawState`, so no heavy computed
+ * (`turns`, sidebar) is dirtied.
+ */
+export function appendStreamingDelta(
+  sessionId: string,
+  messageId: string,
+  contentIndex: number,
+  delta: { text?: string; thinking?: string },
+): void {
+  let state = streamingBySession[sessionId];
+  // A new assistant message (new step, or text resuming after a tool) starts a
+  // fresh entry — the previous message is already committed via messageUpdated.
+  if (!state || state.messageId !== messageId) {
+    state = streamingBySession[sessionId] = { messageId, blocks: [] };
+  }
+
+  const kind: 'text' | 'thinking' = delta.text !== undefined ? 'text' : 'thinking';
+  const chunk = delta.text ?? delta.thinking ?? '';
+  if (chunk.length === 0) return;
+
+  const last = state.blocks.at(-1);
+  if (last && last.contentIndex === contentIndex && last.kind === kind) {
+    last.text += chunk;
+  } else {
+    state.blocks.push({ contentIndex, kind, text: chunk });
+  }
+}
+
+/** Drop the live entry for a session (commit or turn end). */
+export function clearStreaming(sessionId: string): void {
+  delete streamingBySession[sessionId];
+}
diff --git a/apps/kimi-web/src/composables/useKimiWebClient.ts b/apps/kimi-web/src/composables/useKimiWebClient.ts
@@ -19,6 +19,7 @@ import {
   STORAGE_KEYS,
 } from '../lib/storage';
 import { createEventBatcher, isRenderEvent } from './client/eventBatcher';
+import { appendStreamingDelta, clearStreaming } from './client/streamingStore';
 import { useAppearance } from './client/useAppearance';
 import { useNotification } from './client/useNotification';
 import { useTaskPoller } from './client/useTaskPoller';
@@ -486,6 +487,7 @@ function forgetSession(sessionId: string): void {
   // That would make hasLoadedMessages() treat the stale empty cache as
   // authoritative and skip the next snapshot fetch for this id.
   enqueueEvent.flush();
+  clearStreaming(sessionId);
   removeSession(sessionId);
   removeSessionMessages(sessionId);
   delete rawState.approvalsBySession[sessionId];
@@ -639,8 +641,28 @@ function nextOptimisticMsgId(): string {
 // past the queue check and clobber promptIdBySession (breaking abort).
 const inFlightPromptSessions = new Set<string>();
 
+// Mirror of the reducer's advanceSeq, for the one event (assistantDelta) that
+// bypasses the reducer. lastSeqBySession is a resync cursor with no rendering
+// dependencies, so mutating it in place is both safe and cheap.
+function advanceSeqCursor(sessionId: string | undefined, seq: number | undefined): void {
+  if (sessionId !== undefined && seq !== undefined && seq > 0) {
+    const prev = rawState.lastSeqBySession[sessionId] ?? 0;
+    if (seq > prev) rawState.lastSeqBySession[sessionId] = seq;
+  }
+}
+
 // Helper: mutate rawState by applying a reducer on a snapshot then re-assigning fields
 function applyEvent(event: ReturnType<typeof toAppEvent>, sessionId: string, seq: number): void {
+  // Streaming text/thinking deltas bypass the reducer entirely. Appending to the
+  // fine-grained streaming store is O(1) and dirties only the single
+  // StreamingBlocks component — instead of cloning all of `rawState` and
+  // re-rendering the whole App + sidebar on every token.
+  if (event.type === 'assistantDelta') {
+    appendStreamingDelta(sessionId, event.messageId, event.contentIndex, event.delta);
+    advanceSeqCursor(sessionId, seq);
+    return;
+  }
+
   const snapshot: KimiClientState = {
     sessions: rawState.sessions,
     activeSessionId: rawState.activeSessionId,
@@ -670,6 +692,20 @@ function applyEvent(event: ReturnType<typeof toAppEvent>, sessionId: string, seq
   rawState.config = next.config ?? null;
   rawState.warnings = next.warnings;
 
+  // `messageUpdated` carries the authoritative full content of a message (tool
+  // slot / step end / turn end): drop the live streaming entry so the just-
+  // committed content takes over without rendering the same text twice.
+  if (event.type === 'messageUpdated') {
+    clearStreaming(sessionId);
+  }
+  // Turn end: release the streaming entry for the session.
+  if (
+    event.type === 'sessionStatusChanged' &&
+    (event.status === 'idle' || event.status === 'aborted')
+  ) {
+    clearStreaming(sessionId);
+  }
+
   if (event.type === 'configChanged') {
     rawState.defaultModel = event.config.defaultModel ?? null;
   }
@@ -1028,8 +1064,14 @@ async function syncSessionFromSnapshot(sessionId: string): Promise<SyncSessionRe
     // messagesBySession[sessionId]. The snapshot is authoritative (it already
     // contains everything up to asOfSeq); applying stale queued deltas on top
     // of it would duplicate text / tool output. Flushing here applies them to
-    // the pre-snapshot array, which the snapshot then overwrites.
+    // the pre-snapshot state, which the snapshot then overwrites.
     enqueueEvent.flush();
+    // The snapshot is authoritative for the live streaming text too: any deltas
+    // the flush just landed in the streaming store are superseded by the
+    // snapshot (and the in-flight seed below), so drop them. Without this, a
+    // reconnect or delta-gap resync mid-stream would render stale live chunks
+    // on top of the seeded snapshot.
+    clearStreaming(sessionId);
 
     updateSession(sessionId, (s) => ({
       ...snap.session,

diff --git a/apps/kimi-web/test/event-reducer.test.ts b/apps/kimi-web/test/event-reducer.test.ts
@@ -149,3 +149,44 @@ describe('reduceAppEvent taskProgress', () => {
     expect(lines?.at(-1)).toBe('line 59');
   });
 });
+
+describe('reduceAppEvent sessions reference stability', () => {
+  // The sidebar computeds (sessionsForView / workspaceGroups / mergedWorkspaces)
+  // depend on `rawState.sessions`. Events that do not change sessions must keep
+  // the SAME array reference so those computeds are not dirtied; events that do
+  // change sessions must produce a NEW array.
+
+  it('reuses the sessions reference for an event that does not touch sessions', () => {
+    const state = {
+      ...createInitialState(),
+      sessions: [makeSession('s1', '2026-01-01T00:00:00.000Z')],
+      messagesBySession: { s1: [makeMessage('s1', '2026-01-01T00:00:00.000Z')] },
+    };
+    const next = reduceAppEvent(
+      state,
+      {
+        type: 'messageUpdated',
+        sessionId: 's1',
+        messageId: 'msg_2026-01-01T00:00:00.000Z',
+        content: [{ type: 'text', text: 'updated' }],
+        status: 'completed',
+      },
+      { sessionId: 's1', seq: 2 },
+    );
+    expect(next.sessions).toBe(state.sessions);
+  });
+
+  it('produces a new sessions array for an event that changes sessions', () => {
+    const state = {
+      ...createInitialState(),
+      sessions: [makeSession('s1', '2026-01-01T00:00:00.000Z')],
+    };
+    const next = reduceAppEvent(
+      state,
+      { type: 'sessionCreated', session: makeSession('s2', '2026-02-01T00:00:00.000Z') },
+      { sessionId: 's2', seq: 3 },
+    );
+    expect(next.sessions).not.toBe(state.sessions);
+    expect(next.sessions.map((s) => s.id)).toEqual(['s2', 's1']);
+  });
+});