Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/fix-web-streaming-render-jank.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@moonshot-ai/kimi-code": patch
---

Keep the web chat responsive during long streaming replies by isolating live token text from the rest of the UI state, so it no longer stalls the main thread.
8 changes: 7 additions & 1 deletion apps/kimi-web/src/api/daemon/eventReducer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,13 @@ export function createInitialState(): KimiClientState {
function cloneState(s: KimiClientState): KimiClientState {
return {
...s,
sessions: [...s.sessions],
// Reuse the `sessions` array reference when an event does not touch it.
// Every session-mutating case below already builds its own array via
// `[...]` / `.map` / `.filter`, so sharing the reference is safe — and it
// keeps `rawState.sessions` stable for events that don't change sessions,
// so the sidebar computeds (sessionsForView / workspaceGroups /
// mergedWorkspaces) are not dirtied by unrelated events.
sessions: s.sessions,
messagesBySession: { ...s.messagesBySession },
approvalsBySession: { ...s.approvalsBySession },
planReviewByToolCallId: { ...s.planReviewByToolCallId },
Expand Down
23 changes: 23 additions & 0 deletions apps/kimi-web/src/components/chat/ChatPane.vue
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { useI18n } from 'vue-i18n';
import type { ChatTurn, ApprovalBlock, FilePreviewRequest, ToolMedia } from '../../types';
import ToolCall from './ToolCall.vue';
import Markdown from './Markdown.vue';
import StreamingBlocks from './StreamingBlocks.vue';
import ThinkingBlock from './ThinkingBlock.vue';
import ActivityNotice from './ActivityNotice.vue';
import AgentCard from './AgentCard.vue';
Expand Down Expand Up @@ -44,6 +45,12 @@ onUnmounted(() => {
const props = withDefaults(
defineProps<{
turns: ChatTurn[];
/**
* The session these turns belong to. Used by the streaming renderer to look
* up the live text in the streaming store. Optional so SideChatPanel (which
* renders a subagent transcript, not a streaming session) can omit it.
*/
sessionId?: string;
approvals?: { approvalId: string; block: ApprovalBlock; agentName?: string }[];
/**
* Bubble chat layout: render each turn as a chat bubble (user = right-aligned
Expand Down Expand Up @@ -537,6 +544,14 @@ function isStreamingRenderBlock(turn: ChatTurn, block: { sourceIndex: number }):
<AgentGroup v-else-if="blk.kind === 'agentGroup'" :members="blk.members" @open="emit('openAgent', { turnId: turn.id, blockIndex: blk.sourceIndex, memberId: $event })" />
<ToolCall v-else-if="blk.kind === 'tool'" :tool="blk.tool" :mobile="childBubble" :tool-diff-panel="toolDiffPanel" @open-media="emit('openMedia', $event)" @open-file="emit('openFile', $event)" @open-tool-diff="emit('openToolDiff', $event)" />
</template>
<StreamingBlocks
v-if="sessionId && turn.id === streamingTurnId"
:session-id="sessionId"
:turn-id="turn.id"
:mobile="childBubble"
@open-file="(target) => emit('openFile', target)"
@open-thinking="emit('openThinking', $event)"
/>
<div v-if="turn.id !== streamingTurnId && isAssistantRunEnd(ti) && (assistantRunFinalText(ti).trim().length > 0 || turn.durationMs !== undefined)" class="a-msg-ft">
<span v-if="turn.durationMs !== undefined" class="a-duration" :title="`${turn.durationMs} ms`">{{ formatDuration(turn.durationMs) }}</span>
<button
Expand Down Expand Up @@ -679,6 +694,14 @@ function isStreamingRenderBlock(turn: ChatTurn, block: { sourceIndex: number }):
<AgentGroup v-else-if="blk.kind === 'agentGroup'" :members="blk.members" @open="emit('openAgent', { turnId: turn.id, blockIndex: blk.sourceIndex, memberId: $event })" />
<ToolCall v-else-if="blk.kind === 'tool'" :tool="blk.tool" :tool-diff-panel="toolDiffPanel" @open-media="emit('openMedia', $event)" @open-file="emit('openFile', $event)" @open-tool-diff="emit('openToolDiff', $event)" />
</template>
<StreamingBlocks
v-if="sessionId && turn.id === streamingTurnId"
:session-id="sessionId"
:turn-id="turn.id"
:mobile="childBubble"
@open-file="(target) => emit('openFile', target)"
@open-thinking="emit('openThinking', $event)"
/>
</template>
</div>

Expand Down
1 change: 1 addition & 0 deletions apps/kimi-web/src/components/chat/ConversationPane.vue
Original file line number Diff line number Diff line change
Expand Up @@ -1008,6 +1008,7 @@ defineExpose({ loadComposerForEdit });
ref="chatPaneRef"
:key="fileReloadKey ?? 'no-session'"
:turns="turns"
:session-id="sessionId"
:approvals="approvals"
:bubble="bubble"
:mobile="mobile"
Expand Down
15 changes: 9 additions & 6 deletions apps/kimi-web/src/components/chat/Markdown.vue
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,15 @@ const renderPlan = computed(() => {
// Code blocks follow the app colour scheme (shiki re-renders on flip).
const isDark = useIsDark();

// markstream's chat mode can batch nodes and defer offscreen nodes. Batching is
// safe for settled history, but viewport deferral can leave individual code
// blocks blank in our internal chat scroller when visibility events are missed
// during a session/theme switch. Keep batching for history, but always mount the
// actual nodes so every code block has at least its plain fallback immediately.
const allowBatchRender = computed(() => !props.streaming);
// markstream's chat mode batches node mounting across frames (frame-budget
// scheduling) and can defer offscreen nodes. Viewport deferral can leave
// individual code blocks blank in our internal chat scroller when visibility
// events are missed, so it stays disabled below (`deferNodesUntilVisible:
// false`). Batching itself only spreads mounting by a frame or two and is
// exactly the scenario streaming needs, so it stays on for both live and
// settled content (the `loading: false` code-block prop already removes the
// skeleton, so a not-yet-mounted block simply appears a frame later).
const allowBatchRender = computed(() => true);

// ---------------------------------------------------------------------------
// Local image resolution — rewrite the SOURCE TEXT before markstream sees it.
Expand Down
51 changes: 51 additions & 0 deletions apps/kimi-web/src/components/chat/StreamingBlocks.vue
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
<!-- apps/kimi-web/src/components/chat/StreamingBlocks.vue -->
<!--
Renders the live (still-streaming) text/thinking blocks of the active
assistant message. This is the ONLY component that re-renders on each
`assistantDelta`: it subscribes to the fine-grained streaming store, so the
rest of the app (App, sidebar, the turn list) does not move on every token.

Mounted by ChatPane only for the turn that is currently streaming; unmounts
when the turn settles (the committed content in `messagesBySession` takes
over).
-->
<script setup lang="ts">
import { computed } from 'vue';
import Markdown from './Markdown.vue';
import ThinkingBlock from './ThinkingBlock.vue';
import { streamingBySession } from '../../composables/client/streamingStore';
import type { FilePreviewRequest } from '../../types';

const props = withDefaults(
defineProps<{
sessionId: string;
turnId: string;
mobile?: boolean;
}>(),
{ mobile: false },
);

const emit = defineEmits<{
openFile: [target: FilePreviewRequest];
openThinking: [target: { turnId: string; blockIndex: number }];
}>();

// Subscribe to this session's live blocks. Only this computed (and therefore
// only this component) is dirtied when a delta appends to the store.
const blocks = computed(() => streamingBySession[props.sessionId]?.blocks ?? []);
</script>

<template>
<template v-for="blk in blocks" :key="`stream-${blk.kind}-${blk.contentIndex}`">
<ThinkingBlock
v-if="blk.kind === 'thinking'"
:text="blk.text"
:mobile="mobile"
:streaming="true"
@open="emit('openThinking', { turnId, blockIndex: blk.contentIndex })"

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Resolve streaming thinking from live blocks

When a user clicks a still-streaming thinking block, this emits blockIndex: blk.contentIndex, but the live block is no longer present in client.turns because assistantDelta now bypasses messagesBySession; useDetailPanel still reads turn.blocks?.[blockIndex], so the side panel either stays closed or shows an older committed thinking block. This regresses the existing “click thinking to view full text” behavior for long live thoughts until the final messageUpdated arrives.

Useful? React with 👍 / 👎.

/>
<div v-else-if="blk.kind === 'text' && blk.text" class="msg">
<Markdown :text="blk.text" :streaming="true" :open-file="(target) => emit('openFile', target)" />
Comment on lines +47 to +48

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Merge live deltas with seeded markdown blocks

When a user refreshes or reconnects mid-reply, seedInFlight puts the already-generated assistantText/thinkingText into turn.blocks, and later deltas for the same contentIndex render here as a separate Markdown/ThinkingBlock appended after the seeded block. Markdown constructs spanning the snapshot boundary, such as an open code fence or list, are parsed as two documents and render incorrectly until messageUpdated commits. Seed the streaming store with the snapshot block or render same-index live text through the existing block instead of a separate Markdown instance.

Useful? React with 👍 / 👎.

</div>
</template>
</template>
71 changes: 71 additions & 0 deletions apps/kimi-web/src/composables/client/streamingStore.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// apps/kimi-web/src/composables/client/streamingStore.ts
//
// Fine-grained streaming-text store, kept OUTSIDE `rawState` on purpose.
//
// `assistantDelta` is the only genuinely high-frequency event (dozens to
// hundreds per second). Routing it through the immutable reducer + the coarse
// `rawState` graph makes every delta re-render the whole App and recompute the
// sidebar computeds (see the main-thread-jank investigation). Instead, deltas
// append here and only the single `StreamingBlocks` component subscribed to a
// session re-renders.
//
// Lifecycle: deltas append; `messageUpdated` (authoritative full content) and
// turn-end (`sessionStatusChanged` idle/aborted) clear the entry so the
// committed content in `messagesBySession` takes over without duplication.

import { reactive } from 'vue';

export interface StreamingBlock {
contentIndex: number;
kind: 'text' | 'thinking';
text: string;
}

export interface StreamingState {
/** id of the assistant message currently being streamed. */
messageId: string;
/** Ordered live text/thinking blocks (always trailing in the message). */
blocks: StreamingBlock[];
}

/**
* Per-session live streaming state. A session has at most one in-flight
* assistant message (its trailing one), so a single entry per session suffices.
*/
export const streamingBySession = reactive<Record<string, StreamingState>>({});

/**
* Append one `assistantDelta` to the streaming store. O(1): either mutates the
* trailing block's text in place (same contentIndex) or pushes a new block
* (new contentIndex, rare). Never touches `rawState`, so no heavy computed
* (`turns`, sidebar) is dirtied.
*/
export function appendStreamingDelta(
sessionId: string,
messageId: string,
contentIndex: number,
delta: { text?: string; thinking?: string },
): void {
let state = streamingBySession[sessionId];
// A new assistant message (new step, or text resuming after a tool) starts a
// fresh entry — the previous message is already committed via messageUpdated.
if (!state || state.messageId !== messageId) {
state = streamingBySession[sessionId] = { messageId, blocks: [] };
}

const kind: 'text' | 'thinking' = delta.text !== undefined ? 'text' : 'thinking';
const chunk = delta.text ?? delta.thinking ?? '';
if (chunk.length === 0) return;

const last = state.blocks.at(-1);
if (last && last.contentIndex === contentIndex && last.kind === kind) {
last.text += chunk;
} else {
state.blocks.push({ contentIndex, kind, text: chunk });
}
}

/** Drop the live entry for a session (commit or turn end). */
export function clearStreaming(sessionId: string): void {
delete streamingBySession[sessionId];
}
44 changes: 43 additions & 1 deletion apps/kimi-web/src/composables/useKimiWebClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import {
STORAGE_KEYS,
} from '../lib/storage';
import { createEventBatcher, isRenderEvent } from './client/eventBatcher';
import { appendStreamingDelta, clearStreaming } from './client/streamingStore';
import { useAppearance } from './client/useAppearance';
import { useNotification } from './client/useNotification';
import { useTaskPoller } from './client/useTaskPoller';
Expand Down Expand Up @@ -486,6 +487,7 @@ function forgetSession(sessionId: string): void {
// That would make hasLoadedMessages() treat the stale empty cache as
// authoritative and skip the next snapshot fetch for this id.
enqueueEvent.flush();
clearStreaming(sessionId);
removeSession(sessionId);
removeSessionMessages(sessionId);
delete rawState.approvalsBySession[sessionId];
Expand Down Expand Up @@ -639,8 +641,28 @@ function nextOptimisticMsgId(): string {
// past the queue check and clobber promptIdBySession (breaking abort).
const inFlightPromptSessions = new Set<string>();

// Mirror of the reducer's advanceSeq, for the one event (assistantDelta) that
// bypasses the reducer. lastSeqBySession is a resync cursor with no rendering
// dependencies, so mutating it in place is both safe and cheap.
function advanceSeqCursor(sessionId: string | undefined, seq: number | undefined): void {
if (sessionId !== undefined && seq !== undefined && seq > 0) {
const prev = rawState.lastSeqBySession[sessionId] ?? 0;
if (seq > prev) rawState.lastSeqBySession[sessionId] = seq;
}
}

// Helper: mutate rawState by applying a reducer on a snapshot then re-assigning fields
function applyEvent(event: ReturnType<typeof toAppEvent>, sessionId: string, seq: number): void {
// Streaming text/thinking deltas bypass the reducer entirely. Appending to the
// fine-grained streaming store is O(1) and dirties only the single
// StreamingBlocks component — instead of cloning all of `rawState` and
// re-rendering the whole App + sidebar on every token.
if (event.type === 'assistantDelta') {
appendStreamingDelta(sessionId, event.messageId, event.contentIndex, event.delta);

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Clear the streaming store when applying snapshots

Because this writes deltas outside rawState, the snapshot path no longer overwrites them: syncSessionFromSnapshot flushes pending render events before setSessionMessages, so any queued assistantDelta lands in streamingBySession and then survives the authoritative snapshot/seed. On reconnect or delta-gap resync while a reply is streaming, the old live chunks are rendered in addition to the seeded snapshot (or can leak into the next assistant turn) until another clear event arrives. Please clear streamingBySession[sessionId] when installing a snapshot, before seedSnapshot.

Useful? React with 👍 / 👎.

advanceSeqCursor(sessionId, seq);
return;
}

const snapshot: KimiClientState = {
sessions: rawState.sessions,
activeSessionId: rawState.activeSessionId,
Expand Down Expand Up @@ -670,6 +692,20 @@ function applyEvent(event: ReturnType<typeof toAppEvent>, sessionId: string, seq
rawState.config = next.config ?? null;
rawState.warnings = next.warnings;

// `messageUpdated` carries the authoritative full content of a message (tool
// slot / step end / turn end): drop the live streaming entry so the just-
// committed content takes over without rendering the same text twice.
if (event.type === 'messageUpdated') {
clearStreaming(sessionId);
}
// Turn end: release the streaming entry for the session.
if (
event.type === 'sessionStatusChanged' &&
(event.status === 'idle' || event.status === 'aborted')
) {
clearStreaming(sessionId);
}

if (event.type === 'configChanged') {
rawState.defaultModel = event.config.defaultModel ?? null;
}
Expand Down Expand Up @@ -1028,8 +1064,14 @@ async function syncSessionFromSnapshot(sessionId: string): Promise<SyncSessionRe
// messagesBySession[sessionId]. The snapshot is authoritative (it already
// contains everything up to asOfSeq); applying stale queued deltas on top
// of it would duplicate text / tool output. Flushing here applies them to
// the pre-snapshot array, which the snapshot then overwrites.
// the pre-snapshot state, which the snapshot then overwrites.
enqueueEvent.flush();
// The snapshot is authoritative for the live streaming text too: any deltas
// the flush just landed in the streaming store are superseded by the
// snapshot (and the in-flight seed below), so drop them. Without this, a
// reconnect or delta-gap resync mid-stream would render stale live chunks
// on top of the seeded snapshot.
clearStreaming(sessionId);

updateSession(sessionId, (s) => ({
...snap.session,
Expand Down
41 changes: 41 additions & 0 deletions apps/kimi-web/test/event-reducer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,44 @@ describe('reduceAppEvent taskProgress', () => {
expect(lines?.at(-1)).toBe('line 59');
});
});

describe('reduceAppEvent sessions reference stability', () => {
// The sidebar computeds (sessionsForView / workspaceGroups / mergedWorkspaces)
// depend on `rawState.sessions`. Events that do not change sessions must keep
// the SAME array reference so those computeds are not dirtied; events that do
// change sessions must produce a NEW array.

it('reuses the sessions reference for an event that does not touch sessions', () => {
const state = {
...createInitialState(),
sessions: [makeSession('s1', '2026-01-01T00:00:00.000Z')],
messagesBySession: { s1: [makeMessage('s1', '2026-01-01T00:00:00.000Z')] },
};
const next = reduceAppEvent(
state,
{
type: 'messageUpdated',
sessionId: 's1',
messageId: 'msg_2026-01-01T00:00:00.000Z',
content: [{ type: 'text', text: 'updated' }],
status: 'completed',
},
{ sessionId: 's1', seq: 2 },
);
expect(next.sessions).toBe(state.sessions);
});

it('produces a new sessions array for an event that changes sessions', () => {
const state = {
...createInitialState(),
sessions: [makeSession('s1', '2026-01-01T00:00:00.000Z')],
};
const next = reduceAppEvent(
state,
{ type: 'sessionCreated', session: makeSession('s2', '2026-02-01T00:00:00.000Z') },
{ sessionId: 's2', seq: 3 },
);
expect(next.sessions).not.toBe(state.sessions);
expect(next.sessions.map((s) => s.id)).toEqual(['s2', 's1']);
});
});
Loading
Loading