diff --git a/.changeset/read-stream-utf8.md b/.changeset/read-stream-utf8.md new file mode 100644 index 00000000000..774ccbcc692 --- /dev/null +++ b/.changeset/read-stream-utf8.md @@ -0,0 +1,5 @@ +--- +"@kilocode/cli": patch +--- + +Speed up reading large files: the `read` tool now streams UTF-8 content from disk and stops once the line/byte cap is reached, instead of loading the whole file into memory first. diff --git a/packages/opencode/src/kilocode/text-stream.ts b/packages/opencode/src/kilocode/text-stream.ts new file mode 100644 index 00000000000..1bb4d55f50e --- /dev/null +++ b/packages/opencode/src/kilocode/text-stream.ts @@ -0,0 +1,70 @@ +import { createReadStream } from "fs" +import { PassThrough, Readable } from "stream" +import * as Encoding from "./encoding" + +/** + * Encoding-aware text streaming for tools that walk a file line by line. + * Optimistically stream as UTF-8; fall back to a buffered iconv decode only + * when the bytes turn out not to be valid UTF-8. + * + * import * as TextStream from "../kilocode/text-stream" + */ + +/** Distinct class so {@link withFallback} can tell us apart from real I/O failures. */ +export class InvalidUtf8Error extends Error { + constructor() { + super("invalid utf-8") + } +} + +/** + * UTF-8 text Readable for `filepath`. A leading UTF-8 BOM passes through as + * U+FEFF — same as `createReadStream({ encoding: "utf8" })`. + */ +export function openUtf8(filepath: string): Readable { + const out = new PassThrough({ encoding: "utf8" }) + const raw = createReadStream(filepath) + const decoder = new TextDecoder("utf-8", { fatal: true }) + raw.on("data", (chunk) => { + try { + const text = decoder.decode(chunk as Buffer, { stream: true }) + if (text) out.write(text) + } catch { + raw.destroy() + out.destroy(new InvalidUtf8Error()) + } + }) + raw.on("end", () => { + try { + const tail = decoder.decode() + if (tail) out.write(tail) + out.end() + } catch { + out.destroy(new InvalidUtf8Error()) + } + }) + raw.on("error", (err) => out.destroy(err)) + // Propagate consumer-side teardown so early-exit (line / byte cap, fallback) + // stops pulling chunks from disk instead of running to EOF. + out.on("close", () => raw.destroy()) + return out +} + +/** Whole-file UTF-8 Readable via {@link Encoding.read}; buffers the entire decoded file. */ +export async function openDecoded(filepath: string): Promise { + const decoded = await Encoding.read(filepath) + return Readable.from([decoded.text]) +} + +/** + * Run `fn` against an optimistic UTF-8 stream; on {@link InvalidUtf8Error} + * retry once against {@link openDecoded}. Other errors propagate. + */ +export async function withFallback(filepath: string, fn: (input: Readable) => Promise): Promise { + try { + return await fn(openUtf8(filepath)) + } catch (err) { + if (!(err instanceof InvalidUtf8Error)) throw err + } + return fn(await openDecoded(filepath)) +} diff --git a/packages/opencode/src/tool/read.ts b/packages/opencode/src/tool/read.ts index b2b8d643da9..551e3744553 100644 --- a/packages/opencode/src/tool/read.ts +++ b/packages/opencode/src/tool/read.ts @@ -1,9 +1,8 @@ import { lstat } from "fs/promises" // kilocode_change import { Effect, Option, Schema, Scope } from "effect" import { NonNegativeInt } from "@/util/schema" -import { createReadStream } from "fs" import * as path from "path" -import { Readable } from "stream" // kilocode_change +import type { Readable } from "stream" // kilocode_change import { createInterface } from "readline" import * as Tool from "./tool" import { AppFileSystem } from "@opencode-ai/core/filesystem" @@ -15,6 +14,7 @@ import { Instruction } from "../session/instruction" import { isPdfAttachment, sniffAttachmentMime } from "@/util/media" // kilocode_change start import * as Encoding from "../kilocode/encoding" +import * as TextStream from "../kilocode/text-stream" // kilocode_change end const DEFAULT_READ_LIMIT = 2000 @@ -354,12 +354,14 @@ export const ReadTool = Tool.define( }), ) -// kilocode_change start +// kilocode_change start - exported (so readDirectoryFiles can reuse it) and +// routed through TextStream.withFallback so non-UTF-8 files are decoded via +// iconv. The body otherwise matches upstream. export async function lines(filepath: string, opts: { limit: number; offset: number }) { - // kilocode_change end - // kilocode_change start - decode with detected encoding; replaces createReadStream(filepath, { encoding: "utf8" }) - const encoded = await Encoding.read(filepath) - const stream = Readable.from([encoded.text]) + return TextStream.withFallback(filepath, (stream) => readLines(stream, opts)) +} + +async function readLines(stream: Readable, opts: { limit: number; offset: number }) { // kilocode_change end const rl = createInterface({ input: stream,