diff --git a/cmake/targets/BuildBun.cmake b/cmake/targets/BuildBun.cmake index 057314164ab..ca033f10b38 100644 --- a/cmake/targets/BuildBun.cmake +++ b/cmake/targets/BuildBun.cmake @@ -311,6 +311,7 @@ set(BUN_JAVASCRIPT_OUTPUTS ${CODEGEN_PATH}/InternalModuleRegistry+createInternalModuleById.h ${CODEGEN_PATH}/InternalModuleRegistry+enum.h ${CODEGEN_PATH}/InternalModuleRegistry+numberOfModules.h + ${CODEGEN_PATH}/InternalModuleRegistry+sourceList.h ${CODEGEN_PATH}/NativeModuleImpl.h ${CODEGEN_PATH}/ResolvedSourceTag.zig ${CODEGEN_PATH}/SyntheticModuleType.h diff --git a/cmake/tools/SetupWebKit.cmake b/cmake/tools/SetupWebKit.cmake index a7807f16ccc..9373539768f 100644 --- a/cmake/tools/SetupWebKit.cmake +++ b/cmake/tools/SetupWebKit.cmake @@ -6,7 +6,7 @@ option(WEBKIT_LOCAL "If a local version of WebKit should be used instead of down option(WEBKIT_BUILD_TYPE "The build type for local WebKit (defaults to CMAKE_BUILD_TYPE)") if(NOT WEBKIT_VERSION) - set(WEBKIT_VERSION fc9f2fa7272fec64905df6a9c78e15d7912f14ca) + set(WEBKIT_VERSION autobuild-preview-pr-177-20479c97) endif() diff --git a/scripts/build/codegen.ts b/scripts/build/codegen.ts index 39da1176b16..2169f2d3569 100644 --- a/scripts/build/codegen.ts +++ b/scripts/build/codegen.ts @@ -662,6 +662,7 @@ function emitJsModules({ n, cfg, sources, o, dirStamp }: Ctx): void { resolve(cfg.codegenDir, "InternalModuleRegistry+createInternalModuleById.h"), resolve(cfg.codegenDir, "InternalModuleRegistry+enum.h"), resolve(cfg.codegenDir, "InternalModuleRegistry+numberOfModules.h"), + resolve(cfg.codegenDir, "InternalModuleRegistry+sourceList.h"), resolve(cfg.codegenDir, "NativeModuleImpl.h"), resolve(cfg.codegenDir, "ResolvedSourceTag.zig"), resolve(cfg.codegenDir, "SyntheticModuleType.h"), diff --git a/scripts/build/deps/webkit.ts b/scripts/build/deps/webkit.ts index 9ac555bde5d..d1e93edb5d7 100644 --- a/scripts/build/deps/webkit.ts +++ b/scripts/build/deps/webkit.ts @@ -3,7 +3,7 @@ * for local mode. Override via `--webkit-version=` to test a branch. * From https://github.com/oven-sh/WebKit releases. */ -export const WEBKIT_VERSION = "fc9f2fa7272fec64905df6a9c78e15d7912f14ca"; +export const WEBKIT_VERSION = "autobuild-preview-pr-177-20479c97"; /** * WebKit (JavaScriptCore) — the JS engine. diff --git a/src/StandaloneModuleGraph.zig b/src/StandaloneModuleGraph.zig index de43414935a..2201ba02b13 100644 --- a/src/StandaloneModuleGraph.zig +++ b/src/StandaloneModuleGraph.zig @@ -8,6 +8,7 @@ pub const StandaloneModuleGraph = struct { entry_point_id: u32 = 0, compile_exec_argv: []const u8 = "", flags: Flags = .{}, + builtin_bytecode: []align(1) const BuiltinBytecodeEntry = &.{}, // We never want to hit the filesystem for these files // We use the `/$bunfs/` prefix to indicate that it's a virtual path @@ -317,8 +318,116 @@ pub const StandaloneModuleGraph = struct { entry_point_id: u32 = 0, compile_exec_argv_ptr: bun.StringPointer = .{}, flags: Flags = .{}, + builtin_bytecode_ptr: bun.StringPointer = .{}, }; + pub const BuiltinBytecodeKind = enum(u32) { + internal_module = 0, + builtin_function = 1, + }; + + /// Bytecode for a single builtin (internal module like node:fs, or builtin + /// function like ReadableStream constructor), embedded when building with + /// --compile --bytecode so parsing can be skipped at runtime. + pub const BuiltinBytecodeEntry = extern struct { + kind: BuiltinBytecodeKind, + id: u32, + bytecode: bun.StringPointer, + }; + + /// Mirrors JSC enum values. Keep in sync with ConstructAbility.h etc. + pub const BuiltinMetadata = struct { + pub const visibility_public: u8 = 0; // ImplementationVisibility::Public + pub const ctor_kind_none: u8 = 0; // ConstructorKind::None + pub const ctor_ability_cannot: u8 = 1; // ConstructAbility::CannotConstruct + pub const inline_attr_none: u8 = 0; // InlineAttribute::None + }; + + /// Build-time input for toBytes. + pub const BuiltinBytecodeInput = struct { + kind: BuiltinBytecodeKind, + id: u32, + bytecode: []const u8, + owner: *jsc.CachedBytecode, + }; + + pub fn findBuiltinBytecode(this: *const StandaloneModuleGraph, kind: BuiltinBytecodeKind, id: u32) ?[]const u8 { + for (this.builtin_bytecode) |entry| { + if (entry.kind == kind and entry.id == id) { + return sliceTo(this.bytes, entry.bytecode); + } + } + return null; + } + + extern fn Bun__getInternalModuleSource(id: u32, len: *usize, name: *?[*:0]const u8) ?[*]const u8; + extern fn Bun__getInternalModuleSourceCount() u32; + extern fn Bun__getBuiltinFunctionSource(id: u32, len: *usize, name: *?[*:0]const u8, vis: *u8, ctor_kind: *u8, ctor_ability: *u8, inline_attr: *u8) ?[*]const u8; + extern fn Bun__getBuiltinFunctionSourceCount() u32; + + /// Generate bytecode for all internal JS modules and builtin functions. + /// Caller must deref each entry's owner. + pub fn generateBuiltinBytecodes(allocator: std.mem.Allocator) !std.array_list.Managed(BuiltinBytecodeInput) { + var list = std.array_list.Managed(BuiltinBytecodeInput).init(allocator); + errdefer { + for (list.items) |bb| bb.owner.deref(); + list.deinit(); + } + jsc.initialize(false); + jsc.VirtualMachine.is_bundler_thread_for_bytecode_cache = true; + + const module_count = Bun__getInternalModuleSourceCount(); + const function_count = Bun__getBuiltinFunctionSourceCount(); + try list.ensureTotalCapacity(module_count + function_count); + + // Internal modules (node:fs etc) + { + var id: u32 = 0; + while (id < module_count) : (id += 1) { + var len: usize = 0; + var name_ptr: ?[*:0]const u8 = null; + const src_ptr = Bun__getInternalModuleSource(id, &len, &name_ptr) orelse continue; + // Debug builds use BUN_DYNAMIC_JS_LOAD_PATH and embed only "\n". + // BuiltinExecutables::createExecutable requires "(function (){})" minimum. + if (len < "(function (){})".len) continue; + var name = bun.String.borrowUTF8(bun.sliceTo(name_ptr.?, 0)); + if (jsc.CachedBytecode.generateForBuiltin( + &name, + src_ptr[0..len], + BuiltinMetadata.visibility_public, + BuiltinMetadata.ctor_kind_none, + BuiltinMetadata.ctor_ability_cannot, + BuiltinMetadata.inline_attr_none, + )) |res| { + const bytecode, const owner = res; + list.appendAssumeCapacity(.{ .kind = .internal_module, .id = id, .bytecode = bytecode, .owner = owner }); + } + } + } + + // Builtin functions (ReadableStream etc) + { + var id: u32 = 0; + while (id < function_count) : (id += 1) { + var len: usize = 0; + var name_ptr: ?[*:0]const u8 = null; + var vis: u8 = 0; + var ctor_kind: u8 = 0; + var ctor_ability: u8 = 0; + var inline_attr: u8 = 0; + const src_ptr = Bun__getBuiltinFunctionSource(id, &len, &name_ptr, &vis, &ctor_kind, &ctor_ability, &inline_attr) orelse continue; + if (len < "(function (){})".len) continue; + var name = bun.String.borrowUTF8(bun.sliceTo(name_ptr.?, 0)); + if (jsc.CachedBytecode.generateForBuiltin(&name, src_ptr[0..len], vis, ctor_kind, ctor_ability, inline_attr)) |res| { + const bytecode, const owner = res; + list.appendAssumeCapacity(.{ .kind = .builtin_function, .id = id, .bytecode = bytecode, .owner = owner }); + } + } + } + + return list; + } + pub const Flags = packed struct(u32) { disable_default_env_files: bool = false, disable_autoload_bunfig: bool = false, @@ -367,12 +476,17 @@ pub const StandaloneModuleGraph = struct { modules.lockPointers(); // make the pointers stable forever + const builtin_bytecode_bytes = sliceTo(raw_bytes, offsets.builtin_bytecode_ptr); + const builtin_bytecode: []align(1) const BuiltinBytecodeEntry = + std.mem.bytesAsSlice(BuiltinBytecodeEntry, builtin_bytecode_bytes); + return StandaloneModuleGraph{ .bytes = raw_bytes[0..offsets.byte_count], .files = modules, .entry_point_id = offsets.entry_point_id, .compile_exec_argv = sliceToZ(raw_bytes, offsets.compile_exec_argv_ptr), .flags = offsets.flags, + .builtin_bytecode = builtin_bytecode, }; } @@ -388,7 +502,26 @@ pub const StandaloneModuleGraph = struct { return bytes[ptr.offset..][0..ptr.length :0]; } - pub fn toBytes(allocator: std.mem.Allocator, prefix: []const u8, output_files: []const bun.options.OutputFile, output_format: bun.options.Format, compile_exec_argv: []const u8, flags: Flags) ![]u8 { + /// Write bytes at a 128-byte-aligned offset within the string_builder, + /// accounting for the 8-byte section header on PE/Mach-O. Returns the + /// offset where the bytes were written. See the PLATFORM-SPECIFIC + /// ALIGNMENT comment in toBytes for rationale. + fn appendBytecodeAligned(string_builder: *bun.StringBuilder, bytes: []const u8) usize { + const target_mod: usize = 128 - @sizeOf(u64); + const current_mod = string_builder.len % 128; + const padding = if (current_mod <= target_mod) + target_mod - current_mod + else + 128 - current_mod + target_mod; + @memset(string_builder.writable()[0..padding], 0); + string_builder.len += padding; + const aligned_offset = string_builder.len; + @memcpy(string_builder.writable()[0..bytes.len], bytes); + string_builder.len += bytes.len; + return aligned_offset; + } + + pub fn toBytes(allocator: std.mem.Allocator, prefix: []const u8, output_files: []const bun.options.OutputFile, output_format: bun.options.Format, compile_exec_argv: []const u8, flags: Flags, builtin_bytecodes: []const BuiltinBytecodeInput) ![]u8 { var serialize_trace = bun.perf.trace("StandaloneModuleGraph.serialize"); defer serialize_trace.end(); @@ -433,6 +566,11 @@ pub const StandaloneModuleGraph = struct { string_builder.cap += @sizeOf(Offsets); string_builder.countZ(compile_exec_argv); + for (builtin_bytecodes) |bb| { + string_builder.cap += (bb.bytecode.len + 255) / 256 * 256 + 256; + } + string_builder.cap += @sizeOf(BuiltinBytecodeEntry) * builtin_bytecodes.len; + try string_builder.allocate(allocator); var modules = try std.array_list.Managed(CompiledModuleGraphFile).initCapacity(allocator, module_count); @@ -589,12 +727,25 @@ pub const StandaloneModuleGraph = struct { modules.appendAssumeCapacity(module); } + const builtin_entries = try allocator.alloc(BuiltinBytecodeEntry, builtin_bytecodes.len); + defer allocator.free(builtin_entries); + for (builtin_bytecodes, builtin_entries) |input, *entry| { + const aligned_offset = appendBytecodeAligned(&string_builder, input.bytecode); + entry.* = .{ + .kind = input.kind, + .id = input.id, + .bytecode = .{ .offset = @truncate(aligned_offset), .length = @truncate(input.bytecode.len) }, + }; + } + const builtin_bytecode_ptr = string_builder.appendCount(std.mem.sliceAsBytes(builtin_entries)); + const offsets = Offsets{ .entry_point_id = @as(u32, @truncate(entry_point_id.?)), .modules_ptr = string_builder.appendCount(std.mem.sliceAsBytes(modules.items)), .compile_exec_argv_ptr = string_builder.appendCountZ(compile_exec_argv), .byte_count = string_builder.len, .flags = flags, + .builtin_bytecode_ptr = builtin_bytecode_ptr, }; _ = string_builder.append(std.mem.asBytes(&offsets)); @@ -1127,8 +1278,9 @@ pub const StandaloneModuleGraph = struct { compile_exec_argv: []const u8, self_exe_path: ?[]const u8, flags: Flags, + builtin_bytecodes: []const BuiltinBytecodeInput, ) !CompileResult { - const bytes = toBytes(allocator, module_prefix, output_files, output_format, compile_exec_argv, flags) catch |err| { + const bytes = toBytes(allocator, module_prefix, output_files, output_format, compile_exec_argv, flags, builtin_bytecodes) catch |err| { return CompileResult.failFmt("failed to generate module graph bytes: {s}", .{@errorName(err)}); }; if (bytes.len == 0) return CompileResult.fail(.no_output_files); @@ -1351,6 +1503,9 @@ pub const StandaloneModuleGraph = struct { const graph_ptr = try allocator.create(StandaloneModuleGraph); graph_ptr.* = try StandaloneModuleGraph.fromBytes(allocator, raw_bytes, offsets); graph_ptr.set(); + if (graph_ptr.builtin_bytecode.len > 0) { + Bun__hasEmbeddedBuiltinBytecode = true; + } return graph_ptr; } @@ -1523,6 +1678,28 @@ pub const StandaloneModuleGraph = struct { } }; +/// Set from fromBytesAlloc when a standalone executable was built with +/// --bytecode. Checked on the C++ side before calling into the lookup +/// functions below so that normal Bun runs skip the extern call entirely. +export var Bun__hasEmbeddedBuiltinBytecode: bool = false; + +/// Called from InternalModuleRegistry.cpp to look up pre-generated bytecode +/// for an internal module. Returns null when no bytecode was embedded for +/// this module. +export fn Bun__findInternalModuleBytecode(field_id: u32, out_len: *usize) ?[*]const u8 { + const graph = StandaloneModuleGraph.get() orelse return null; + const bytes = graph.findBuiltinBytecode(.internal_module, field_id) orelse return null; + out_len.* = bytes.len; + return bytes.ptr; +} + +export fn Bun__findBuiltinFunctionBytecode(global_id: u32, out_len: *usize) ?[*]const u8 { + const graph = StandaloneModuleGraph.get() orelse return null; + const bytes = graph.findBuiltinBytecode(.builtin_function, global_id) orelse return null; + out_len.* = bytes.len; + return bytes.ptr; +} + const std = @import("std"); const w = std.os.windows; @@ -1532,6 +1709,7 @@ const Output = bun.Output; const SourceMap = bun.SourceMap; const StringPointer = bun.StringPointer; const Syscall = bun.sys; +const jsc = bun.jsc; const macho = bun.macho; const pe = bun.pe; const strings = bun.strings; diff --git a/src/bun.js/bindings/CachedBytecode.zig b/src/bun.js/bindings/CachedBytecode.zig index 6f0c13488df..9d322df6dc2 100644 --- a/src/bun.js/bindings/CachedBytecode.zig +++ b/src/bun.js/bindings/CachedBytecode.zig @@ -1,6 +1,7 @@ pub const CachedBytecode = opaque { extern fn generateCachedModuleByteCodeFromSourceCode(sourceProviderURL: *bun.String, input_code: [*]const u8, inputSourceCodeSize: usize, outputByteCode: *?[*]u8, outputByteCodeSize: *usize, cached_bytecode: *?*CachedBytecode) bool; extern fn generateCachedCommonJSProgramByteCodeFromSourceCode(sourceProviderURL: *bun.String, input_code: [*]const u8, inputSourceCodeSize: usize, outputByteCode: *?[*]u8, outputByteCodeSize: *usize, cached_bytecode: *?*CachedBytecode) bool; + extern fn generateCachedBuiltinByteCodeFromSourceCode(moduleName: *bun.String, input_code: [*]const u8, inputSourceCodeSize: usize, implementationVisibility: u8, constructorKind: u8, constructAbility: u8, inlineAttribute: u8, outputByteCode: *?[*]u8, outputByteCodeSize: *usize, cached_bytecode: *?*CachedBytecode) bool; pub fn generateForESM(sourceProviderURL: *bun.String, input: []const u8) ?struct { []const u8, *CachedBytecode } { var this: ?*CachedBytecode = null; @@ -25,6 +26,17 @@ pub const CachedBytecode = opaque { return null; } + pub fn generateForBuiltin(moduleName: *bun.String, input: []const u8, vis: u8, ctor_kind: u8, ctor_ability: u8, inline_attr: u8) ?struct { []const u8, *CachedBytecode } { + var this: ?*CachedBytecode = null; + var input_code_size: usize = 0; + var input_code_ptr: ?[*]u8 = null; + if (generateCachedBuiltinByteCodeFromSourceCode(moduleName, input.ptr, input.len, vis, ctor_kind, ctor_ability, inline_attr, &input_code_ptr, &input_code_size, &this)) { + return .{ input_code_ptr.?[0..input_code_size], this.? }; + } + + return null; + } + extern "c" fn CachedBytecode__deref(this: *CachedBytecode) void; pub fn deref(this: *CachedBytecode) void { return CachedBytecode__deref(this); diff --git a/src/bun.js/bindings/InternalModuleRegistry.cpp b/src/bun.js/bindings/InternalModuleRegistry.cpp index 4e4f6fb428b..3f1a4cb9b14 100644 --- a/src/bun.js/bindings/InternalModuleRegistry.cpp +++ b/src/bun.js/bindings/InternalModuleRegistry.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "InternalModuleRegistryConstants.h" @@ -18,6 +19,12 @@ namespace Bun { extern "C" bool BunTest__shouldGenerateCodeCoverage(BunString sourceURL); extern "C" void ByteRangeMapping__generate(BunString sourceURL, BunString code, int sourceID); +// Implemented in StandaloneModuleGraph.zig. The bool is set once at startup +// when a standalone executable was built with --bytecode; checking it lets +// normal Bun runs skip the extern lookup call. +extern "C" bool Bun__hasEmbeddedBuiltinBytecode; +extern "C" const uint8_t* Bun__findInternalModuleBytecode(uint32_t fieldId, size_t* outLen); + static void maybeAddCodeCoverage(JSC::VM& vm, const JSC::SourceCode& code) { #if ASSERT_ENABLED @@ -33,23 +40,41 @@ static void maybeAddCodeCoverage(JSC::VM& vm, const JSC::SourceCode& code) // JS builtin that acts as a module. In debug mode, we use a different implementation that reads // from the developer's filesystem. This allows reloading code without recompiling bindings. -JSC::JSValue generateModule(JSC::JSGlobalObject* globalObject, JSC::VM& vm, const String& SOURCE, const String& moduleName, const String& urlString) +JSC::JSValue generateModule(JSC::JSGlobalObject* globalObject, JSC::VM& vm, const String& SOURCE, const String& moduleName, const String& urlString, uint32_t fieldId) { auto throwScope = DECLARE_THROW_SCOPE(vm); auto&& origin = SourceOrigin(WTF::URL(urlString)); SourceCode source = JSC::makeSource(SOURCE, origin, JSC::SourceTaintedOrigin::Untainted, moduleName); maybeAddCodeCoverage(vm, source); + + UnlinkedFunctionExecutable* unlinked = nullptr; + + // Standalone executables built with --bytecode embed pre-generated bytecode + // for internal modules. Try decoding that first to skip parsing. + if (Bun__hasEmbeddedBuiltinBytecode) [[unlikely]] { + size_t bytecodeLen = 0; + if (const uint8_t* bytecode = Bun__findInternalModuleBytecode(fieldId, &bytecodeLen)) { + auto cached = JSC::CachedBytecode::create( + std::span(const_cast(bytecode), bytecodeLen), + [](const void*) {}, {}); + unlinked = JSC::decodeBuiltinFunctionExecutable(vm, WTF::move(cached)); + } + } + + if (!unlinked) { + unlinked = createBuiltinExecutable( + vm, source, + Identifier::fromString(vm, moduleName), + ImplementationVisibility::Public, + ConstructorKind::None, + ConstructAbility::CannotConstruct, + InlineAttribute::None); + } + JSFunction* func = JSFunction::create( vm, globalObject, - createBuiltinExecutable( - vm, source, - Identifier::fromString(vm, moduleName), - ImplementationVisibility::Public, - ConstructorKind::None, - ConstructAbility::CannotConstruct, - InlineAttribute::None) - ->link(vm, nullptr, source), + unlinked->link(vm, nullptr, source), static_cast(globalObject)); RETURN_IF_EXCEPTION(throwScope, {}); @@ -103,7 +128,7 @@ JSValue initializeInternalModuleFromDisk(JSGlobalObject* globalObject, VM& vm, c WTF::String file = makeString(ASCIILiteral::fromLiteralUnsafe(BUN_DYNAMIC_JS_LOAD_PATH), "/"_s, WTF::move(fileBase)); if (auto contents = WTF::FileSystemImpl::readEntireFile(file)) { auto string = WTF::String::fromUTF8(contents.value()); - return generateModule(globalObject, vm, string, moduleName, urlString); + return generateModule(globalObject, vm, string, moduleName, urlString, UINT32_MAX); } else { printf("\nFATAL: bun-debug failed to load bundled version of \"%s\" at \"%s\" (was it deleted?)\n" "Please re-compile Bun to continue.\n\n", @@ -116,7 +141,7 @@ JSValue initializeInternalModuleFromDisk(JSGlobalObject* globalObject, VM& vm, c #else #define INTERNAL_MODULE_REGISTRY_GENERATE(globalObject, vm, moduleId, filename, SOURCE, urlString) \ - return generateModule(globalObject, vm, SOURCE, moduleId, urlString) + return generateModule(globalObject, vm, SOURCE, moduleId, urlString, (uint32_t)id) #endif const ClassInfo InternalModuleRegistry::s_info = { "InternalModuleRegistry"_s, &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(InternalModuleRegistry) }; @@ -191,4 +216,6 @@ JSC_DEFINE_HOST_FUNCTION(InternalModuleRegistry::jsCreateInternalModuleById, (JS } // namespace Bun +#include "InternalModuleRegistry+sourceList.h" + #undef INTERNAL_MODULE_REGISTRY_GENERATE diff --git a/src/bun.js/bindings/ZigSourceProvider.cpp b/src/bun.js/bindings/ZigSourceProvider.cpp index dcb4b32297e..9859ccbcd64 100644 --- a/src/bun.js/bindings/ZigSourceProvider.cpp +++ b/src/bun.js/bindings/ZigSourceProvider.cpp @@ -15,6 +15,7 @@ #include #include #include +#include "BunClientData.h" namespace Zig { @@ -186,6 +187,11 @@ static JSC::VM& getVMForBytecodeCache() vmPtr->refSuppressingSaferCPPChecking(); vmForBytecodeCache = vmPtr.get(); vmPtr->heap.acquireAccess(); + // Builtin bytecode generation needs Bun's private identifiers + // (@isCallable etc) registered in the VM, which JSVMClientData + // sets up. Module/CJS bytecode generation doesn't need it but + // works fine with it, so we always attach it. + WebCore::JSVMClientData::create(vmForBytecodeCache, nullptr); } return *vmForBytecodeCache; } @@ -260,6 +266,64 @@ extern "C" bool generateCachedCommonJSProgramByteCodeFromSourceCode(BunString* s return true; } +extern "C" bool generateCachedBuiltinByteCodeFromSourceCode(BunString* moduleName, const Latin1Character* inputSourceCode, size_t inputSourceCodeSize, uint8_t implementationVisibility, uint8_t constructorKind, uint8_t constructAbility, uint8_t inlineAttribute, const uint8_t** outputByteCode, size_t* outputByteCodeSize, JSC::CachedBytecode** cachedBytecodePtr) +{ + std::span sourceCodeSpan(inputSourceCode, inputSourceCodeSize); + JSC::SourceCode sourceCode = JSC::makeSource(WTF::String(sourceCodeSpan), JSC::SourceOrigin(), JSC::SourceTaintedOrigin::Untainted); + + JSC::VM& vm = getVMForBytecodeCache(); + JSC::JSLockHolder locker(vm); + + auto name = JSC::Identifier::fromString(vm, moduleName->toWTFString()); + ParserError parserError; + UnlinkedFunctionExecutable* executable = JSC::recursivelyGenerateUnlinkedCodeBlockForBuiltinFunction( + vm, sourceCode, name, parserError, + static_cast(implementationVisibility), + static_cast(constructorKind), + static_cast(constructAbility), + static_cast(inlineAttribute)); + if (parserError.isValid() || !executable) + return false; + + RefPtr cachedBytecode = JSC::encodeBuiltinFunctionExecutable(vm, executable); + if (!cachedBytecode) + return false; + + cachedBytecode->ref(); + *cachedBytecodePtr = cachedBytecode.get(); + *outputByteCode = cachedBytecode->span().data(); + *outputByteCodeSize = cachedBytecode->span().size(); + + return true; +} + +extern "C" bool Bun__hasEmbeddedBuiltinBytecode; +extern "C" const uint8_t* Bun__findBuiltinFunctionBytecode(uint32_t globalId, size_t* outLen); + +} // namespace Zig + +namespace Bun { + +JSC::UnlinkedFunctionExecutable* tryDecodeBuiltinFunctionBytecode(JSC::VM& vm, uint32_t globalId) +{ + if (!Bun__hasEmbeddedBuiltinBytecode) [[likely]] + return nullptr; + + size_t len = 0; + const uint8_t* bytecode = Bun__findBuiltinFunctionBytecode(globalId, &len); + if (!bytecode) + return nullptr; + + auto cached = JSC::CachedBytecode::create( + std::span(const_cast(bytecode), len), + [](const void*) {}, {}); + return JSC::decodeBuiltinFunctionExecutable(vm, WTF::move(cached)); +} + +} // namespace Bun + +namespace Zig { + unsigned SourceProvider::hash() const { if (m_hash) { diff --git a/src/bundler/bundle_v2.zig b/src/bundler/bundle_v2.zig index ba274716b92..4b82504e198 100644 --- a/src/bundler/bundle_v2.zig +++ b/src/bundler/bundle_v2.zig @@ -2125,6 +2125,17 @@ pub const BundleV2 = struct { // Use the target-specific base path for compile mode, not the user-configured public_path const module_prefix = bun.StandaloneModuleGraph.targetBasePublicPath(compile_options.compile_target.os, "root/"); + var builtin_bytecodes = if (this.config.bytecode and compile_options.compile_target.isDefault()) + bun.StandaloneModuleGraph.generateBuiltinBytecodes(bun.default_allocator) catch |err| { + return bun.StandaloneModuleGraph.CompileResult.failFmt("Failed to generate builtin bytecode: {s}", .{@errorName(err)}); + } + else + std.array_list.Managed(bun.StandaloneModuleGraph.BuiltinBytecodeInput).init(bun.default_allocator); + defer { + for (builtin_bytecodes.items) |bb| bb.owner.deref(); + builtin_bytecodes.deinit(); + } + const result = bun.StandaloneModuleGraph.toExecutable( &compile_options.compile_target, bun.default_allocator, @@ -2172,6 +2183,7 @@ pub const BundleV2 = struct { .disable_autoload_tsconfig = !compile_options.autoload_tsconfig, .disable_autoload_package_json = !compile_options.autoload_package_json, }, + builtin_bytecodes.items, ) catch |err| { return bun.StandaloneModuleGraph.CompileResult.failFmt("{s}", .{@errorName(err)}); }; diff --git a/src/cli/build_command.zig b/src/cli/build_command.zig index 2f52ea440f0..5db947d1ba8 100644 --- a/src/cli/build_command.zig +++ b/src/cli/build_command.zig @@ -519,6 +519,17 @@ pub const BuildCommand = struct { } } + // Pre-generate bytecode for internal modules (node:fs etc) so + // the standalone executable skips parsing them on first require. + var builtin_bytecodes = if (ctx.bundler_options.bytecode and !is_cross_compile) + try bun.StandaloneModuleGraph.generateBuiltinBytecodes(allocator) + else + std.array_list.Managed(bun.StandaloneModuleGraph.BuiltinBytecodeInput).init(allocator); + defer { + for (builtin_bytecodes.items) |bb| bb.owner.deref(); + builtin_bytecodes.deinit(); + } + const result = bun.StandaloneModuleGraph.toExecutable( compile_target, allocator, @@ -537,6 +548,7 @@ pub const BuildCommand = struct { .disable_autoload_tsconfig = !ctx.bundler_options.compile_autoload_tsconfig, .disable_autoload_package_json = !ctx.bundler_options.compile_autoload_package_json, }, + builtin_bytecodes.items, ) catch |err| { Output.printErrorln("failed to create executable: {s}", .{@errorName(err)}); Global.exit(1); diff --git a/src/codegen/bundle-functions.ts b/src/codegen/bundle-functions.ts index 2354d140194..f48b70b7f03 100644 --- a/src/codegen/bundle-functions.ts +++ b/src/codegen/bundle-functions.ts @@ -63,6 +63,7 @@ interface BundledBuiltin { params: string[]; visibility: string; sourceOffset: number; + globalId: number; } /** @@ -336,6 +337,7 @@ $$capture_start$$(${fn.async ? "async " : ""}${ // Not known yet. sourceOffset: 0, + globalId: 0, overriddenName: fn.directives.getter ? `"get ${fn.name}"_s` @@ -386,10 +388,12 @@ export async function bundleBuiltinFunctions({ requireTransformer }: BundleBuilt let combinedSourceCodeChars = ""; let combinedSourceCodeLength = 0; - // Compute source offsets + let globalBuiltinId = 0; + // Compute source offsets and global IDs { for (const { basename, functions } of files) { for (const fn of functions) { + fn.globalId = globalBuiltinId++; fn.sourceOffset = combinedSourceCodeLength; combinedSourceCodeLength += fn.source.length; if (combinedSourceCodeChars && !combinedSourceCodeChars.endsWith(",")) { @@ -555,6 +559,38 @@ JSBuiltinInternalFunctions::JSBuiltinInternalFunctions(JSC::VM& vm) : m_vm(vm) } // namespace WebCore `; + // Source accessor for bytecode generation at build time + { + const entries: string[] = []; + for (const { basename, functions } of files) { + for (const fn of functions) { + const inlineAttr = fn.directives.alwaysInline ? "Always" : "None"; + entries.push( + `{ ${fn.sourceOffset}, ${fn.source.length}, "${low(basename)}${cap(fn.name)}", ` + + `(uint8_t)JSC::ImplementationVisibility::${fn.visibility}, ` + + `(uint8_t)JSC::ConstructorKind::${fn.constructKind}, ` + + `(uint8_t)JSC::ConstructAbility::${fn.constructAbility}, ` + + `(uint8_t)JSC::InlineAttribute::${inlineAttr} }`, + ); + } + } + bundledCPP += ` +extern "C" const char* Bun__getBuiltinFunctionSource(uint32_t id, size_t* len, const char** name, uint8_t* vis, uint8_t* ctorKind, uint8_t* ctorAbility, uint8_t* inlineAttr) +{ + struct Entry { uint32_t offset; uint32_t length; const char* name; uint8_t vis, ck, ca, ia; }; + static const Entry table[] = { + ${entries.join(",\n ")} + }; + if (id >= ${globalBuiltinId}) return nullptr; + const auto& e = table[id]; + *len = e.length; *name = e.name; *vis = e.vis; *ctorKind = e.ck; *ctorAbility = e.ca; *inlineAttr = e.ia; + return (const char*)WebCore::combinedSourceCodeBuffer + e.offset; +} + +extern "C" uint32_t Bun__getBuiltinFunctionSourceCount() { return ${globalBuiltinId}; } + `; + } + // C++ Header codegen let bundledHeader = `// Generated by ${import.meta.path} // Do not edit by hand. @@ -572,6 +608,10 @@ JSBuiltinInternalFunctions::JSBuiltinInternalFunctions(JSC::VM& vm) : m_vm(vm) class FunctionExecutable; } + namespace Bun { + JSC::UnlinkedFunctionExecutable* tryDecodeBuiltinFunctionBytecode(JSC::VM&, uint32_t globalId); + } + namespace WebCore { `; for (const { basename, functions, internal } of files) { @@ -587,6 +627,7 @@ JSBuiltinInternalFunctions::JSBuiltinInternalFunctions(JSC::VM& vm) : m_vm(vm) static constexpr JSC::InlineAttribute s_${name}InlineAttribute = JSC::InlineAttribute::${fn.directives.alwaysInline ? "Always" : "None"}; static constexpr JSC::ConstructorKind s_${name}ConstructorKind = JSC::ConstructorKind::${fn.constructKind}; static constexpr JSC::ImplementationVisibility s_${name}ImplementationVisibility = JSC::ImplementationVisibility::${fn.visibility}; + static constexpr uint32_t s_${name}GlobalId = ${fn.globalId}; `; } @@ -646,7 +687,10 @@ JSBuiltinInternalFunctions::JSBuiltinInternalFunctions(JSC::VM& vm) : m_vm(vm) JSC::Identifier executableName = functionName##PublicName();\\ if (overriddenName)\\ executableName = JSC::Identifier::fromString(m_vm, overriddenName);\\ - m_##name##Executable = JSC::Weak(JSC::createBuiltinExecutable(m_vm, m_##name##Source, executableName, s_##name##ImplementationVisibility, s_##name##ConstructorKind, s_##name##ConstructAbility, s_##name##InlineAttribute), this, &m_##name##Executable);\\ + auto* unlinked = Bun::tryDecodeBuiltinFunctionBytecode(m_vm, s_##name##GlobalId);\\ + if (!unlinked)\\ + unlinked = JSC::createBuiltinExecutable(m_vm, m_##name##Source, executableName, s_##name##ImplementationVisibility, s_##name##ConstructorKind, s_##name##ConstructAbility, s_##name##InlineAttribute);\\ + m_##name##Executable = JSC::Weak(unlinked, this, &m_##name##Executable);\\ }\\ return m_##name##Executable.get();\\ } diff --git a/src/codegen/bundle-modules.ts b/src/codegen/bundle-modules.ts index c282da4560b..10bddfd2e0c 100644 --- a/src/codegen/bundle-modules.ts +++ b/src/codegen/bundle-modules.ts @@ -361,6 +361,30 @@ JSValue InternalModuleRegistry::createInternalModuleById(JSGlobalObject* globalO `, ); +// Source pointer/length accessor for bytecode generation at build time. +// Only JS modules (not native) are listed. In debug builds, sources are +// empty (loaded from disk), so this returns nullptr. +writeIfNotChanged( + path.join(CODEGEN_DIR, "InternalModuleRegistry+sourceList.h"), + `// clang-format off +extern "C" const char* Bun__getInternalModuleSource(uint32_t id, size_t* len, const char** name) +{ + switch (id) { + ${moduleList + .slice(0, nativeStartIndex) + .map((id, n) => { + const moduleName = idToPublicSpecifierOrEnumName(id); + return `case ${n}: *len = Bun::InternalModuleRegistryConstants::${idToEnumName(id)}Code.length(); *name = "${moduleName}"; return Bun::InternalModuleRegistryConstants::${idToEnumName(id)}Code.characters();`; + }) + .join("\n ")} + default: *len = 0; *name = nullptr; return nullptr; + } +} + +extern "C" uint32_t Bun__getInternalModuleSourceCount() { return ${nativeStartIndex}; } +`, +); + // This header is used by InternalModuleRegistry.cpp, and should only be included in that file. // It inlines all the strings for the module IDs. // diff --git a/test/bundler/compile-builtin-bytecode.test.ts b/test/bundler/compile-builtin-bytecode.test.ts new file mode 100644 index 00000000000..39a8f705535 --- /dev/null +++ b/test/bundler/compile-builtin-bytecode.test.ts @@ -0,0 +1,106 @@ +import { describe, expect, test } from "bun:test"; +import { bunEnv, bunExe, isWindows, tempDir } from "harness"; +import { join } from "path"; + +// --compile --bytecode embeds pre-generated bytecode for internal modules +// (node:fs, node:stream etc) into the standalone executable so they don't +// need to be parsed at runtime. This test verifies the executable still +// works correctly with the embedded builtin bytecode. +describe("compile --bytecode builtin modules", () => { + test("internal modules work in standalone executable with bytecode", async () => { + using dir = tempDir("compile-builtin-bytecode", { + "app.js": ` + const fs = require("node:fs"); + const os = require("node:os"); + const path = require("node:path"); + const stream = require("node:stream"); + const util = require("node:util"); + + // Use functions from each module to ensure bytecode decodes correctly + const tmpFile = path.join(os.tmpdir(), "builtin-bc-test-" + Date.now() + ".txt"); + fs.writeFileSync(tmpFile, "hello"); + const content = fs.readFileSync(tmpFile, "utf8"); + fs.unlinkSync(tmpFile); + + const readable = stream.Readable.from(["a", "b", "c"]); + let chunks = []; + readable.on("data", c => chunks.push(c)); + readable.on("end", () => { + console.log(JSON.stringify({ + content, + chunks: chunks.join(""), + inspected: util.inspect({ x: 1 }), + })); + }); + `, + }); + + const outfile = join(String(dir), isWindows ? "app.exe" : "app"); + + await using buildProc = Bun.spawn({ + cmd: [bunExe(), "build", "--compile", "--bytecode", join(String(dir), "app.js"), "--outfile", outfile], + env: bunEnv, + stderr: "pipe", + stdout: "pipe", + }); + const [, buildStderr, buildExit] = await Promise.all([ + buildProc.stdout.text(), + buildProc.stderr.text(), + buildProc.exited, + ]); + expect(buildStderr).not.toContain("error"); + expect(buildExit).toBe(0); + + await using runProc = Bun.spawn({ + cmd: [outfile], + env: bunEnv, + stderr: "pipe", + stdout: "pipe", + }); + const [runStdout, runStderr, runExit] = await Promise.all([ + runProc.stdout.text(), + runProc.stderr.text(), + runProc.exited, + ]); + + expect(runStderr).toBe(""); + const output = JSON.parse(runStdout.trim()); + expect(output).toEqual({ + content: "hello", + chunks: "abc", + inspected: "{ x: 1 }", + }); + expect(runExit).toBe(0); + }); + + test("executable without --bytecode still works (no builtin bytecode)", async () => { + using dir = tempDir("compile-no-builtin-bytecode", { + "app.js": ` + const fs = require("node:fs"); + console.log(typeof fs.readFileSync); + `, + }); + + const outfile = join(String(dir), isWindows ? "app.exe" : "app"); + + await using buildProc = Bun.spawn({ + cmd: [bunExe(), "build", "--compile", join(String(dir), "app.js"), "--outfile", outfile], + env: bunEnv, + stderr: "pipe", + stdout: "pipe", + }); + await buildProc.exited; + expect(buildProc.exitCode).toBe(0); + + await using runProc = Bun.spawn({ + cmd: [outfile], + env: bunEnv, + stderr: "pipe", + stdout: "pipe", + }); + const [stdout, , exitCode] = await Promise.all([runProc.stdout.text(), runProc.stderr.text(), runProc.exited]); + + expect(stdout.trim()).toBe("function"); + expect(exitCode).toBe(0); + }); +});