Skip to content
Draft
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmake/targets/BuildBun.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ set(BUN_JAVASCRIPT_OUTPUTS
${CODEGEN_PATH}/InternalModuleRegistry+createInternalModuleById.h
${CODEGEN_PATH}/InternalModuleRegistry+enum.h
${CODEGEN_PATH}/InternalModuleRegistry+numberOfModules.h
${CODEGEN_PATH}/InternalModuleRegistry+sourceList.h
${CODEGEN_PATH}/NativeModuleImpl.h
${CODEGEN_PATH}/ResolvedSourceTag.zig
${CODEGEN_PATH}/SyntheticModuleType.h
Expand Down
2 changes: 1 addition & 1 deletion cmake/tools/SetupWebKit.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ option(WEBKIT_LOCAL "If a local version of WebKit should be used instead of down
option(WEBKIT_BUILD_TYPE "The build type for local WebKit (defaults to CMAKE_BUILD_TYPE)")

if(NOT WEBKIT_VERSION)
set(WEBKIT_VERSION fc9f2fa7272fec64905df6a9c78e15d7912f14ca)
set(WEBKIT_VERSION autobuild-preview-pr-177-20479c97)
endif()


Expand Down
1 change: 1 addition & 0 deletions scripts/build/codegen.ts
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,7 @@ function emitJsModules({ n, cfg, sources, o, dirStamp }: Ctx): void {
resolve(cfg.codegenDir, "InternalModuleRegistry+createInternalModuleById.h"),
resolve(cfg.codegenDir, "InternalModuleRegistry+enum.h"),
resolve(cfg.codegenDir, "InternalModuleRegistry+numberOfModules.h"),
resolve(cfg.codegenDir, "InternalModuleRegistry+sourceList.h"),
resolve(cfg.codegenDir, "NativeModuleImpl.h"),
resolve(cfg.codegenDir, "ResolvedSourceTag.zig"),
resolve(cfg.codegenDir, "SyntheticModuleType.h"),
Expand Down
2 changes: 1 addition & 1 deletion scripts/build/deps/webkit.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* for local mode. Override via `--webkit-version=<hash>` to test a branch.
* From https://github.com/oven-sh/WebKit releases.
*/
export const WEBKIT_VERSION = "fc9f2fa7272fec64905df6a9c78e15d7912f14ca";
export const WEBKIT_VERSION = "autobuild-preview-pr-177-20479c97";

/**
* WebKit (JavaScriptCore) — the JS engine.
Expand Down
174 changes: 172 additions & 2 deletions src/StandaloneModuleGraph.zig
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ pub const StandaloneModuleGraph = struct {
entry_point_id: u32 = 0,
compile_exec_argv: []const u8 = "",
flags: Flags = .{},
builtin_bytecode: []align(1) const BuiltinBytecodeEntry = &.{},

// We never want to hit the filesystem for these files
// We use the `/$bunfs/` prefix to indicate that it's a virtual path
Expand Down Expand Up @@ -317,8 +318,116 @@ pub const StandaloneModuleGraph = struct {
entry_point_id: u32 = 0,
compile_exec_argv_ptr: bun.StringPointer = .{},
flags: Flags = .{},
builtin_bytecode_ptr: bun.StringPointer = .{},
};

pub const BuiltinBytecodeKind = enum(u32) {
internal_module = 0,
builtin_function = 1,
};

/// Bytecode for a single builtin (internal module like node:fs, or builtin
/// function like ReadableStream constructor), embedded when building with
/// --compile --bytecode so parsing can be skipped at runtime.
pub const BuiltinBytecodeEntry = extern struct {
kind: BuiltinBytecodeKind,
id: u32,
bytecode: bun.StringPointer,
};

/// Mirrors JSC enum values. Keep in sync with ConstructAbility.h etc.
pub const BuiltinMetadata = struct {
pub const visibility_public: u8 = 0; // ImplementationVisibility::Public
pub const ctor_kind_none: u8 = 0; // ConstructorKind::None
pub const ctor_ability_cannot: u8 = 1; // ConstructAbility::CannotConstruct
pub const inline_attr_none: u8 = 0; // InlineAttribute::None
};

/// Build-time input for toBytes.
pub const BuiltinBytecodeInput = struct {
kind: BuiltinBytecodeKind,
id: u32,
bytecode: []const u8,
owner: *jsc.CachedBytecode,
};

pub fn findBuiltinBytecode(this: *const StandaloneModuleGraph, kind: BuiltinBytecodeKind, id: u32) ?[]const u8 {
for (this.builtin_bytecode) |entry| {
if (entry.kind == kind and entry.id == id) {
return sliceTo(this.bytes, entry.bytecode);
}
}
return null;
}

extern fn Bun__getInternalModuleSource(id: u32, len: *usize, name: *?[*:0]const u8) ?[*]const u8;
extern fn Bun__getInternalModuleSourceCount() u32;
extern fn Bun__getBuiltinFunctionSource(id: u32, len: *usize, name: *?[*:0]const u8, vis: *u8, ctor_kind: *u8, ctor_ability: *u8, inline_attr: *u8) ?[*]const u8;
extern fn Bun__getBuiltinFunctionSourceCount() u32;

/// Generate bytecode for all internal JS modules and builtin functions.
/// Caller must deref each entry's owner.
pub fn generateBuiltinBytecodes(allocator: std.mem.Allocator) !std.array_list.Managed(BuiltinBytecodeInput) {
var list = std.array_list.Managed(BuiltinBytecodeInput).init(allocator);
errdefer {
for (list.items) |bb| bb.owner.deref();
list.deinit();
}
jsc.initialize(false);
jsc.VirtualMachine.is_bundler_thread_for_bytecode_cache = true;

const module_count = Bun__getInternalModuleSourceCount();
const function_count = Bun__getBuiltinFunctionSourceCount();
try list.ensureTotalCapacity(module_count + function_count);

// Internal modules (node:fs etc)
{
var id: u32 = 0;
while (id < module_count) : (id += 1) {
var len: usize = 0;
var name_ptr: ?[*:0]const u8 = null;
const src_ptr = Bun__getInternalModuleSource(id, &len, &name_ptr) orelse continue;
// Debug builds use BUN_DYNAMIC_JS_LOAD_PATH and embed only "\n".
// BuiltinExecutables::createExecutable requires "(function (){})" minimum.
if (len < "(function (){})".len) continue;
var name = bun.String.borrowUTF8(bun.sliceTo(name_ptr.?, 0));
if (jsc.CachedBytecode.generateForBuiltin(
&name,
src_ptr[0..len],
BuiltinMetadata.visibility_public,
BuiltinMetadata.ctor_kind_none,
BuiltinMetadata.ctor_ability_cannot,
BuiltinMetadata.inline_attr_none,
)) |res| {
const bytecode, const owner = res;
list.appendAssumeCapacity(.{ .kind = .internal_module, .id = id, .bytecode = bytecode, .owner = owner });
}
}
}

// Builtin functions (ReadableStream etc)
{
var id: u32 = 0;
while (id < function_count) : (id += 1) {
var len: usize = 0;
var name_ptr: ?[*:0]const u8 = null;
var vis: u8 = 0;
var ctor_kind: u8 = 0;
var ctor_ability: u8 = 0;
var inline_attr: u8 = 0;
const src_ptr = Bun__getBuiltinFunctionSource(id, &len, &name_ptr, &vis, &ctor_kind, &ctor_ability, &inline_attr) orelse continue;
if (len < "(function (){})".len) continue;
var name = bun.String.borrowUTF8(bun.sliceTo(name_ptr.?, 0));
if (jsc.CachedBytecode.generateForBuiltin(&name, src_ptr[0..len], vis, ctor_kind, ctor_ability, inline_attr)) |res| {
const bytecode, const owner = res;
list.appendAssumeCapacity(.{ .kind = .builtin_function, .id = id, .bytecode = bytecode, .owner = owner });
}
}
}

return list;
}

pub const Flags = packed struct(u32) {
disable_default_env_files: bool = false,
disable_autoload_bunfig: bool = false,
Expand Down Expand Up @@ -367,12 +476,17 @@ pub const StandaloneModuleGraph = struct {

modules.lockPointers(); // make the pointers stable forever

const builtin_bytecode_bytes = sliceTo(raw_bytes, offsets.builtin_bytecode_ptr);
const builtin_bytecode: []align(1) const BuiltinBytecodeEntry =
std.mem.bytesAsSlice(BuiltinBytecodeEntry, builtin_bytecode_bytes);

return StandaloneModuleGraph{
.bytes = raw_bytes[0..offsets.byte_count],
.files = modules,
.entry_point_id = offsets.entry_point_id,
.compile_exec_argv = sliceToZ(raw_bytes, offsets.compile_exec_argv_ptr),
.flags = offsets.flags,
.builtin_bytecode = builtin_bytecode,
};
}

Expand All @@ -388,7 +502,26 @@ pub const StandaloneModuleGraph = struct {
return bytes[ptr.offset..][0..ptr.length :0];
}

pub fn toBytes(allocator: std.mem.Allocator, prefix: []const u8, output_files: []const bun.options.OutputFile, output_format: bun.options.Format, compile_exec_argv: []const u8, flags: Flags) ![]u8 {
/// Write bytes at a 128-byte-aligned offset within the string_builder,
/// accounting for the 8-byte section header on PE/Mach-O. Returns the
/// offset where the bytes were written. See the PLATFORM-SPECIFIC
/// ALIGNMENT comment in toBytes for rationale.
fn appendBytecodeAligned(string_builder: *bun.StringBuilder, bytes: []const u8) usize {
const target_mod: usize = 128 - @sizeOf(u64);
const current_mod = string_builder.len % 128;
const padding = if (current_mod <= target_mod)
target_mod - current_mod
else
128 - current_mod + target_mod;
@memset(string_builder.writable()[0..padding], 0);
string_builder.len += padding;
const aligned_offset = string_builder.len;
@memcpy(string_builder.writable()[0..bytes.len], bytes);
string_builder.len += bytes.len;
return aligned_offset;
}

pub fn toBytes(allocator: std.mem.Allocator, prefix: []const u8, output_files: []const bun.options.OutputFile, output_format: bun.options.Format, compile_exec_argv: []const u8, flags: Flags, builtin_bytecodes: []const BuiltinBytecodeInput) ![]u8 {
var serialize_trace = bun.perf.trace("StandaloneModuleGraph.serialize");
defer serialize_trace.end();

Expand Down Expand Up @@ -433,6 +566,11 @@ pub const StandaloneModuleGraph = struct {
string_builder.cap += @sizeOf(Offsets);
string_builder.countZ(compile_exec_argv);

for (builtin_bytecodes) |bb| {
string_builder.cap += (bb.bytecode.len + 255) / 256 * 256 + 256;
}
string_builder.cap += @sizeOf(BuiltinBytecodeEntry) * builtin_bytecodes.len;

try string_builder.allocate(allocator);

var modules = try std.array_list.Managed(CompiledModuleGraphFile).initCapacity(allocator, module_count);
Expand Down Expand Up @@ -589,12 +727,25 @@ pub const StandaloneModuleGraph = struct {
modules.appendAssumeCapacity(module);
}

const builtin_entries = try allocator.alloc(BuiltinBytecodeEntry, builtin_bytecodes.len);
defer allocator.free(builtin_entries);
for (builtin_bytecodes, builtin_entries) |input, *entry| {
const aligned_offset = appendBytecodeAligned(&string_builder, input.bytecode);
entry.* = .{
.kind = input.kind,
.id = input.id,
.bytecode = .{ .offset = @truncate(aligned_offset), .length = @truncate(input.bytecode.len) },
};
}
const builtin_bytecode_ptr = string_builder.appendCount(std.mem.sliceAsBytes(builtin_entries));

const offsets = Offsets{
.entry_point_id = @as(u32, @truncate(entry_point_id.?)),
.modules_ptr = string_builder.appendCount(std.mem.sliceAsBytes(modules.items)),
.compile_exec_argv_ptr = string_builder.appendCountZ(compile_exec_argv),
.byte_count = string_builder.len,
.flags = flags,
.builtin_bytecode_ptr = builtin_bytecode_ptr,
};

_ = string_builder.append(std.mem.asBytes(&offsets));
Expand Down Expand Up @@ -1127,8 +1278,9 @@ pub const StandaloneModuleGraph = struct {
compile_exec_argv: []const u8,
self_exe_path: ?[]const u8,
flags: Flags,
builtin_bytecodes: []const BuiltinBytecodeInput,
) !CompileResult {
const bytes = toBytes(allocator, module_prefix, output_files, output_format, compile_exec_argv, flags) catch |err| {
const bytes = toBytes(allocator, module_prefix, output_files, output_format, compile_exec_argv, flags, builtin_bytecodes) catch |err| {
return CompileResult.failFmt("failed to generate module graph bytes: {s}", .{@errorName(err)});
};
if (bytes.len == 0) return CompileResult.fail(.no_output_files);
Expand Down Expand Up @@ -1523,6 +1675,23 @@ pub const StandaloneModuleGraph = struct {
}
};

/// Called from InternalModuleRegistry.cpp to look up pre-generated bytecode
/// for an internal module. Returns null when not a standalone executable or
/// when no bytecode was embedded for this module.
export fn Bun__findInternalModuleBytecode(bun_vm: *jsc.VirtualMachine, field_id: u32, out_len: *usize) ?[*]const u8 {
const graph = bun_vm.standalone_module_graph orelse return null;
const bytes = graph.findBuiltinBytecode(.internal_module, field_id) orelse return null;
out_len.* = bytes.len;
return bytes.ptr;
}

export fn Bun__findBuiltinFunctionBytecode(bun_vm: *jsc.VirtualMachine, global_id: u32, out_len: *usize) ?[*]const u8 {
const graph = bun_vm.standalone_module_graph orelse return null;
const bytes = graph.findBuiltinBytecode(.builtin_function, global_id) orelse return null;
out_len.* = bytes.len;
return bytes.ptr;
}

const std = @import("std");
const w = std.os.windows;

Expand All @@ -1532,6 +1701,7 @@ const Output = bun.Output;
const SourceMap = bun.SourceMap;
const StringPointer = bun.StringPointer;
const Syscall = bun.sys;
const jsc = bun.jsc;
const macho = bun.macho;
const pe = bun.pe;
const strings = bun.strings;
Expand Down
12 changes: 12 additions & 0 deletions src/bun.js/bindings/CachedBytecode.zig
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
pub const CachedBytecode = opaque {
extern fn generateCachedModuleByteCodeFromSourceCode(sourceProviderURL: *bun.String, input_code: [*]const u8, inputSourceCodeSize: usize, outputByteCode: *?[*]u8, outputByteCodeSize: *usize, cached_bytecode: *?*CachedBytecode) bool;
extern fn generateCachedCommonJSProgramByteCodeFromSourceCode(sourceProviderURL: *bun.String, input_code: [*]const u8, inputSourceCodeSize: usize, outputByteCode: *?[*]u8, outputByteCodeSize: *usize, cached_bytecode: *?*CachedBytecode) bool;
extern fn generateCachedBuiltinByteCodeFromSourceCode(moduleName: *bun.String, input_code: [*]const u8, inputSourceCodeSize: usize, implementationVisibility: u8, constructorKind: u8, constructAbility: u8, inlineAttribute: u8, outputByteCode: *?[*]u8, outputByteCodeSize: *usize, cached_bytecode: *?*CachedBytecode) bool;

pub fn generateForESM(sourceProviderURL: *bun.String, input: []const u8) ?struct { []const u8, *CachedBytecode } {
var this: ?*CachedBytecode = null;
Expand All @@ -25,6 +26,17 @@ pub const CachedBytecode = opaque {
return null;
}

pub fn generateForBuiltin(moduleName: *bun.String, input: []const u8, vis: u8, ctor_kind: u8, ctor_ability: u8, inline_attr: u8) ?struct { []const u8, *CachedBytecode } {
var this: ?*CachedBytecode = null;
var input_code_size: usize = 0;
var input_code_ptr: ?[*]u8 = null;
if (generateCachedBuiltinByteCodeFromSourceCode(moduleName, input.ptr, input.len, vis, ctor_kind, ctor_ability, inline_attr, &input_code_ptr, &input_code_size, &this)) {
return .{ input_code_ptr.?[0..input_code_size], this.? };
}

return null;
}

extern "c" fn CachedBytecode__deref(this: *CachedBytecode) void;
pub fn deref(this: *CachedBytecode) void {
return CachedBytecode__deref(this);
Expand Down
Loading
Loading