diff --git a/src/execution/dispatch_loop.rs b/src/execution/dispatch_loop.rs index 3a6db2a..355385a 100644 --- a/src/execution/dispatch_loop.rs +++ b/src/execution/dispatch_loop.rs @@ -23,24 +23,10 @@ pub fn execute_instructions(state: &mut VmState) -> Outcome { return Outcome::Trap; } - // Natural pc-overflow handling: pop nested label or halt at function level. + // pc past the body means the frame is done (function-level `end` and + // function-level branches both halt through here or directly). if state.pc >= state.instrs_len { - if state.current_label_idx > 0 { - let (return_ip, is_loop) = { - let ls = &state.label_stack()[state.current_label_idx]; - (ls.label.return_ip, ls.label.is_loop) - }; - let cur_ip = state.pc; - if return_ip <= cur_ip && !is_loop { - return Outcome::Halt; - } - state.label_stack_mut().pop(); - state.current_label_idx -= 1; - state.pc = if is_loop { cur_ip + 1 } else { return_ip }; - continue; - } else { - return Outcome::Halt; - } + return Outcome::Halt; } #[cfg(feature = "stats")] diff --git a/src/execution/handlers.rs b/src/execution/handlers.rs index 223ed51..d7276fc 100644 --- a/src/execution/handlers.rs +++ b/src/execution/handlers.rs @@ -28,7 +28,7 @@ use crate::execution::ir::{Handler, Outcome, ProcessedInstr, RegOrLocal}; use crate::execution::module::GetInstanceByIdx; use crate::execution::operand; use crate::execution::regs::Reg; -use crate::execution::state::{Label, LabelStack, ModuleLevelInstr, VmState}; +use crate::execution::state::{ModuleLevelInstr, VmState}; use crate::execution::value::Val; use arrayvec::ArrayVec; @@ -292,6 +292,8 @@ pub const HANDLER_IDX_REF_LOCAL_SET: usize = 0x102; // WASI call handler constant pub const HANDLER_IDX_CALL_WASI: usize = 0x103; +/// Function-level `end`: writes the return registers and halts the frame. +pub const HANDLER_IDX_END_FUNC: usize = 0x104; // ============================================================================ // advance! macro — the difference between tco and non-tco mode @@ -1409,7 +1411,6 @@ pub fn unreachable(_state: &mut VmState) -> Outcome { pub fn br(state: &mut VmState) -> Outcome { let instr = unsafe { &*state.instrs.add(state.pc) }; let ProcessedInstr::BrReg { - relative_depth, target_ip, source_regs, target_result_regs, @@ -1418,17 +1419,12 @@ pub fn br(state: &mut VmState) -> Outcome { else { unsafe { std::hint::unreachable_unchecked() } }; - let depth = *relative_depth as usize; let target_ip = *target_ip; if !source_regs.is_empty() && !target_result_regs.is_empty() { state .reg_file_mut() .copy_regs(source_regs, target_result_regs); } - let target_level = state.current_label_idx.saturating_sub(depth); - let keep_count = target_level.max(1); - state.label_stack_mut().truncate(keep_count); - state.current_label_idx = state.label_stack().len() - 1; state.pc = target_ip; advance!(state) } @@ -1436,7 +1432,6 @@ pub fn br(state: &mut VmState) -> Outcome { pub fn br_if(state: &mut VmState) -> Outcome { let instr = unsafe { &*state.instrs.add(state.pc) }; let ProcessedInstr::BrIfReg { - relative_depth, target_ip, cond_reg, source_regs, @@ -1446,7 +1441,6 @@ pub fn br_if(state: &mut VmState) -> Outcome { else { unsafe { std::hint::unreachable_unchecked() } }; - let depth = *relative_depth as usize; let target_ip = *target_ip; let cond_reg = *cond_reg; let cond = state.reg_file().get_i32(cond_reg.index()); @@ -1459,10 +1453,6 @@ pub fn br_if(state: &mut VmState) -> Outcome { .reg_file_mut() .copy_regs(source_regs, target_result_regs); } - let target_level = state.current_label_idx.saturating_sub(depth); - let keep_count = target_level.max(1); - state.label_stack_mut().truncate(keep_count); - state.current_label_idx = state.label_stack().len() - 1; state.pc = target_ip; advance!(state) } @@ -1481,145 +1471,85 @@ pub fn br_table(state: &mut VmState) -> Outcome { let index_reg = *index_reg; let idx = state.reg_file().get_i32(index_reg.index()) as usize; - let (depth, target_ip, target_result_regs_slice): (usize, usize, &[Reg]) = - if idx < targets.len() { - let (d, ip, rs) = &targets[idx]; - (*d as usize, *ip, &rs[..]) - } else { - let (d, ip, rs) = default_target; - (*d as usize, *ip, &rs[..]) - }; + let (target_ip, target_result_regs_slice): (usize, &[Reg]) = if idx < targets.len() { + let (_, ip, rs) = &targets[idx]; + (*ip, &rs[..]) + } else { + let (_, ip, rs) = default_target; + (*ip, &rs[..]) + }; if !source_regs.is_empty() && !target_result_regs_slice.is_empty() { state .reg_file_mut() .copy_regs(source_regs, target_result_regs_slice); } - let target_level = state.current_label_idx.saturating_sub(depth); - let keep_count = target_level.max(1); - state.label_stack_mut().truncate(keep_count); - state.current_label_idx = state.label_stack().len() - 1; state.pc = target_ip; advance!(state) } -pub fn block(state: &mut VmState) -> Outcome { - let is_loop = match state.current_instr() { - ProcessedInstr::BlockReg { is_loop, .. } => *is_loop, - _ => unsafe { std::hint::unreachable_unchecked() }, - }; - let next_ip = state.pc + 1; - let cur_idx = state.current_label_idx; - let label_stack = state.label_stack_mut(); - let pi_rc = label_stack[cur_idx].processed_instrs.clone(); - label_stack.push(LabelStack { - label: Label { - is_loop, - return_ip: next_ip, - }, - processed_instrs: pi_rc, - ip: next_ip, - }); - state.current_label_idx = state.label_stack().len() - 1; - state.pc = next_ip; - advance!(state) -} - pub fn r#if(state: &mut VmState) -> Outcome { - let (cond_reg, else_target_ip, has_else) = match state.current_instr() { + let (cond_reg, else_target_ip) = match state.current_instr() { ProcessedInstr::IfReg { cond_reg, else_target_ip, - has_else, .. - } => (*cond_reg, *else_target_ip, *has_else), + } => (*cond_reg, *else_target_ip), _ => unsafe { std::hint::unreachable_unchecked() }, }; let cond = state.reg_file().get_i32(cond_reg.index()); - // Only clone pi_rc inside branches that need it, so the no-else path - // has zero `Rc` destructors at the tail call. - if cond != 0 { - let next_ip = state.pc + 1; - let cur_idx = state.current_label_idx; - let label_stack = state.label_stack_mut(); - let pi_rc = label_stack[cur_idx].processed_instrs.clone(); - label_stack.push(LabelStack { - label: Label { - is_loop: false, - return_ip: else_target_ip, - }, - processed_instrs: pi_rc, - ip: next_ip, - }); - state.current_label_idx = state.label_stack().len() - 1; - state.pc = next_ip; - } else if has_else { - let cur_idx = state.current_label_idx; - let label_stack = state.label_stack_mut(); - let pi_rc = label_stack[cur_idx].processed_instrs.clone(); - label_stack.push(LabelStack { - label: Label { - is_loop: false, - return_ip: else_target_ip, - }, - processed_instrs: pi_rc, - ip: else_target_ip, - }); - state.current_label_idx = state.label_stack().len() - 1; - state.pc = else_target_ip; + state.pc = if cond != 0 { + state.pc + 1 } else { - state.pc = else_target_ip; - } + else_target_ip + }; advance!(state) } +/// Inner (block-level) `end`: copies block results to the block's result +/// registers and falls through. pub fn end(state: &mut VmState) -> Outcome { let instr = unsafe { &*state.instrs.add(state.pc) }; let ProcessedInstr::EndReg { source_regs, target_result_regs, + .. } = instr else { unsafe { std::hint::unreachable_unchecked() } }; - - let mut halt = false; - if state.label_stack().len() <= 1 { - halt = true; - } else { + if !source_regs.is_empty() && !target_result_regs.is_empty() { state .reg_file_mut() .copy_regs(source_regs, target_result_regs); - state.label_stack_mut().pop(); - state.current_label_idx = state.label_stack().len() - 1; - let next_ip = state.pc + 1; - if next_ip >= state.instrs_len && state.current_label_idx == 0 { - halt = true; - } else { - state.pc = next_ip; - } - } - if halt { - let dst = state.return_result_regs_mut(); - dst.clear(); - for r in source_regs.iter() { - dst.push(*r); - } - state.pc = state.instrs_len; } + state.pc += 1; advance!(state) } +/// Function-level `end`: records the return-value registers and halts the +/// frame. Also the target of function-level `br`/`br_if`/`br_table` (which +/// copy their values into this end's source registers before jumping). +pub fn end_func(state: &mut VmState) -> Outcome { + let instr = unsafe { &*state.instrs.add(state.pc) }; + let ProcessedInstr::EndReg { source_regs, .. } = instr else { + unsafe { std::hint::unreachable_unchecked() } + }; + let dst = state.return_result_regs_mut(); + dst.clear(); + for r in source_regs.iter() { + dst.push(*r); + } + state.pc = state.instrs_len; + Outcome::Halt +} + pub fn jump(state: &mut VmState) -> Outcome { let target_ip = match state.current_instr() { ProcessedInstr::JumpReg { target_ip } => *target_ip, _ => unsafe { std::hint::unreachable_unchecked() }, }; - if state.label_stack().len() > 1 { - state.label_stack_mut().pop(); - state.current_label_idx = state.label_stack().len() - 1; - } state.pc = target_ip; advance!(state) } @@ -2324,9 +2254,17 @@ pub fn select_handler(instr: &ProcessedInstr) -> Handler { ProcessedInstr::CallWasiReg { .. } => call_wasi, ProcessedInstr::ReturnReg { .. } => r#return, ProcessedInstr::JumpReg { .. } => jump, - ProcessedInstr::BlockReg { .. } => block, + // BlockReg never survives compaction (Phase 5); trap if one leaks. + ProcessedInstr::BlockReg { .. } => invalid, ProcessedInstr::IfReg { .. } => r#if, - ProcessedInstr::EndReg { .. } => end, + ProcessedInstr::EndReg { + is_function_end: false, + .. + } => end, + ProcessedInstr::EndReg { + is_function_end: true, + .. + } => end_func, ProcessedInstr::BrReg { .. } => br, ProcessedInstr::BrIfReg { .. } => br_if, ProcessedInstr::BrTableReg { .. } => br_table, diff --git a/src/execution/ir.rs b/src/execution/ir.rs index 9c30c36..856b83f 100644 --- a/src/execution/ir.rs +++ b/src/execution/ir.rs @@ -7,8 +7,8 @@ use crate::execution::handlers::{ HANDLER_IDX_BLOCK, HANDLER_IDX_BR, HANDLER_IDX_BR_IF, HANDLER_IDX_BR_TABLE, HANDLER_IDX_CALL, HANDLER_IDX_CALL_INDIRECT, HANDLER_IDX_CALL_WASI, HANDLER_IDX_DATA_DROP, HANDLER_IDX_ELSE, - HANDLER_IDX_END, HANDLER_IDX_IF, HANDLER_IDX_LOOP, HANDLER_IDX_NOP, HANDLER_IDX_RETURN, - HANDLER_IDX_UNREACHABLE, + HANDLER_IDX_END, HANDLER_IDX_END_FUNC, HANDLER_IDX_IF, HANDLER_IDX_LOOP, HANDLER_IDX_NOP, + HANDLER_IDX_RETURN, HANDLER_IDX_UNREACHABLE, }; use crate::execution::regs::Reg; use crate::execution::state::VmState; @@ -202,29 +202,26 @@ pub enum ProcessedInstr { JumpReg { target_ip: usize, }, + /// Parse-time structural marker for block/loop nesting (fixup passes + /// rebuild the control stack from it). BlockReg { - arity: usize, - param_count: usize, is_loop: bool, }, IfReg { - arity: usize, cond_reg: Reg, else_target_ip: usize, - has_else: bool, }, EndReg { source_regs: RegSlice, target_result_regs: RegSlice, + is_function_end: bool, }, BrReg { - relative_depth: u32, target_ip: usize, source_regs: RegSlice, target_result_regs: RegSlice, }, BrIfReg { - relative_depth: u32, target_ip: usize, cond_reg: Reg, source_regs: RegSlice, @@ -266,7 +263,14 @@ impl ProcessedInstr { ProcessedInstr::BlockReg { is_loop: false, .. } => HANDLER_IDX_BLOCK, ProcessedInstr::BlockReg { is_loop: true, .. } => HANDLER_IDX_LOOP, ProcessedInstr::IfReg { .. } => HANDLER_IDX_IF, - ProcessedInstr::EndReg { .. } => HANDLER_IDX_END, + ProcessedInstr::EndReg { + is_function_end: false, + .. + } => HANDLER_IDX_END, + ProcessedInstr::EndReg { + is_function_end: true, + .. + } => HANDLER_IDX_END_FUNC, ProcessedInstr::BrReg { .. } => HANDLER_IDX_BR, ProcessedInstr::BrIfReg { .. } => HANDLER_IDX_BR_IF, ProcessedInstr::BrTableReg { .. } => HANDLER_IDX_BR_TABLE, diff --git a/src/execution/migration.rs b/src/execution/migration.rs index be1cf8e..c978da0 100644 --- a/src/execution/migration.rs +++ b/src/execution/migration.rs @@ -8,7 +8,7 @@ //! The checkpoint captures: //! - The register file (`RegFile`), which holds both operand-stack values and //! function locals (params + declared locals) -//! - Activation frame stack (label stacks, result registers, frame metadata) +//! - Activation frame stack (per-frame ip, result registers, frame metadata) //! - Linear memory contents (LZ4 compressed) //! - Global variable values //! - Per-frame function indices (used to rebuild skipped `Rc` fields on restore) @@ -230,7 +230,7 @@ pub fn checkpoint>( .activation_frame_stack .iter() .map(|frame_stack| { - let frame_instrs = &frame_stack.label_stack[0].processed_instrs; + let frame_instrs = &frame_stack.processed_instrs; module_inst .func_addrs .iter() @@ -262,12 +262,6 @@ pub fn checkpoint>( .map(|v| v.len()) .unwrap_or(0); let frames_count = state.stacks.activation_frame_stack.len(); - let total_labels: usize = state - .stacks - .activation_frame_stack - .iter() - .map(|f| f.label_stack.len()) - .sum(); let frames_size = bincode::serialize(&state.stacks.activation_frame_stack) .map(|v| v.len()) .unwrap_or(0); @@ -284,8 +278,8 @@ pub fn checkpoint>( println!("Checkpoint component sizes:"); println!(" reg_file: {} bytes", reg_file_size); println!( - " frames: {} bytes ({} frames, {} labels)", - frames_size, frames_count, total_labels + " frames: {} bytes ({} frames)", + frames_size, frames_count ); println!( " memory_data: {} bytes (raw {} bytes, LZ4 compressed)", @@ -369,10 +363,7 @@ pub fn restore>( let func_addr = &module_inst.func_addrs[func_idx as usize]; let func_inst = func_addr.read_lock(); if let FuncInst::RuntimeFunc { code, .. } = func_inst { - let body = code.body.clone(); - for label_stack in frame_stack.label_stack.iter_mut() { - label_stack.processed_instrs = body.clone(); - } + frame_stack.processed_instrs = code.body.clone(); // v2 dispatcher: handler array (function pointers) — Rc> frame_stack.handlers = code.handlers.clone(); // Restored frames default to enable_checkpoint=false (Runtime::run diff --git a/src/execution/runtime.rs b/src/execution/runtime.rs index bd541e1..9446745 100644 --- a/src/execution/runtime.rs +++ b/src/execution/runtime.rs @@ -8,7 +8,7 @@ use crate::execution::migration; use crate::execution::module::ModuleInst; use crate::execution::regs::{Reg, RegFile}; use crate::execution::state::VmState; -use crate::execution::state::{Frame, FrameStack, Label, LabelStack, ModuleLevelInstr, Stacks}; +use crate::execution::state::{Frame, FrameStack, ModuleLevelInstr, Stacks}; use crate::execution::stats::ExecutionStats; #[cfg(feature = "trace")] use crate::execution::trace::{TraceConfig, Tracer}; @@ -136,15 +136,11 @@ impl Runtime { let reg_file_ptr: *mut RegFile = &mut self.stacks.reg_file as *mut RegFile; let frame_stack = &mut self.stacks.activation_frame_stack[frame_stack_idx]; - let current_label_idx = frame_stack.label_stack.len().saturating_sub(1); - let (instrs_ptr, instrs_len, pc) = { - let ls = &frame_stack.label_stack[current_label_idx]; - ( - ls.processed_instrs.as_ptr(), - ls.processed_instrs.len(), - ls.ip, - ) - }; + let (instrs_ptr, instrs_len, pc) = ( + frame_stack.processed_instrs.as_ptr(), + frame_stack.processed_instrs.len(), + frame_stack.ip, + ); let handlers_ptr = { cfg_if::cfg_if! { if #[cfg(all( @@ -163,8 +159,6 @@ impl Runtime { } }; let mem_ptr = frame_stack.cached_mem_ptr.unwrap_or(std::ptr::null_mut()); - let label_stack_ptr: *mut Vec = - &mut frame_stack.label_stack as *mut Vec; let return_result_regs_ptr: *mut ArrayVec = &mut frame_stack.return_result_regs as *mut ArrayVec; let enable_checkpoint = frame_stack.enable_checkpoint; @@ -187,8 +181,6 @@ impl Runtime { instrs: instrs_ptr, instrs_len, handlers: handlers_ptr, - label_stack: label_stack_ptr, - current_label_idx, mem_ptr, module: module_ptr, trap: None, @@ -204,10 +196,7 @@ impl Runtime { let outcome = dispatch::execute_instructions(&mut state); - let idx = state.current_label_idx; - if idx < state.label_stack().len() { - state.label_stack_mut()[idx].ip = state.pc; - } + frame_stack.ip = state.pc; frame_stack.cached_mem_ptr = if state.mem_ptr.is_null() { None } else { @@ -300,15 +289,6 @@ impl Runtime { } Ok(instr_option) => { - let current_frame_stack_mut = - self.stacks.activation_frame_stack.last_mut().unwrap(); - - if current_frame_stack_mut.label_stack.is_empty() { - return Err(RuntimeError::StackError( - "Label stack empty during frame transition", - )); - } - match instr_option { Some(ModuleLevelInstr::InvokeWasiReg { wasi_func_type, @@ -383,14 +363,8 @@ impl Runtime { module: func_module_weak.clone(), n: type_.results.len(), }, - label_stack: vec![LabelStack { - label: Label { - is_loop: false, - return_ip: 0, - }, - processed_instrs: code.body.clone(), - ip: 0, - }], + ip: 0, + processed_instrs: code.body.clone(), enable_checkpoint: self.enable_checkpoint, result_regs: ArrayVec::new(), return_result_regs: ArrayVec::new(), diff --git a/src/execution/state.rs b/src/execution/state.rs index 040e2bf..574b131 100644 --- a/src/execution/state.rs +++ b/src/execution/state.rs @@ -1,7 +1,7 @@ //! v2 dispatcher execution state. //! //! `VmState` aggregates everything a handler needs to execute one instruction: -//! register file, locals, instruction stream, label stack, memory pointer, +//! register file, locals, instruction stream, memory pointer, //! module reference, and outcome channels. Fields are raw pointers so all //! handlers share an identical `fn(&mut VmState) -> Outcome` signature //! (required for `return_call_indirect` type identity in TCO mode). @@ -15,7 +15,7 @@ use crate::execution::regs::{Reg, RegFile}; use crate::execution::value::Val; use crate::structure::module::WasiFuncType; use arrayvec::ArrayVec; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use serde::{Deserialize, Serialize}; use std::rc::{Rc, Weak}; cfg_if::cfg_if! { @@ -36,17 +36,12 @@ pub struct VmState { // Register file (holds operand-stack registers and locals) pub reg_file: *mut RegFile, - // Active label's instruction stream + cached handler array - // (invariant within a frame because all label stacks share the same Rc) + // Frame's instruction stream + cached handler array pub pc: usize, pub instrs: *const ProcessedInstr, pub instrs_len: usize, pub handlers: *const Handler, - // Label stack management (Br/BrIf/End/Block/If/Jump mutate these) - pub label_stack: *mut Vec, - pub current_label_idx: usize, - // Memory fast path (load/store) pub mem_ptr: *mut u8, @@ -99,18 +94,6 @@ impl VmState { unsafe { &mut *self.reg_file } } - /// Shared reference to the label stack. - #[inline(always)] - pub fn label_stack(&self) -> &Vec { - unsafe { &*self.label_stack } - } - - /// Mutable reference to the label stack. - #[inline(always)] - pub fn label_stack_mut(&mut self) -> &mut Vec { - unsafe { &mut *self.label_stack } - } - /// Reference to the module instance. #[inline(always)] pub fn module(&self) -> &ModuleInst { @@ -181,14 +164,8 @@ impl VMState { module: module.clone(), n: type_.results.len(), }, - label_stack: vec![LabelStack { - label: Label { - is_loop: false, - return_ip: 0, - }, - processed_instrs: code.body.clone(), - ip: 0, - }], + ip: 0, + processed_instrs: code.body.clone(), enable_checkpoint: false, result_regs: ArrayVec::new(), return_result_regs: ArrayVec::new(), @@ -227,11 +204,16 @@ pub struct Frame { pub n: usize, } -/// Activation frame stack with label stacks and execution state. +/// Activation frame with execution state. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct FrameStack { pub frame: Frame, - pub label_stack: Vec, + /// Program counter within this frame's body. Saved when the frame yields + /// (call/checkpoint) and used to resume execution. + pub ip: usize, + /// Function body (invariant per frame). + #[serde(skip)] + pub processed_instrs: Rc>, #[serde(skip)] pub enable_checkpoint: bool, pub result_regs: ArrayVec, @@ -251,49 +233,3 @@ pub struct FrameStack { #[serde(skip)] pub handler_ctrl: Option>, } - -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct Label { - pub is_loop: bool, - pub return_ip: usize, -} - -/// Label stack containing instructions and program counter. -#[derive(Clone, Debug)] -pub struct LabelStack { - pub label: Label, - pub processed_instrs: Rc>, - pub ip: usize, -} - -impl Serialize for LabelStack { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - use serde::ser::SerializeStruct; - let mut state = serializer.serialize_struct("LabelStack", 2)?; - state.serialize_field("label", &self.label)?; - state.serialize_field("ip", &self.ip)?; - state.end() - } -} - -impl<'de> Deserialize<'de> for LabelStack { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - #[derive(Deserialize)] - struct LabelStackData { - label: Label, - ip: usize, - } - let data = LabelStackData::deserialize(deserializer)?; - Ok(LabelStack { - label: data.label, - processed_instrs: Rc::new(Vec::new()), - ip: data.ip, - }) - } -} diff --git a/src/execution/stats.rs b/src/execution/stats.rs index 574666a..148021b 100644 --- a/src/execution/stats.rs +++ b/src/execution/stats.rs @@ -3,8 +3,8 @@ use super::handlers::*; /// Number of handler-index slots. Must exceed the largest `HANDLER_IDX_*` -/// (currently 0x103); 0x104 leaves the counter array indexable by every index. -const HANDLER_SLOTS: usize = 0x104; +/// (currently 0x104); 0x105 leaves the counter array indexable by every index. +const HANDLER_SLOTS: usize = 0x105; /// Collects per-instruction execution counts. #[derive(Debug)] @@ -40,6 +40,7 @@ impl ExecutionStats { HANDLER_IDX_IF => "if", HANDLER_IDX_ELSE => "else", HANDLER_IDX_END => "end", + HANDLER_IDX_END_FUNC => "end (function)", HANDLER_IDX_BR => "br", HANDLER_IDX_BR_IF => "br_if", HANDLER_IDX_BR_TABLE => "br_table", diff --git a/src/parser.rs b/src/parser.rs index 24c8492..32c6871 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1281,6 +1281,10 @@ fn decode_code_section( ) .map_err(|e| Box::new(e) as Box)?; + // Phase 5: Strip no-op instructions (block/loop, nop, no-op inner end) + // and remap all branch targets to the compacted indices. + let processed_instrs = compact_instruction_stream(processed_instrs); + let body_rc = Rc::new(processed_instrs); // v2 dispatcher handler array: parallel to body + halt sentinel at end @@ -1307,6 +1311,79 @@ fn decode_code_section( Ok(()) } +/// Returns true for instructions with no runtime effect, removable once all +/// branch targets are resolved to absolute IPs: +/// - `BlockReg` (block/loop): labels are fully static, the handler is a no-op. +/// - `NopReg`: explicit nops and unreachable-code placeholders. +/// - Inner `EndReg` whose register copy does nothing (either side empty or +/// source == target). The function-level end is always kept: it collects +/// the return registers and is the target of function-level branches. +fn is_noop_instr(instr: &ProcessedInstr) -> bool { + match instr { + ProcessedInstr::BlockReg { .. } | ProcessedInstr::NopReg => true, + ProcessedInstr::EndReg { + is_function_end: false, + source_regs, + target_result_regs, + } => { + source_regs.is_empty() + || target_result_regs.is_empty() + || source_regs == target_result_regs + } + _ => false, + } +} + +/// Phase 5: Removes no-op instructions from the stream and remaps all branch +/// targets (`BrReg`, `BrIfReg`, `BrTableReg`, `IfReg`, `JumpReg`) to the +/// compacted indices. +/// +/// A jump to a removed instruction lands on the next kept instruction at or +/// after it, which is semantically identical because removed instructions do +/// nothing. `remap[i]` = number of kept instructions before old index `i`, +/// which is exactly that target. +fn compact_instruction_stream(processed: Vec) -> Vec { + let old_len = processed.len(); + let mut remap: Vec = Vec::with_capacity(old_len + 1); + let mut kept_count = 0usize; + for instr in processed.iter() { + remap.push(kept_count); + if !is_noop_instr(instr) { + kept_count += 1; + } + } + // Defensive entry for targets one past the end. + remap.push(kept_count); + + let mut kept: Vec = Vec::with_capacity(kept_count); + for mut instr in processed.into_iter() { + if is_noop_instr(&instr) { + continue; + } + match &mut instr { + ProcessedInstr::BrReg { target_ip, .. } => *target_ip = remap[*target_ip], + ProcessedInstr::BrIfReg { target_ip, .. } => *target_ip = remap[*target_ip], + ProcessedInstr::IfReg { else_target_ip, .. } => { + *else_target_ip = remap[*else_target_ip] + } + ProcessedInstr::JumpReg { target_ip } => *target_ip = remap[*target_ip], + ProcessedInstr::BrTableReg { + targets, + default_target, + .. + } => { + for (_, target_ip, _) in targets.iter_mut() { + *target_ip = remap[*target_ip]; + } + default_target.1 = remap[default_target.1]; + } + _ => {} + } + kept.push(instr); + } + kept +} + /// Information needed to fix up a branch instruction's target address. /// /// During initial instruction decoding, branch targets are unknown because @@ -1344,7 +1421,7 @@ fn preprocess_instructions( ) -> Result<(), RuntimeError> { // --- Phase 2: Resolve Br, BrIf, If, Else jumps --- - // Control stack stores: (pc, is_loop, block_type, runtime_label_stack_idx) + // Control stack stores: (pc, is_loop, block_type) let mut current_control_stack_pass2: Vec<(usize, bool, wasmparser::BlockType)> = Vec::new(); for fixup_index in 0..fixups.len() { @@ -1389,22 +1466,36 @@ fn preprocess_instructions( } if current_control_stack_pass2.len() <= current_fixup_depth { - // Depth exceeds control stack - this is a branch to function level (return) - // Set target_ip to end of function (processed.len()) - let function_end_ip = processed.len(); + // Depth exceeds control stack - this is a branch to function level + // (return). Target the function-level EndReg (always the last + // instruction): `end_func` collects the return registers and + // halts. The branch copies its values into the end's source regs. + let function_end_ip = processed.len() - 1; + let end_source_regs: RegSlice = match processed.last() { + Some(ProcessedInstr::EndReg { source_regs, .. }) => source_regs.clone(), + _ => { + return Err(RuntimeError::InvalidWasm( + "Internal Error: function body does not terminate with EndReg", + )) + } + }; if let Some(instr_to_patch) = processed.get_mut(current_fixup_pc) { if let ProcessedInstr::BrReg { target_ip: ref mut tip, + target_result_regs: ref mut trr, .. } = instr_to_patch { *tip = function_end_ip; + *trr = end_source_regs; } else if let ProcessedInstr::BrIfReg { target_ip: ref mut tip, + target_result_regs: ref mut trr, .. } = instr_to_patch { *tip = function_end_ip; + *trr = end_source_regs; } else if is_if_false_jump { if !matches!(instr_to_patch, ProcessedInstr::IfReg { .. }) { fixups[fixup_index].original_wasm_depth = usize::MAX; @@ -1467,16 +1558,13 @@ fn preprocess_instructions( // If instruction's jump-on-false // Target is ElseMarker+1 or EndMarker+1 let else_target = *if_else_map.get(&target_start_pc).unwrap_or(&target_ip); - let has_else = else_target != target_ip; if let ProcessedInstr::IfReg { else_target_ip: ref mut tip, - has_else: ref mut he, .. } = instr_to_patch { *tip = else_target; - *he = has_else; } } else if is_else_jump { if let ProcessedInstr::JumpReg { @@ -1558,12 +1646,30 @@ fn preprocess_instructions( let default_depth = default_info.0 as usize; let default_result_regs = default_info.2; + // Function-level targets branch to the function-level + // EndReg (always the last instruction), copying their + // values into its source regs. + let function_end_ip = processed.len() - 1; + let function_end_regs: RegSlice = match processed.last() { + Some(ProcessedInstr::EndReg { source_regs, .. }) => source_regs.clone(), + _ => { + return Err(RuntimeError::InvalidWasm( + "Internal Error: function body does not terminate with EndReg", + )) + } + }; + // Compute target_ip for each target (keeping existing result_regs) let mut resolved_reg_targets: Vec<(u32, usize, RegSlice)> = Vec::new(); for (rel_depth, _, result_regs) in targets_clone.iter() { let depth = *rel_depth as usize; if current_control_stack_pass3.len() <= depth { - resolved_reg_targets.push((*rel_depth, 0, result_regs.clone())); // Invalid + // Function-level target (acts as return) + resolved_reg_targets.push(( + *rel_depth, + function_end_ip, + function_end_regs.clone(), + )); continue; } let target_stack_level = current_control_stack_pass3.len() - 1 - depth; @@ -1578,21 +1684,24 @@ fn preprocess_instructions( resolved_reg_targets.push((*rel_depth, target_ip, result_regs.clone())); } - // Compute target_ip for default target + // Compute target_ip and result regs for default target // Note: block_end_map already stores End + 1 position - let default_target_ip = if current_control_stack_pass3.len() <= default_depth { - 0 // Invalid - } else { - let target_stack_level = - current_control_stack_pass3.len() - 1 - default_depth; - let (target_start_pc, is_loop, _) = - current_control_stack_pass3[target_stack_level]; - if is_loop { - target_start_pc + let (default_target_ip, default_result_regs) = + if current_control_stack_pass3.len() <= default_depth { + // Function-level default target (acts as return) + (function_end_ip, function_end_regs) } else { - *block_end_map.get(&target_start_pc).unwrap_or(&0) - } - }; + let target_stack_level = + current_control_stack_pass3.len() - 1 - default_depth; + let (target_start_pc, is_loop, _) = + current_control_stack_pass3[target_stack_level]; + let ip = if is_loop { + target_start_pc + } else { + *block_end_map.get(&target_start_pc).unwrap_or(&0) + }; + (ip, default_result_regs) + }; // Update BrTableReg if let Some(instr_to_patch) = processed.get_mut(pc) { @@ -5405,6 +5514,8 @@ fn decode_processed_instrs_and_fixups<'a>( ) } wasmparser::Operator::End => { + // Empty control stack means this is the function-level end. + let is_function_end = control_info_stack.is_empty(); let result_type_vec = if let Some(block_info) = control_info_stack.last() { get_block_result_types(&block_info.block_type, module) } else { @@ -5445,6 +5556,7 @@ fn decode_processed_instrs_and_fixups<'a>( let instr = ProcessedInstr::EndReg { source_regs: source_regs.into_boxed_slice(), target_result_regs: target_result_regs.into_boxed_slice(), + is_function_end, }; (Some(instr), None) } @@ -5477,9 +5589,6 @@ fn decode_processed_instrs_and_fixups<'a>( allocator.push(*vtype); } - let arity = result_types.len(); - let param_count = param_types.len(); - // Push to control_info_stack for End to use control_info_stack.push(ControlBlockInfo { block_type: *blockty, @@ -5488,11 +5597,7 @@ fn decode_processed_instrs_and_fixups<'a>( param_regs: Vec::new(), }); - let instr = ProcessedInstr::BlockReg { - arity, - param_count, - is_loop, - }; + let instr = ProcessedInstr::BlockReg { is_loop }; (Some(instr), None) } wasmparser::Operator::If { blockty } => { @@ -5521,8 +5626,6 @@ fn decode_processed_instrs_and_fixups<'a>( allocator.push(*vtype); } - let arity = result_types.len(); - // Push to control_info_stack for End to use control_info_stack.push(ControlBlockInfo { block_type: *blockty, @@ -5532,10 +5635,8 @@ fn decode_processed_instrs_and_fixups<'a>( }); let instr = ProcessedInstr::IfReg { - arity, cond_reg, else_target_ip: usize::MAX, // Will be fixed up - has_else: false, // Will be updated during fixup }; let fixup = Some(FixupInfo { pc: current_processed_pc, @@ -5740,15 +5841,23 @@ fn decode_processed_instrs_and_fixups<'a>( } wasmparser::Operator::Br { relative_depth } => { - // Compute source and target registers for branch - let (source_regs, target_result_regs) = compute_branch_regs( - &control_info_stack, - *relative_depth as usize, - reg_allocator.as_ref(), - ); + // Compute source and target registers for branch. + // A depth at/beyond the control stack targets the function + // level: source regs are the top-of-stack regs matching the + // function result types; target regs (the function-level + // end's source regs) are patched during fixup. + let (source_regs, target_result_regs) = + if *relative_depth as usize >= control_info_stack.len() { + (allocator.peek_regs_for_types(result_types), Vec::new()) + } else { + compute_branch_regs( + &control_info_stack, + *relative_depth as usize, + reg_allocator.as_ref(), + ) + }; let instr = ProcessedInstr::BrReg { - relative_depth: *relative_depth, target_ip: usize::MAX, // Will be set by fixup source_regs: source_regs.clone().into_boxed_slice(), target_result_regs: target_result_regs.into_boxed_slice(), @@ -5771,14 +5880,20 @@ fn decode_processed_instrs_and_fixups<'a>( allocator.pop(&ValueType::NumType(NumType::I32)); // Compute source and target registers for branch - let (source_regs, target_result_regs) = compute_branch_regs( - &control_info_stack, - *relative_depth as usize, - reg_allocator.as_ref(), - ); + // Function-level depth: source regs from the function + // result types; target regs patched during fixup. + let (source_regs, target_result_regs) = + if *relative_depth as usize >= control_info_stack.len() { + (allocator.peek_regs_for_types(result_types), Vec::new()) + } else { + compute_branch_regs( + &control_info_stack, + *relative_depth as usize, + reg_allocator.as_ref(), + ) + }; let instr = ProcessedInstr::BrIfReg { - relative_depth: *relative_depth, target_ip: usize::MAX, // Will be set by fixup cond_reg, source_regs: source_regs.clone().into_boxed_slice(), @@ -5811,7 +5926,7 @@ fn decode_processed_instrs_and_fixups<'a>( let (_, target_result_regs) = compute_branch_regs( &control_info_stack, *depth as usize, - reg_allocator.as_ref(), + Some(&*allocator), ); table_targets.push(( *depth, @@ -5821,12 +5936,19 @@ fn decode_processed_instrs_and_fixups<'a>( // target_ip will be set by fixup } - // Compute source and target registers for default target - let (source_regs, default_result_regs) = compute_branch_regs( - &control_info_stack, - targets.default() as usize, - reg_allocator.as_ref(), - ); + // Compute source and target registers for default target. + // Function-level depth: source regs from the function + // result types; target regs patched during fixup. + let (source_regs, default_result_regs) = + if targets.default() as usize >= control_info_stack.len() { + (allocator.peek_regs_for_types(result_types), Vec::new()) + } else { + compute_branch_regs( + &control_info_stack, + targets.default() as usize, + Some(&*allocator), + ) + }; let default_target = ( targets.default(), usize::MAX,