diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..bf55fc4 Binary files /dev/null and b/.DS_Store differ diff --git a/harm-runtime/Cargo.toml b/harm-runtime/Cargo.toml index aa4a5a1..6a5dbed 100644 --- a/harm-runtime/Cargo.toml +++ b/harm-runtime/Cargo.toml @@ -12,4 +12,12 @@ publish = false [dependencies] harm = { workspace = true } -memmap2 = "0.9.9" +memmap2 = { version = "0.9.9", optional = true } +thiserror = "2.0.18" + +[features] +default = ["memmap2"] +memmap2 = ["dep:memmap2"] + +[dev-dependencies] +clear-cache = "0.1.3" diff --git a/harm-runtime/src/builder.rs b/harm-runtime/src/builder.rs new file mode 100644 index 0000000..4f55010 --- /dev/null +++ b/harm-runtime/src/builder.rs @@ -0,0 +1,159 @@ +/* Copyright (C) 2026 Ivan Boldyrev + * + * This document is licensed under the BSD 3-clause license. + */ + +use std::collections::HashMap; + +use harm::reloc::{Addr64, LabelId, Offset64, Rel64, Rel64Error}; + +#[derive(Debug, thiserror::Error)] +pub enum BuilderError { + #[error("Address overflow: base 0x{0:016x}, offset 0x{1:016x}")] + AddressOverflow(u64, usize), + #[error("Offset overflow: base {0:016x}, offset {1:016x}")] + OffsetOverflow(u64, i64), + #[error("Relocation error: {nested:?} at offset {offset:016x}")] + Relocation { nested: Rel64Error, offset: usize }, + #[error("Undefined label: {0:?}")] + UndefinedLabel(LabelId), +} + +/// Do static relocations: recalculate labels and applies relocations, producing memory ready for execution. +/// +/// Please note that real memory location may be different from base address: it allows to build at some buffer +/// and then move data to real position later. +pub struct Builder<'mem> { + mem: &'mem mut [u8], // real memory + base: Addr64, // virtual base, on ARM system usually matches with `mem` start +} + +impl<'mem> Builder<'mem> { + pub fn new(mem: &'mem mut [u8], base: Addr64) -> Self { + Self { mem, base } + } + + pub fn build( + self, + named_labels: impl Iterator, + labels: impl Iterator, + relocations: impl Iterator, + ) -> Result, BuilderError> { + // Recalculate labels. + let labels: HashMap<_, _> = labels + .map(|(label_id, offset)| { + let addr = self + .base + .checked_add_signed(offset) + .ok_or(BuilderError::OffsetOverflow(self.base, offset)); + addr.map(|addr| (label_id, addr)) + }) + .collect::>()?; + + // Calculate label addresses. + let label_addresses = named_labels + .map(|(name, label_id)| { + let label_addr = labels + .get(&label_id) + .copied() + .ok_or(BuilderError::UndefinedLabel(label_id))?; + Ok((name.to_owned(), label_addr)) + }) + .collect::>()?; + + // Apply relocations to the self.mem. + for (offset, rel) in relocations { + let label_addr = labels + .get(&rel.label.id) + .copied() + .ok_or(BuilderError::UndefinedLabel(rel.label.id))?; + // TODO is it wrapping? + let label_ref_addr = label_addr.wrapping_add_signed(rel.label.addend); + + rel.apply(self.base, label_ref_addr, self.mem, offset) + .map_err(|nested| BuilderError::Relocation { nested, offset })?; + } + + Ok(label_addresses) + } +} + +#[cfg(test)] +mod tests { + use harm::reloc::{LabelId, LabelRef, Rel64Tag}; + + use super::*; + + #[test] + fn test_good_offset() { + let mut mem = vec![0u8; 4]; + let builder = Builder::new(&mut mem, 0); + let label_ref = LabelRef { + id: LabelId(0), + addend: 0, + }; + let relocations = [(0, Rel64::new(Rel64Tag::NONE, label_ref))]; + let res = builder.build( + [].into_iter(), + [(LabelId(0), 4)].into_iter(), + relocations.into_iter(), + ); + + assert!(res.is_ok(), "{res:?}"); + } + + #[test] + fn test_bad_offset() { + let mut mem = vec![0u8; 4]; + let builder = Builder::new(&mut mem, 0); + let label_ref = LabelRef { + id: LabelId(0), + addend: 0, + }; + // N.B. NONE relocation is 0 bytes wide, so 4 doesn't fail. Use 5. + let relocations = [(5, Rel64::new(Rel64Tag::NONE, label_ref))]; + let res = builder.build( + [].into_iter(), + [(LabelId(0), 4)].into_iter(), + relocations.into_iter(), + ); + + assert!( + matches!( + res, + Err(BuilderError::Relocation { + nested: _, + offset: _ + }) + ), + "{res:?}" + ); + } + + #[test] + fn test_bad_offset_max() { + let mut mem = vec![0u8; 4]; + let builder = Builder::new(&mut mem, 0); + let label_ref = LabelRef { + id: LabelId(0), + addend: 0, + }; + let relocations = [(usize::MAX, Rel64::new(Rel64Tag::NONE, label_ref))]; + let res = builder.build( + [].into_iter(), + [(LabelId(0), 4)].into_iter(), + relocations.into_iter(), + ); + + assert!( + matches!( + res, + Err(BuilderError::Relocation { + nested: _, + offset: _ + }) + ), + "{res:?}" + ); + } +} diff --git a/harm-runtime/src/labels.rs b/harm-runtime/src/labels.rs index c655256..f71624c 100644 --- a/harm-runtime/src/labels.rs +++ b/harm-runtime/src/labels.rs @@ -3,11 +3,110 @@ * This document is licensed under the BSD 3-clause license. */ -use harm::reloc::Offset; +use std::collections::HashMap; + +use harm::reloc::{LabelId, Offset64}; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum LabelInfo { Forward, // TODO segment - Offset(Offset), + Offset(Offset64), +} + +#[derive(Debug, Default)] +pub struct LabelRegistry { + named_labels: HashMap, + labels: HashMap, + next_id: usize, +} + +impl LabelRegistry { + #[inline] + pub fn new() -> Self { + Self::default() + } + + #[inline] + pub fn get_forward_named_label(&mut self, name: &str) -> LabelId { + if let Some(id) = self.named_labels.get(name) { + *id + } else { + let id = self.next_label(); + self.named_labels.insert(name.to_string(), id); + self.labels.insert(id, LabelInfo::Forward); + id + } + } + + #[inline] + pub fn forward_label(&mut self) -> LabelId { + let id = self.next_label(); + self.labels.insert(id, LabelInfo::Forward); + id + } + + /// Define the label to have its address to be base address plus `offset`. + pub fn define_label(&mut self, label_id: LabelId, offset: Offset64) { + if let Some(info) = self.labels.get_mut(&label_id) { + match info { + LabelInfo::Forward => { + *info = LabelInfo::Offset(offset); + } + LabelInfo::Offset(_) => { + todo!("Label {label_id:?} is already defined"); + } + } + } else { + todo!("Label {label_id:?} is not registered"); + } + } + + /// Define the label to have its address to be base address plus `offset`. + #[inline] + pub fn define_named_label(&mut self, name: &str, offset: Offset64) -> LabelId { + if let Some(id) = self.named_labels.get(name).copied() { + self.labels.insert(id, LabelInfo::Offset(offset)); + id + } else { + let id = self.next_label(); + self.named_labels.insert(name.to_string(), id); + self.labels.insert(id, LabelInfo::Offset(offset)); + id + } + } + + /// Turn the label into a named. + pub fn name_label(&mut self, id: LabelId, name: &str) { + if self.labels.contains_key(&id) { + self.named_labels.insert(name.to_string(), id); + } else { + todo!("Label {id:?} is not registered"); + } + } + + /// Return current label info. + #[inline] + pub fn label_info(&self, id: LabelId) -> Option<&LabelInfo> { + self.labels.get(&id) + } + + pub fn get_named_labels(&self) -> impl Iterator { + self.named_labels + .iter() + .map(|(name, id)| (name.as_str(), *id)) + } + + pub fn get_defined_labels(&self) -> impl Iterator { + self.labels.iter().filter_map(|(id, info)| match info { + LabelInfo::Offset(offset) => Some((*id, *offset)), + LabelInfo::Forward => None, + }) + } + + fn next_label(&mut self) -> LabelId { + let id = LabelId(self.next_id); + self.next_id += 1; + id + } } diff --git a/harm-runtime/src/lib.rs b/harm-runtime/src/lib.rs index cdcc60b..4af3596 100644 --- a/harm-runtime/src/lib.rs +++ b/harm-runtime/src/lib.rs @@ -1,6 +1,9 @@ -/* Copyright (C) 2025 Ivan Boldyrev +/* Copyright (C) 2026 Ivan Boldyrev * * This document is licensed under the BSD 3-clause license. */ +pub mod builder; pub mod labels; +pub mod memory; +pub mod runtime; diff --git a/harm-runtime/src/memory.rs b/harm-runtime/src/memory.rs new file mode 100644 index 0000000..916794b --- /dev/null +++ b/harm-runtime/src/memory.rs @@ -0,0 +1,105 @@ +/* Copyright (C) 2026 Ivan Boldyrev + * + * This document is licensed under the BSD 3-clause license. + */ + +#[cfg(feature = "memmap2")] +mod memmap2; + +use harm::reloc::Addr64; + +#[cfg(feature = "memmap2")] +pub use self::memmap2::{MmapBuffer, MmapPositionedMemory}; + +pub mod foreign_memory; +pub use self::foreign_memory::ForeignMemoryBuffer; + +pub trait Memory { + type ExtendError; + + /// Current writing position. + fn pos(&self) -> usize; + + /// If the memory has fixed capacity, return it. + /// + /// A `Vec` is not considered a memory of fixed capacity because it can grow indefinitely. + fn capacity(&self) -> Option; + + /// Append data to the memory. + /// + /// Should fail when it reaches memory's capacity. In this case, `self.pos()` must not change, but the memory behind + /// it till the end of the capacity may be modified. + fn try_extend>(&mut self, bytes: I) -> Result<(), Self::ExtendError>; + + /// Align position. Same guarantees as `try_extend` apply. + fn align(&mut self, alignment: usize) -> Result<(), Self::ExtendError> { + if alignment > 1 { + let pos = self.pos(); + let remn = pos % alignment; + if remn != 0 { + self.try_extend(core::iter::repeat_n(0, alignment - remn))?; + } + } + Ok(()) + } +} + +pub trait IntoPositionedMemory { + type PositionedMemoryError; + + /// Transform into positioned memory. + fn into_positioned_memory(self) -> Result; +} + +/// Memory with fixed location that can be transformed to an executable one after relocations are applied. +pub trait PositionedMemory: AsMut<[u8]> { + fn get_base_address(&self) -> Addr64; +} + +pub trait IntoExecutableMemory { + type ExecutableMemory; + type ExecutableMemoryError; + + fn into_executable_memory(self) -> Result; +} + +#[cfg(test)] +mod tests { + #[test] + fn test_align() { + use super::*; + + let mut data = &mut Vec::::new(); + + Memory::align(&mut data, 8).unwrap(); + assert!(data.is_empty()); + + data.push(1); + Memory::align(&mut data, 8).unwrap(); + assert_eq!(data.len(), 8); + + Memory::align(&mut data, 8).unwrap(); + assert_eq!(data.len(), 8); + + data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7]); + Memory::align(&mut data, 8).unwrap(); + assert_eq!(data.len(), 16); + } + + #[test] + fn test_align_corner_case() { + use super::*; + + let mut data = &mut Vec::::new(); + + Memory::align(&mut data, 0).unwrap(); + assert!(data.is_empty()); + + data.push(1); + Memory::align(&mut data, 0).unwrap(); + assert_eq!(data.len(), 1); + + Memory::align(&mut data, 1).unwrap(); + assert_eq!(data.len(), 1); + } +} diff --git a/harm-runtime/src/memory/foreign_memory.rs b/harm-runtime/src/memory/foreign_memory.rs new file mode 100644 index 0000000..e68b5ce --- /dev/null +++ b/harm-runtime/src/memory/foreign_memory.rs @@ -0,0 +1,103 @@ +/* Copyright (C) 2026 Ivan Boldyrev + * + * This document is licensed under the BSD 3-clause license. + */ + +use harm::reloc::Addr64; + +use crate::memory::PositionedMemory; + +use super::{IntoPositionedMemory, Memory}; + +/// Memory that is not intended to be executed immediately, but stored or transferred. +pub struct ForeignMemoryBuffer { + mem: std::vec::Vec, + base_addr: Addr64, +} + +impl ForeignMemoryBuffer { + pub fn new(base_addr: Addr64) -> Self { + Self { + mem: Vec::new(), + base_addr, + } + } + + pub fn with_capacity(base_addr: Addr64, capacity: usize) -> Self { + Self { + mem: Vec::with_capacity(capacity), + base_addr, + } + } + + pub fn base_addr(&self) -> Addr64 { + self.base_addr + } +} + +impl AsRef<[u8]> for ForeignMemoryBuffer { + fn as_ref(&self) -> &[u8] { + &self.mem + } +} + +impl Memory for ForeignMemoryBuffer { + type ExtendError = core::convert::Infallible; + + fn pos(&self) -> usize { + self.mem.len() + } + + fn capacity(&self) -> Option { + None // unrestricted + } + + fn try_extend>(&mut self, bytes: I) -> Result<(), Self::ExtendError> { + self.mem.extend(bytes); + Ok(()) + } +} + +impl IntoPositionedMemory for ForeignMemoryBuffer { + type PositionedMemoryError = core::convert::Infallible; + + fn into_positioned_memory(self) -> Result { + Ok(ForeignMemory { + mem: self.mem, + base_addr: self.base_addr, + }) + } +} + +pub struct ForeignMemory { + mem: std::vec::Vec, + base_addr: Addr64, +} + +impl ForeignMemory { + pub fn base_addr(&self) -> Addr64 { + self.base_addr + } + + pub fn into_inner(self) -> (Addr64, std::vec::Vec) { + (self.base_addr, self.mem) + } +} + +impl AsRef<[u8]> for ForeignMemory { + fn as_ref(&self) -> &[u8] { + &self.mem + } +} + +impl AsMut<[u8]> for ForeignMemory { + fn as_mut(&mut self) -> &mut [u8] { + &mut self.mem + } +} + +impl PositionedMemory for ForeignMemory { + fn get_base_address(&self) -> Addr64 { + self.base_addr + } +} diff --git a/harm-runtime/src/memory/memmap2.rs b/harm-runtime/src/memory/memmap2.rs new file mode 100644 index 0000000..df0bd5d --- /dev/null +++ b/harm-runtime/src/memory/memmap2.rs @@ -0,0 +1,225 @@ +/* Copyright (C) 2026 Ivan Boldyrev + * + * This document is licensed under the BSD 3-clause license. + */ + +use std::convert::Infallible; + +use harm::reloc::Addr64; + +use super::{IntoExecutableMemory, IntoPositionedMemory, Memory, PositionedMemory}; + +#[derive(thiserror::Error, Debug)] +pub enum MapBufferError { + #[error("buffer overflow: {0}")] + Overflow(usize), +} + +pub struct MmapBuffer { + pos: usize, + memory: memmap2::MmapMut, +} + +impl MmapBuffer { + #[inline] + pub fn new(mmap_mut: memmap2::MmapMut) -> Self { + Self { + pos: 0, + // N.B. We assume that memory is aligned. + memory: mmap_mut, + } + } + + #[inline] + pub fn allocate(length: usize) -> std::io::Result { + let mmap_mut = memmap2::MmapMut::map_anon(length)?; + Ok(Self::new(mmap_mut)) + } +} + +impl Memory for MmapBuffer { + type ExtendError = MapBufferError; + + #[inline] + fn pos(&self) -> usize { + self.pos + } + + #[inline] + fn capacity(&self) -> Option { + Some(self.memory.len()) + } + + #[inline] + fn try_extend>(&mut self, bytes: I) -> Result<(), Self::ExtendError> { + let mut pos = self.pos; + for byte in bytes { + if pos >= self.memory.len() { + return Err(MapBufferError::Overflow(self.pos)); + } + + self.memory[pos] = byte; + pos += 1; + } + // Success, update position. + self.pos = pos; + Ok(()) + } +} + +impl IntoPositionedMemory for MmapBuffer { + type PositionedMemoryError = Infallible; + + #[inline] + fn into_positioned_memory(self) -> Result { + Ok(MmapPositionedMemory::new(self.memory)) + } +} + +pub struct MmapPositionedMemory(memmap2::MmapMut); + +impl MmapPositionedMemory { + #[inline] + pub fn new(mmap_mut: memmap2::MmapMut) -> Self { + Self(mmap_mut) + } + + #[inline] + pub fn allocate(length: usize) -> std::io::Result { + let mmap_mut = memmap2::MmapMut::map_anon(length)?; + Ok(Self(mmap_mut)) + } +} + +impl AsRef<[u8]> for MmapPositionedMemory { + #[inline] + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +impl AsMut<[u8]> for MmapPositionedMemory { + #[inline] + fn as_mut(&mut self) -> &mut [u8] { + &mut self.0 + } +} + +impl PositionedMemory for MmapPositionedMemory { + // TODO makes sense only on AArch64. + fn get_base_address(&self) -> Addr64 { + self.0.as_ptr() as Addr64 + } +} + +impl IntoExecutableMemory for MmapPositionedMemory { + type ExecutableMemory = memmap2::Mmap; + type ExecutableMemoryError = std::io::Error; + + #[inline] + fn into_executable_memory(self) -> Result { + self.0.make_exec() + } +} + +impl Memory for &mut Vec { + type ExtendError = Infallible; + + #[inline] + fn pos(&self) -> usize { + self.len() + } + + #[inline] + fn capacity(&self) -> Option { + None + } + + #[inline] + fn try_extend>(&mut self, bytes: I) -> Result<(), Self::ExtendError> { + self.extend(bytes); + Ok(()) + } +} + +impl IntoPositionedMemory for &mut Vec { + type PositionedMemoryError = std::io::Error; + + #[inline] + fn into_positioned_memory(self) -> Result { + let mut mem = MmapPositionedMemory::allocate(self.len())?; + // The memmap2 spec doesn't say that the length can be different... + mem.as_mut().copy_from_slice(self); + Ok(mem) + } +} + +#[cfg(test)] +mod tests { + use harm::instructions::InstructionSeq; + + use super::*; + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_mmap_execute() { + use harm::{ + instructions::{arith::add::add, control::ret}, + register::Reg64::*, + }; + let mut buf = MmapBuffer::allocate(8).expect("mmap failed, system problem"); + buf.try_extend(add(X0, X0, X1).bytes()).unwrap(); + buf.try_extend(ret().bytes()).unwrap(); + + let mem = buf.into_positioned_memory().unwrap(); + // Doing relocations... + + let exec = mem.into_executable_memory().unwrap(); + + let res; + unsafe { + clear_cache::clear_cache(exec.as_ptr(), exec.as_ptr().add(exec.len())); + + let func: unsafe extern "C" fn(i64, i64) -> i64 = std::mem::transmute(exec.as_ptr()); + res = func(1, 2); + } + assert_eq!(res, 3); + } + + #[test] + fn test_try_extend_1023() { + let mut buf = MmapBuffer::allocate(1024).expect("mmap failed, system problem"); + buf.try_extend(vec![1; 1023].into_iter()).unwrap(); + } + + #[test] + fn test_try_extend_1024() { + let mut buf = MmapBuffer::allocate(1024).expect("mmap failed, system problem"); + buf.try_extend(vec![1; 1024].into_iter()).unwrap(); + } + + #[test] + fn test_try_extend_1025() { + let mut buf = MmapBuffer::allocate(1024).expect("mmap failed, system problem"); + assert!(buf.try_extend(vec![1; 1025].into_iter()).is_err()); + assert_eq!(buf.pos(), 0); + } + + #[test] + fn test_try_extend_1020_plus_5() { + let mut buf = MmapBuffer::allocate(1024).expect("mmap failed, system problem"); + buf.try_extend(vec![1; 1020].into_iter()).unwrap(); + assert!(buf.try_extend(vec![1; 5].into_iter()).is_err()); + assert_eq!(buf.pos(), 1020); + } + + #[test] + fn test_try_extend_pair() { + let mut buf = MmapBuffer::allocate(1024).expect("mmap failed, system problem"); + buf.try_extend(vec![1; 512].into_iter()).unwrap(); + buf.try_extend(vec![1; 512].into_iter()).unwrap(); + assert_eq!(buf.pos(), 1024); + + assert!(buf.try_extend(vec![1; 1].into_iter()).is_err()); + } +} diff --git a/harm-runtime/src/runtime.rs b/harm-runtime/src/runtime.rs new file mode 100644 index 0000000..0336d9f --- /dev/null +++ b/harm-runtime/src/runtime.rs @@ -0,0 +1,221 @@ +/* Copyright (C) 2025 Ivan Boldyrev + * + * This document is licensed under the BSD 3-clause license. + */ + +use std::collections::HashMap; + +use crate::builder::{Builder, BuilderError}; +use crate::labels::LabelRegistry; +use crate::memory::{IntoExecutableMemory, IntoPositionedMemory, Memory, PositionedMemory}; +use harm::instructions::InstructionSeq; +use harm::reloc::{Addr64, LabelId, Offset64, Rel64}; + +#[derive(Debug, thiserror::Error)] +pub enum AssemblerError { + #[error("builder error: {0}")] + Builder(#[from] BuilderError), + #[error("memory error: {0}")] + Memory(MemErr), + #[error("positioned memory error: {0}")] + PositionedMemory(PMErr), + #[error("executable memory error: {0}")] + ExecutableMemory(EMErr), +} +// N.B. we keep here internal relocation type, and convert it to external on serialization. +#[derive(Default)] +pub struct Assembler { + label_manager: LabelRegistry, + memory: Mem, + relocations: HashMap, +} + +impl Assembler { + #[inline] + pub fn new(mem: Mem) -> Self { + Self { + label_manager: LabelRegistry::new(), + memory: mem, + relocations: HashMap::new(), + } + } + + /// Build the program without making it executable. + pub fn build( + self, + ) -> Result< + (FM, HashMap), + AssemblerError< + Mem::ExtendError, + >::PositionedMemoryError, + E, + >, + > + where + Mem: IntoPositionedMemory, + FM: PositionedMemory, + { + let mut fixed_memory = self + .memory + .into_positioned_memory() + .map_err(AssemblerError::PositionedMemory)?; + let base = fixed_memory.get_base_address(); + let builder = Builder::new(fixed_memory.as_mut(), base); + let labels = builder.build( + self.label_manager.get_named_labels(), + self.label_manager.get_defined_labels(), + self.relocations.into_iter(), + )?; + Ok((fixed_memory, labels)) + } + + /// Build the program and make it executable. + pub fn compile( + self, + ) -> Result< + ( + ::ExecutableMemory, + HashMap, + ), + AssemblerError< + Mem::ExtendError, + >::PositionedMemoryError, + ::ExecutableMemoryError, + >, + > + where + Mem: IntoPositionedMemory, + FM: PositionedMemory + IntoExecutableMemory, + { + let (fixed_memory, labels) = self.build()?; + let exec_memory = fixed_memory + .into_executable_memory() + .map_err(AssemblerError::ExecutableMemory)?; + Ok((exec_memory, labels)) + } + + pub fn append(&mut self, s: InstSeq) -> Result<(), Mem::ExtendError> { + // TODO align by instruction alignment? + for (inst, rel) in s.encode() { + let pos = self.memory.pos(); + self.memory.try_extend(inst.0.iter().cloned())?; + if let Some(rel) = rel { + self.relocations.insert(pos, rel); + } + } + Ok(()) + } + + // TODO the label have to be aligned. Except for data labels?.. + // For an instruction, it is alwasy 4 bytes, but for data it can be different, from 1 to N bytes. + pub fn current_label(&mut self) -> LabelId { + let pos = self.memory.pos(); + + // TODO can be fused + let label_id = self.label_manager.forward_label(); + self.label_manager.define_label(label_id, pos as Offset64); + + label_id + } + + pub fn current_named_label(&mut self, name: &str) -> LabelId { + let id = self.new_forward_named_label(name); + self.assign_forward_label(id); + id + } + + pub fn new_forward_label(&mut self) -> LabelId { + self.label_manager.forward_label() + } + + pub fn new_forward_named_label(&mut self, name: &str) -> LabelId { + self.label_manager.get_forward_named_label(name) + } + + pub fn assign_forward_label(&mut self, label_id: LabelId) { + let pos = self.memory.pos(); + + self.label_manager.define_label(label_id, pos as Offset64); + } +} + +#[cfg(test)] +mod tests { + use harm::{ + instructions::{ + arith::add::add, + control::{b, ret}, + dpimm::movz, + }, + register::Reg64, + reloc::LabelRef, + }; + + use crate::memory::{ForeignMemoryBuffer, foreign_memory::ForeignMemory}; + + use super::*; + + #[test] + fn test_assembler_build() { + let mem = ForeignMemoryBuffer::new(0x1000); + let mut asm = Assembler::new(mem); + + let finish_label = asm.new_forward_label(); + // TODO constructor + let finish_ref = LabelRef { + id: finish_label, + addend: 0, + }; + + asm.append(add(Reg64::X0, Reg64::X0, Reg64::X1)); + asm.append(b(finish_ref)); + asm.append(movz(Reg64::X0, 0)); + asm.assign_forward_label(finish_label); + asm.append(ret()); + + let (fm, _) = asm.build::().unwrap(); + + let mut expected = vec![]; + expected.extend(add(Reg64::X0, Reg64::X0, Reg64::X1).bytes()); + expected.extend(b(8).unwrap().bytes()); + expected.extend(movz(Reg64::X0, 0).bytes()); + expected.extend(ret().bytes()); + + assert_eq!(fm.as_ref(), &*expected); + } + + // Execute the code from the `test_assembler_build`. + #[cfg(all(target_arch = "aarch64", feature = "memmap2"))] + #[test] + fn test_assembler_aarch64_execute() { + use crate::memory::MmapBuffer; + + let mem = MmapBuffer::allocate(16).unwrap(); + let mut asm = Assembler::new(mem); + + let finish_label = asm.new_forward_label(); + // TODO constructor + let finish_ref = LabelRef { + id: finish_label, + addend: 0, + }; + + let _start = asm.current_named_label("_start"); + asm.append(add(Reg64::X0, Reg64::X0, Reg64::X1)).unwrap(); + asm.append(b(finish_ref)).unwrap(); + asm.append(movz(Reg64::X0, 0)).unwrap(); + asm.assign_forward_label(finish_label); + asm.append(ret()).unwrap(); + + let (fm, labels) = asm.compile().unwrap(); + let start_addr = labels.get("_start").cloned().unwrap() as usize; + + let res = unsafe { + clear_cache::clear_cache(fm.as_ptr(), fm.as_ptr().add(fm.len())); + + let start: unsafe extern "C" fn(i64, i64) -> i64 = std::mem::transmute(start_addr); + start(42, 8) + }; + assert_eq!(res, 50); + } +} diff --git a/harm/src/reloc.rs b/harm/src/reloc.rs index f2a8c9b..60c6e06 100644 --- a/harm/src/reloc.rs +++ b/harm/src/reloc.rs @@ -24,8 +24,6 @@ mod control; mod data; mod movs; -use ::core::fmt; - use aarchmrs_types::InstructionCode; pub use self::addr::*; @@ -40,14 +38,44 @@ use crate::bits::BitError; #[repr(transparent)] pub struct LabelId(pub usize); -pub type Offset = i64; +// Every offset in an instruction does fit in i32. +// But "[relocation] is sign-extended to 64 bits". +pub type Offset64 = i64; + +pub type Addr64 = u64; + +#[derive(Debug)] +pub enum RelocationError { + CheckedOverflow, + OffsetOverflow, +} + +use ::core::fmt; + +impl fmt::Display for RelocationError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use RelocationError::*; + + match self { + CheckedOverflow => write!(f, "Checked relocation overflow"), + OffsetOverflow => write!(f, "Offset overflow"), + } + } +} -pub type Addr = u64; +impl ::core::error::Error for RelocationError {} #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct LabelRef { pub id: LabelId, - pub addend: Offset, + pub addend: Offset64, +} + +// TODO refactor in a separate commit +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Relocation { + pub rel: Rel, + pub label: LabelRef, } #[derive(Debug, Clone, PartialEq, Eq)] @@ -251,6 +279,17 @@ impl Rel64 { pub const fn movw_prel_g3(label: LabelRef) -> Self { Self::new(Rel64Tag::MOVW_PREL_G3, label) } + + #[inline] + pub fn apply( + self, + base: Addr64, + value: Addr64, + memory: &mut [u8], + offset: usize, + ) -> Result<(), Rel64Error> { + self.rel.apply(base, value, memory, offset) + } } #[derive(Debug, PartialEq, Eq, Clone)] @@ -348,8 +387,8 @@ impl Rel64Tag { /// location for flexibility: the memory can be moved to real destination later). pub fn apply( self, - base: Addr, - value: Addr, + base: Addr64, + value: Addr64, memory: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { @@ -421,7 +460,7 @@ fn get_bytes_mut( /// A function for calculating PC-relative relocation signed difference S - P, where P is `base + offset` (checked) /// and S is `value`. -pub fn calc_delta(base: u64, value: u64, offset: usize) -> Result { +pub fn calc_delta(base: u64, value: u64, offset: usize) -> Result { let offset64 = offset .try_into() .map_err(|_e| Rel64Error::InvalidOffset { offset })?; @@ -437,7 +476,7 @@ pub fn calc_delta(base: u64, value: u64, offset: usize) -> Result Result { +pub fn calc_page_offset(base: u64, value: u64, offset: usize) -> Result { const PAGE_MASK: u64 = !0xfff; let offset64 = offset .try_into() diff --git a/harm/src/reloc/addr.rs b/harm/src/reloc/addr.rs index 7b5f151..490bd56 100644 --- a/harm/src/reloc/addr.rs +++ b/harm/src/reloc/addr.rs @@ -5,7 +5,7 @@ use aarchmrs_types::InstructionCode; -use super::{Addr, Rel64Error, cond_br19_reloc}; +use super::{Addr64, Rel64Error, cond_br19_reloc}; use crate::instructions::dpimm::{AdrOffset, AdrpOffset}; use crate::instructions::ldst::{ScaledOffset16, ScaledOffset32, ScaledOffset64, ScaledOffset128}; use crate::reloc::{calc_delta, calc_page_offset}; @@ -41,8 +41,8 @@ const LDST128_IMM12_WIDTH: u32 = 12u32; #[inline] pub fn ld_prel_lo19_reloc( - base: Addr, - symbol: Addr, + base: Addr64, + symbol: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { @@ -51,8 +51,8 @@ pub fn ld_prel_lo19_reloc( #[inline] pub fn adr_prel_lo21_reloc( - base: Addr, - symbol: Addr, + base: Addr64, + symbol: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { @@ -67,8 +67,8 @@ pub fn adr_prel_lo21_reloc( #[inline] pub fn adr_prel_pg_hi21_reloc( - base: Addr, - symbol: Addr, + base: Addr64, + symbol: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { @@ -85,8 +85,8 @@ pub fn adr_prel_pg_hi21_reloc( #[inline] pub fn adr_prel_pg_hi21_nc_reloc( - base: Addr, - symbol: Addr, + base: Addr64, + symbol: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { @@ -102,7 +102,7 @@ pub fn adr_prel_pg_hi21_nc_reloc( #[inline] pub fn add_abs_lo12_nc_reloc( - symbol: Addr, + symbol: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { @@ -128,7 +128,7 @@ fn patch_adr_adrp(mem: &mut [u8; 4], checked_value: u32) { #[inline] pub fn ldst8_abs_lo12_nc_reloc( - symbol: Addr, + symbol: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { @@ -137,7 +137,7 @@ pub fn ldst8_abs_lo12_nc_reloc( #[inline] pub fn ldst16_abs_lo12_nc_reloc( - symbol: Addr, + symbol: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { @@ -152,7 +152,7 @@ pub fn ldst16_abs_lo12_nc_reloc( #[inline] pub fn ldst32_abs_lo12_nc_reloc( - symbol: Addr, + symbol: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { @@ -167,7 +167,7 @@ pub fn ldst32_abs_lo12_nc_reloc( #[inline] pub fn ldst64_abs_lo12_nc_reloc( - symbol: Addr, + symbol: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { @@ -182,7 +182,7 @@ pub fn ldst64_abs_lo12_nc_reloc( #[inline] pub fn ldst128_abs_lo12_nc_reloc( - symbol: Addr, + symbol: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { diff --git a/harm/src/reloc/control.rs b/harm/src/reloc/control.rs index b9b36d8..af21c6e 100644 --- a/harm/src/reloc/control.rs +++ b/harm/src/reloc/control.rs @@ -3,7 +3,7 @@ * This document is licensed under the BSD 3-clause license. */ -use super::{Addr, Rel64Error, calc_delta, patch_instruction_bits}; +use super::{Addr64, Rel64Error, calc_delta, patch_instruction_bits}; use crate::instructions::control::{BranchCondOffset, BranchOffset, TestBranchOffset}; use crate::reloc::get_bytes_mut; @@ -18,8 +18,8 @@ const COND_BR_IMM19_WIDTH: u32 = 19u32; #[inline] pub fn jump26_reloc( - base: Addr, - target: Addr, + base: Addr64, + target: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { @@ -32,8 +32,8 @@ pub fn jump26_reloc( #[inline] pub fn call26_reloc( - base: Addr, - target: Addr, + base: Addr64, + target: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { @@ -42,8 +42,8 @@ pub fn call26_reloc( #[inline] pub fn tst_br14_reloc( - base: Addr, - target: Addr, + base: Addr64, + target: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { @@ -56,8 +56,8 @@ pub fn tst_br14_reloc( #[inline] pub fn cond_br19_reloc( - base: Addr, - target: Addr, + base: Addr64, + target: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { diff --git a/harm/src/reloc/data.rs b/harm/src/reloc/data.rs index 319f31e..68892a2 100644 --- a/harm/src/reloc/data.rs +++ b/harm/src/reloc/data.rs @@ -3,7 +3,7 @@ * This document is licensed under the BSD 3-clause license. */ -use super::{Addr, Rel64Error, calc_delta, get_bytes_mut}; +use super::{Addr64, Rel64Error, calc_delta, get_bytes_mut}; #[inline] pub fn abs64_reloc(value: u64, mem: &mut [u8], offset: usize) -> Result<(), Rel64Error> { @@ -30,8 +30,8 @@ pub fn abs16_reloc(value: i64, mem: &mut [u8], offset: usize) -> Result<(), Rel6 #[inline] pub fn prel64_reloc( - base: Addr, - symbol: Addr, + base: Addr64, + symbol: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { @@ -43,8 +43,8 @@ pub fn prel64_reloc( #[inline] pub fn prel32_reloc( - base: Addr, - symbol: Addr, + base: Addr64, + symbol: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { @@ -58,8 +58,8 @@ pub fn prel32_reloc( #[inline] pub fn prel16_reloc( - base: Addr, - symbol: Addr, + base: Addr64, + symbol: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { @@ -73,8 +73,8 @@ pub fn prel16_reloc( #[inline] pub fn plt32_reloc( - base: Addr, - symbol: Addr, + base: Addr64, + symbol: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { diff --git a/harm/src/reloc/movs.rs b/harm/src/reloc/movs.rs index a7b7bb0..f3164ea 100644 --- a/harm/src/reloc/movs.rs +++ b/harm/src/reloc/movs.rs @@ -4,7 +4,7 @@ */ use aarchmrs_types::InstructionCode; -use super::{Addr, Rel64Error, calc_delta, get_bytes_mut}; +use super::{Addr64, Rel64Error, calc_delta, get_bytes_mut}; use crate::bits::SBitValue; const MOV_OPCODE_OFFSET: u32 = 29; @@ -92,8 +92,8 @@ pub fn movw_sabs_g2_reloc(value: i64, mem: &mut [u8], offset: usize) -> Result<( #[inline] pub fn movw_prel_g0_reloc( - base: Addr, - value: Addr, + base: Addr64, + value: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { @@ -103,8 +103,8 @@ pub fn movw_prel_g0_reloc( #[inline] pub fn movw_prel_g0_nc_reloc( - base: Addr, - value: Addr, + base: Addr64, + value: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { @@ -114,8 +114,8 @@ pub fn movw_prel_g0_nc_reloc( #[inline] pub fn movw_prel_g1_reloc( - base: Addr, - value: Addr, + base: Addr64, + value: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { @@ -125,8 +125,8 @@ pub fn movw_prel_g1_reloc( #[inline] pub fn movw_prel_g1_nc_reloc( - base: Addr, - value: Addr, + base: Addr64, + value: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { @@ -136,8 +136,8 @@ pub fn movw_prel_g1_nc_reloc( #[inline] pub fn movw_prel_g2_reloc( - base: Addr, - value: Addr, + base: Addr64, + value: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { @@ -147,8 +147,8 @@ pub fn movw_prel_g2_reloc( #[inline] pub fn movw_prel_g2_nc_reloc( - base: Addr, - value: Addr, + base: Addr64, + value: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> { @@ -158,8 +158,8 @@ pub fn movw_prel_g2_nc_reloc( #[inline] pub fn movw_prel_g3_reloc( - base: Addr, - value: Addr, + base: Addr64, + value: Addr64, mem: &mut [u8], offset: usize, ) -> Result<(), Rel64Error> {