diff --git a/yjit.c b/yjit.c index 03930706ba..fe9098f0b5 100644 --- a/yjit.c +++ b/yjit.c @@ -56,7 +56,7 @@ STATIC_ASSERT(pointer_tagging_scheme, USE_FLONUM); // types in C such as int, long, etc. and use `std::os::raw::c_long` and friends on // the Rust side. // -// What's up with the long prefix? The "rb_" part is to apease `make leaked-globals` +// What's up with the long prefix? The "rb_" part is to appease `make leaked-globals` // which runs on upstream CI. The rationale for the check is unclear to Alan as // we build with `-fvisibility=hidden` so only explicitly marked functions end // up as public symbols in libruby.so. Perhaps the check is for the static @@ -66,13 +66,13 @@ STATIC_ASSERT(pointer_tagging_scheme, USE_FLONUM); // The "_yjit_" part is for trying to be informative. We might want different // suffixes for symbols meant for Rust and symbols meant for broader CRuby. -void +bool rb_yjit_mark_writable(void *mem_block, uint32_t mem_size) { if (mprotect(mem_block, mem_size, PROT_READ | PROT_WRITE)) { - rb_bug("Couldn't make JIT page region (%p, %lu bytes) writeable, errno: %s\n", - mem_block, (unsigned long)mem_size, strerror(errno)); + return false; } + return true; } void @@ -209,25 +209,29 @@ align_ptr(uint8_t *ptr, uint32_t multiple) } #endif -// Allocate a block of executable memory +// Address space reservation. Memory pages are mapped on an as needed basis. +// See the Rust mm module for details. uint8_t * -rb_yjit_alloc_exec_mem(uint32_t mem_size) +rb_yjit_reserve_addr_space(uint32_t mem_size) { #ifndef _WIN32 uint8_t *mem_block; // On Linux #if defined(MAP_FIXED_NOREPLACE) && defined(_SC_PAGESIZE) + uint32_t const page_size = (uint32_t)sysconf(_SC_PAGESIZE); + uint8_t *const cfunc_sample_addr = (void *)&rb_yjit_reserve_addr_space; + uint8_t *const probe_region_end = cfunc_sample_addr + INT32_MAX; // Align the requested address to page size - uint32_t page_size = (uint32_t)sysconf(_SC_PAGESIZE); - uint8_t *req_addr = align_ptr((uint8_t*)&rb_yjit_alloc_exec_mem, page_size); + uint8_t *req_addr = align_ptr(cfunc_sample_addr, page_size); + // Probe for addresses close to this function using MAP_FIXED_NOREPLACE + // to improve odds of being in range for 32-bit relative call instructions. do { - // Try to map a chunk of memory as executable - mem_block = (uint8_t*)mmap( - (void*)req_addr, + mem_block = mmap( + req_addr, mem_size, - PROT_READ | PROT_EXEC, + PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0 @@ -240,15 +244,15 @@ rb_yjit_alloc_exec_mem(uint32_t mem_size) // +4MB req_addr += 4 * 1024 * 1024; - } while (req_addr < (uint8_t*)&rb_yjit_alloc_exec_mem + INT32_MAX); + } while (req_addr < probe_region_end); // On MacOS and other platforms #else // Try to map a chunk of memory as executable - mem_block = (uint8_t*)mmap( - (void*)rb_yjit_alloc_exec_mem, + mem_block = mmap( + (void *)rb_yjit_reserve_addr_space, mem_size, - PROT_READ | PROT_EXEC, + PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0 @@ -258,10 +262,10 @@ rb_yjit_alloc_exec_mem(uint32_t mem_size) // Fallback if (mem_block == MAP_FAILED) { // Try again without the address hint (e.g., valgrind) - mem_block = (uint8_t*)mmap( + mem_block = mmap( NULL, mem_size, - PROT_READ | PROT_EXEC, + PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0 @@ -270,17 +274,10 @@ rb_yjit_alloc_exec_mem(uint32_t mem_size) // Check that the memory mapping was successful if (mem_block == MAP_FAILED) { - perror("mmap call failed"); - exit(-1); + perror("ruby: yjit: mmap:"); + rb_bug("mmap failed"); } - // Fill the executable memory with PUSH DS (0x1E) so that - // executing uninitialized memory will fault with #UD in - // 64-bit mode. - rb_yjit_mark_writable(mem_block, mem_size); - memset(mem_block, 0x1E, mem_size); - rb_yjit_mark_executable(mem_block, mem_size); - return mem_block; #else // Windows not supported for now diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs index 212013d70c..d8f3c98e89 100644 --- a/yjit/bindgen/src/main.rs +++ b/yjit/bindgen/src/main.rs @@ -239,6 +239,7 @@ fn main() { .allowlist_function("rb_iseq_(get|set)_yjit_payload") .allowlist_function("rb_iseq_pc_at_idx") .allowlist_function("rb_iseq_opcode_at_pc") + .allowlist_function("rb_yjit_reserve_addr_space") .allowlist_function("rb_yjit_mark_writable") .allowlist_function("rb_yjit_mark_executable") .allowlist_function("rb_yjit_get_page_size") diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs index 1d31facb78..e16e856925 100644 --- a/yjit/src/asm/mod.rs +++ b/yjit/src/asm/mod.rs @@ -3,49 +3,16 @@ use std::mem; #[cfg(feature = "asm_comments")] use std::collections::BTreeMap; +use crate::virtualmem::{VirtualMem, CodePtr}; + // Lots of manual vertical alignment in there that rustfmt doesn't handle well. #[rustfmt::skip] pub mod x86_64; -/// Pointer to a piece of machine code -/// We may later change this to wrap an u32 -/// Note: there is no NULL constant for CodePtr. You should use Option instead. -#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Debug)] -#[repr(C)] -pub struct CodePtr(*const u8); - -impl CodePtr { - pub fn raw_ptr(&self) -> *const u8 { - let CodePtr(ptr) = *self; - return ptr; - } - - fn into_i64(&self) -> i64 { - let CodePtr(ptr) = self; - *ptr as i64 - } - - #[allow(unused)] - fn into_usize(&self) -> usize { - let CodePtr(ptr) = self; - *ptr as usize - } -} - -impl From<*mut u8> for CodePtr { - fn from(value: *mut u8) -> Self { - assert!(value as usize != 0); - return CodePtr(value); - } -} - // // TODO: need a field_size_of macro, to compute the size of a struct field in bytes // -// 1 is not aligned so this won't match any pages -const ALIGNED_WRITE_POSITION_NONE: usize = 1; - /// Reference to an ASM label struct LabelRef { // Position in the code block where the label reference exists @@ -57,13 +24,8 @@ struct LabelRef { /// Block of memory into which instructions can be assembled pub struct CodeBlock { - // Block of non-executable memory used for dummy code blocks - // This memory is owned by this block and lives as long as the block - #[allow(unused)] - dummy_block: Vec, - - // Pointer to memory we are writing into - mem_block: *mut u8, + // Memory for storing the encoded instructions + mem_block: VirtualMem, // Memory block size mem_size: usize, @@ -84,14 +46,6 @@ pub struct CodeBlock { #[cfg(feature = "asm_comments")] asm_comments: BTreeMap>, - // Keep track of the current aligned write position. - // Used for changing protection when writing to the JIT buffer - current_aligned_write_pos: usize, - - // Memory protection works at page granularity and this is the - // the size of each page. Used to implement W^X. - page_size: usize, - // Set if the CodeBlock is unable to output some instructions, // for example, when there is not enough space or when a jump // target is too far away. @@ -99,47 +53,22 @@ pub struct CodeBlock { } impl CodeBlock { - #[cfg(test)] - pub fn new_dummy(mem_size: usize) -> Self { - // Allocate some non-executable memory - let mut dummy_block = vec![0; mem_size]; - let mem_ptr = dummy_block.as_mut_ptr(); - + /// Make a new CodeBlock + pub fn new(mem_block: VirtualMem) -> Self { Self { - dummy_block: dummy_block, - mem_block: mem_ptr, - mem_size: mem_size, + mem_size: mem_block.virtual_region_size(), + mem_block, write_pos: 0, label_addrs: Vec::new(), label_names: Vec::new(), label_refs: Vec::new(), #[cfg(feature = "asm_comments")] asm_comments: BTreeMap::new(), - current_aligned_write_pos: ALIGNED_WRITE_POSITION_NONE, - page_size: 4096, dropped_bytes: false, } } - #[cfg(not(test))] - pub fn new(mem_block: *mut u8, mem_size: usize, page_size: usize) -> Self { - Self { - dummy_block: vec![0; 0], - mem_block: mem_block, - mem_size: mem_size, - write_pos: 0, - label_addrs: Vec::new(), - label_names: Vec::new(), - label_refs: Vec::new(), - #[cfg(feature = "asm_comments")] - asm_comments: BTreeMap::new(), - current_aligned_write_pos: ALIGNED_WRITE_POSITION_NONE, - page_size, - dropped_bytes: false, - } - } - - // Check if this code block has sufficient remaining capacity + /// Check if this code block has sufficient remaining capacity pub fn has_capacity(&self, num_bytes: usize) -> bool { self.write_pos + num_bytes < self.mem_size } @@ -175,6 +104,10 @@ impl CodeBlock { self.write_pos } + pub fn get_mem(&mut self) -> &mut VirtualMem { + &mut self.mem_block + } + // Set the current write position pub fn set_pos(&mut self, pos: usize) { // Assert here since while CodeBlock functions do bounds checking, there is @@ -204,16 +137,13 @@ impl CodeBlock { // Set the current write position from a pointer pub fn set_write_ptr(&mut self, code_ptr: CodePtr) { - let pos = (code_ptr.raw_ptr() as usize) - (self.mem_block as usize); + let pos = code_ptr.into_usize() - self.mem_block.start_ptr().into_usize(); self.set_pos(pos); } // Get a direct pointer into the executable memory block pub fn get_ptr(&self, offset: usize) -> CodePtr { - unsafe { - let ptr = self.mem_block.add(offset); - CodePtr(ptr) - } + self.mem_block.start_ptr().add_bytes(offset) } // Get a direct pointer to the current write position @@ -223,9 +153,9 @@ impl CodeBlock { // Write a single byte at the current position pub fn write_byte(&mut self, byte: u8) { - if self.write_pos < self.mem_size { - self.mark_position_writable(self.write_pos); - unsafe { self.mem_block.add(self.write_pos).write(byte) }; + let write_ptr = self.get_write_ptr(); + + if self.mem_block.write_byte(write_ptr, byte).is_ok() { self.write_pos += 1; } else { self.dropped_bytes = true; @@ -328,33 +258,23 @@ impl CodeBlock { assert!(self.label_refs.is_empty()); } - pub fn mark_position_writable(&mut self, write_pos: usize) { - let page_size = self.page_size; - let aligned_position = (write_pos / page_size) * page_size; - - if self.current_aligned_write_pos != aligned_position { - self.current_aligned_write_pos = aligned_position; - - #[cfg(not(test))] - unsafe { - use core::ffi::c_void; - let page_ptr = self.get_ptr(aligned_position).raw_ptr() as *mut c_void; - crate::cruby::rb_yjit_mark_writable(page_ptr, page_size.try_into().unwrap()); - } - } - } - pub fn mark_all_executable(&mut self) { - self.current_aligned_write_pos = ALIGNED_WRITE_POSITION_NONE; + self.mem_block.mark_all_executable(); + } +} - #[cfg(not(test))] - unsafe { - use core::ffi::c_void; - // NOTE(alan): Right now we do allocate one big chunck and give the top half to the outlined codeblock - // The start of the top half of the region isn't necessarily a page boundary... - let cb_start = self.get_ptr(0).raw_ptr() as *mut c_void; - crate::cruby::rb_yjit_mark_executable(cb_start, self.mem_size.try_into().unwrap()); - } +#[cfg(test)] +impl CodeBlock { + /// Stubbed CodeBlock for testing. Can't execute generated code. + pub fn new_dummy(mem_size: usize) -> Self { + use crate::virtualmem::*; + use crate::virtualmem::tests::TestingAllocator; + + let alloc = TestingAllocator::new(mem_size); + let mem_start: *const u8 = alloc.mem_start(); + let virt_mem = VirtualMem::new(alloc, 1, mem_start as *mut u8, mem_size); + + Self::new(virt_mem) } } diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs index a2549faab8..6eb7efaa0a 100644 --- a/yjit/src/asm/x86_64/mod.rs +++ b/yjit/src/asm/x86_64/mod.rs @@ -862,7 +862,7 @@ fn write_jcc_ptr(cb: &mut CodeBlock, op0: u8, op1: u8, dst_ptr: CodePtr) { let end_ptr = cb.get_ptr(cb.write_pos + 4); // Compute the jump offset - let rel64 = (dst_ptr.0 as i64) - (end_ptr.0 as i64); + let rel64 = dst_ptr.into_i64() - end_ptr.into_i64(); if rel64 >= i32::MIN.into() && rel64 <= i32::MAX.into() { // Write the relative 32-bit jump offset diff --git a/yjit/src/asm/x86_64/tests.rs b/yjit/src/asm/x86_64/tests.rs index c77d30e74d..ffcc063420 100644 --- a/yjit/src/asm/x86_64/tests.rs +++ b/yjit/src/asm/x86_64/tests.rs @@ -7,7 +7,7 @@ use std::fmt; impl<'a> fmt::LowerHex for super::CodeBlock { fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result { for pos in 0..self.write_pos { - let byte = unsafe { self.mem_block.add(pos).read() }; + let byte = unsafe { self.mem_block.start_ptr().raw_ptr().add(pos).read() }; fmtr.write_fmt(format_args!("{:02x}", byte))?; } Ok(()) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index ca2c237e2d..75249658fb 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -21,6 +21,8 @@ use std::os::raw::c_uint; use std::ptr; use std::slice; +pub use crate::virtualmem::CodePtr; + // Callee-saved registers pub const REG_CFP: X86Opnd = R13; pub const REG_EC: X86Opnd = R12; @@ -5982,14 +5984,53 @@ impl CodegenGlobals { #[cfg(not(test))] let (mut cb, mut ocb) = { - let page_size = unsafe { rb_yjit_get_page_size() }.as_usize(); - let mem_block: *mut u8 = unsafe { alloc_exec_mem(mem_size.try_into().unwrap()) }; - let cb = CodeBlock::new(mem_block, mem_size / 2, page_size); - let ocb = OutlinedCb::wrap(CodeBlock::new( - unsafe { mem_block.add(mem_size / 2) }, - mem_size / 2, + // TODO(alan): we can error more gracefully when the user gives + // --yjit-exec-mem=absurdly-large-number + // + // 2 GiB. It's likely a bug if we generate this much code. + const MAX_BUFFER_SIZE: usize = 2 * 1024 * 1024 * 1024; + assert!(mem_size <= MAX_BUFFER_SIZE); + let mem_size_u32 = mem_size as u32; + let half_size = mem_size / 2; + + let page_size = unsafe { rb_yjit_get_page_size() }; + let assert_page_aligned = |ptr| assert_eq!( + 0, + ptr as usize % page_size.as_usize(), + "Start of virtual address block should be page-aligned", + ); + + let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size_u32) }; + let second_half = virt_block.wrapping_add(half_size); + + // Memory protection syscalls need page-aligned addresses, so check it here. Assuming + // `virt_block` is page-aligned, `second_half` should be page-aligned as long as the + // page size in bytes is a power of two 2¹⁹ or smaller. This is because the user + // requested size is half of mem_option × 2²⁰ as it's in MiB. + // + // Basically, we don't support x86-64 2MiB and 1GiB pages. ARMv8 can do up to 64KiB + // (2¹⁶ bytes) pages, which should be fine. 4KiB pages seem to be the most popular though. + assert_page_aligned(virt_block); + assert_page_aligned(second_half); + + use crate::virtualmem::*; + + let first_half = VirtualMem::new( + SystemAllocator {}, page_size, - )); + virt_block, + half_size + ); + let second_half = VirtualMem::new( + SystemAllocator {}, + page_size, + second_half, + half_size + ); + + let cb = CodeBlock::new(first_half); + let ocb = OutlinedCb::wrap(CodeBlock::new(second_half)); + (cb, ocb) }; diff --git a/yjit/src/core.rs b/yjit/src/core.rs index bbc5e800c0..6d6877f273 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -1,6 +1,7 @@ use crate::asm::x86_64::*; use crate::asm::*; use crate::codegen::*; +use crate::virtualmem::CodePtr; use crate::cruby::*; use crate::options::*; use crate::stats::*; @@ -9,7 +10,6 @@ use core::ffi::c_void; use std::cell::*; use std::hash::{Hash, Hasher}; use std::mem; -use std::mem::size_of; use std::rc::{Rc}; use InsnOpnd::*; use TempMapping::*; @@ -573,24 +573,22 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) { // Walk over references to objects in generated code. for offset in &block.gc_object_offsets { let offset_to_value = offset.as_usize(); - let value_address: *const u8 = cb.get_ptr(offset_to_value).raw_ptr(); + let value_code_ptr = cb.get_ptr(offset_to_value); + let value_ptr: *const u8 = value_code_ptr.raw_ptr(); // Creating an unaligned pointer is well defined unlike in C. - let value_address = value_address as *mut VALUE; + let value_ptr = value_ptr as *mut VALUE; // SAFETY: these point to YJIT's code buffer - let object = unsafe { value_address.read_unaligned() }; + let object = unsafe { value_ptr.read_unaligned() }; let new_addr = unsafe { rb_gc_location(object) }; - // Only write when the VALUE moves, to be CoW friendly. + // Only write when the VALUE moves, to be copy-on-write friendly. if new_addr != object { - // Possibly unlock the page we need to update - cb.mark_position_writable(offset_to_value); - - // Object could cross a page boundary, so unlock there as well - cb.mark_position_writable(offset_to_value + size_of::() - 1); - - // SAFETY: we just made this address writable - unsafe { value_address.write_unaligned(new_addr) }; + for (byte_idx, &byte) in new_addr.as_u64().to_le_bytes().iter().enumerate() { + let byte_code_ptr = value_code_ptr.add_bytes(byte_idx); + cb.get_mem().write_byte(byte_code_ptr, byte) + .expect("patching existing code should be within bounds"); + } } } } @@ -599,8 +597,6 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) { // Note that we would have returned already if YJIT is off. cb.mark_all_executable(); - // I guess we need to make the outlined block executable as well because - // we don't split the two at exact page boundaries. CodegenGlobals::get_outlined_cb() .unwrap() .mark_all_executable(); diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs index da9a84a160..51ba9c1531 100644 --- a/yjit/src/cruby.rs +++ b/yjit/src/cruby.rs @@ -111,9 +111,6 @@ pub use autogened::*; // and textually included in this file #[cfg_attr(test, allow(unused))] // We don't link against C code when testing extern "C" { - #[link_name = "rb_yjit_alloc_exec_mem"] // we can rename functions with this attribute - pub fn alloc_exec_mem(mem_size: u32) -> *mut u8; - #[link_name = "rb_insn_name"] pub fn raw_insn_name(insn: VALUE) -> *const c_char; diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index b5dd356aef..44f87a8482 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -976,7 +976,7 @@ extern "C" { ) -> ::std::os::raw::c_int; } extern "C" { - pub fn rb_yjit_mark_writable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32); + pub fn rb_yjit_mark_writable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool; } extern "C" { pub fn rb_yjit_mark_executable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32); @@ -991,6 +991,9 @@ extern "C" { extern "C" { pub fn rb_yjit_get_page_size() -> u32; } +extern "C" { + pub fn rb_yjit_reserve_addr_space(mem_size: u32) -> *mut u8; +} extern "C" { pub fn rb_c_method_tracing_currently_enabled(ec: *mut rb_execution_context_t) -> bool; } diff --git a/yjit/src/lib.rs b/yjit/src/lib.rs index 2313fdbce6..6772f551a8 100644 --- a/yjit/src/lib.rs +++ b/yjit/src/lib.rs @@ -13,3 +13,4 @@ mod options; mod stats; mod utils; mod yjit; +mod virtualmem; diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs index e129cc2811..6bad8db7e7 100644 --- a/yjit/src/stats.rs +++ b/yjit/src/stats.rs @@ -141,7 +141,7 @@ macro_rules! incr_counter { ($counter_name:ident) => { #[allow(unused_unsafe)] { - unsafe { COUNTERS.$counter_name += 1 } + unsafe { $crate::stats::COUNTERS.$counter_name += 1 } } }; } @@ -244,6 +244,10 @@ make_counters! { gbpp_block_param_modified, gbpp_block_handler_not_iseq, + + // Currently, it's out of the ordinary (might be impossible) for YJIT to leave gaps in + // executable memory, so this should be 0. + exec_mem_non_bump_alloc, } //=========================================================================== diff --git a/yjit/src/virtualmem.rs b/yjit/src/virtualmem.rs new file mode 100644 index 0000000000..6a8e27447e --- /dev/null +++ b/yjit/src/virtualmem.rs @@ -0,0 +1,376 @@ +//! Memory management stuff for YJIT's code storage. Deals with virtual memory. +// I'm aware that there is an experiment in Rust Nightly right now for to see if banning +// usize->pointer casts is viable. It seems like a lot of work for us to participate for not much +// benefit. + +use crate::utils::IntoUsize; + +#[cfg(not(test))] +pub type VirtualMem = VirtualMemory; + +#[cfg(test)] +pub type VirtualMem = VirtualMemory; + +/// Memory for generated executable machine code. When not testing, we reserve address space for +/// the entire region upfront and map physical memory into the reserved address space as needed. On +/// Linux, this is basically done using an `mmap` with `PROT_NONE` upfront and gradually using +/// `mprotect` with `PROT_READ|PROT_WRITE` as needed. The WIN32 equivalent seems to be +/// `VirtualAlloc` with `MEM_RESERVE` then later with `MEM_COMMIT`. +/// +/// This handles ["W^X"](https://en.wikipedia.org/wiki/W%5EX) semi-automatically. Writes +/// are always accepted and once writes are done a call to [Self::mark_all_executable] makes +/// the code in the region executable. +pub struct VirtualMemory { + /// Location of the virtual memory region. + region_start: *mut u8, + + /// Size of the region in bytes. + region_size_bytes: usize, + + /// Number of bytes per "page", memory protection permission can only be controlled at this + /// granularity. + page_size_bytes: usize, + + /// Number of bytes that have we have allocated physical memory for starting at + /// [Self::region_start]. + mapped_region_bytes: usize, + + /// Keep track of the address of the last written to page. + /// Used for changing protection to implement W^X. + current_write_page: Option, + + /// Zero size member for making syscalls to get physical memory during normal operation. + /// When testing this owns some memory. + allocator: A, +} + +/// Groups together the two syscalls to get get new physical memory and to change +/// memory protection. See [VirtualMemory] for details. +pub trait Allocator { + #[must_use] + fn mark_writable(&mut self, ptr: *const u8, size: u32) -> bool; + + fn mark_executable(&mut self, ptr: *const u8, size: u32); +} + +/// Pointer into a [VirtualMemory]. +/// We may later change this to wrap an u32. +/// Note: there is no NULL constant for CodePtr. You should use Option instead. +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Debug)] +#[repr(C)] +pub struct CodePtr(*const u8); + +/// Errors that can happen when writing to [VirtualMemory] +#[derive(Debug, PartialEq)] +pub enum WriteError { + OutOfBounds, + FailedPageMapping, +} + +use WriteError::*; + +impl VirtualMemory { + /// Bring a part of the address space under management. + pub fn new(allocator: A, page_size: u32, virt_region_start: *mut u8, size_bytes: usize) -> Self { + assert_ne!(0, page_size); + let page_size_bytes = page_size.as_usize(); + + Self { + region_start: virt_region_start, + region_size_bytes: size_bytes, + page_size_bytes, + mapped_region_bytes: 0, + current_write_page: None, + allocator, + } + } + + /// Return the start of the region as a raw pointer. Note that it could be a dangling + /// pointer so be careful dereferencing it. + pub fn start_ptr(&self) -> CodePtr { + CodePtr(self.region_start) + } + + /// Size of the region in bytes where writes could be attempted. + pub fn virtual_region_size(&self) -> usize { + self.region_size_bytes + } + + /// Write a single byte. The first write to a page makes it readable. + pub fn write_byte(&mut self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> { + let page_size = self.page_size_bytes; + let raw: *mut u8 = write_ptr.raw_ptr() as *mut u8; + let page_addr = (raw as usize / page_size) * page_size; + + if self.current_write_page == Some(page_addr) { + // Writing within the last written to page, nothing to do + } else { + // Switching to a different and potentially new page + let start = self.region_start; + let mapped_region_end = start.wrapping_add(self.mapped_region_bytes); + let whole_region_end = start.wrapping_add(self.region_size_bytes); + let alloc = &mut self.allocator; + + assert!((start..=whole_region_end).contains(&mapped_region_end)); + + if (start..mapped_region_end).contains(&raw) { + // Writing to a previously written to page. + // Need to make page writable, but no need to fill. + let page_size: u32 = page_size.try_into().unwrap(); + if !alloc.mark_writable(page_addr as *const _, page_size) { + return Err(FailedPageMapping); + } + + self.current_write_page = Some(page_addr); + } else if (start..whole_region_end).contains(&raw) { + // Writing to a brand new page + let mapped_region_end_addr = mapped_region_end as usize; + let alloc_size = page_addr - mapped_region_end_addr + page_size; + + assert_eq!(0, alloc_size % page_size, "allocation size should be page aligned"); + assert_eq!(0, mapped_region_end_addr % page_size, "pointer should be page aligned"); + + if alloc_size > page_size { + // This is unusual for the current setup, so keep track of it. + crate::stats::incr_counter!(exec_mem_non_bump_alloc); + } + + // Allocate new chunk + let alloc_size_u32: u32 = alloc_size.try_into().unwrap(); + unsafe { + if !alloc.mark_writable(mapped_region_end.cast(), alloc_size_u32) { + return Err(FailedPageMapping); + } + // Fill new memory with PUSH DS (0x1E) so that executing uninitialized memory + // will fault with #UD in 64-bit mode. On Linux it becomes SIGILL and use the + // usual Ruby crash reporter. + std::slice::from_raw_parts_mut(mapped_region_end, alloc_size).fill(0x1E); + } + self.mapped_region_bytes = self.mapped_region_bytes + alloc_size; + + self.current_write_page = Some(page_addr); + } else { + return Err(OutOfBounds); + } + } + + // We have permission to write if we get here + unsafe { raw.write(byte) }; + + Ok(()) + } + + /// Make all the code in the region executable. Call this at the end of a write session. + /// See [Self] for usual usage flow. + pub fn mark_all_executable(&mut self) { + self.current_write_page = None; + + let region_start = self.region_start; + let mapped_region_bytes: u32 = self.mapped_region_bytes.try_into().unwrap(); + + // Make mapped region executable + self.allocator.mark_executable(region_start, mapped_region_bytes); + } +} + +impl CodePtr { + /// Note that the raw pointer might be dangling if there hasn't + /// been any writes to it through the [VirtualMemory] yet. + pub fn raw_ptr(self) -> *const u8 { + let CodePtr(ptr) = self; + return ptr; + } + + /// Advance the CodePtr. Can return a dangling pointer. + pub fn add_bytes(self, bytes: usize) -> Self { + let CodePtr(raw) = self; + CodePtr(raw.wrapping_add(bytes)) + } + + pub fn into_i64(self) -> i64 { + let CodePtr(ptr) = self; + ptr as i64 + } + + pub fn into_usize(self) -> usize { + let CodePtr(ptr) = self; + ptr as usize + } +} + +impl From<*mut u8> for CodePtr { + fn from(value: *mut u8) -> Self { + assert!(value as usize != 0); + return CodePtr(value); + } +} + +/// Requires linking with CRuby to work +#[cfg(not(test))] +mod sys { + use crate::cruby::*; + + /// Zero size! This just groups together syscalls that require linking with CRuby. + pub struct SystemAllocator; + + type VoidPtr = *mut std::os::raw::c_void; + + impl super::Allocator for SystemAllocator { + fn mark_writable(&mut self, ptr: *const u8, size: u32) -> bool { + unsafe { rb_yjit_mark_writable(ptr as VoidPtr, size) } + } + + fn mark_executable(&mut self, ptr: *const u8, size: u32) { + unsafe { rb_yjit_mark_executable(ptr as VoidPtr, size) } + } + } +} + +#[cfg(not(test))] +pub(crate) use sys::*; + + +#[cfg(test)] +pub mod tests { + use crate::utils::IntoUsize; + use super::*; + + // Track allocation requests and owns some fixed size backing memory for requests. + // While testing we don't execute generated code. + pub struct TestingAllocator { + requests: Vec, + memory: Vec, + } + + #[derive(Debug)] + enum AllocRequest { + MarkWritable{ start_idx: usize, length: usize }, + MarkExecutable{ start_idx: usize, length: usize }, + } + use AllocRequest::*; + + impl TestingAllocator { + pub fn new(mem_size: usize) -> Self { + Self { requests: Vec::default(), memory: vec![0; mem_size] } + } + + pub fn mem_start(&self) -> *const u8 { + self.memory.as_ptr() + } + + // Verify that write_byte() bounds checks. Return `ptr` as an index. + fn bounds_check_request(&self, ptr: *const u8, size: u32) -> usize { + let mem_start = self.memory.as_ptr() as usize; + let index = ptr as usize - mem_start; + + assert!(index < self.memory.len()); + assert!(index + size.as_usize() <= self.memory.len()); + + index + } + } + + // Bounds check and then record the request + impl super::Allocator for TestingAllocator { + fn mark_writable(&mut self, ptr: *const u8, length: u32) -> bool { + let index = self.bounds_check_request(ptr, length); + self.requests.push(MarkWritable { start_idx: index, length: length.as_usize() }); + + true + } + + fn mark_executable(&mut self, ptr: *const u8, length: u32) { + let index = self.bounds_check_request(ptr, length); + self.requests.push(MarkExecutable { start_idx: index, length: length.as_usize() }); + + // We don't try to execute generated code in cfg(test) + // so no need to actually request executable memory. + } + } + + // Fictional architecture where each page is 4 bytes long + const PAGE_SIZE: usize = 4; + fn new_dummy_virt_mem() -> VirtualMemory { + let mem_size = PAGE_SIZE * 10; + let alloc = TestingAllocator::new(mem_size); + let mem_start: *const u8 = alloc.mem_start(); + + VirtualMemory::new( + alloc, + PAGE_SIZE.try_into().unwrap(), + mem_start as *mut u8, + mem_size, + ) + } + + #[test] + fn new_memory_is_initialized() { + let mut virt = new_dummy_virt_mem(); + + virt.write_byte(virt.start_ptr(), 1).unwrap(); + assert!( + virt.allocator.memory[..PAGE_SIZE].iter().all(|&byte| byte != 0), + "Entire page should be initialized", + ); + + // Skip a few page + let three_pages = 3 * PAGE_SIZE; + virt.write_byte(virt.start_ptr().add_bytes(three_pages), 1).unwrap(); + assert!( + virt.allocator.memory[..three_pages].iter().all(|&byte| byte != 0), + "Gaps between write requests should be filled", + ); + } + + #[test] + fn no_redundant_syscalls_when_writing_to_the_same_page() { + let mut virt = new_dummy_virt_mem(); + + virt.write_byte(virt.start_ptr(), 1).unwrap(); + virt.write_byte(virt.start_ptr(), 0).unwrap(); + + assert!( + matches!( + virt.allocator.requests[..], + [MarkWritable { start_idx: 0, length: PAGE_SIZE }], + ) + ); + } + + #[test] + fn bounds_checking() { + use super::WriteError::*; + use std::ptr; + let mut virt = new_dummy_virt_mem(); + + let null = CodePtr(ptr::null()); + assert_eq!(Err(OutOfBounds), virt.write_byte(null, 0)); + + let one_past_end = virt.start_ptr().add_bytes(virt.virtual_region_size()); + assert_eq!(Err(OutOfBounds), virt.write_byte(one_past_end, 0)); + + let end_of_addr_space = CodePtr(usize::MAX as _); + assert_eq!(Err(OutOfBounds), virt.write_byte(end_of_addr_space, 0)); + } + + #[test] + fn only_written_to_regions_become_executable() { + // ... so we catch attempts to read/write/execute never-written-to regions + const THREE_PAGES: usize = PAGE_SIZE * 3; + let mut virt = new_dummy_virt_mem(); + let page_two_start = virt.start_ptr().add_bytes(PAGE_SIZE * 2); + virt.write_byte(page_two_start, 1).unwrap(); + virt.mark_all_executable(); + + assert!(virt.virtual_region_size() > THREE_PAGES); + assert!( + matches!( + virt.allocator.requests[..], + [ + MarkWritable { start_idx: 0, length: THREE_PAGES }, + MarkExecutable { start_idx: 0, length: THREE_PAGES }, + ] + ), + ); + } +}