mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
YJIT: GC and recompile all code pages (#6406)
when it fails to allocate a new page. Co-authored-by: Alan Wu <alansi.xingwu@shopify.com>
This commit is contained in:
parent
1d2d25dcad
commit
b7644a2311
Notes:
git
2022-10-25 16:07:34 +00:00
Merged-By: k0kubun <takashikkbn@gmail.com>
12 changed files with 454 additions and 32 deletions
2
cont.c
2
cont.c
|
@ -69,7 +69,7 @@ static VALUE rb_cFiberPool;
|
|||
#define FIBER_POOL_ALLOCATION_FREE
|
||||
#endif
|
||||
|
||||
#define jit_cont_enabled mjit_enabled // To be used by YJIT later
|
||||
#define jit_cont_enabled (mjit_enabled || rb_yjit_enabled_p())
|
||||
|
||||
enum context_type {
|
||||
CONTINUATION_CONTEXT = 0,
|
||||
|
|
|
@ -825,12 +825,126 @@ class TestYJIT < Test::Unit::TestCase
|
|||
RUBY
|
||||
end
|
||||
|
||||
def test_code_gc
|
||||
assert_compiles(code_gc_helpers + <<~'RUBY', exits: :any, result: :ok)
|
||||
return :not_paged unless add_pages(100) # prepare freeable pages
|
||||
code_gc # first code GC
|
||||
return :not_compiled1 unless compiles { nil } # should be JITable again
|
||||
|
||||
code_gc # second code GC
|
||||
return :not_compiled2 unless compiles { nil } # should be JITable again
|
||||
|
||||
code_gc_count = RubyVM::YJIT.runtime_stats[:code_gc_count]
|
||||
return :"code_gc_#{code_gc_count}" if code_gc_count && code_gc_count != 2
|
||||
|
||||
:ok
|
||||
RUBY
|
||||
end
|
||||
|
||||
def test_on_stack_code_gc_call
|
||||
assert_compiles(code_gc_helpers + <<~'RUBY', exits: :any, result: :ok)
|
||||
fiber = Fiber.new {
|
||||
# Loop to call the same basic block again after Fiber.yield
|
||||
while true
|
||||
Fiber.yield(nil.to_i)
|
||||
end
|
||||
}
|
||||
|
||||
return :not_paged1 unless add_pages(400) # go to a page without initial ocb code
|
||||
return :broken_resume1 if fiber.resume != 0 # JIT the fiber
|
||||
code_gc # first code GC, which should not free the fiber page
|
||||
return :broken_resume2 if fiber.resume != 0 # The code should be still callable
|
||||
|
||||
code_gc_count = RubyVM::YJIT.runtime_stats[:code_gc_count]
|
||||
return :"code_gc_#{code_gc_count}" if code_gc_count && code_gc_count != 1
|
||||
|
||||
:ok
|
||||
RUBY
|
||||
end
|
||||
|
||||
def test_on_stack_code_gc_twice
|
||||
assert_compiles(code_gc_helpers + <<~'RUBY', exits: :any, result: :ok)
|
||||
fiber = Fiber.new {
|
||||
# Loop to call the same basic block again after Fiber.yield
|
||||
while Fiber.yield(nil.to_i); end
|
||||
}
|
||||
|
||||
return :not_paged1 unless add_pages(400) # go to a page without initial ocb code
|
||||
return :broken_resume1 if fiber.resume(true) != 0 # JIT the fiber
|
||||
code_gc # first code GC, which should not free the fiber page
|
||||
|
||||
return :not_paged2 unless add_pages(300) # add some stuff to be freed
|
||||
# Not calling fiber.resume here to test the case that the YJIT payload loses some
|
||||
# information at the previous code GC. The payload should still be there, and
|
||||
# thus we could know the fiber ISEQ is still on stack on this second code GC.
|
||||
code_gc # second code GC, which should still not free the fiber page
|
||||
|
||||
return :not_paged3 unless add_pages(200) # attempt to overwrite the fiber page (it shouldn't)
|
||||
return :broken_resume2 if fiber.resume(true) != 0 # The fiber code should be still fine
|
||||
|
||||
return :broken_resume3 if fiber.resume(false) != nil # terminate the fiber
|
||||
code_gc # third code GC, freeing a page that used to be on stack
|
||||
|
||||
return :not_paged4 unless add_pages(100) # check everything still works
|
||||
|
||||
code_gc_count = RubyVM::YJIT.runtime_stats[:code_gc_count]
|
||||
return :"code_gc_#{code_gc_count}" if code_gc_count && code_gc_count != 3
|
||||
|
||||
:ok
|
||||
RUBY
|
||||
end
|
||||
|
||||
def test_code_gc_with_many_iseqs
|
||||
assert_compiles(code_gc_helpers + <<~'RUBY', exits: :any, result: :ok, mem_size: 1)
|
||||
fiber = Fiber.new {
|
||||
# Loop to call the same basic block again after Fiber.yield
|
||||
while true
|
||||
Fiber.yield(nil.to_i)
|
||||
end
|
||||
}
|
||||
|
||||
return :not_paged1 unless add_pages(500) # use some pages
|
||||
return :broken_resume1 if fiber.resume != 0 # leave an on-stack code as well
|
||||
|
||||
add_pages(2000) # use a whole lot of pages to run out of 1MiB
|
||||
return :broken_resume2 if fiber.resume != 0 # on-stack code should be callable
|
||||
|
||||
code_gc_count = RubyVM::YJIT.runtime_stats[:code_gc_count]
|
||||
return :"code_gc_#{code_gc_count}" if code_gc_count && code_gc_count == 0
|
||||
|
||||
:ok
|
||||
RUBY
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def code_gc_helpers
|
||||
<<~'RUBY'
|
||||
def compiles(&block)
|
||||
failures = RubyVM::YJIT.runtime_stats[:compilation_failure]
|
||||
block.call
|
||||
failures == RubyVM::YJIT.runtime_stats[:compilation_failure]
|
||||
end
|
||||
|
||||
def add_pages(num_jits)
|
||||
pages = RubyVM::YJIT.runtime_stats[:compiled_page_count]
|
||||
num_jits.times { return false unless eval('compiles { nil.to_i }') }
|
||||
pages.nil? || pages < RubyVM::YJIT.runtime_stats[:compiled_page_count]
|
||||
end
|
||||
|
||||
def code_gc
|
||||
RubyVM::YJIT.simulate_oom! # bump write_pos
|
||||
eval('proc { nil }.call') # trigger code GC
|
||||
end
|
||||
RUBY
|
||||
end
|
||||
|
||||
def assert_no_exits(script)
|
||||
assert_compiles(script)
|
||||
end
|
||||
|
||||
ANY = Object.new
|
||||
def assert_compiles(test_script, insns: [], call_threshold: 1, stdout: nil, exits: {}, result: ANY, frozen_string_literal: nil)
|
||||
def assert_compiles(test_script, insns: [], call_threshold: 1, stdout: nil, exits: {}, result: ANY, frozen_string_literal: nil, mem_size: nil)
|
||||
reset_stats = <<~RUBY
|
||||
RubyVM::YJIT.runtime_stats
|
||||
RubyVM::YJIT.reset_stats!
|
||||
|
@ -864,7 +978,7 @@ class TestYJIT < Test::Unit::TestCase
|
|||
#{write_results}
|
||||
RUBY
|
||||
|
||||
status, out, err, stats = eval_with_jit(script, call_threshold: call_threshold)
|
||||
status, out, err, stats = eval_with_jit(script, call_threshold:, mem_size:)
|
||||
|
||||
assert status.success?, "exited with status #{status.to_i}, stderr:\n#{err}"
|
||||
|
||||
|
@ -918,12 +1032,13 @@ class TestYJIT < Test::Unit::TestCase
|
|||
s.chars.map { |c| c.ascii_only? ? c : "\\u%x" % c.codepoints[0] }.join
|
||||
end
|
||||
|
||||
def eval_with_jit(script, call_threshold: 1, timeout: 1000)
|
||||
def eval_with_jit(script, call_threshold: 1, timeout: 1000, mem_size: nil)
|
||||
args = [
|
||||
"--disable-gems",
|
||||
"--yjit-call-threshold=#{call_threshold}",
|
||||
"--yjit-stats"
|
||||
]
|
||||
args << "--yjit-exec-mem-size=#{mem_size}" if mem_size
|
||||
args << "-e" << script_shell_encode(script)
|
||||
stats_r, stats_w = IO.pipe
|
||||
out, err, status = EnvUtil.invoke_ruby(args,
|
||||
|
|
23
yjit.c
23
yjit.c
|
@ -27,6 +27,7 @@
|
|||
#include "probes_helper.h"
|
||||
#include "iseq.h"
|
||||
#include "ruby/debug.h"
|
||||
#include "internal/cont.h"
|
||||
|
||||
// For mmapp(), sysconf()
|
||||
#ifndef _WIN32
|
||||
|
@ -65,10 +66,7 @@ STATIC_ASSERT(pointer_tagging_scheme, USE_FLONUM);
|
|||
bool
|
||||
rb_yjit_mark_writable(void *mem_block, uint32_t mem_size)
|
||||
{
|
||||
if (mprotect(mem_block, mem_size, PROT_READ | PROT_WRITE)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
return mprotect(mem_block, mem_size, PROT_READ | PROT_WRITE) == 0;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -85,6 +83,20 @@ rb_yjit_mark_executable(void *mem_block, uint32_t mem_size)
|
|||
}
|
||||
}
|
||||
|
||||
// Free the specified memory block.
|
||||
bool
|
||||
rb_yjit_mark_unused(void *mem_block, uint32_t mem_size)
|
||||
{
|
||||
// On Linux, you need to use madvise MADV_DONTNEED to free memory.
|
||||
// We might not need to call this on macOS, but it's not really documented.
|
||||
// We generally prefer to do the same thing on both to ease testing too.
|
||||
madvise(mem_block, mem_size, MADV_DONTNEED);
|
||||
|
||||
// On macOS, mprotect PROT_NONE seems to reduce RSS.
|
||||
// We also call this on Linux to avoid executing unused pages.
|
||||
return mprotect(mem_block, mem_size, PROT_NONE) == 0;
|
||||
}
|
||||
|
||||
// `start` is inclusive and `end` is exclusive.
|
||||
void
|
||||
rb_yjit_icache_invalidate(void *start, void *end)
|
||||
|
@ -387,6 +399,9 @@ rb_iseq_reset_jit_func(const rb_iseq_t *iseq)
|
|||
{
|
||||
RUBY_ASSERT_ALWAYS(IMEMO_TYPE_P(iseq, imemo_iseq));
|
||||
iseq->body->jit_func = NULL;
|
||||
// Enable re-compiling this ISEQ. Event when it's invalidated for TracePoint,
|
||||
// we'd like to re-compile ISEQs that haven't been converted to trace_* insns.
|
||||
iseq->body->total_calls = 0;
|
||||
}
|
||||
|
||||
// Get the PC for a given index in an iseq
|
||||
|
|
6
yjit.rb
6
yjit.rb
|
@ -212,13 +212,17 @@ module RubyVM::YJIT
|
|||
$stderr.puts "bindings_allocations: " + ("%10d" % stats[:binding_allocations])
|
||||
$stderr.puts "bindings_set: " + ("%10d" % stats[:binding_set])
|
||||
$stderr.puts "compilation_failure: " + ("%10d" % compilation_failure) if compilation_failure != 0
|
||||
$stderr.puts "compiled_iseq_count: " + ("%10d" % stats[:compiled_iseq_count])
|
||||
$stderr.puts "compiled_block_count: " + ("%10d" % stats[:compiled_block_count])
|
||||
$stderr.puts "compiled_iseq_count: " + ("%10d" % stats[:compiled_iseq_count])
|
||||
$stderr.puts "compiled_page_count: " + ("%10d" % stats[:compiled_page_count])
|
||||
$stderr.puts "freed_iseq_count: " + ("%10d" % stats[:freed_iseq_count])
|
||||
$stderr.puts "freed_page_count: " + ("%10d" % stats[:freed_page_count])
|
||||
$stderr.puts "invalidation_count: " + ("%10d" % stats[:invalidation_count])
|
||||
$stderr.puts "constant_state_bumps: " + ("%10d" % stats[:constant_state_bumps])
|
||||
$stderr.puts "inline_code_size: " + ("%10d" % stats[:inline_code_size])
|
||||
$stderr.puts "outlined_code_size: " + ("%10d" % stats[:outlined_code_size])
|
||||
$stderr.puts "freed_code_size: " + ("%10d" % stats[:freed_code_size])
|
||||
$stderr.puts "code_gc_count: " + ("%10d" % stats[:code_gc_count])
|
||||
$stderr.puts "num_gc_obj_refs: " + ("%10d" % stats[:num_gc_obj_refs])
|
||||
|
||||
$stderr.puts "total_exit_count: " + ("%10d" % total_exits)
|
||||
|
|
|
@ -263,6 +263,7 @@ fn main() {
|
|||
.allowlist_function("rb_yjit_reserve_addr_space")
|
||||
.allowlist_function("rb_yjit_mark_writable")
|
||||
.allowlist_function("rb_yjit_mark_executable")
|
||||
.allowlist_function("rb_yjit_mark_unused")
|
||||
.allowlist_function("rb_yjit_get_page_size")
|
||||
.allowlist_function("rb_leaf_invokebuiltin_iseq_p")
|
||||
.allowlist_function("rb_leaf_builtin_function")
|
||||
|
@ -297,6 +298,9 @@ fn main() {
|
|||
// From internal/compile.h
|
||||
.allowlist_function("rb_vm_insn_decode")
|
||||
|
||||
// from internal/cont.h
|
||||
.allowlist_function("rb_jit_cont_each_iseq")
|
||||
|
||||
// From iseq.h
|
||||
.allowlist_function("rb_vm_insn_addr2opcode")
|
||||
.allowlist_function("rb_iseqw_to_iseq")
|
||||
|
|
|
@ -6,6 +6,9 @@ use std::rc::Rc;
|
|||
use crate::backend::x86_64::JMP_PTR_BYTES;
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
use crate::backend::arm64::JMP_PTR_BYTES;
|
||||
use crate::core::for_each_on_stack_iseq_payload;
|
||||
use crate::invariants::rb_yjit_tracing_invalidate_all;
|
||||
use crate::stats::incr_counter;
|
||||
use crate::virtualmem::WriteError;
|
||||
|
||||
#[cfg(feature = "disasm")]
|
||||
|
@ -115,17 +118,23 @@ impl CodeBlock {
|
|||
pub fn next_page<F: Fn(&mut CodeBlock, CodePtr)>(&mut self, base_ptr: CodePtr, jmp_ptr: F) -> bool {
|
||||
let old_write_ptr = self.get_write_ptr();
|
||||
self.set_write_ptr(base_ptr);
|
||||
self.without_page_end_reserve(|cb| assert!(cb.has_capacity(JMP_PTR_BYTES)));
|
||||
|
||||
// Use the freed_pages list if code GC has been used. Otherwise use the next page.
|
||||
let next_page_idx = if let Some(freed_pages) = CodegenGlobals::get_freed_pages() {
|
||||
let current_page = self.write_pos / self.page_size;
|
||||
freed_pages.iter().find(|&&page| current_page < page).map(|&page| page)
|
||||
} else {
|
||||
Some(self.write_pos / self.page_size + 1)
|
||||
};
|
||||
|
||||
// Move self to the next page
|
||||
let next_page_idx = self.write_pos / self.page_size + 1;
|
||||
if !self.set_page(next_page_idx, &jmp_ptr) {
|
||||
if next_page_idx.is_none() || !self.set_page(next_page_idx.unwrap(), &jmp_ptr) {
|
||||
self.set_write_ptr(old_write_ptr); // rollback if there are no more pages
|
||||
return false;
|
||||
}
|
||||
|
||||
// Move the other CodeBlock to the same page if it'S on the furthest page
|
||||
self.other_cb().unwrap().set_page(next_page_idx, &jmp_ptr);
|
||||
self.other_cb().unwrap().set_page(next_page_idx.unwrap(), &jmp_ptr);
|
||||
|
||||
return !self.dropped_bytes;
|
||||
}
|
||||
|
@ -151,7 +160,7 @@ impl CodeBlock {
|
|||
// We could remember the last write_pos in page2 and let set_page use that position,
|
||||
// but you need to waste some space for keeping write_pos for every single page.
|
||||
// It doesn't seem necessary for performance either. So we're currently not doing it.
|
||||
let dst_pos = self.page_size * page_idx + self.page_start();
|
||||
let dst_pos = self.get_page_pos(page_idx);
|
||||
if self.page_size * page_idx < self.mem_size && self.write_pos < dst_pos {
|
||||
// Reset dropped_bytes
|
||||
self.dropped_bytes = false;
|
||||
|
@ -161,6 +170,7 @@ impl CodeBlock {
|
|||
self.write_pos = dst_pos;
|
||||
let dst_ptr = self.get_write_ptr();
|
||||
self.write_pos = src_pos;
|
||||
self.without_page_end_reserve(|cb| assert!(cb.has_capacity(JMP_PTR_BYTES)));
|
||||
|
||||
// Generate jmp_ptr from src_pos to dst_pos
|
||||
self.without_page_end_reserve(|cb| {
|
||||
|
@ -175,6 +185,53 @@ impl CodeBlock {
|
|||
!self.dropped_bytes
|
||||
}
|
||||
|
||||
/// Free the memory pages of given code page indexes
|
||||
fn free_pages(&mut self, page_idxs: &Vec<usize>) {
|
||||
let mut page_idxs = page_idxs.clone();
|
||||
page_idxs.reverse(); // to loop with pop()
|
||||
|
||||
// Group adjacent page indexes and free them in batches to reduce the # of syscalls.
|
||||
while let Some(page_idx) = page_idxs.pop() {
|
||||
// Group first adjacent page indexes
|
||||
let mut batch_idxs = vec![page_idx];
|
||||
while page_idxs.last() == Some(&(batch_idxs.last().unwrap() + 1)) {
|
||||
batch_idxs.push(page_idxs.pop().unwrap());
|
||||
}
|
||||
|
||||
// Free the grouped pages at once
|
||||
let start_ptr = self.mem_block.borrow().start_ptr().add_bytes(page_idx * self.page_size);
|
||||
let batch_size = self.page_size * batch_idxs.len();
|
||||
self.mem_block.borrow_mut().free_bytes(start_ptr, batch_size as u32);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn page_size(&self) -> usize {
|
||||
self.page_size
|
||||
}
|
||||
|
||||
/// Return the number of code pages that have been allocated by the VirtualMemory.
|
||||
pub fn num_pages(&self) -> usize {
|
||||
let mapped_region_size = self.mem_block.borrow().mapped_region_size();
|
||||
// CodeBlock's page size != VirtualMem's page size on Linux,
|
||||
// so mapped_region_size % self.page_size may not be 0
|
||||
((mapped_region_size - 1) / self.page_size) + 1
|
||||
}
|
||||
|
||||
/// Return the number of code pages that have been freed and not used yet.
|
||||
pub fn num_freed_pages(&self) -> usize {
|
||||
(0..self.num_pages()).filter(|&page_idx| self.has_freed_page(page_idx)).count()
|
||||
}
|
||||
|
||||
pub fn has_freed_page(&self, page_idx: usize) -> bool {
|
||||
CodegenGlobals::get_freed_pages().as_ref().map_or(false, |pages| pages.contains(&page_idx)) && // code GCed
|
||||
self.write_pos < page_idx * self.page_size // and not written yet
|
||||
}
|
||||
|
||||
/// Convert a page index to the write_pos for the page start.
|
||||
fn get_page_pos(&self, page_idx: usize) -> usize {
|
||||
self.page_size * page_idx + self.page_start()
|
||||
}
|
||||
|
||||
/// write_pos of the current page start
|
||||
pub fn page_start_pos(&self) -> usize {
|
||||
self.get_write_pos() / self.page_size * self.page_size + self.page_start()
|
||||
|
@ -216,21 +273,48 @@ impl CodeBlock {
|
|||
/// Return the address ranges of a given address range that this CodeBlock can write.
|
||||
#[cfg(any(feature = "disasm", target_arch = "aarch64"))]
|
||||
pub fn writable_addrs(&self, start_ptr: CodePtr, end_ptr: CodePtr) -> Vec<(usize, usize)> {
|
||||
let mut addrs = vec![];
|
||||
let mut start = start_ptr.into_usize();
|
||||
// CodegenGlobals is not initialized when we write initial ocb code
|
||||
let freed_pages = if CodegenGlobals::has_instance() {
|
||||
CodegenGlobals::get_freed_pages().as_ref()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let region_start = self.get_ptr(0).into_usize();
|
||||
let region_end = self.get_ptr(self.get_mem_size()).into_usize();
|
||||
let mut start = start_ptr.into_usize();
|
||||
let end = std::cmp::min(end_ptr.into_usize(), region_end);
|
||||
|
||||
let mut addrs = vec![];
|
||||
while start < end {
|
||||
let current_page = region_start +
|
||||
(start.saturating_sub(region_start) / self.page_size * self.page_size);
|
||||
let page_idx = start.saturating_sub(region_start) / self.page_size;
|
||||
let current_page = region_start + (page_idx * self.page_size);
|
||||
let page_end = std::cmp::min(end, current_page + self.page_end());
|
||||
// If code GC has been used, skip pages that are used by past on-stack code
|
||||
if freed_pages.map_or(true, |pages| pages.contains(&page_idx)) {
|
||||
addrs.push((start, page_end));
|
||||
}
|
||||
start = current_page + self.page_size + self.page_start();
|
||||
}
|
||||
addrs
|
||||
}
|
||||
|
||||
/// Return the code size that has been used by this CodeBlock.
|
||||
pub fn code_size(&self) -> usize {
|
||||
let mut size = 0;
|
||||
let current_page_idx = self.write_pos / self.page_size;
|
||||
for page_idx in 0..self.num_pages() {
|
||||
if page_idx == current_page_idx {
|
||||
// Count only actually used bytes for the current page.
|
||||
size += (self.write_pos % self.page_size).saturating_sub(self.page_start());
|
||||
} else if !self.has_freed_page(page_idx) {
|
||||
// Count an entire range for any non-freed pages that have been used.
|
||||
size += self.page_end() - self.page_start() + self.page_end_reserve;
|
||||
}
|
||||
}
|
||||
size
|
||||
}
|
||||
|
||||
/// Check if this code block has sufficient remaining capacity
|
||||
pub fn has_capacity(&self, num_bytes: usize) -> bool {
|
||||
let page_offset = self.write_pos % self.page_size;
|
||||
|
@ -261,6 +345,11 @@ impl CodeBlock {
|
|||
self.asm_comments.get(&pos)
|
||||
}
|
||||
|
||||
pub fn clear_comments(&mut self) {
|
||||
#[cfg(feature = "disasm")]
|
||||
self.asm_comments.clear();
|
||||
}
|
||||
|
||||
pub fn get_mem_size(&self) -> usize {
|
||||
self.mem_size
|
||||
}
|
||||
|
@ -293,6 +382,24 @@ impl CodeBlock {
|
|||
self.mem_block.borrow().start_ptr().add_bytes(offset)
|
||||
}
|
||||
|
||||
/// Convert an address range to memory page indexes against a num_pages()-sized array.
|
||||
pub fn addrs_to_pages(&self, start_addr: CodePtr, end_addr: CodePtr) -> Vec<usize> {
|
||||
let mem_start = self.mem_block.borrow().start_ptr().into_usize();
|
||||
let mem_end = self.mem_block.borrow().end_ptr().into_usize();
|
||||
assert!(mem_start <= start_addr.into_usize());
|
||||
assert!(start_addr.into_usize() <= end_addr.into_usize());
|
||||
assert!(end_addr.into_usize() <= mem_end);
|
||||
|
||||
// Ignore empty code ranges
|
||||
if start_addr == end_addr {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
let start_page = (start_addr.into_usize() - mem_start) / self.page_size;
|
||||
let end_page = (end_addr.into_usize() - mem_start - 1) / self.page_size;
|
||||
(start_page..=end_page).collect() // TODO: consider returning an iterator
|
||||
}
|
||||
|
||||
/// Get a (possibly dangling) direct pointer to the current write position
|
||||
pub fn get_write_ptr(&self) -> CodePtr {
|
||||
self.get_ptr(self.write_pos)
|
||||
|
@ -431,6 +538,58 @@ impl CodeBlock {
|
|||
self.mem_block.borrow_mut().mark_all_executable();
|
||||
}
|
||||
|
||||
/// Code GC. Free code pages that are not on stack and reuse them.
|
||||
pub fn code_gc(&mut self) {
|
||||
// The previous code GC failed to free any pages. Give up.
|
||||
if CodegenGlobals::get_freed_pages() == &Some(vec![]) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Check which pages are still in use
|
||||
let mut pages_in_use = vec![false; self.num_pages()];
|
||||
// For each ISEQ, we currently assume that only code pages used by inline code
|
||||
// are used by outlined code, so we mark only code pages used by inlined code.
|
||||
for_each_on_stack_iseq_payload(|iseq_payload| {
|
||||
for page in &iseq_payload.pages {
|
||||
pages_in_use[*page] = true;
|
||||
}
|
||||
});
|
||||
// Outlined code generated by CodegenGlobals::init() should also be kept.
|
||||
for page in CodegenGlobals::get_ocb_pages() {
|
||||
pages_in_use[*page] = true;
|
||||
}
|
||||
|
||||
// Let VirtuamMem free the pages
|
||||
let freed_pages: Vec<usize> = pages_in_use.iter().enumerate()
|
||||
.filter(|&(_, &in_use)| !in_use).map(|(page, _)| page).collect();
|
||||
self.free_pages(&freed_pages);
|
||||
|
||||
// Invalidate everything to have more compact code after code GC.
|
||||
// This currently patches every ISEQ, which works, but in the future,
|
||||
// we could limit that to patch only on-stack ISEQs for optimizing code GC.
|
||||
rb_yjit_tracing_invalidate_all();
|
||||
// When code GC runs next time, we could have reused pages in between
|
||||
// invalidated pages. To invalidate them, we skip freezing them here.
|
||||
// We free or not reuse the bytes frozen by any past invalidation, so this
|
||||
// can be safely reset to pass the frozen bytes check on invalidation.
|
||||
CodegenGlobals::set_inline_frozen_bytes(0);
|
||||
|
||||
if let Some(&first_page) = freed_pages.first() {
|
||||
let mut cb = CodegenGlobals::get_inline_cb();
|
||||
cb.write_pos = cb.get_page_pos(first_page);
|
||||
cb.dropped_bytes = false;
|
||||
cb.clear_comments();
|
||||
|
||||
let mut ocb = CodegenGlobals::get_outlined_cb().unwrap();
|
||||
ocb.write_pos = ocb.get_page_pos(first_page);
|
||||
ocb.dropped_bytes = false;
|
||||
ocb.clear_comments();
|
||||
}
|
||||
|
||||
CodegenGlobals::set_freed_pages(freed_pages);
|
||||
incr_counter!(code_gc_count);
|
||||
}
|
||||
|
||||
pub fn inline(&self) -> bool {
|
||||
!self.outlined
|
||||
}
|
||||
|
|
|
@ -643,6 +643,11 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O
|
|||
if cb.has_dropped_bytes() {
|
||||
None
|
||||
} else {
|
||||
// Mark code pages for code GC
|
||||
let iseq_payload = get_or_create_iseq_payload(iseq);
|
||||
for page in cb.addrs_to_pages(code_ptr, cb.get_write_ptr()) {
|
||||
iseq_payload.pages.insert(page);
|
||||
}
|
||||
Some(code_ptr)
|
||||
}
|
||||
}
|
||||
|
@ -6504,6 +6509,12 @@ pub struct CodegenGlobals {
|
|||
|
||||
// Methods for generating code for hardcoded (usually C) methods
|
||||
method_codegen_table: HashMap<usize, MethodGenFn>,
|
||||
|
||||
/// Page indexes for outlined code that are not associated to any ISEQ.
|
||||
ocb_pages: Vec<usize>,
|
||||
|
||||
/// Freed page indexes. None if code GC has not been used.
|
||||
freed_pages: Option<Vec<usize>>,
|
||||
}
|
||||
|
||||
/// For implementing global code invalidation. A position in the inline
|
||||
|
@ -6570,6 +6581,7 @@ impl CodegenGlobals {
|
|||
#[cfg(test)]
|
||||
let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(mem_size / 2));
|
||||
|
||||
let ocb_start_addr = ocb.unwrap().get_write_ptr();
|
||||
let leave_exit_code = gen_leave_exit(&mut ocb);
|
||||
|
||||
let stub_exit_code = gen_code_for_exit_from_stub(&mut ocb);
|
||||
|
@ -6577,6 +6589,9 @@ impl CodegenGlobals {
|
|||
// Generate full exit code for C func
|
||||
let cfunc_exit_code = gen_full_cfunc_return(&mut ocb);
|
||||
|
||||
let ocb_end_addr = ocb.unwrap().get_write_ptr();
|
||||
let ocb_pages = ocb.unwrap().addrs_to_pages(ocb_start_addr, ocb_end_addr);
|
||||
|
||||
// Mark all code memory as executable
|
||||
cb.mark_all_executable();
|
||||
ocb.unwrap().mark_all_executable();
|
||||
|
@ -6590,6 +6605,8 @@ impl CodegenGlobals {
|
|||
global_inval_patches: Vec::new(),
|
||||
inline_frozen_bytes: 0,
|
||||
method_codegen_table: HashMap::new(),
|
||||
ocb_pages,
|
||||
freed_pages: None,
|
||||
};
|
||||
|
||||
// Register the method codegen functions
|
||||
|
@ -6725,6 +6742,18 @@ impl CodegenGlobals {
|
|||
Some(&mgf) => Some(mgf), // Deref
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_ocb_pages() -> &'static Vec<usize> {
|
||||
&CodegenGlobals::get_instance().ocb_pages
|
||||
}
|
||||
|
||||
pub fn get_freed_pages() -> &'static mut Option<Vec<usize>> {
|
||||
&mut CodegenGlobals::get_instance().freed_pages
|
||||
}
|
||||
|
||||
pub fn set_freed_pages(freed_pages: Vec<usize>) {
|
||||
CodegenGlobals::get_instance().freed_pages = Some(freed_pages)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
@ -11,6 +11,7 @@ use crate::utils::*;
|
|||
use crate::disasm::*;
|
||||
use core::ffi::c_void;
|
||||
use std::cell::*;
|
||||
use std::collections::HashSet;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::mem;
|
||||
use std::rc::{Rc};
|
||||
|
@ -321,7 +322,7 @@ struct Branch {
|
|||
|
||||
// Positions where the generated code starts and ends
|
||||
start_addr: Option<CodePtr>,
|
||||
end_addr: Option<CodePtr>,
|
||||
end_addr: Option<CodePtr>, // exclusive
|
||||
|
||||
// Context right after the branch instruction
|
||||
#[allow(unused)] // set but not read at the moment
|
||||
|
@ -475,7 +476,11 @@ impl Eq for BlockRef {}
|
|||
/// when calling into YJIT
|
||||
#[derive(Default)]
|
||||
pub struct IseqPayload {
|
||||
// Basic block versions
|
||||
version_map: VersionMap,
|
||||
|
||||
// Indexes of code pages used by this this ISEQ
|
||||
pub pages: HashSet<usize>,
|
||||
}
|
||||
|
||||
impl IseqPayload {
|
||||
|
@ -498,7 +503,7 @@ pub fn get_iseq_payload(iseq: IseqPtr) -> Option<&'static mut IseqPayload> {
|
|||
}
|
||||
|
||||
/// Get the payload object associated with an iseq. Create one if none exists.
|
||||
fn get_or_create_iseq_payload(iseq: IseqPtr) -> &'static mut IseqPayload {
|
||||
pub fn get_or_create_iseq_payload(iseq: IseqPtr) -> &'static mut IseqPayload {
|
||||
type VoidPtr = *mut c_void;
|
||||
|
||||
let payload_non_null = unsafe {
|
||||
|
@ -537,6 +542,21 @@ pub fn for_each_iseq<F: FnMut(IseqPtr)>(mut callback: F) {
|
|||
unsafe { rb_yjit_for_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) };
|
||||
}
|
||||
|
||||
/// Iterate over all on-stack ISEQ payloads
|
||||
#[cfg(not(test))]
|
||||
pub fn for_each_on_stack_iseq_payload<F: FnMut(&IseqPayload)>(mut callback: F) {
|
||||
unsafe extern "C" fn callback_wrapper(iseq: IseqPtr, data: *mut c_void) {
|
||||
let callback: &mut &mut dyn FnMut(&IseqPayload) -> bool = std::mem::transmute(&mut *data);
|
||||
if let Some(iseq_payload) = get_iseq_payload(iseq) {
|
||||
callback(iseq_payload);
|
||||
}
|
||||
}
|
||||
let mut data: &mut dyn FnMut(&IseqPayload) = &mut callback;
|
||||
unsafe { rb_jit_cont_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) };
|
||||
}
|
||||
#[cfg(test)]
|
||||
pub fn for_each_on_stack_iseq_payload<F: FnMut(&IseqPayload)>(mut _callback: F) {}
|
||||
|
||||
/// Free the per-iseq payload
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rb_yjit_iseq_free(payload: *mut c_void) {
|
||||
|
@ -854,6 +874,12 @@ fn add_block_version(blockref: &BlockRef, cb: &CodeBlock) {
|
|||
}
|
||||
|
||||
incr_counter!(compiled_block_count);
|
||||
|
||||
// Mark code pages for code GC
|
||||
let iseq_payload = get_iseq_payload(block.blockid.iseq).unwrap();
|
||||
for page in cb.addrs_to_pages(block.start_addr.unwrap(), block.end_addr.unwrap()) {
|
||||
iseq_payload.pages.insert(page);
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove a block version from the version map of its parent ISEQ
|
||||
|
@ -1526,7 +1552,11 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> {
|
|||
|
||||
match block {
|
||||
// Compilation failed
|
||||
None => return None,
|
||||
None => {
|
||||
// Trigger code GC. This entry point will be recompiled later.
|
||||
cb.code_gc();
|
||||
return None;
|
||||
}
|
||||
|
||||
// If the block contains no Ruby instructions
|
||||
Some(block) => {
|
||||
|
@ -1776,6 +1806,18 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
|
|||
block_rc.borrow().start_addr.unwrap()
|
||||
}
|
||||
None => {
|
||||
// Code GC needs to borrow blocks for invalidation, so their mutable
|
||||
// borrows must be dropped first.
|
||||
drop(block);
|
||||
drop(branch);
|
||||
// Trigger code GC. The whole ISEQ will be recompiled later.
|
||||
// We shouldn't trigger it in the middle of compilation in branch_stub_hit
|
||||
// because incomplete code could be used when cb.dropped_bytes is flipped
|
||||
// by code GC. So this place, after all compilation, is the safest place
|
||||
// to hook code GC on branch_stub_hit.
|
||||
cb.code_gc();
|
||||
branch = branch_rc.borrow_mut();
|
||||
|
||||
// Failed to service the stub by generating a new block so now we
|
||||
// need to exit to the interpreter at the stubbed location. We are
|
||||
// intentionally *not* restoring original_interp_sp. At the time of
|
||||
|
@ -1793,7 +1835,8 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
|
|||
let new_branch_size = branch.code_size();
|
||||
assert!(
|
||||
new_branch_size <= branch_size_on_entry,
|
||||
"branch stubs should never enlarge branches"
|
||||
"branch stubs should never enlarge branches: (old_size: {}, new_size: {})",
|
||||
branch_size_on_entry, new_branch_size,
|
||||
);
|
||||
|
||||
// Return a pointer to the compiled block version
|
||||
|
@ -1904,7 +1947,10 @@ pub fn gen_branch(
|
|||
// Get the branch targets or stubs
|
||||
let dst_addr0 = get_branch_target(target0, ctx0, &branchref, 0, ocb);
|
||||
let dst_addr1 = if let Some(ctx) = ctx1 {
|
||||
get_branch_target(target1.unwrap(), ctx, &branchref, 1, ocb)
|
||||
match get_branch_target(target1.unwrap(), ctx, &branchref, 1, ocb) {
|
||||
Some(dst_addr) => Some(dst_addr),
|
||||
None => return, // avoid unwrap() in gen_fn()
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
|
|
@ -1278,12 +1278,18 @@ extern "C" {
|
|||
lines: *mut ::std::os::raw::c_int,
|
||||
) -> ::std::os::raw::c_int;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn rb_jit_cont_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn rb_yjit_mark_writable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn rb_yjit_mark_executable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn rb_yjit_mark_unused(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn rb_yjit_icache_invalidate(
|
||||
start: *mut ::std::os::raw::c_void,
|
||||
|
|
|
@ -91,7 +91,7 @@ macro_rules! get_option_ref {
|
|||
// Unsafe is ok here because options are initialized
|
||||
// once before any Ruby code executes
|
||||
($option_name:ident) => {
|
||||
unsafe { &(OPTIONS.$option_name) }
|
||||
unsafe { &($crate::options::OPTIONS.$option_name) }
|
||||
};
|
||||
}
|
||||
pub(crate) use get_option_ref;
|
||||
|
|
|
@ -253,6 +253,7 @@ make_counters! {
|
|||
compiled_block_count,
|
||||
compilation_failure,
|
||||
freed_iseq_count,
|
||||
code_gc_count,
|
||||
|
||||
exit_from_branch_stub,
|
||||
|
||||
|
@ -351,23 +352,37 @@ fn rb_yjit_gen_stats_dict() -> VALUE {
|
|||
return Qnil;
|
||||
}
|
||||
|
||||
macro_rules! hash_aset_usize {
|
||||
($hash:ident, $counter_name:expr, $value:expr) => {
|
||||
let key = rust_str_to_sym($counter_name);
|
||||
let value = VALUE::fixnum_from_usize($value);
|
||||
rb_hash_aset($hash, key, value);
|
||||
}
|
||||
}
|
||||
|
||||
let hash = unsafe { rb_hash_new() };
|
||||
|
||||
// Inline and outlined code size
|
||||
// CodeBlock stats
|
||||
unsafe {
|
||||
// Get the inline and outlined code blocks
|
||||
let cb = CodegenGlobals::get_inline_cb();
|
||||
let ocb = CodegenGlobals::get_outlined_cb();
|
||||
|
||||
// Inline code size
|
||||
let key = rust_str_to_sym("inline_code_size");
|
||||
let value = VALUE::fixnum_from_usize(cb.get_write_pos());
|
||||
rb_hash_aset(hash, key, value);
|
||||
hash_aset_usize!(hash, "inline_code_size", cb.code_size());
|
||||
|
||||
// Outlined code size
|
||||
let key = rust_str_to_sym("outlined_code_size");
|
||||
let value = VALUE::fixnum_from_usize(ocb.unwrap().get_write_pos());
|
||||
rb_hash_aset(hash, key, value);
|
||||
hash_aset_usize!(hash, "outlined_code_size", ocb.unwrap().code_size());
|
||||
|
||||
// GCed pages
|
||||
let freed_page_count = cb.num_freed_pages();
|
||||
hash_aset_usize!(hash, "freed_page_count", freed_page_count);
|
||||
|
||||
// GCed code size
|
||||
hash_aset_usize!(hash, "freed_code_size", freed_page_count * cb.page_size());
|
||||
|
||||
// Compiled pages
|
||||
hash_aset_usize!(hash, "compiled_page_count", cb.num_pages() - freed_page_count);
|
||||
}
|
||||
|
||||
// If we're not generating stats, the hash is done
|
||||
|
|
|
@ -51,6 +51,8 @@ pub trait Allocator {
|
|||
fn mark_writable(&mut self, ptr: *const u8, size: u32) -> bool;
|
||||
|
||||
fn mark_executable(&mut self, ptr: *const u8, size: u32);
|
||||
|
||||
fn mark_unused(&mut self, ptr: *const u8, size: u32) -> bool;
|
||||
}
|
||||
|
||||
/// Pointer into a [VirtualMemory].
|
||||
|
@ -91,6 +93,15 @@ impl<A: Allocator> VirtualMemory<A> {
|
|||
CodePtr(self.region_start)
|
||||
}
|
||||
|
||||
pub fn end_ptr(&self) -> CodePtr {
|
||||
CodePtr(self.region_start.wrapping_add(self.mapped_region_bytes))
|
||||
}
|
||||
|
||||
/// Size of the region in bytes that we have allocated physical memory for.
|
||||
pub fn mapped_region_size(&self) -> usize {
|
||||
self.mapped_region_bytes
|
||||
}
|
||||
|
||||
/// Size of the region in bytes where writes could be attempted.
|
||||
pub fn virtual_region_size(&self) -> usize {
|
||||
self.region_size_bytes
|
||||
|
@ -177,6 +188,12 @@ impl<A: Allocator> VirtualMemory<A> {
|
|||
// Make mapped region executable
|
||||
self.allocator.mark_executable(region_start, mapped_region_bytes);
|
||||
}
|
||||
|
||||
/// Free a range of bytes. start_ptr must be memory page-aligned.
|
||||
pub fn free_bytes(&mut self, start_ptr: CodePtr, size: u32) {
|
||||
assert_eq!(start_ptr.into_usize() % self.page_size_bytes, 0);
|
||||
self.allocator.mark_unused(start_ptr.0, size);
|
||||
}
|
||||
}
|
||||
|
||||
impl CodePtr {
|
||||
|
@ -235,6 +252,10 @@ mod sys {
|
|||
fn mark_executable(&mut self, ptr: *const u8, size: u32) {
|
||||
unsafe { rb_yjit_mark_executable(ptr as VoidPtr, size) }
|
||||
}
|
||||
|
||||
fn mark_unused(&mut self, ptr: *const u8, size: u32) -> bool {
|
||||
unsafe { rb_yjit_mark_unused(ptr as VoidPtr, size) }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -258,6 +279,7 @@ pub mod tests {
|
|||
enum AllocRequest {
|
||||
MarkWritable{ start_idx: usize, length: usize },
|
||||
MarkExecutable{ start_idx: usize, length: usize },
|
||||
MarkUnused{ start_idx: usize, length: usize },
|
||||
}
|
||||
use AllocRequest::*;
|
||||
|
||||
|
@ -298,6 +320,13 @@ pub mod tests {
|
|||
// We don't try to execute generated code in cfg(test)
|
||||
// so no need to actually request executable memory.
|
||||
}
|
||||
|
||||
fn mark_unused(&mut self, ptr: *const u8, length: u32) -> bool {
|
||||
let index = self.bounds_check_request(ptr, length);
|
||||
self.requests.push(MarkUnused { start_idx: index, length: length.as_usize() });
|
||||
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
// Fictional architecture where each page is 4 bytes long
|
||||
|
|
Loading…
Reference in a new issue