mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
add --yjit-dump-iseqs param (https://github.com/Shopify/ruby/pull/332)
This commit is contained in:
parent
0ad9cc1696
commit
b4be3c00c5
Notes:
git
2022-08-25 02:43:13 +09:00
7 changed files with 167 additions and 38 deletions
12
yjit.c
12
yjit.c
|
@ -399,6 +399,18 @@ rb_str_bytesize(VALUE str)
|
|||
return LONG2NUM(RSTRING_LEN(str));
|
||||
}
|
||||
|
||||
unsigned long
|
||||
rb_RSTRING_LEN(VALUE str)
|
||||
{
|
||||
return RSTRING_LEN(str);
|
||||
}
|
||||
|
||||
char *
|
||||
rb_RSTRING_PTR(VALUE str)
|
||||
{
|
||||
return RSTRING_PTR(str);
|
||||
}
|
||||
|
||||
// This is defined only as a named struct inside rb_iseq_constant_body.
|
||||
// By giving it a separate typedef, we make it nameable by rust-bindgen.
|
||||
// Bindgen's temp/anon name isn't guaranteed stable.
|
||||
|
|
|
@ -70,6 +70,9 @@ fn main() {
|
|||
.allowlist_function("rb_str_buf_append")
|
||||
.allowlist_function("rb_str_dup")
|
||||
|
||||
// From encindex.h
|
||||
.allowlist_type("ruby_preserved_encindex")
|
||||
|
||||
// This struct is public to Ruby C extensions
|
||||
// From include/ruby/internal/core/rbasic.h
|
||||
.allowlist_type("RBasic")
|
||||
|
@ -240,6 +243,7 @@ fn main() {
|
|||
.allowlist_var("VM_ENV_DATA_INDEX_SPECVAL")
|
||||
.allowlist_var("VM_ENV_DATA_INDEX_FLAGS")
|
||||
.allowlist_var("VM_ENV_DATA_SIZE")
|
||||
.allowlist_function("rb_iseq_path")
|
||||
|
||||
// From yjit.c
|
||||
.allowlist_function("rb_iseq_(get|set)_yjit_payload")
|
||||
|
@ -265,6 +269,8 @@ fn main() {
|
|||
.allowlist_function("rb_yjit_for_each_iseq")
|
||||
.allowlist_function("rb_yjit_obj_written")
|
||||
.allowlist_function("rb_yjit_str_simple_append")
|
||||
.allowlist_function("rb_RSTRING_PTR")
|
||||
.allowlist_function("rb_RSTRING_LEN")
|
||||
.allowlist_function("rb_ENCODING_GET")
|
||||
.allowlist_function("rb_yjit_exit_locations_dict")
|
||||
|
||||
|
@ -282,6 +288,7 @@ fn main() {
|
|||
.allowlist_function("rb_vm_insn_addr2opcode")
|
||||
.allowlist_function("rb_iseqw_to_iseq")
|
||||
.allowlist_function("rb_iseq_each")
|
||||
.allowlist_function("rb_iseq_method_name")
|
||||
|
||||
// From builtin.h
|
||||
.allowlist_type("rb_builtin_function.*")
|
||||
|
|
|
@ -6,6 +6,8 @@ use crate::cruby::*;
|
|||
use crate::options::*;
|
||||
use crate::stats::*;
|
||||
use crate::utils::*;
|
||||
#[cfg(feature="disasm")]
|
||||
use crate::disasm::*;
|
||||
use core::ffi::c_void;
|
||||
use std::cell::*;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
@ -1426,6 +1428,20 @@ fn gen_block_series_body(
|
|||
last_blockref = new_blockref;
|
||||
}
|
||||
|
||||
#[cfg(feature = "disasm")]
|
||||
{
|
||||
// If dump_iseq_disasm is active, see if this iseq's location matches the given substring.
|
||||
// If so, we print the new blocks to the console.
|
||||
if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() {
|
||||
let iseq_location = iseq_get_location(blockid.iseq);
|
||||
if iseq_location.contains(substr) {
|
||||
let last_block = last_blockref.borrow();
|
||||
println!("Compiling {} block(s) for {}, ISEQ offsets [{}, {})", batch.len(), iseq_location, blockid.idx, last_block.end_idx);
|
||||
println!("{}", disasm_iseq_insn_range(blockid.iseq, blockid.idx, last_block.end_idx));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some(first_block)
|
||||
}
|
||||
|
||||
|
@ -1956,6 +1972,17 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
|
|||
|
||||
verify_blockid(block.blockid);
|
||||
|
||||
#[cfg(feature = "disasm")]
|
||||
{
|
||||
// If dump_iseq_disasm is specified, print to console that blocks for matching ISEQ names were invalidated.
|
||||
if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() {
|
||||
let iseq_location = iseq_get_location(block.blockid.iseq);
|
||||
if iseq_location.contains(substr) {
|
||||
println!("Invalidating block from {}, ISEQ offsets [{}, {})", iseq_location, block.blockid.idx, block.end_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove this block from the version array
|
||||
remove_block_version(blockref);
|
||||
|
||||
|
|
|
@ -246,6 +246,20 @@ pub const RUBY_ENCODING_SHIFT: ruby_encoding_consts = 22;
|
|||
pub const RUBY_ENCODING_MASK: ruby_encoding_consts = 532676608;
|
||||
pub const RUBY_ENCODING_MAXNAMELEN: ruby_encoding_consts = 42;
|
||||
pub type ruby_encoding_consts = u32;
|
||||
pub const RUBY_ENCINDEX_ASCII_8BIT: ruby_preserved_encindex = 0;
|
||||
pub const RUBY_ENCINDEX_UTF_8: ruby_preserved_encindex = 1;
|
||||
pub const RUBY_ENCINDEX_US_ASCII: ruby_preserved_encindex = 2;
|
||||
pub const RUBY_ENCINDEX_UTF_16BE: ruby_preserved_encindex = 3;
|
||||
pub const RUBY_ENCINDEX_UTF_16LE: ruby_preserved_encindex = 4;
|
||||
pub const RUBY_ENCINDEX_UTF_32BE: ruby_preserved_encindex = 5;
|
||||
pub const RUBY_ENCINDEX_UTF_32LE: ruby_preserved_encindex = 6;
|
||||
pub const RUBY_ENCINDEX_UTF_16: ruby_preserved_encindex = 7;
|
||||
pub const RUBY_ENCINDEX_UTF_32: ruby_preserved_encindex = 8;
|
||||
pub const RUBY_ENCINDEX_UTF8_MAC: ruby_preserved_encindex = 9;
|
||||
pub const RUBY_ENCINDEX_EUC_JP: ruby_preserved_encindex = 10;
|
||||
pub const RUBY_ENCINDEX_Windows_31J: ruby_preserved_encindex = 11;
|
||||
pub const RUBY_ENCINDEX_BUILTIN_MAX: ruby_preserved_encindex = 12;
|
||||
pub type ruby_preserved_encindex = u32;
|
||||
extern "C" {
|
||||
pub fn rb_obj_info_dump(obj: VALUE);
|
||||
}
|
||||
|
@ -649,6 +663,9 @@ pub const VM_ENV_FLAG_ESCAPED: vm_frame_env_flags = 4;
|
|||
pub const VM_ENV_FLAG_WB_REQUIRED: vm_frame_env_flags = 8;
|
||||
pub const VM_ENV_FLAG_ISOLATED: vm_frame_env_flags = 16;
|
||||
pub type vm_frame_env_flags = u32;
|
||||
extern "C" {
|
||||
pub fn rb_iseq_path(iseq: *const rb_iseq_t) -> VALUE;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn rb_vm_bh_to_procval(ec: *const rb_execution_context_t, block_handler: VALUE) -> VALUE;
|
||||
}
|
||||
|
@ -969,6 +986,9 @@ extern "C" {
|
|||
extern "C" {
|
||||
pub fn rb_iseqw_to_iseq(iseqw: VALUE) -> *const rb_iseq_t;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn rb_iseq_method_name(iseq: *const rb_iseq_t) -> VALUE;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn rb_vm_barrier();
|
||||
}
|
||||
|
@ -1020,6 +1040,12 @@ extern "C" {
|
|||
extern "C" {
|
||||
pub fn rb_iseq_opcode_at_pc(iseq: *const rb_iseq_t, pc: *const VALUE) -> ::std::os::raw::c_int;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn rb_RSTRING_LEN(str_: VALUE) -> ::std::os::raw::c_ulong;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn rb_RSTRING_PTR(str_: VALUE) -> *mut ::std::os::raw::c_char;
|
||||
}
|
||||
pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword;
|
||||
extern "C" {
|
||||
pub fn rb_leaf_invokebuiltin_iseq_p(iseq: *const rb_iseq_t) -> bool;
|
||||
|
|
|
@ -26,15 +26,17 @@ pub extern "C" fn rb_yjit_disasm_iseq(_ec: EcPtr, _ruby_self: VALUE, iseqw: VALU
|
|||
// Get the iseq pointer from the wrapper
|
||||
let iseq = unsafe { rb_iseqw_to_iseq(iseqw) };
|
||||
|
||||
let out_string = disasm_iseq(iseq);
|
||||
// This will truncate disassembly of methods with 10k+ bytecodes.
|
||||
// That's a good thing - this prints to console.
|
||||
let out_string = disasm_iseq_insn_range(iseq, 0, 9999);
|
||||
|
||||
return rust_str_to_ruby(&out_string);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "disasm")]
|
||||
fn disasm_iseq(iseq: IseqPtr) -> String {
|
||||
let mut out = String::from("");
|
||||
pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u32, end_idx: u32) -> String {
|
||||
let mut out = String::from("");
|
||||
|
||||
// Get a list of block versions generated for this iseq
|
||||
let mut block_list = get_iseq_block_list(iseq);
|
||||
|
@ -84,47 +86,49 @@ fn disasm_iseq(iseq: IseqPtr) -> String {
|
|||
for block_idx in 0..block_list.len() {
|
||||
let block = block_list[block_idx].borrow();
|
||||
let blockid = block.get_blockid();
|
||||
let end_idx = block.get_end_idx();
|
||||
let start_addr = block.get_start_addr().unwrap().raw_ptr();
|
||||
let end_addr = block.get_end_addr().unwrap().raw_ptr();
|
||||
let code_size = block.code_size();
|
||||
if blockid.idx >= start_idx && blockid.idx < end_idx {
|
||||
let end_idx = block.get_end_idx();
|
||||
let start_addr = block.get_start_addr().unwrap().raw_ptr();
|
||||
let end_addr = block.get_end_addr().unwrap().raw_ptr();
|
||||
let code_size = block.code_size();
|
||||
|
||||
// Write some info about the current block
|
||||
let block_ident = format!(
|
||||
"BLOCK {}/{}, ISEQ RANGE [{},{}), {} bytes ",
|
||||
block_idx + 1,
|
||||
block_list.len(),
|
||||
blockid.idx,
|
||||
end_idx,
|
||||
code_size
|
||||
);
|
||||
out.push_str(&format!("== {:=<60}\n", block_ident));
|
||||
// Write some info about the current block
|
||||
let block_ident = format!(
|
||||
"BLOCK {}/{}, ISEQ RANGE [{},{}), {} bytes ",
|
||||
block_idx + 1,
|
||||
block_list.len(),
|
||||
blockid.idx,
|
||||
end_idx,
|
||||
code_size
|
||||
);
|
||||
out.push_str(&format!("== {:=<60}\n", block_ident));
|
||||
|
||||
// Disassemble the instructions
|
||||
let code_slice = unsafe { std::slice::from_raw_parts(start_addr, code_size) };
|
||||
let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap();
|
||||
// Disassemble the instructions
|
||||
let code_slice = unsafe { std::slice::from_raw_parts(start_addr, code_size) };
|
||||
let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap();
|
||||
|
||||
// For each instruction in this block
|
||||
for insn in insns.as_ref() {
|
||||
// Comments for this block
|
||||
if let Some(comment_list) = global_cb.comments_at(insn.address() as usize) {
|
||||
for comment in comment_list {
|
||||
out.push_str(&format!(" \x1b[1m# {}\x1b[0m\n", comment));
|
||||
// For each instruction in this block
|
||||
for insn in insns.as_ref() {
|
||||
// Comments for this block
|
||||
if let Some(comment_list) = global_cb.comments_at(insn.address() as usize) {
|
||||
for comment in comment_list {
|
||||
out.push_str(&format!(" \x1b[1m# {}\x1b[0m\n", comment));
|
||||
}
|
||||
}
|
||||
out.push_str(&format!(" {}\n", insn));
|
||||
}
|
||||
out.push_str(&format!(" {}\n", insn));
|
||||
}
|
||||
|
||||
// If this is not the last block
|
||||
if block_idx < block_list.len() - 1 {
|
||||
// Compute the size of the gap between this block and the next
|
||||
let next_block = block_list[block_idx + 1].borrow();
|
||||
let next_start_addr = next_block.get_start_addr().unwrap().raw_ptr();
|
||||
let gap_size = (next_start_addr as usize) - (end_addr as usize);
|
||||
// If this is not the last block
|
||||
if block_idx < block_list.len() - 1 {
|
||||
// Compute the size of the gap between this block and the next
|
||||
let next_block = block_list[block_idx + 1].borrow();
|
||||
let next_start_addr = next_block.get_start_addr().unwrap().raw_ptr();
|
||||
let gap_size = (next_start_addr as usize) - (end_addr as usize);
|
||||
|
||||
// Log the size of the gap between the blocks if nonzero
|
||||
if gap_size > 0 {
|
||||
out.push_str(&format!("... {} byte gap ...\n", gap_size));
|
||||
// Log the size of the gap between the blocks if nonzero
|
||||
if gap_size > 0 {
|
||||
out.push_str(&format!("... {} byte gap ...\n", gap_size));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use std::ffi::CStr;
|
||||
|
||||
// Command-line options
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
|
||||
#[derive(Clone, PartialEq, Eq, Debug)]
|
||||
#[repr(C)]
|
||||
pub struct Options {
|
||||
// Size of the executable memory block to allocate in MiB
|
||||
|
@ -30,6 +30,9 @@ pub struct Options {
|
|||
/// Dump compiled and executed instructions for debugging
|
||||
pub dump_insns: bool,
|
||||
|
||||
/// Print when specific ISEQ items are compiled or invalidated
|
||||
pub dump_iseq_disasm: Option<String>,
|
||||
|
||||
/// Verify context objects (debug mode only)
|
||||
pub verify_ctx: bool,
|
||||
|
||||
|
@ -52,6 +55,7 @@ pub static mut OPTIONS: Options = Options {
|
|||
dump_insns: false,
|
||||
verify_ctx: false,
|
||||
global_constant_state: false,
|
||||
dump_iseq_disasm: None,
|
||||
};
|
||||
|
||||
/// Macro to get an option value by name
|
||||
|
@ -64,6 +68,16 @@ macro_rules! get_option {
|
|||
}
|
||||
pub(crate) use get_option;
|
||||
|
||||
/// Macro to reference an option value by name; we assume it's a cloneable type like String or an Option of same.
|
||||
macro_rules! get_option_ref {
|
||||
// Unsafe is ok here because options are initialized
|
||||
// once before any Ruby code executes
|
||||
($option_name:ident) => {
|
||||
unsafe { &(OPTIONS.$option_name) }
|
||||
};
|
||||
}
|
||||
pub(crate) use get_option_ref;
|
||||
|
||||
/// Expected to receive what comes after the third dash in "--yjit-*".
|
||||
/// Empty string means user passed only "--yjit". C code rejects when
|
||||
/// they pass exact "--yjit-".
|
||||
|
@ -105,6 +119,10 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
|
|||
}
|
||||
},
|
||||
|
||||
("dump-iseq-disasm", _) => unsafe {
|
||||
OPTIONS.dump_iseq_disasm = Some(opt_val.to_string());
|
||||
},
|
||||
|
||||
("greedy-versioning", "") => unsafe { OPTIONS.greedy_versioning = true },
|
||||
("no-type-prop", "") => unsafe { OPTIONS.no_type_prop = true },
|
||||
("stats", "") => unsafe { OPTIONS.gen_stats = true },
|
||||
|
|
|
@ -71,6 +71,41 @@ macro_rules! offset_of {
|
|||
#[allow(unused)]
|
||||
pub(crate) use offset_of;
|
||||
|
||||
// Convert a CRuby UTF-8-encoded RSTRING into a Rust string.
|
||||
// This should work fine on ASCII strings and anything else
|
||||
// that is considered legal UTF-8, including embedded nulls.
|
||||
fn ruby_str_to_rust(v: VALUE) -> String {
|
||||
// Make sure the CRuby encoding is UTF-8 compatible
|
||||
let encoding = unsafe { rb_ENCODING_GET(v) } as u32;
|
||||
assert!(encoding == RUBY_ENCINDEX_ASCII_8BIT || encoding == RUBY_ENCINDEX_UTF_8 || encoding == RUBY_ENCINDEX_US_ASCII);
|
||||
|
||||
let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8;
|
||||
let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap();
|
||||
let str_slice: &[u8] = unsafe { slice::from_raw_parts(str_ptr, str_len) };
|
||||
String::from_utf8(str_slice.to_vec()).unwrap() // does utf8 validation
|
||||
}
|
||||
|
||||
// Location is the file defining the method, colon, method name.
|
||||
// Filenames are sometimes internal strings supplied to eval,
|
||||
// so be careful with them.
|
||||
pub fn iseq_get_location(iseq: IseqPtr) -> String {
|
||||
let iseq_path = unsafe { rb_iseq_path(iseq) };
|
||||
let iseq_method = unsafe { rb_iseq_method_name(iseq) };
|
||||
|
||||
let mut s = if iseq_path == Qnil {
|
||||
"None".to_string()
|
||||
} else {
|
||||
ruby_str_to_rust(iseq_path)
|
||||
};
|
||||
s.push_str(":");
|
||||
if iseq_method == Qnil {
|
||||
s.push_str("None");
|
||||
} else {
|
||||
s.push_str(& ruby_str_to_rust(iseq_method));
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue