1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00
This commit is contained in:
Noah Gibbs 2022-07-28 16:45:08 +01:00 committed by Takashi Kokubun
parent 0ad9cc1696
commit b4be3c00c5
Notes: git 2022-08-25 02:43:13 +09:00
7 changed files with 167 additions and 38 deletions

12
yjit.c
View file

@ -399,6 +399,18 @@ rb_str_bytesize(VALUE str)
return LONG2NUM(RSTRING_LEN(str));
}
unsigned long
rb_RSTRING_LEN(VALUE str)
{
return RSTRING_LEN(str);
}
char *
rb_RSTRING_PTR(VALUE str)
{
return RSTRING_PTR(str);
}
// This is defined only as a named struct inside rb_iseq_constant_body.
// By giving it a separate typedef, we make it nameable by rust-bindgen.
// Bindgen's temp/anon name isn't guaranteed stable.

View file

@ -70,6 +70,9 @@ fn main() {
.allowlist_function("rb_str_buf_append")
.allowlist_function("rb_str_dup")
// From encindex.h
.allowlist_type("ruby_preserved_encindex")
// This struct is public to Ruby C extensions
// From include/ruby/internal/core/rbasic.h
.allowlist_type("RBasic")
@ -240,6 +243,7 @@ fn main() {
.allowlist_var("VM_ENV_DATA_INDEX_SPECVAL")
.allowlist_var("VM_ENV_DATA_INDEX_FLAGS")
.allowlist_var("VM_ENV_DATA_SIZE")
.allowlist_function("rb_iseq_path")
// From yjit.c
.allowlist_function("rb_iseq_(get|set)_yjit_payload")
@ -265,6 +269,8 @@ fn main() {
.allowlist_function("rb_yjit_for_each_iseq")
.allowlist_function("rb_yjit_obj_written")
.allowlist_function("rb_yjit_str_simple_append")
.allowlist_function("rb_RSTRING_PTR")
.allowlist_function("rb_RSTRING_LEN")
.allowlist_function("rb_ENCODING_GET")
.allowlist_function("rb_yjit_exit_locations_dict")
@ -282,6 +288,7 @@ fn main() {
.allowlist_function("rb_vm_insn_addr2opcode")
.allowlist_function("rb_iseqw_to_iseq")
.allowlist_function("rb_iseq_each")
.allowlist_function("rb_iseq_method_name")
// From builtin.h
.allowlist_type("rb_builtin_function.*")

View file

@ -6,6 +6,8 @@ use crate::cruby::*;
use crate::options::*;
use crate::stats::*;
use crate::utils::*;
#[cfg(feature="disasm")]
use crate::disasm::*;
use core::ffi::c_void;
use std::cell::*;
use std::hash::{Hash, Hasher};
@ -1426,6 +1428,20 @@ fn gen_block_series_body(
last_blockref = new_blockref;
}
#[cfg(feature = "disasm")]
{
// If dump_iseq_disasm is active, see if this iseq's location matches the given substring.
// If so, we print the new blocks to the console.
if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() {
let iseq_location = iseq_get_location(blockid.iseq);
if iseq_location.contains(substr) {
let last_block = last_blockref.borrow();
println!("Compiling {} block(s) for {}, ISEQ offsets [{}, {})", batch.len(), iseq_location, blockid.idx, last_block.end_idx);
println!("{}", disasm_iseq_insn_range(blockid.iseq, blockid.idx, last_block.end_idx));
}
}
}
Some(first_block)
}
@ -1956,6 +1972,17 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
verify_blockid(block.blockid);
#[cfg(feature = "disasm")]
{
// If dump_iseq_disasm is specified, print to console that blocks for matching ISEQ names were invalidated.
if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() {
let iseq_location = iseq_get_location(block.blockid.iseq);
if iseq_location.contains(substr) {
println!("Invalidating block from {}, ISEQ offsets [{}, {})", iseq_location, block.blockid.idx, block.end_idx);
}
}
}
// Remove this block from the version array
remove_block_version(blockref);

View file

@ -246,6 +246,20 @@ pub const RUBY_ENCODING_SHIFT: ruby_encoding_consts = 22;
pub const RUBY_ENCODING_MASK: ruby_encoding_consts = 532676608;
pub const RUBY_ENCODING_MAXNAMELEN: ruby_encoding_consts = 42;
pub type ruby_encoding_consts = u32;
pub const RUBY_ENCINDEX_ASCII_8BIT: ruby_preserved_encindex = 0;
pub const RUBY_ENCINDEX_UTF_8: ruby_preserved_encindex = 1;
pub const RUBY_ENCINDEX_US_ASCII: ruby_preserved_encindex = 2;
pub const RUBY_ENCINDEX_UTF_16BE: ruby_preserved_encindex = 3;
pub const RUBY_ENCINDEX_UTF_16LE: ruby_preserved_encindex = 4;
pub const RUBY_ENCINDEX_UTF_32BE: ruby_preserved_encindex = 5;
pub const RUBY_ENCINDEX_UTF_32LE: ruby_preserved_encindex = 6;
pub const RUBY_ENCINDEX_UTF_16: ruby_preserved_encindex = 7;
pub const RUBY_ENCINDEX_UTF_32: ruby_preserved_encindex = 8;
pub const RUBY_ENCINDEX_UTF8_MAC: ruby_preserved_encindex = 9;
pub const RUBY_ENCINDEX_EUC_JP: ruby_preserved_encindex = 10;
pub const RUBY_ENCINDEX_Windows_31J: ruby_preserved_encindex = 11;
pub const RUBY_ENCINDEX_BUILTIN_MAX: ruby_preserved_encindex = 12;
pub type ruby_preserved_encindex = u32;
extern "C" {
pub fn rb_obj_info_dump(obj: VALUE);
}
@ -649,6 +663,9 @@ pub const VM_ENV_FLAG_ESCAPED: vm_frame_env_flags = 4;
pub const VM_ENV_FLAG_WB_REQUIRED: vm_frame_env_flags = 8;
pub const VM_ENV_FLAG_ISOLATED: vm_frame_env_flags = 16;
pub type vm_frame_env_flags = u32;
extern "C" {
pub fn rb_iseq_path(iseq: *const rb_iseq_t) -> VALUE;
}
extern "C" {
pub fn rb_vm_bh_to_procval(ec: *const rb_execution_context_t, block_handler: VALUE) -> VALUE;
}
@ -969,6 +986,9 @@ extern "C" {
extern "C" {
pub fn rb_iseqw_to_iseq(iseqw: VALUE) -> *const rb_iseq_t;
}
extern "C" {
pub fn rb_iseq_method_name(iseq: *const rb_iseq_t) -> VALUE;
}
extern "C" {
pub fn rb_vm_barrier();
}
@ -1020,6 +1040,12 @@ extern "C" {
extern "C" {
pub fn rb_iseq_opcode_at_pc(iseq: *const rb_iseq_t, pc: *const VALUE) -> ::std::os::raw::c_int;
}
extern "C" {
pub fn rb_RSTRING_LEN(str_: VALUE) -> ::std::os::raw::c_ulong;
}
extern "C" {
pub fn rb_RSTRING_PTR(str_: VALUE) -> *mut ::std::os::raw::c_char;
}
pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword;
extern "C" {
pub fn rb_leaf_invokebuiltin_iseq_p(iseq: *const rb_iseq_t) -> bool;

View file

@ -26,14 +26,16 @@ pub extern "C" fn rb_yjit_disasm_iseq(_ec: EcPtr, _ruby_self: VALUE, iseqw: VALU
// Get the iseq pointer from the wrapper
let iseq = unsafe { rb_iseqw_to_iseq(iseqw) };
let out_string = disasm_iseq(iseq);
// This will truncate disassembly of methods with 10k+ bytecodes.
// That's a good thing - this prints to console.
let out_string = disasm_iseq_insn_range(iseq, 0, 9999);
return rust_str_to_ruby(&out_string);
}
}
#[cfg(feature = "disasm")]
fn disasm_iseq(iseq: IseqPtr) -> String {
pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u32, end_idx: u32) -> String {
let mut out = String::from("");
// Get a list of block versions generated for this iseq
@ -84,6 +86,7 @@ fn disasm_iseq(iseq: IseqPtr) -> String {
for block_idx in 0..block_list.len() {
let block = block_list[block_idx].borrow();
let blockid = block.get_blockid();
if blockid.idx >= start_idx && blockid.idx < end_idx {
let end_idx = block.get_end_idx();
let start_addr = block.get_start_addr().unwrap().raw_ptr();
let end_addr = block.get_end_addr().unwrap().raw_ptr();
@ -128,6 +131,7 @@ fn disasm_iseq(iseq: IseqPtr) -> String {
}
}
}
}
return out;
}

View file

@ -1,7 +1,7 @@
use std::ffi::CStr;
// Command-line options
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
#[derive(Clone, PartialEq, Eq, Debug)]
#[repr(C)]
pub struct Options {
// Size of the executable memory block to allocate in MiB
@ -30,6 +30,9 @@ pub struct Options {
/// Dump compiled and executed instructions for debugging
pub dump_insns: bool,
/// Print when specific ISEQ items are compiled or invalidated
pub dump_iseq_disasm: Option<String>,
/// Verify context objects (debug mode only)
pub verify_ctx: bool,
@ -52,6 +55,7 @@ pub static mut OPTIONS: Options = Options {
dump_insns: false,
verify_ctx: false,
global_constant_state: false,
dump_iseq_disasm: None,
};
/// Macro to get an option value by name
@ -64,6 +68,16 @@ macro_rules! get_option {
}
pub(crate) use get_option;
/// Macro to reference an option value by name; we assume it's a cloneable type like String or an Option of same.
macro_rules! get_option_ref {
// Unsafe is ok here because options are initialized
// once before any Ruby code executes
($option_name:ident) => {
unsafe { &(OPTIONS.$option_name) }
};
}
pub(crate) use get_option_ref;
/// Expected to receive what comes after the third dash in "--yjit-*".
/// Empty string means user passed only "--yjit". C code rejects when
/// they pass exact "--yjit-".
@ -105,6 +119,10 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
}
},
("dump-iseq-disasm", _) => unsafe {
OPTIONS.dump_iseq_disasm = Some(opt_val.to_string());
},
("greedy-versioning", "") => unsafe { OPTIONS.greedy_versioning = true },
("no-type-prop", "") => unsafe { OPTIONS.no_type_prop = true },
("stats", "") => unsafe { OPTIONS.gen_stats = true },

View file

@ -71,6 +71,41 @@ macro_rules! offset_of {
#[allow(unused)]
pub(crate) use offset_of;
// Convert a CRuby UTF-8-encoded RSTRING into a Rust string.
// This should work fine on ASCII strings and anything else
// that is considered legal UTF-8, including embedded nulls.
fn ruby_str_to_rust(v: VALUE) -> String {
// Make sure the CRuby encoding is UTF-8 compatible
let encoding = unsafe { rb_ENCODING_GET(v) } as u32;
assert!(encoding == RUBY_ENCINDEX_ASCII_8BIT || encoding == RUBY_ENCINDEX_UTF_8 || encoding == RUBY_ENCINDEX_US_ASCII);
let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8;
let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap();
let str_slice: &[u8] = unsafe { slice::from_raw_parts(str_ptr, str_len) };
String::from_utf8(str_slice.to_vec()).unwrap() // does utf8 validation
}
// Location is the file defining the method, colon, method name.
// Filenames are sometimes internal strings supplied to eval,
// so be careful with them.
pub fn iseq_get_location(iseq: IseqPtr) -> String {
let iseq_path = unsafe { rb_iseq_path(iseq) };
let iseq_method = unsafe { rb_iseq_method_name(iseq) };
let mut s = if iseq_path == Qnil {
"None".to_string()
} else {
ruby_str_to_rust(iseq_path)
};
s.push_str(":");
if iseq_method == Qnil {
s.push_str("None");
} else {
s.push_str(& ruby_str_to_rust(iseq_method));
}
s
}
#[cfg(test)]
mod tests {
#[test]