mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
223 lines
8.1 KiB
Ruby
223 lines
8.1 KiB
Ruby
#! /your/favourite/path/to/ruby
|
|
# -*- Ruby -*-
|
|
# -*- frozen_string_literal: true; -*-
|
|
# -*- warn_indent: true; -*-
|
|
#
|
|
# Copyright (c) 2020 Wu, Alan. All rights reserved.
|
|
#
|
|
# This file is a part of the programming language Ruby. Permission is hereby
|
|
# granted, to either redistribute and/or modify this file, provided that the
|
|
# conditions mentioned in the file COPYING are met. Consult the file for
|
|
# details.
|
|
|
|
module RubyVM::MicroJIT
|
|
ScrapeResult = Struct.new(:pre_call_bytes, :post_call_bytes, :disassembly_lines)
|
|
|
|
class << self
|
|
def target_platform
|
|
# Note, checking RUBY_PLATRFORM doesn't work when cross compiling
|
|
@platform ||= if RUBY_PLATFORM.include?('darwin')
|
|
:darwin
|
|
elsif RUBY_PLATFORM.include?('linux')
|
|
:linux
|
|
else
|
|
:unknown
|
|
end
|
|
end
|
|
|
|
def get_fileoff
|
|
# use the load command to figure out the offset to the start of the content of vm.o
|
|
`otool -l vm.o`.each_line do |line|
|
|
if (fileoff = line[/fileoff (\d+)/, 1])
|
|
p [__method__, line] if $DEBUG
|
|
return fileoff.to_i
|
|
end
|
|
end
|
|
raise
|
|
end
|
|
|
|
def get_symbol_offset(symbol)
|
|
`nm vm.o`.each_line do |line|
|
|
if (offset = line[Regexp.compile('(\h+).+' + Regexp.escape(symbol) + '\Z'), 1])
|
|
p [__method__, line] if $DEBUG
|
|
return Integer(offset, 16)
|
|
end
|
|
end
|
|
raise
|
|
end
|
|
|
|
def readint8b(offset)
|
|
bytes = IO.binread('vm.o', 8, offset)
|
|
bytes.unpack('q').first # this is native endian but we want little endian. it's fine if the host moachine is x86
|
|
end
|
|
|
|
def get_symbol_section_and_offset(name)
|
|
`objdump -w -t vm.o`.each_line do |line|
|
|
split_line = line.split
|
|
next unless split_line.size >= 6
|
|
# the table should go into a data section
|
|
if split_line[5].include?('insns_address_table') && split_line[3].include?('data')
|
|
p line if $DEBUG
|
|
return [split_line[3], Integer(split_line[0], 16)]
|
|
end
|
|
end
|
|
raise 'Failed to find section and offset for the the instruction address table'
|
|
end
|
|
|
|
def get_handler_offset(table_section, table_offset, insn_id)
|
|
target_offset = insn_id * 8 + table_offset
|
|
reloc_start_message = "RELOCATION RECORDS FOR [#{table_section}]:"
|
|
`objdump -w -r vm.o`.each_line do |line|
|
|
line.strip!
|
|
if (line == reloc_start_message)...(line.empty?)
|
|
split_line = line.split
|
|
next if split_line.first == 'RELOCATION'
|
|
next if split_line == ['OFFSET', 'TYPE', 'VALUE']
|
|
if Integer(split_line.first, 16) == target_offset
|
|
section, offset = split_line[2].split('+')
|
|
p line if $DEBUG
|
|
return section, Integer(offset, 16)
|
|
end
|
|
end
|
|
end
|
|
raise 'Failed to find relocation info for the target instruction'
|
|
end
|
|
|
|
def objdump_disassemble_command(offset)
|
|
case target_platform
|
|
when :darwin
|
|
"objdump --x86-asm-syntax=intel --start-address=#{offset} --stop-address=#{offset+50} -d vm.o"
|
|
when :linux
|
|
"objdump -M intel --start-address=#{offset} --stop-address=#{offset+50} -d vm.o"
|
|
else
|
|
raise "unkown platform"
|
|
end
|
|
end
|
|
|
|
def disassemble(offset)
|
|
command = objdump_disassemble_command(offset)
|
|
puts "Running: #{command}"
|
|
disassembly = `#{command}`
|
|
instructions = []
|
|
puts disassembly if $DEBUG
|
|
disassembly.each_line do |line|
|
|
line = line.strip
|
|
match_data = /\s*\h+:\s*((?:\h\h\s)+)\s+(\w+)/.match(line)
|
|
if match_data
|
|
bytes = match_data[1]
|
|
mnemonic = match_data[2]
|
|
instructions << [bytes, mnemonic, line]
|
|
break if mnemonic == 'jmp'
|
|
elsif !instructions.empty?
|
|
p line
|
|
raise "expected a continuous sequence of disassembly lines"
|
|
end
|
|
end
|
|
|
|
jmp_idx = instructions.find_index { |_, mnemonic, _| mnemonic == 'jmp' }
|
|
raise 'failed to find jmp' unless jmp_idx
|
|
raise 'generated code for example too long' unless jmp_idx < 10
|
|
handler_instructions = instructions[(0..jmp_idx)]
|
|
|
|
disassembly_lines = handler_instructions.map {|_, _, line| line}
|
|
puts "Disassembly for the example handler:"
|
|
puts disassembly_lines
|
|
|
|
|
|
raise 'rip reference in example makes copying unsafe' if handler_instructions.any? { |_, _, full_line| full_line.downcase.include?('rip') }
|
|
acceptable_mnemonics = %w(mov jmp lea call endbr64)
|
|
unrecognized = nil
|
|
handler_instructions.each { |i| unrecognized = i unless acceptable_mnemonics.include?(i[1]) }
|
|
raise "found an unrecognized \"#{unrecognized[1]}\" instruction in the example. List of recognized instructions: #{acceptable_mnemonics.join(', ')}" if unrecognized
|
|
raise 'found multiple jmp instructions' if handler_instructions.count { |_, mnemonic, _| mnemonic == 'jmp' } > 1
|
|
raise "the jmp instruction seems to be relative which isn't copiable" if instructions[jmp_idx][0].split.size > 4
|
|
raise 'no call instructions found' if handler_instructions.count { |_, mnemonic, _| mnemonic == 'call' } == 0
|
|
raise 'found multiple call instructions' if handler_instructions.count { |_, mnemonic, _| mnemonic == 'call' } > 1
|
|
call_idx = handler_instructions.find_index { |_, mnemonic, _| mnemonic == 'call' }
|
|
|
|
|
|
pre_call_bytes = []
|
|
post_call_bytes = []
|
|
|
|
handler_instructions.take(call_idx).each do |bytes, mnemonic, _|
|
|
pre_call_bytes += bytes.split
|
|
end
|
|
|
|
handler_instructions[call_idx + 1, handler_instructions.size].each do |bytes, _, _|
|
|
post_call_bytes += bytes.split
|
|
end
|
|
|
|
ScrapeResult.new(
|
|
comma_separated_hex_string(pre_call_bytes),
|
|
comma_separated_hex_string(post_call_bytes),
|
|
disassembly_lines
|
|
)
|
|
end
|
|
|
|
def darwin_scrape(instruction_id)
|
|
fileoff = get_fileoff
|
|
tc_table_offset = get_symbol_offset('vm_exec_core.insns_address_table')
|
|
vm_exec_core_offset = get_symbol_offset('vm_exec_core')
|
|
p instruction_id if $DEBUG
|
|
p fileoff if $DEBUG
|
|
p tc_table_offset.to_s(16) if $DEBUG
|
|
offset_to_insn_in_tc_table = fileoff + tc_table_offset + 8 * instruction_id
|
|
p offset_to_insn_in_tc_table if $DEBUG
|
|
offset_to_handler_code_from_vm_exec_core = readint8b(offset_to_insn_in_tc_table)
|
|
p offset_to_handler_code_from_vm_exec_core if $DEBUG
|
|
disassemble(vm_exec_core_offset + offset_to_handler_code_from_vm_exec_core)
|
|
end
|
|
|
|
def linux_scrape(instruction_id)
|
|
table_section, table_offset = get_symbol_section_and_offset('vm_exec_core.insns_address_table')
|
|
p [table_section, table_offset] if $DEBUG
|
|
handler_section, handler_offset = get_handler_offset(table_section, table_offset, instruction_id)
|
|
p [handler_section, handler_offset] if $DEBUG
|
|
disassemble(handler_offset)
|
|
end
|
|
|
|
def make_result(success, without_pc, with_pc)
|
|
[success ? 1 : 0,
|
|
[
|
|
['ujit_without_ec', without_pc],
|
|
['ujit_with_ec', with_pc],
|
|
]
|
|
]
|
|
end
|
|
|
|
def scrape_instruction(instruction_id)
|
|
raise unless instruction_id.is_a?(Integer)
|
|
case target_platform
|
|
when :darwin
|
|
darwin_scrape(instruction_id)
|
|
when :linux
|
|
linux_scrape(instruction_id)
|
|
else
|
|
raise 'Unkonwn platform. Only Mach-O on macOS and ELF on Linux are supported'
|
|
end
|
|
end
|
|
|
|
def scrape
|
|
without_ec = scrape_instruction(RubyVM::Instructions.find_index { |insn| insn.name == 'ujit_call_example' })
|
|
with_ec = scrape_instruction(RubyVM::Instructions.find_index { |insn| insn.name == 'ujit_call_example_with_ec' })
|
|
make_result(true, without_ec, with_ec)
|
|
rescue => e
|
|
print_warning("scrape failed: #{e.message}")
|
|
int3 = '0xcc'
|
|
failure_result = ScrapeResult.new(int3, int3, ['int3'])
|
|
make_result(false, failure_result, failure_result)
|
|
end
|
|
|
|
def print_warning(text)
|
|
text = "ujit warning: #{text}"
|
|
text = "\x1b[1m#{text}\x1b[0m" if STDOUT.tty?
|
|
STDOUT.puts(text)
|
|
end
|
|
|
|
def comma_separated_hex_string(nums)
|
|
nums.map{ |byte| '0x'+byte}.join(', ')
|
|
end
|
|
end
|
|
end
|
|
|
|
require_relative 'micro_jit/example_instructions'
|