From 3d87eadf16a086d2f3bec0b556760c0ebfe1e7cd Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Wed, 23 Sep 2020 03:02:01 -0400 Subject: [PATCH] Refactor ujit_examples.h generator. Remove dwarfdump dependency --- common.mk | 8 +- gen_ujit_examples.rb | 110 --------------- template/Makefile.in | 4 +- tool/ruby_vm/models/instructions.rb | 63 +-------- tool/ruby_vm/models/micro_jit.rb | 125 ++++++++++++++++++ .../models/micro_jit/example_instructions.rb | 71 ++++++++++ tool/ruby_vm/views/ujit_examples.inc.erb | 16 +++ tool/ruby_vm/views/vm.inc.erb | 2 +- ujit_compile.c | 4 +- 9 files changed, 223 insertions(+), 180 deletions(-) delete mode 100644 gen_ujit_examples.rb create mode 100644 tool/ruby_vm/models/micro_jit.rb create mode 100644 tool/ruby_vm/models/micro_jit/example_instructions.rb create mode 100644 tool/ruby_vm/views/ujit_examples.inc.erb diff --git a/common.mk b/common.mk index e32038b851..1dc7525b92 100644 --- a/common.mk +++ b/common.mk @@ -1104,13 +1104,11 @@ incs: $(INSNS) {$(VPATH)}node_name.inc {$(VPATH)}known_errors.inc \ {$(VPATH)}vm_call_iseq_optimized.inc $(srcdir)/revision.h \ $(REVISION_H) \ $(UNICODE_DATA_HEADERS) $(ENC_HEADERS) \ - {$(VPATH)}id.h {$(VPATH)}probes.dmyh {$(VPATH)}ujit_examples.h + {$(VPATH)}id.h {$(VPATH)}probes.dmyh insns: $(INSNS) -ujit_examples.h: gen_ujit_examples.rb vm.$(OBJEXT) - $(ECHO) generating $@ - $(Q) $(BASERUBY) $(srcdir)/gen_ujit_examples.rb +ujit_examples.inc: vm.$(OBJEXT) id.h: $(tooldir)/generic_erb.rb $(srcdir)/template/id.h.tmpl $(srcdir)/defs/id.def $(ECHO) generating $@ @@ -15040,7 +15038,7 @@ transient_heap.$(OBJEXT): {$(VPATH)}vm_core.h transient_heap.$(OBJEXT): {$(VPATH)}vm_debug.h transient_heap.$(OBJEXT): {$(VPATH)}vm_opts.h transient_heap.$(OBJEXT): {$(VPATH)}vm_sync.h -ujit_compile.$(OBJEXT): {$(VPATH)}ujit_examples.h +ujit_compile.$(OBJEXT): {$(VPATH)}ujit_examples.inc util.$(OBJEXT): $(hdrdir)/ruby.h util.$(OBJEXT): $(hdrdir)/ruby/ruby.h util.$(OBJEXT): $(top_srcdir)/internal/compilers.h diff --git a/gen_ujit_examples.rb b/gen_ujit_examples.rb deleted file mode 100644 index dc400bc920..0000000000 --- a/gen_ujit_examples.rb +++ /dev/null @@ -1,110 +0,0 @@ -def get_example_instruction_id - # TODO we could get this from the script that generates vm.inc instead of doing this song and dance - `dwarfdump --name='YARVINSN_ujit_call_example' vm.o`.each_line do |line| - if (id = line[/DW_AT_const_value\s\((\d+\))/, 1]) - p [__method__, line] if $DEBUG - return id.to_i - end - end - raise -end - -def get_fileoff - # use the load command to figure out the offset to the start of the content of vm.o - `otool -l vm.o`.each_line do |line| - if (fileoff = line[/fileoff (\d+)/, 1]) - p [__method__, line] if $DEBUG - return fileoff.to_i - end - end - raise -end - -def get_symbol_offset(symbol) - `nm vm.o`.each_line do |line| - if (offset = line[Regexp.compile('(\h+).+' + Regexp.escape(symbol) + '\Z'), 1]) - p [__method__, line] if $DEBUG - return Integer(offset, 16) - end - end - raise -end - -def readint8b(offset) - bytes = IO.binread('vm.o', 8, offset) - bytes.unpack('q').first # this is native endian but we want little endian. it's fine if the host moachine is x86 -end - - -def disassemble(offset) - command = "objdump --x86-asm-syntax=intel --start-address=#{offset} --stop-address=#{offset+50} -d vm.o" - puts "Running: #{command}" - puts "feel free to verify with --reloc" - disassembly = `#{command}` - instructions = [] - puts disassembly if $DEBUG - disassembly.each_line do |line| - line = line.strip - match_data = /\h+: ((?:\h\h\s?)+)\s+(\w+)/.match(line) - if match_data - bytes = match_data[1] - mnemonic = match_data[2] - instructions << [bytes, mnemonic, line] - break if mnemonic == 'jmp' - elsif !instructions.empty? - p line - raise "expected a continuous sequence of disassembly lines" - end - end - - jmp_idx = instructions.find_index { |_, mnemonic, _| mnemonic == 'jmp' } - raise 'failed to find jmp' unless jmp_idx - raise 'generated code for example too long' unless jmp_idx < 10 - handler_instructions = instructions[(0..jmp_idx)] - - puts "Disassembly for the example handler:" - puts handler_instructions.map {|_, _, line| line} - - - raise 'rip reference in example makes copying unsafe' if handler_instructions.any? { |_, _, full_line| full_line.downcase.include?('rip') } - acceptable_mnemonics = %w(mov jmp lea call) - unrecognized = nil - handler_instructions.each { |i| unrecognized = i unless acceptable_mnemonics.include?(i[1]) } - raise "found an unrecognized \"#{unrecognized[1]}\" instruction in the example. List of recognized instructions: #{acceptable_mnemonics.join(', ')}" if unrecognized - raise 'found multiple jmp instructions' if handler_instructions.count { |_, mnemonic, _| mnemonic == 'jmp' } > 1 - raise "the jmp instruction seems to be relative which isn't copiable" if instructions[jmp_idx][0].split.size > 4 - raise 'found multiple call instructions' if handler_instructions.count { |_, mnemonic, _| mnemonic == 'call' } > 1 - call_idx = handler_instructions.find_index { |_, mnemonic, _| mnemonic == 'call' } - - - pre_call_bytes = [] - post_call_bytes = [] - handler_instructions.take(call_idx).each do |bytes, mnemonic, _| - pre_call_bytes += bytes.split - end - handler_instructions[call_idx + 1, handler_instructions.size].each do |bytes, _, _| - post_call_bytes += bytes.split - end - - File.write("ujit_examples.h", <<-EOF) -static const uint8_t ujit_pre_call_bytes[] = { #{pre_call_bytes.map{ |byte| '0x'+byte}.join(', ')} }; -static const uint8_t ujit_post_call_bytes[] = { #{post_call_bytes.map{ |byte| '0x'+byte}.join(', ')} }; - EOF - if $DEBUG - puts "file:" - puts File.binread("ujit_examples.h") - end -end - -instruction_id = get_example_instruction_id -fileoff = get_fileoff -tc_table_offset = get_symbol_offset('vm_exec_core.insns_address_table') -vm_exec_core_offset = get_symbol_offset('vm_exec_core') -p instruction_id if $DEBUG -p fileoff if $DEBUG -p tc_table_offset.to_s(16) if $DEBUG -offset_to_insn_in_tc_table = fileoff + tc_table_offset + 8 * instruction_id -p offset_to_insn_in_tc_table if $DEBUG -offset_to_handler_code_from_vm_exec_core = readint8b(offset_to_insn_in_tc_table) -p offset_to_handler_code_from_vm_exec_core if $DEBUG -disassemble(vm_exec_core_offset + offset_to_handler_code_from_vm_exec_core) diff --git a/template/Makefile.in b/template/Makefile.in index b0e987bba0..557c9922f2 100644 --- a/template/Makefile.in +++ b/template/Makefile.in @@ -590,7 +590,7 @@ update-known-errors: $(IFCHANGE) $(srcdir)/defs/known_errors.def - INSNS = opt_sc.inc optinsn.inc optunifs.inc insns.inc insns_info.inc \ - vmtc.inc vm.inc mjit_compile.inc + vmtc.inc vm.inc mjit_compile.inc ujit_examples.inc $(INSNS): $(srcdir)/insns.def vm_opts.h \ $(srcdir)/defs/opt_operand.def $(srcdir)/defs/opt_insn_unif.def \ @@ -610,6 +610,8 @@ $(INSNS): $(srcdir)/insns.def vm_opts.h \ $(tooldir)/ruby_vm/models/instructions_unifications.rb \ $(tooldir)/ruby_vm/models/operands_unifications.rb \ $(tooldir)/ruby_vm/models/trace_instructions.rb \ + $(tooldir)/ruby_vm/models/micro_jit.rb \ + $(tooldir)/ruby_vm/models/micro_jit/example_instructions.rb \ $(tooldir)/ruby_vm/models/typemap.rb \ $(tooldir)/ruby_vm/scripts/converter.rb \ $(tooldir)/ruby_vm/scripts/insns2vm.rb \ diff --git a/tool/ruby_vm/models/instructions.rb b/tool/ruby_vm/models/instructions.rb index 83dff9c5b0..065ac7dbf2 100644 --- a/tool/ruby_vm/models/instructions.rb +++ b/tool/ruby_vm/models/instructions.rb @@ -13,71 +13,12 @@ require_relative 'bare_instructions' require_relative 'operands_unifications' require_relative 'instructions_unifications' - -class RubyVM::UJITExampleInstructions - include RubyVM::CEscape - - attr_reader :name - - def initialize name - @name = name - end - - def pretty_name - return sprintf "%s(...)(...)(...)", @name - end - - def jump_destination - return @orig.name - end - - def bin - return sprintf "BIN(%s)", @name - end - - def width - 1 - end - - def operands_info - "" - end - - def rets - return ['...'] - end - - def pops - return ['...'] - end - - def attributes - return [] - end - - def has_attribute? *; - return false - end - - def handles_sp? - false - end - - def always_leaf? - false - end - - @all_examples = [new('ujit_call_example')] - - def self.to_a - @all_examples - end -end +require_relative 'micro_jit' RubyVM::Instructions = RubyVM::BareInstructions.to_a + \ RubyVM::OperandsUnifications.to_a + \ RubyVM::InstructionsUnifications.to_a + \ - RubyVM::UJITExampleInstructions.to_a + RubyVM::MicroJIT::ExampleInstructions.to_a diff --git a/tool/ruby_vm/models/micro_jit.rb b/tool/ruby_vm/models/micro_jit.rb new file mode 100644 index 0000000000..eee829a2e6 --- /dev/null +++ b/tool/ruby_vm/models/micro_jit.rb @@ -0,0 +1,125 @@ +#! /your/favourite/path/to/ruby +# -*- Ruby -*- +# -*- frozen_string_literal: true; -*- +# -*- warn_indent: true; -*- +# +# Copyright (c) 2020 Wu, Alan. All rights reserved. +# +# This file is a part of the programming language Ruby. Permission is hereby +# granted, to either redistribute and/or modify this file, provided that the +# conditions mentioned in the file COPYING are met. Consult the file for +# details. + +module RubyVM::MicroJIT + class << self + def get_fileoff + # use the load command to figure out the offset to the start of the content of vm.o + `otool -l vm.o`.each_line do |line| + if (fileoff = line[/fileoff (\d+)/, 1]) + p [__method__, line] if $DEBUG + return fileoff.to_i + end + end + raise + end + + def get_symbol_offset(symbol) + `nm vm.o`.each_line do |line| + if (offset = line[Regexp.compile('(\h+).+' + Regexp.escape(symbol) + '\Z'), 1]) + p [__method__, line] if $DEBUG + return Integer(offset, 16) + end + end + raise + end + + def readint8b(offset) + bytes = IO.binread('vm.o', 8, offset) + bytes.unpack('q').first # this is native endian but we want little endian. it's fine if the host moachine is x86 + end + + def disassemble(offset) + command = "objdump --x86-asm-syntax=intel --start-address=#{offset} --stop-address=#{offset+50} -d vm.o" + puts "Running: #{command}" + puts "feel free to verify with --reloc" + disassembly = `#{command}` + instructions = [] + puts disassembly if $DEBUG + disassembly.each_line do |line| + line = line.strip + match_data = /\h+: ((?:\h\h\s?)+)\s+(\w+)/.match(line) + if match_data + bytes = match_data[1] + mnemonic = match_data[2] + instructions << [bytes, mnemonic, line] + break if mnemonic == 'jmp' + elsif !instructions.empty? + p line + raise "expected a continuous sequence of disassembly lines" + end + end + + jmp_idx = instructions.find_index { |_, mnemonic, _| mnemonic == 'jmp' } + raise 'failed to find jmp' unless jmp_idx + raise 'generated code for example too long' unless jmp_idx < 10 + handler_instructions = instructions[(0..jmp_idx)] + + puts "Disassembly for the example handler:" + puts handler_instructions.map {|_, _, line| line} + + + raise 'rip reference in example makes copying unsafe' if handler_instructions.any? { |_, _, full_line| full_line.downcase.include?('rip') } + acceptable_mnemonics = %w(mov jmp lea call) + unrecognized = nil + handler_instructions.each { |i| unrecognized = i unless acceptable_mnemonics.include?(i[1]) } + raise "found an unrecognized \"#{unrecognized[1]}\" instruction in the example. List of recognized instructions: #{acceptable_mnemonics.join(', ')}" if unrecognized + raise 'found multiple jmp instructions' if handler_instructions.count { |_, mnemonic, _| mnemonic == 'jmp' } > 1 + raise "the jmp instruction seems to be relative which isn't copiable" if instructions[jmp_idx][0].split.size > 4 + raise 'found multiple call instructions' if handler_instructions.count { |_, mnemonic, _| mnemonic == 'call' } > 1 + call_idx = handler_instructions.find_index { |_, mnemonic, _| mnemonic == 'call' } + + + @pre_call_bytes = [] + @post_call_bytes = [] + + handler_instructions.take(call_idx).each do |bytes, mnemonic, _| + @pre_call_bytes += bytes.split + end + + handler_instructions[call_idx + 1, handler_instructions.size].each do |bytes, _, _| + @post_call_bytes += bytes.split + end + end + + def scrape + instruction_id = RubyVM::Instructions.find_index { |insn| insn.name == 'ujit_call_example' } + fileoff = get_fileoff + tc_table_offset = get_symbol_offset('vm_exec_core.insns_address_table') + vm_exec_core_offset = get_symbol_offset('vm_exec_core') + p instruction_id if $DEBUG + p fileoff if $DEBUG + p tc_table_offset.to_s(16) if $DEBUG + offset_to_insn_in_tc_table = fileoff + tc_table_offset + 8 * instruction_id + p offset_to_insn_in_tc_table if $DEBUG + offset_to_handler_code_from_vm_exec_core = readint8b(offset_to_insn_in_tc_table) + p offset_to_handler_code_from_vm_exec_core if $DEBUG + disassemble(vm_exec_core_offset + offset_to_handler_code_from_vm_exec_core) + end + + def comma_separated_hex_string(nums) + nums.map{ |byte| '0x'+byte}.join(', ') + end + + def pre_call_bytes + scrape unless @pre_call_bytes + comma_separated_hex_string(@pre_call_bytes) + end + + def post_call_bytes + scrape unless @post_call_bytes + comma_separated_hex_string(@post_call_bytes) + end + end +end + +require_relative 'micro_jit/example_instructions' diff --git a/tool/ruby_vm/models/micro_jit/example_instructions.rb b/tool/ruby_vm/models/micro_jit/example_instructions.rb new file mode 100644 index 0000000000..5117d1c519 --- /dev/null +++ b/tool/ruby_vm/models/micro_jit/example_instructions.rb @@ -0,0 +1,71 @@ +#! /your/favourite/path/to/ruby +# -*- Ruby -*- +# -*- frozen_string_literal: true; -*- +# -*- warn_indent: true; -*- +# +# Copyright (c) 2020 Wu, Alan. All rights reserved. +# +# This file is a part of the programming language Ruby. Permission is hereby +# granted, to either redistribute and/or modify this file, provided that the +# conditions mentioned in the file COPYING are met. Consult the file for +# details. + +class RubyVM::MicroJIT::ExampleInstructions + include RubyVM::CEscape + + attr_reader :name + + def initialize name + @name = name + end + + def pretty_name + return sprintf "%s(...)(...)(...)", @name + end + + def jump_destination + return @orig.name + end + + def bin + return sprintf "BIN(%s)", @name + end + + def width + 1 + end + + def operands_info + "" + end + + def rets + return ['...'] + end + + def pops + return ['...'] + end + + def attributes + return [] + end + + def has_attribute? *; + return false + end + + def handles_sp? + false + end + + def always_leaf? + false + end + + @all_examples = [new('ujit_call_example')] + + def self.to_a + @all_examples + end +end diff --git a/tool/ruby_vm/views/ujit_examples.inc.erb b/tool/ruby_vm/views/ujit_examples.inc.erb new file mode 100644 index 0000000000..a301c5ca74 --- /dev/null +++ b/tool/ruby_vm/views/ujit_examples.inc.erb @@ -0,0 +1,16 @@ +/* -*- C -*- */ + +%# Copyright (c) 2020 Wu, Alan. All rights reserved. +%# +%# This file is a part of the programming language Ruby. Permission is hereby +%# granted, to either redistribute and/or modify this file, provided that the +%# conditions mentioned in the file COPYING are met. Consult the file for +%# details. +<%= render 'copyright' %> +<%= render 'notice', locals: { + this_file: 'contains raw instruction bytes that helps MicroJIT generate code', + edit: __FILE__, +} -%> + +static const uint8_t ujit_pre_call_bytes[] = { <%= RubyVM::MicroJIT.pre_call_bytes %> }; +static const uint8_t ujit_post_call_bytes[] = { <%= RubyVM::MicroJIT.post_call_bytes %> }; diff --git a/tool/ruby_vm/views/vm.inc.erb b/tool/ruby_vm/views/vm.inc.erb index 7942a3ef87..3c7b602859 100644 --- a/tool/ruby_vm/views/vm.inc.erb +++ b/tool/ruby_vm/views/vm.inc.erb @@ -28,7 +28,7 @@ % RubyVM::TraceInstructions.to_a.each do |insn| <%= render 'trace_instruction', locals: { insn: insn } -%> % end -% RubyVM::UJITExampleInstructions.to_a.each do |insn| +% RubyVM::MicroJIT::ExampleInstructions.to_a.each do |insn| INSN_ENTRY(<%= insn.name %>) { START_OF_ORIGINAL_INSN(<%= insn.name %>); diff --git a/ujit_compile.c b/ujit_compile.c index fae676db3b..9d7a8b2ca0 100644 --- a/ujit_compile.c +++ b/ujit_compile.c @@ -9,9 +9,9 @@ #include "ujit_asm.h" #include "ujit_utils.h" -// TODO: give ujit_examples.h some more meaningful file name +// TODO: give ujit_examples.inc some more meaningful file name // eg ujit_hook.h -#include "ujit_examples.h" +#include "ujit_examples.inc" // Code generation context typedef struct ctx_struct