2020-09-03 11:08:16 -04:00
def get_example_instruction_id
2020-09-14 14:36:39 -04:00
# TODO we could get this from the script that generates vm.inc instead of doing this song and dance
2020-09-03 11:08:16 -04:00
` dwarfdump --name='YARVINSN_ujit_call_example' vm.o ` . each_line do | line |
if ( id = line [ / DW_AT_const_value \ s \ (( \ d+ \ )) / , 1 ] )
2020-09-03 12:06:53 -04:00
p [ __method__ , line ] if $DEBUG
2020-09-03 11:08:16 -04:00
return id . to_i
end
end
raise
end
def get_fileoff
# use the load command to figure out the offset to the start of the content of vm.o
` otool -l vm.o ` . each_line do | line |
if ( fileoff = line [ / fileoff ( \ d+) / , 1 ] )
2020-09-03 12:06:53 -04:00
p [ __method__ , line ] if $DEBUG
2020-09-03 11:08:16 -04:00
return fileoff . to_i
end
end
raise
end
def get_symbol_offset ( symbol )
` nm vm.o ` . each_line do | line |
if ( offset = line [ Regexp . compile ( '(\h+).+' + Regexp . escape ( symbol ) + '\Z' ) , 1 ] )
2020-09-03 12:06:53 -04:00
p [ __method__ , line ] if $DEBUG
2020-09-03 11:08:16 -04:00
return Integer ( offset , 16 )
end
end
raise
end
def readint8b ( offset )
bytes = IO . binread ( 'vm.o' , 8 , offset )
bytes . unpack ( 'q' ) . first # this is native endian but we want little endian. it's fine if the host moachine is x86
end
def disassemble ( offset )
command = " objdump --x86-asm-syntax=intel --start-address= #{ offset } --stop-address= #{ offset + 50 } -d vm.o "
puts " Running: #{ command } "
puts " feel free to verify with --reloc "
disassembly = ` #{ command } `
instructions = [ ]
2020-09-03 12:06:53 -04:00
puts disassembly if $DEBUG
2020-09-03 11:08:16 -04:00
disassembly . each_line do | line |
line = line . strip
2020-09-03 12:06:53 -04:00
match_data = / \ h+: ((?: \ h \ h \ s?)+) \ s+( \ w+) / . match ( line )
if match_data
2020-09-03 11:08:16 -04:00
bytes = match_data [ 1 ]
mnemonic = match_data [ 2 ]
instructions << [ bytes , mnemonic , line ]
2020-09-03 12:06:53 -04:00
break if mnemonic == 'jmp'
elsif ! instructions . empty?
2020-09-03 11:08:16 -04:00
p line
raise " expected a continuous sequence of disassembly lines "
end
end
jmp_idx = instructions . find_index { | _ , mnemonic , _ | mnemonic == 'jmp' }
raise 'failed to find jmp' unless jmp_idx
raise 'generated code for example too long' unless jmp_idx < 10
handler_instructions = instructions [ ( 0 .. jmp_idx ) ]
2020-09-03 14:18:02 -04:00
puts " Disassembly for the example handler: "
puts handler_instructions . map { | _ , _ , line | line }
2020-09-03 11:08:16 -04:00
raise 'rip reference in example makes copying unsafe' if handler_instructions . any? { | _ , _ , full_line | full_line . downcase . include? ( 'rip' ) }
acceptable_mnemonics = %w( mov jmp lea call )
unrecognized = nil
handler_instructions . each { | i | unrecognized = i unless acceptable_mnemonics . include? ( i [ 1 ] ) }
2020-09-03 14:18:02 -04:00
raise " found an unrecognized \" #{ unrecognized [ 1 ] } \" instruction in the example. List of recognized instructions: #{ acceptable_mnemonics . join ( ', ' ) } " if unrecognized
2020-09-03 11:08:16 -04:00
raise 'found multiple jmp instructions' if handler_instructions . count { | _ , mnemonic , _ | mnemonic == 'jmp' } > 1
2020-09-03 14:18:02 -04:00
raise " the jmp instruction seems to be relative which isn't copiable " if instructions [ jmp_idx ] [ 0 ] . split . size > 4
2020-09-03 11:08:16 -04:00
raise 'found multiple call instructions' if handler_instructions . count { | _ , mnemonic , _ | mnemonic == 'call' } > 1
call_idx = handler_instructions . find_index { | _ , mnemonic , _ | mnemonic == 'call' }
pre_call_bytes = [ ]
post_call_bytes = [ ]
handler_instructions . take ( call_idx ) . each do | bytes , mnemonic , _ |
pre_call_bytes += bytes . split
end
2020-09-03 12:06:53 -04:00
handler_instructions [ call_idx + 1 , handler_instructions . size ] . each do | bytes , _ , _ |
2020-09-03 11:08:16 -04:00
post_call_bytes += bytes . split
end
File . write ( " ujit_examples.h " , <<-EOF)
2020-09-03 12:06:53 -04:00
static const uint8_t ujit_pre_call_bytes [ ] = { #{pre_call_bytes.map{ |byte| '0x'+byte}.join(', ')} };
static const uint8_t ujit_post_call_bytes [ ] = { #{post_call_bytes.map{ |byte| '0x'+byte}.join(', ')} };
2020-09-03 11:08:16 -04:00
EOF
2020-09-03 12:06:53 -04:00
if $DEBUG
puts " file: "
puts File . binread ( " ujit_examples.h " )
end
2020-09-03 11:08:16 -04:00
end
instruction_id = get_example_instruction_id
fileoff = get_fileoff
tc_table_offset = get_symbol_offset ( 'vm_exec_core.insns_address_table' )
vm_exec_core_offset = get_symbol_offset ( 'vm_exec_core' )
2020-09-03 12:06:53 -04:00
p instruction_id if $DEBUG
p fileoff if $DEBUG
p tc_table_offset . to_s ( 16 ) if $DEBUG
2020-09-03 11:08:16 -04:00
offset_to_insn_in_tc_table = fileoff + tc_table_offset + 8 * instruction_id
2020-09-03 12:06:53 -04:00
p offset_to_insn_in_tc_table if $DEBUG
2020-09-03 11:08:16 -04:00
offset_to_handler_code_from_vm_exec_core = readint8b ( offset_to_insn_in_tc_table )
2020-09-03 12:06:53 -04:00
p offset_to_handler_code_from_vm_exec_core if $DEBUG
2020-09-03 11:08:16 -04:00
disassemble ( vm_exec_core_offset + offset_to_handler_code_from_vm_exec_core )