1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00
ruby--ruby/tool/ruby_vm/models/bare_instructions.rb

241 lines
6.9 KiB
Ruby
Raw Normal View History

#! /your/favourite/path/to/ruby
# -*- Ruby -*-
# -*- frozen_string_literal: true; -*-
# -*- warn_indent: true; -*-
#
# Copyright (c) 2017 Urabe, Shyouhei. All rights reserved.
#
# This file is a part of the programming language Ruby. Permission is hereby
# granted, to either redistribute and/or modify this file, provided that the
# conditions mentioned in the file COPYING are met. Consult the file for
# details.
require_relative '../loaders/insns_def'
require_relative 'c_expr'
require_relative 'typemap'
require_relative 'attribute'
class RubyVM::BareInstructions
attr_reader :template, :name, :opes, :pops, :rets, :decls, :expr
def initialize opts = {}
@template = opts[:template]
@name = opts[:name]
@loc = opts[:location]
@sig = opts[:signature]
@expr = RubyVM::CExpr.new opts[:expr]
@opes = typesplit @sig[:ope]
@pops = typesplit @sig[:pop].reject {|i| i == '...' }
@rets = typesplit @sig[:ret].reject {|i| i == '...' }
@attrs = opts[:attributes].map {|i|
RubyVM::Attribute.new i.merge(:insn => self)
}.each_with_object({}) {|a, h|
h[a.key] = a
}
@attrs_orig = @attrs.dup
check_attribute_consistency
move ADD_PC around to optimize PC manipluiations This commit introduces new attribute handles_flame and if that is _not_ the case, places ADD_PC right after INC_SP. This improves locality of PC manipulations to prevents unnecessary register spill- outs. As a result, it reduces the size of vm_exec_core from 32,688 bytes to 32,384 bytes on my machine. Speedup is very faint, but certain. ----------------------------------------------------------- benchmark results: minimum results in each 3 measurements. Execution time (sec) name before after so_ackermann 0.476 0.464 so_array 0.742 0.728 so_binary_trees 5.493 5.466 so_concatenate 3.619 3.395 so_count_words 0.190 0.184 so_exception 0.249 0.239 so_fannkuch 0.994 0.953 so_fasta 1.369 1.374 so_k_nucleotide 1.111 1.111 so_lists 0.470 0.481 so_mandelbrot 2.059 2.050 so_matrix 0.466 0.465 so_meteor_contest 2.712 2.781 so_nbody 1.154 1.204 so_nested_loop 0.852 0.846 so_nsieve 1.636 1.623 so_nsieve_bits 2.073 2.039 so_object 0.616 0.584 so_partial_sums 1.464 1.481 so_pidigits 1.075 1.082 so_random 0.321 0.317 so_reverse_complement 0.555 0.558 so_sieve 0.495 0.490 so_spectralnorm 1.634 1.627 Speedup ratio: compare with the result of `before' (greater is better) name after so_ackermann 1.025 so_array 1.019 so_binary_trees 1.005 so_concatenate 1.066 so_count_words 1.030 so_exception 1.040 so_fannkuch 1.043 so_fasta 0.996 so_k_nucleotide 1.000 so_lists 0.978 so_mandelbrot 1.004 so_matrix 1.001 so_meteor_contest 0.975 so_nbody 0.959 so_nested_loop 1.007 so_nsieve 1.008 so_nsieve_bits 1.017 so_object 1.056 so_partial_sums 0.989 so_pidigits 0.994 so_random 1.014 so_reverse_complement 0.996 so_sieve 1.010 so_spectralnorm 1.004 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62051 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2018-01-26 01:30:58 -05:00
predefine_attributes
end
def pretty_name
n = @sig[:name]
o = @sig[:ope].map{|i| i[/\S+$/] }.join ', '
p = @sig[:pop].map{|i| i[/\S+$/] }.join ', '
r = @sig[:ret].map{|i| i[/\S+$/] }.join ', '
return sprintf "%s(%s)(%s)(%s)", n, o, p, r
end
def bin
return "BIN(#{name})"
end
def call_attribute name
return sprintf 'attr_%s_%s(%s)', name, @name, \
@opes.map {|i| i[:name] }.compact.join(', ')
end
def has_attribute? k
@attrs_orig.has_key? k
end
def attributes
return @attrs \
. sort_by {|k, _| k } \
. map {|_, v| v }
end
def width
return 1 + opes.size
end
def declarations
return @variables \
. values \
. group_by {|h| h[:type] } \
s/CALL_SIMPLE_METHOD/DISPATCH_ORIGINAL_INSN/ Now that DISPATCH_ORIGINAL_INSN is introduced, we can replace CALL_SIMPLE_METHOD with DISPATCH_ORIGINAL_INSN. These two macros differ in size very much and results in this big difference in compiled binary size. This changeset reduces the size of vm_exec_core from 32,352 bytes to 27,008 bytes on my machine. As a result it yields slightly better performance. Closes [GH-1779]. ----------------------------------------------------------- benchmark results: minimum results in each 3 measurements. Execution time (sec) name before after so_ackermann 0.484 0.454 so_array 0.837 0.779 so_binary_trees 5.928 5.801 so_concatenate 3.473 3.543 so_count_words 0.201 0.222 so_exception 0.255 0.252 so_fannkuch 1.080 1.019 so_fasta 1.459 1.463 so_k_nucleotide 1.218 1.180 so_lists 0.499 0.484 so_mandelbrot 2.189 2.324 so_matrix 0.510 0.496 so_meteor_contest 3.025 2.925 so_nbody 1.319 1.273 so_nested_loop 0.941 0.932 so_nsieve 1.806 1.647 so_nsieve_bits 2.151 2.078 so_object 0.632 0.621 so_partial_sums 1.560 1.632 so_pidigits 1.190 1.183 so_random 0.333 0.353 so_reverse_complement 0.604 0.586 so_sieve 0.521 0.481 so_spectralnorm 1.774 1.722 Speedup ratio: compare with the result of `before' (greater is better) name after so_ackermann 1.065 so_array 1.075 so_binary_trees 1.022 so_concatenate 0.980 so_count_words 0.903 so_exception 1.009 so_fannkuch 1.059 so_fasta 0.997 so_k_nucleotide 1.032 so_lists 1.032 so_mandelbrot 0.942 so_matrix 1.028 so_meteor_contest 1.034 so_nbody 1.036 so_nested_loop 1.009 so_nsieve 1.097 so_nsieve_bits 1.035 so_object 1.018 so_partial_sums 0.956 so_pidigits 1.006 so_random 0.943 so_reverse_complement 1.032 so_sieve 1.083 so_spectralnorm 1.030 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62088 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2018-01-29 02:04:50 -05:00
. sort_by {|t, v| t } \
. map {|t, v| [t, v.map {|i| i[:name] }.sort ] } \
s/CALL_SIMPLE_METHOD/DISPATCH_ORIGINAL_INSN/ Now that DISPATCH_ORIGINAL_INSN is introduced, we can replace CALL_SIMPLE_METHOD with DISPATCH_ORIGINAL_INSN. These two macros differ in size very much and results in this big difference in compiled binary size. This changeset reduces the size of vm_exec_core from 32,352 bytes to 27,008 bytes on my machine. As a result it yields slightly better performance. Closes [GH-1779]. ----------------------------------------------------------- benchmark results: minimum results in each 3 measurements. Execution time (sec) name before after so_ackermann 0.484 0.454 so_array 0.837 0.779 so_binary_trees 5.928 5.801 so_concatenate 3.473 3.543 so_count_words 0.201 0.222 so_exception 0.255 0.252 so_fannkuch 1.080 1.019 so_fasta 1.459 1.463 so_k_nucleotide 1.218 1.180 so_lists 0.499 0.484 so_mandelbrot 2.189 2.324 so_matrix 0.510 0.496 so_meteor_contest 3.025 2.925 so_nbody 1.319 1.273 so_nested_loop 0.941 0.932 so_nsieve 1.806 1.647 so_nsieve_bits 2.151 2.078 so_object 0.632 0.621 so_partial_sums 1.560 1.632 so_pidigits 1.190 1.183 so_random 0.333 0.353 so_reverse_complement 0.604 0.586 so_sieve 0.521 0.481 so_spectralnorm 1.774 1.722 Speedup ratio: compare with the result of `before' (greater is better) name after so_ackermann 1.065 so_array 1.075 so_binary_trees 1.022 so_concatenate 0.980 so_count_words 0.903 so_exception 1.009 so_fannkuch 1.059 so_fasta 0.997 so_k_nucleotide 1.032 so_lists 1.032 so_mandelbrot 0.942 so_matrix 1.028 so_meteor_contest 1.034 so_nbody 1.036 so_nested_loop 1.009 so_nsieve 1.097 so_nsieve_bits 1.035 so_object 1.018 so_partial_sums 0.956 so_pidigits 1.006 so_random 0.943 so_reverse_complement 1.032 so_sieve 1.083 so_spectralnorm 1.030 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62088 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2018-01-29 02:04:50 -05:00
. map {|t, v|
sprintf("MAYBE_UNUSED(%s) %s", t, v.join(', '))
}
end
def preamble
# preamble makes sense for operand unifications
return []
end
def sc?
# sc stands for stack caching.
return false
end
def cast_to_VALUE var, expr = var[:name]
RubyVM::Typemap.typecast_to_VALUE var[:type], expr
end
def cast_from_VALUE var, expr = var[:name]
RubyVM::Typemap.typecast_from_VALUE var[:type], expr
end
def operands_info
opes.map {|o|
c, _ = RubyVM::Typemap.fetch o[:type]
next c
}.join
end
def handles_sp?
/\b(false|0)\b/ !~ @attrs.fetch('handles_sp').expr.expr
mjit_compile.c: reduce sp motion on JIT This retries r62655, which was reverted at r63863 for r63763. tool/ruby_vm/views/_mjit_compile_insn.erb: revert the revert. tool/ruby_vm/views/_mjit_compile_insn_body.erb: ditto. tool/ruby_vm/views/_mjit_compile_pc_and_sp.erb: ditto. tool/ruby_vm/views/_mjit_compile_send.erb: ditto. tool/ruby_vm/views/mjit_compile.inc.erb: ditto. tool/ruby_vm/views/_insn_entry.erb: revert half of r63763. The commit was originally reverted since changing pc motion was bad for tracing, but changing sp motion was totally fine. For JIT, I wanna resurrect the sp motion change in r62051. tool/ruby_vm/models/bare_instructions.rb: ditto. insns.def: ditto. vm_insnhelper.c: ditto. vm_insnhelper.h: ditto. * benchmark $ benchmark-driver benchmark.yml --rbenv 'before;after;before --jit;after --jit' --repeat-count 12 -v before: ruby 2.6.0dev (2018-07-19 trunk 63998) [x86_64-linux] after: ruby 2.6.0dev (2018-07-19 add-sp 63998) [x86_64-linux] last_commit=mjit_compile.c: reduce sp motion on JIT before --jit: ruby 2.6.0dev (2018-07-19 trunk 63998) +JIT [x86_64-linux] after --jit: ruby 2.6.0dev (2018-07-19 add-sp 63998) +JIT [x86_64-linux] last_commit=mjit_compile.c: reduce sp motion on JIT Calculating ------------------------------------- before after before --jit after --jit Optcarrot Lan_Master.nes 51.354 50.238 70.010 72.139 fps Comparison: Optcarrot Lan_Master.nes after --jit: 72.1 fps before --jit: 70.0 fps - 1.03x slower before: 51.4 fps - 1.40x slower after: 50.2 fps - 1.44x slower git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@63999 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2018-07-19 09:25:22 -04:00
end
def always_leaf?
@attrs.fetch('leaf').expr.expr == 'true;'
end
def leaf_without_check_ints?
@attrs.fetch('leaf').expr.expr == 'leafness_of_check_ints;'
end
def handle_canary stmt
# Stack canary is basically a good thing that we want to add, however:
#
# - When the instruction returns variadic number of return values,
# it is not easy to tell where is the stack top. We can't but
# skip it.
#
# - When the instruction body is empty (like putobject), we can
# say for 100% sure that canary is a waste of time.
#
# So we skip canary for those cases.
return '' if @sig[:ret].any? {|i| i == '...' }
return '' if @expr.blank?
return " #{stmt};\n"
end
def inspect
sprintf "#<%s %s@%s:%d>", self.class.name, @name, @loc[0], @loc[1]
end
def has_ope? var
return @opes.any? {|i| i[:name] == var[:name] }
end
def has_pop? var
return @pops.any? {|i| i[:name] == var[:name] }
end
def use_call_data?
@use_call_data ||=
@variables.find { |_, var_info| var_info[:type] == 'CALL_DATA' }
end
private
def check_attribute_consistency
if has_attribute?('sp_inc') \
&& use_call_data? \
&& !has_attribute?('comptime_sp_inc')
# As the call cache caches information that can only be obtained at
# runtime, we do not need it when compiling from AST to bytecode. This
# attribute defines an expression that computes the stack pointer
# increase based on just the call info to avoid reserving space for the
# call cache at compile time. In the expression, all call data operands
# are mapped to their call info counterpart. Additionally, all mentions
# of `cd` in the operand name are replaced with `ci`.
raise "Please define attribute `comptime_sp_inc` for `#{@name}`"
end
end
def generate_attribute t, k, v
@attrs[k] ||= RubyVM::Attribute.new \
insn: self, \
name: k, \
type: t, \
location: [], \
expr: v.to_s + ';'
move ADD_PC around to optimize PC manipluiations This commit introduces new attribute handles_flame and if that is _not_ the case, places ADD_PC right after INC_SP. This improves locality of PC manipulations to prevents unnecessary register spill- outs. As a result, it reduces the size of vm_exec_core from 32,688 bytes to 32,384 bytes on my machine. Speedup is very faint, but certain. ----------------------------------------------------------- benchmark results: minimum results in each 3 measurements. Execution time (sec) name before after so_ackermann 0.476 0.464 so_array 0.742 0.728 so_binary_trees 5.493 5.466 so_concatenate 3.619 3.395 so_count_words 0.190 0.184 so_exception 0.249 0.239 so_fannkuch 0.994 0.953 so_fasta 1.369 1.374 so_k_nucleotide 1.111 1.111 so_lists 0.470 0.481 so_mandelbrot 2.059 2.050 so_matrix 0.466 0.465 so_meteor_contest 2.712 2.781 so_nbody 1.154 1.204 so_nested_loop 0.852 0.846 so_nsieve 1.636 1.623 so_nsieve_bits 2.073 2.039 so_object 0.616 0.584 so_partial_sums 1.464 1.481 so_pidigits 1.075 1.082 so_random 0.321 0.317 so_reverse_complement 0.555 0.558 so_sieve 0.495 0.490 so_spectralnorm 1.634 1.627 Speedup ratio: compare with the result of `before' (greater is better) name after so_ackermann 1.025 so_array 1.019 so_binary_trees 1.005 so_concatenate 1.066 so_count_words 1.030 so_exception 1.040 so_fannkuch 1.043 so_fasta 0.996 so_k_nucleotide 1.000 so_lists 0.978 so_mandelbrot 1.004 so_matrix 1.001 so_meteor_contest 0.975 so_nbody 0.959 so_nested_loop 1.007 so_nsieve 1.008 so_nsieve_bits 1.017 so_object 1.056 so_partial_sums 0.989 so_pidigits 0.994 so_random 1.014 so_reverse_complement 0.996 so_sieve 1.010 so_spectralnorm 1.004 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62051 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2018-01-26 01:30:58 -05:00
return @attrs[k] ||= attr
end
def predefine_attributes
# Beware: order matters here because some attribute depends another.
generate_attribute 'const char*', 'name', "insn_name(#{bin})"
generate_attribute 'enum ruby_vminsn_type', 'bin', bin
generate_attribute 'rb_num_t', 'open', opes.size
generate_attribute 'rb_num_t', 'popn', pops.size
generate_attribute 'rb_num_t', 'retn', rets.size
generate_attribute 'rb_num_t', 'width', width
generate_attribute 'rb_snum_t', 'sp_inc', rets.size - pops.size
generate_attribute 'bool', 'handles_sp', default_definition_of_handles_sp
generate_attribute 'bool', 'leaf', default_definition_of_leaf
end
def default_definition_of_handles_sp
# Insn with ISEQ should yield it; can handle sp.
return opes.any? {|o| o[:type] == 'ISEQ' }
end
def default_definition_of_leaf
# Insn that handles SP can never be a leaf.
if not has_attribute? 'handles_sp' then
return ! default_definition_of_handles_sp
elsif handles_sp? then
return "! #{call_attribute 'handles_sp'}"
else
return true
end
end
def typesplit a
@variables ||= {}
a.map do |decl|
md = %r'
(?<comment> /[*] [^*]* [*]+ (?: [^*/] [^*]* [*]+ )* / ){0}
(?<ws> \g<comment> | \s ){0}
(?<ident> [_a-zA-Z] [0-9_a-zA-Z]* ){0}
(?<type> (?: \g<ident> \g<ws>+ )* \g<ident> ){0}
(?<var> \g<ident> ){0}
\G \g<ws>* \g<type> \g<ws>+ \g<var>
'x.match(decl)
@variables[md['var']] ||= {
decl: decl,
type: md['type'],
name: md['var'],
}
end
end
@instances = RubyVM::InsnsDef.map {|h|
new h.merge(:template => h)
}
def self.fetch name
@instances.find do |insn|
insn.name == name
end or raise IndexError, "instruction not found: #{name}"
end
def self.to_a
@instances
end
end