1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00
ruby--ruby/tool/mjit/bindgen.rb
Jemma Issroff d594a5a8bd
This commit implements the Object Shapes technique in CRuby.
Object Shapes is used for accessing instance variables and representing the
"frozenness" of objects.  Object instances have a "shape" and the shape
represents some attributes of the object (currently which instance variables are
set and the "frozenness").  Shapes form a tree data structure, and when a new
instance variable is set on an object, that object "transitions" to a new shape
in the shape tree.  Each shape has an ID that is used for caching. The shape
structure is independent of class, so objects of different types can have the
same shape.

For example:

```ruby
class Foo
  def initialize
    # Starts with shape id 0
    @a = 1 # transitions to shape id 1
    @b = 1 # transitions to shape id 2
  end
end

class Bar
  def initialize
    # Starts with shape id 0
    @a = 1 # transitions to shape id 1
    @b = 1 # transitions to shape id 2
  end
end

foo = Foo.new # `foo` has shape id 2
bar = Bar.new # `bar` has shape id 2
```

Both `foo` and `bar` instances have the same shape because they both set
instance variables of the same name in the same order.

This technique can help to improve inline cache hits as well as generate more
efficient machine code in JIT compilers.

This commit also adds some methods for debugging shapes on objects.  See
`RubyVM::Shape` for more details.

For more context on Object Shapes, see [Feature: #18776]

Co-Authored-By: Aaron Patterson <tenderlove@ruby-lang.org>
Co-Authored-By: Eileen M. Uchitelle <eileencodes@gmail.com>
Co-Authored-By: John Hawthorn <john@hawthorn.email>
2022-09-28 08:26:21 -07:00

404 lines
12 KiB
Ruby
Executable file

#!/usr/bin/env ruby
# frozen_string_literal: true
ENV['BUNDLE_GEMFILE'] ||= File.expand_path('./Gemfile', __dir__)
require 'bundler/setup'
require 'etc'
require 'fiddle/import'
require 'set'
unless build_dir = ARGV.first
abort "Usage: #{$0} BUILD_DIR"
end
# Help ffi-clang find libclang
# Hint: apt install libclang1
ENV['LIBCLANG'] ||= Dir.glob("/lib/#{RUBY_PLATFORM}-gnu/libclang-*.so*").grep_v(/-cpp/).sort.last
require 'ffi/clang'
class Node < Struct.new(
:kind,
:spelling,
:type,
:typedef_type,
:bitwidth,
:sizeof_type,
:offsetof,
:enum_value,
:children,
keyword_init: true,
)
end
# Parse a C header with ffi-clang and return Node objects.
# To ease the maintenance, ffi-clang should be used only inside this class.
class HeaderParser
def initialize(header, cflags:)
@translation_unit = FFI::Clang::Index.new.parse_translation_unit(header, cflags, [], {})
end
def parse
parse_children(@translation_unit.cursor)
end
private
def parse_children(cursor)
children = []
cursor.visit_children do |cursor, _parent|
children << parse_cursor(cursor)
next :continue
end
children
end
def parse_cursor(cursor)
unless cursor.kind.start_with?('cursor_')
raise "unexpected cursor kind: #{cursor.kind}"
end
kind = cursor.kind.to_s.delete_prefix('cursor_').to_sym
children = parse_children(cursor)
offsetof = {}
if kind == :struct
children.select { |c| c.kind == :field_decl }.each do |child|
offsetof[child.spelling] = cursor.type.offsetof(child.spelling)
end
end
sizeof_type = nil
if %i[struct union].include?(kind)
sizeof_type = cursor.type.sizeof
end
enum_value = nil
if kind == :enum_constant_decl
enum_value = cursor.enum_value
end
Node.new(
kind: kind,
spelling: cursor.spelling,
type: cursor.type.spelling,
typedef_type: cursor.typedef_type.spelling,
bitwidth: cursor.bitwidth,
sizeof_type: sizeof_type,
offsetof: offsetof,
enum_value: enum_value,
children: children,
)
end
end
# Convert Node objects to a Ruby binding source.
class BindingGenerator
BINDGEN_BEG = '### MJIT bindgen begin ###'
BINDGEN_END = '### MJIT bindgen end ###'
DEFAULTS = { '_Bool' => 'CType::Bool.new' }
DEFAULTS.default_proc = proc { |_h, k| "CType::Stub.new(:#{k})" }
attr_reader :src
# @param src_path [String]
# @param uses [Array<String>]
# @param values [Hash{ Symbol => Array<String> }]
# @param types [Array<String>]
# @param dynamic_types [Array<String>] #ifdef-dependent immediate types, which need Primitive.cexpr! for type detection
# @param skip_fields [Hash{ Symbol => Array<String> }] Struct fields that are skipped from bindgen
# @param ruby_fields [Hash{ Symbol => Array<String> }] Struct VALUE fields that are considered Ruby objects
def initialize(src_path:, uses:, values:, types:, dynamic_types:, skip_fields:, ruby_fields:)
@preamble, @postamble = split_ambles(src_path)
@src = String.new
@uses = uses.sort
@values = values.transform_values(&:sort)
@types = types.sort
@dynamic_types = dynamic_types.sort
@skip_fields = skip_fields.transform_keys(&:to_s)
@ruby_fields = ruby_fields.transform_keys(&:to_s)
@references = Set.new
end
def generate(nodes)
println @preamble
# Define USE_* macros
@uses.each do |use|
println " def C.#{use}"
println " Primitive.cexpr! %q{ RBOOL(#{use} != 0) }"
println " end"
println
end
# Define macros/enums
@values.each do |type, values|
values.each do |value|
println " def C.#{value}"
println " Primitive.cexpr! %q{ #{type}2NUM(#{value}) }"
println " end"
println
end
end
# TODO: Support nested declarations
nodes_index = nodes.group_by(&:spelling).transform_values(&:last)
# Define types
@types.each do |type|
unless definition = generate_node(nodes_index[type])
raise "Failed to generate type: #{type}"
end
println " def C.#{type}"
println "@#{type} ||= #{definition}".gsub(/^/, " ").chomp
println " end"
println
end
# Define dynamic types
@dynamic_types.each do |type|
unless generate_node(nodes_index[type])&.start_with?('CType::Immediate')
raise "Non-immediate type is given to dynamic_types: #{type}"
end
println " def C.#{type}"
println " @#{type} ||= CType::Immediate.find(Primitive.cexpr!(\"SIZEOF(#{type})\"), Primitive.cexpr!(\"SIGNED_TYPE_P(#{type})\"))"
println " end"
println
end
# Leave a stub for types that are referenced but not targeted
(@references - @types - @dynamic_types).each do |type|
println " def C.#{type}"
println " #{DEFAULTS[type]}"
println " end"
println
end
print @postamble
end
private
# Return code before BINDGEN_BEG and code after BINDGEN_END
def split_ambles(src_path)
lines = File.read(src_path).lines
preamble_end = lines.index { |l| l.include?(BINDGEN_BEG) }
raise "`#{BINDGEN_BEG}` was not found in '#{src_path}'" if preamble_end.nil?
postamble_beg = lines.index { |l| l.include?(BINDGEN_END) }
raise "`#{BINDGEN_END}` was not found in '#{src_path}'" if postamble_beg.nil?
raise "`#{BINDGEN_BEG}` was found after `#{BINDGEN_END}`" if preamble_end >= postamble_beg
return lines[0..preamble_end].join, lines[postamble_beg..-1].join
end
# Generate code from a node. Used for constructing a complex nested node.
# @param node [Node]
def generate_node(node, sizeof_type: nil)
case node&.kind
when :struct, :union
# node.spelling is often empty for union, but we'd like to give it a name when it has one.
buf = +"CType::#{node.kind.to_s.sub(/\A[a-z]/, &:upcase)}.new(\n"
buf << " \"#{node.spelling}\", Primitive.cexpr!(\"SIZEOF(#{sizeof_type || node.type})\"),\n"
bit_fields_end = node.children.index { |c| c.bitwidth == -1 } || node.children.size # first non-bit field index
node.children.each_with_index do |child, i|
skip_type = sizeof_type&.gsub(/\(\(struct ([^\)]+) \*\)NULL\)->/, '\1.') || node.spelling
next if @skip_fields.fetch(skip_type, []).include?(child.spelling)
field_builder = proc do |field, type|
if node.kind == :struct
to_ruby = @ruby_fields.fetch(node.spelling, []).include?(field)
if child.bitwidth > 0
if bit_fields_end <= i # give up offsetof calculation for non-leading bit fields
raise "non-leading bit fields are not supported. consider including '#{field}' in skip_fields."
end
offsetof = node.offsetof.fetch(field)
else
off_type = sizeof_type || "(*((#{node.type} *)NULL))"
offsetof = "Primitive.cexpr!(\"OFFSETOF(#{off_type}, #{field})\")"
end
" #{field}: [#{type}, #{offsetof}#{', true' if to_ruby}],\n"
else
" #{field}: #{type},\n"
end
end
case child
# BitField is struct-specific. So it must be handled here.
in Node[kind: :field_decl, spelling:, bitwidth:, children: [_grandchild]] if bitwidth > 0
buf << field_builder.call(spelling, "CType::BitField.new(#{bitwidth}, #{node.offsetof.fetch(spelling) % 8})")
# "(unnamed ...)" struct and union are handled here, which are also struct-specific.
in Node[kind: :field_decl, spelling:, type:, children: [grandchild]] if type.match?(/\((unnamed|anonymous) [^)]+\)\z/)
if sizeof_type
child_type = "#{sizeof_type}.#{child.spelling}"
else
child_type = "((#{node.type} *)NULL)->#{child.spelling}"
end
buf << field_builder.call(spelling, generate_node(grandchild, sizeof_type: child_type).gsub(/^/, ' ').sub(/\A +/, ''))
# In most cases, we'd like to let generate_type handle the type unless it's "(unnamed ...)".
in Node[kind: :field_decl, spelling:, type:] if !type.empty?
buf << field_builder.call(spelling, generate_type(type))
else # forward declarations are ignored
end
end
buf << ")"
when :typedef_decl
case node.children
in [child]
generate_node(child)
in [child, Node[kind: :integer_literal]]
generate_node(child)
in _ unless node.typedef_type.empty?
generate_type(node.typedef_type)
end
when :enum_decl
generate_type('int')
when :type_ref
generate_type(node.spelling)
end
end
# Generate code from a type name. Used for resolving the name of a simple leaf node.
# @param type [String]
def generate_type(type)
if type.match?(/\[\d+\]\z/)
return "CType::Pointer.new { #{generate_type(type.sub!(/\[\d+\]\z/, ''))} }"
end
type = type.delete_suffix('const')
if type.end_with?('*')
return "CType::Pointer.new { #{generate_type(type.delete_suffix('*').rstrip)} }"
end
type = type.gsub(/((const|volatile) )+/, '').rstrip
if type.start_with?(/(struct|union|enum) /)
target = type.split(' ', 2).last
push_target(target)
"self.#{target}"
else
begin
ctype = Fiddle::Importer.parse_ctype(type)
rescue Fiddle::DLError
push_target(type)
"self.#{type}"
else
# Convert any function pointers to void* to workaround FILE* vs int*
if ctype == Fiddle::TYPE_VOIDP
"CType::Immediate.parse(\"void *\")"
else
"CType::Immediate.parse(#{type.dump})"
end
end
end
end
def print(str)
@src << str
end
def println(str = "")
@src << str << "\n"
end
def chomp
@src.delete_suffix!("\n")
end
def rstrip!
@src.rstrip!
end
def push_target(target)
unless target.match?(/\A\w+\z/)
raise "invalid target: #{target}"
end
@references << target
end
end
src_dir = File.expand_path('../..', __dir__)
src_path = File.join(src_dir, 'mjit_c.rb')
build_dir = File.expand_path(build_dir)
cflags = [
src_dir,
build_dir,
File.join(src_dir, 'include'),
File.join(build_dir, ".ext/include/#{RUBY_PLATFORM}"),
].map { |dir| "-I#{dir}" }
nodes = HeaderParser.new(File.join(src_dir, 'mjit_compiler.h'), cflags: cflags).parse
generator = BindingGenerator.new(
src_path: src_path,
uses: %w[
USE_LAZY_LOAD
USE_RVARGC
],
values: {
INT: %w[
NOT_COMPILED_STACK_SIZE
VM_CALL_KW_SPLAT
VM_CALL_KW_SPLAT_bit
VM_CALL_TAILCALL
VM_CALL_TAILCALL_bit
VM_METHOD_TYPE_CFUNC
VM_METHOD_TYPE_ISEQ
],
ULONG: %w[
INVALID_SHAPE_ID
SHAPE_MASK
],
},
types: %w[
CALL_DATA
IC
IVC
RB_BUILTIN
attr_index_t
compile_branch
compile_status
inlined_call_context
iseq_inline_constant_cache
iseq_inline_constant_cache_entry
iseq_inline_iv_cache_entry
iseq_inline_storage_entry
mjit_options
rb_builtin_function
rb_call_data
rb_callable_method_entry_struct
rb_callcache
rb_callinfo
rb_control_frame_t
rb_cref_t
rb_execution_context_struct
rb_execution_context_t
rb_iseq_constant_body
rb_iseq_location_t
rb_iseq_struct
rb_iseq_t
rb_iv_index_tbl_entry
rb_method_definition_struct
rb_method_iseq_t
rb_method_type_t
rb_mjit_compile_info
rb_mjit_unit
rb_serial_t
],
dynamic_types: %w[
VALUE
shape_id_t
],
skip_fields: {
'rb_execution_context_struct.machine': %w[regs], # differs between macOS and Linux
rb_execution_context_struct: %w[method_missing_reason], # non-leading bit fields not supported
rb_iseq_constant_body: %w[yjit_payload], # conditionally defined
},
ruby_fields: {
rb_iseq_location_struct: %w[
base_label
first_lineno
label
pathobj
]
},
)
generator.generate(nodes)
File.write(src_path, generator.src)