1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00
ruby--ruby/tool/mjit/bindgen.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

399 lines
12 KiB
Ruby
Raw Normal View History

2022-09-05 00:53:46 -04:00
#!/usr/bin/env ruby
# frozen_string_literal: true
ENV['BUNDLE_GEMFILE'] ||= File.expand_path('./Gemfile', __dir__)
require 'bundler/setup'
2022-09-05 00:53:46 -04:00
require 'etc'
require 'fiddle/import'
require 'set'
2022-09-05 04:06:37 -04:00
unless build_dir = ARGV.first
abort "Usage: #{$0} BUILD_DIR"
end
2022-09-05 00:53:46 -04:00
# Help ffi-clang find libclang
# Hint: apt install libclang1
ENV['LIBCLANG'] ||= Dir.glob("/lib/#{RUBY_PLATFORM}-gnu/libclang-*.so*").grep_v(/-cpp/).sort.last
2022-09-05 00:53:46 -04:00
require 'ffi/clang'
class Node < Struct.new(
:kind,
:spelling,
:type,
:typedef_type,
:bitwidth,
:sizeof_type,
:offsetof,
:enum_value,
:children,
keyword_init: true,
)
end
# Parse a C header with ffi-clang and return Node objects.
# To ease the maintenance, ffi-clang should be used only inside this class.
class HeaderParser
def initialize(header, cflags:)
2022-09-18 09:44:29 -04:00
@translation_unit = FFI::Clang::Index.new.parse_translation_unit(header, cflags, [], {})
2022-09-05 00:53:46 -04:00
end
def parse
parse_children(@translation_unit.cursor)
end
private
def parse_children(cursor)
children = []
cursor.visit_children do |cursor, _parent|
2022-09-18 09:44:29 -04:00
children << parse_cursor(cursor)
2022-09-05 00:53:46 -04:00
next :continue
end
children
end
def parse_cursor(cursor)
unless cursor.kind.start_with?('cursor_')
raise "unexpected cursor kind: #{cursor.kind}"
end
kind = cursor.kind.to_s.delete_prefix('cursor_').to_sym
children = parse_children(cursor)
offsetof = {}
if kind == :struct
children.select { |c| c.kind == :field_decl }.each do |child|
offsetof[child.spelling] = cursor.type.offsetof(child.spelling)
end
end
sizeof_type = nil
if %i[struct union].include?(kind)
sizeof_type = cursor.type.sizeof
end
enum_value = nil
if kind == :enum_constant_decl
enum_value = cursor.enum_value
end
Node.new(
kind: kind,
spelling: cursor.spelling,
type: cursor.type.spelling,
typedef_type: cursor.typedef_type.spelling,
bitwidth: cursor.bitwidth,
sizeof_type: sizeof_type,
offsetof: offsetof,
enum_value: enum_value,
children: children,
)
end
end
# Convert Node objects to a Ruby binding source.
class BindingGenerator
2022-09-18 10:17:22 -04:00
BINDGEN_BEG = '### MJIT bindgen begin ###'
BINDGEN_END = '### MJIT bindgen end ###'
2022-09-05 00:53:46 -04:00
DEFAULTS = { '_Bool' => 'CType::Bool.new' }
DEFAULTS.default_proc = proc { |_h, k| "CType::Stub.new(:#{k})" }
attr_reader :src
2022-09-18 10:43:24 -04:00
# @param src_path [String]
# @param uses [Array<String>]
# @param values [Hash{ Symbol => Array<String> }]
2022-09-20 10:23:50 -04:00
# @param types [Array<String>]
# @param dynamic_types [Array<String>] #ifdef-dependent immediate types, which need Primitive.cexpr! for type detection
# @param skip_fields [Hash{ Symbol => Array<String> }] Struct fields that are skipped from bindgen
2022-09-05 00:53:46 -04:00
# @param ruby_fields [Hash{ Symbol => Array<String> }] Struct VALUE fields that are considered Ruby objects
def initialize(src_path:, uses:, values:, types:, dynamic_types:, skip_fields:, ruby_fields:)
2022-09-18 10:17:22 -04:00
@preamble, @postamble = split_ambles(src_path)
2022-09-05 00:53:46 -04:00
@src = String.new
2022-09-18 10:43:24 -04:00
@uses = uses.sort
@values = values.transform_values(&:sort)
2022-09-05 00:53:46 -04:00
@types = types.sort
2022-09-20 10:23:50 -04:00
@dynamic_types = dynamic_types.sort
@skip_fields = skip_fields.transform_keys(&:to_s)
2022-09-05 00:53:46 -04:00
@ruby_fields = ruby_fields.transform_keys(&:to_s)
@references = Set.new
end
2022-09-18 20:25:04 -04:00
def generate(nodes)
2022-09-18 10:17:22 -04:00
println @preamble
2022-09-18 09:44:29 -04:00
2022-09-18 10:43:24 -04:00
# Define USE_* macros
@uses.each do |use|
println " def C.#{use}"
println " Primitive.cexpr! %q{ RBOOL(#{use} != 0) }"
println " end"
println
end
# Define macros/enums
@values.each do |type, values|
values.each do |value|
println " def C.#{value}"
println " Primitive.cexpr! %q{ #{type}2NUM(#{value}) }"
println " end"
println
end
2022-09-18 09:44:29 -04:00
end
2022-09-05 00:53:46 -04:00
# TODO: Support nested declarations
nodes_index = nodes.group_by(&:spelling).transform_values(&:last)
# Define types
@types.each do |type|
unless definition = generate_node(nodes_index[type])
raise "Failed to generate type: #{type}"
end
println " def C.#{type}"
println "@#{type} ||= #{definition}".gsub(/^/, " ").chomp
println " end"
println
end
2022-09-20 10:23:50 -04:00
# Define dynamic types
@dynamic_types.each do |type|
unless generate_node(nodes_index[type])&.start_with?('CType::Immediate')
raise "Non-immediate type is given to dynamic_types: #{type}"
end
println " def C.#{type}"
println " @#{type} ||= CType::Immediate.find(Primitive.cexpr!(\"SIZEOF(#{type})\"), Primitive.cexpr!(\"SIGNED_TYPE_P(#{type})\"))"
println " end"
println
end
2022-09-05 00:53:46 -04:00
# Leave a stub for types that are referenced but not targeted
2022-09-20 10:23:50 -04:00
(@references - @types - @dynamic_types).each do |type|
2022-09-18 20:25:04 -04:00
println " def C.#{type}"
println " #{DEFAULTS[type]}"
println " end"
2022-09-05 00:53:46 -04:00
println
end
2022-09-18 20:25:04 -04:00
print @postamble
2022-09-05 00:53:46 -04:00
end
private
2022-09-18 10:17:22 -04:00
# Return code before BINDGEN_BEG and code after BINDGEN_END
def split_ambles(src_path)
lines = File.read(src_path).lines
preamble_end = lines.index { |l| l.include?(BINDGEN_BEG) }
raise "`#{BINDGEN_BEG}` was not found in '#{src_path}'" if preamble_end.nil?
postamble_beg = lines.index { |l| l.include?(BINDGEN_END) }
raise "`#{BINDGEN_END}` was not found in '#{src_path}'" if postamble_beg.nil?
raise "`#{BINDGEN_BEG}` was found after `#{BINDGEN_END}`" if preamble_end >= postamble_beg
return lines[0..preamble_end].join, lines[postamble_beg..-1].join
end
2022-09-05 00:53:46 -04:00
# Generate code from a node. Used for constructing a complex nested node.
# @param node [Node]
def generate_node(node, sizeof_type: nil)
2022-09-05 00:53:46 -04:00
case node&.kind
when :struct, :union
# node.spelling is often empty for union, but we'd like to give it a name when it has one.
buf = +"CType::#{node.kind.to_s.sub(/\A[a-z]/, &:upcase)}.new(\n"
buf << " \"#{node.spelling}\", Primitive.cexpr!(\"SIZEOF(#{sizeof_type || node.type})\"),\n"
bit_fields_end = node.children.index { |c| c.bitwidth == -1 } || node.children.size # first non-bit field index
node.children.each_with_index do |child, i|
skip_type = sizeof_type&.gsub(/\(\(struct ([^\)]+) \*\)NULL\)->/, '\1.') || node.spelling
next if @skip_fields.fetch(skip_type, []).include?(child.spelling)
2022-09-05 00:53:46 -04:00
field_builder = proc do |field, type|
if node.kind == :struct
to_ruby = @ruby_fields.fetch(node.spelling, []).include?(field)
if child.bitwidth > 0
if bit_fields_end <= i # give up offsetof calculation for non-leading bit fields
raise "non-leading bit fields are not supported. consider including '#{field}' in skip_fields."
end
offsetof = node.offsetof.fetch(field)
else
off_type = sizeof_type || "(*((#{node.type} *)NULL))"
offsetof = "Primitive.cexpr!(\"OFFSETOF(#{off_type}, #{field})\")"
end
" #{field}: [#{type}, #{offsetof}#{', true' if to_ruby}],\n"
2022-09-05 00:53:46 -04:00
else
" #{field}: #{type},\n"
end
end
case child
# BitField is struct-specific. So it must be handled here.
in Node[kind: :field_decl, spelling:, bitwidth:, children: [_grandchild]] if bitwidth > 0
buf << field_builder.call(spelling, "CType::BitField.new(#{bitwidth}, #{node.offsetof.fetch(spelling) % 8})")
# "(unnamed ...)" struct and union are handled here, which are also struct-specific.
in Node[kind: :field_decl, spelling:, type:, children: [grandchild]] if type.match?(/\((unnamed|anonymous) [^)]+\)\z/)
if sizeof_type
child_type = "#{sizeof_type}.#{child.spelling}"
else
child_type = "((#{node.type} *)NULL)->#{child.spelling}"
end
buf << field_builder.call(spelling, generate_node(grandchild, sizeof_type: child_type).gsub(/^/, ' ').sub(/\A +/, ''))
2022-09-05 00:53:46 -04:00
# In most cases, we'd like to let generate_type handle the type unless it's "(unnamed ...)".
in Node[kind: :field_decl, spelling:, type:] if !type.empty?
2022-09-05 00:53:46 -04:00
buf << field_builder.call(spelling, generate_type(type))
else # forward declarations are ignored
end
end
buf << ")"
when :typedef_decl
case node.children
in [child]
generate_node(child)
in [child, Node[kind: :integer_literal]]
generate_node(child)
in _ unless node.typedef_type.empty?
generate_type(node.typedef_type)
end
when :enum_decl
generate_type('int')
when :type_ref
generate_type(node.spelling)
end
end
# Generate code from a type name. Used for resolving the name of a simple leaf node.
# @param type [String]
def generate_type(type)
if type.match?(/\[\d+\]\z/)
return "CType::Pointer.new { #{generate_type(type.sub!(/\[\d+\]\z/, ''))} }"
end
type = type.delete_suffix('const')
if type.end_with?('*')
return "CType::Pointer.new { #{generate_type(type.delete_suffix('*').rstrip)} }"
end
type = type.gsub(/((const|volatile) )+/, '').rstrip
if type.start_with?(/(struct|union|enum) /)
target = type.split(' ', 2).last
push_target(target)
"self.#{target}"
else
begin
ctype = Fiddle::Importer.parse_ctype(type)
rescue Fiddle::DLError
push_target(type)
"self.#{type}"
2022-09-18 20:25:04 -04:00
else
2022-09-20 10:23:50 -04:00
# Convert any function pointers to void* to workaround FILE* vs int*
if ctype == Fiddle::TYPE_VOIDP
"CType::Immediate.parse(\"void *\")"
else
"CType::Immediate.parse(#{type.dump})"
end
2022-09-05 00:53:46 -04:00
end
end
end
def print(str)
@src << str
end
def println(str = "")
@src << str << "\n"
end
def chomp
@src.delete_suffix!("\n")
end
def rstrip!
@src.rstrip!
end
def push_target(target)
unless target.match?(/\A\w+\z/)
raise "invalid target: #{target}"
end
@references << target
end
end
src_dir = File.expand_path('../..', __dir__)
2022-09-18 10:20:44 -04:00
src_path = File.join(src_dir, 'mjit_c.rb')
2022-09-05 04:06:37 -04:00
build_dir = File.expand_path(build_dir)
2022-09-05 00:53:46 -04:00
cflags = [
src_dir,
build_dir,
File.join(src_dir, 'include'),
2022-09-05 04:06:37 -04:00
File.join(build_dir, ".ext/include/#{RUBY_PLATFORM}"),
2022-09-05 00:53:46 -04:00
].map { |dir| "-I#{dir}" }
nodes = HeaderParser.new(File.join(src_dir, 'mjit_compiler.h'), cflags: cflags).parse
generator = BindingGenerator.new(
2022-09-18 10:17:22 -04:00
src_path: src_path,
2022-09-18 10:43:24 -04:00
uses: %w[
2022-09-05 00:53:46 -04:00
USE_LAZY_LOAD
USE_RVARGC
2022-09-18 10:43:24 -04:00
],
values: {
INT: %w[
NOT_COMPILED_STACK_SIZE
VM_CALL_KW_SPLAT
VM_CALL_KW_SPLAT_bit
VM_CALL_TAILCALL
VM_CALL_TAILCALL_bit
VM_METHOD_TYPE_CFUNC
VM_METHOD_TYPE_ISEQ
],
},
2022-09-05 00:53:46 -04:00
types: %w[
2022-09-05 04:06:37 -04:00
CALL_DATA
2022-09-05 00:53:46 -04:00
IC
IVC
RB_BUILTIN
2022-09-05 04:06:37 -04:00
compile_branch
2022-09-05 00:53:46 -04:00
compile_status
2022-09-05 04:06:37 -04:00
inlined_call_context
2022-09-05 00:53:46 -04:00
iseq_inline_constant_cache
iseq_inline_constant_cache_entry
iseq_inline_iv_cache_entry
iseq_inline_storage_entry
2022-09-05 04:06:37 -04:00
mjit_options
2022-09-05 00:53:46 -04:00
rb_builtin_function
2022-09-05 04:06:37 -04:00
rb_call_data
rb_callable_method_entry_struct
rb_callcache
rb_callinfo
This commit implements the Object Shapes technique in CRuby. Object Shapes is used for accessing instance variables and representing the "frozenness" of objects. Object instances have a "shape" and the shape represents some attributes of the object (currently which instance variables are set and the "frozenness"). Shapes form a tree data structure, and when a new instance variable is set on an object, that object "transitions" to a new shape in the shape tree. Each shape has an ID that is used for caching. The shape structure is independent of class, so objects of different types can have the same shape. For example: ```ruby class Foo def initialize # Starts with shape id 0 @a = 1 # transitions to shape id 1 @b = 1 # transitions to shape id 2 end end class Bar def initialize # Starts with shape id 0 @a = 1 # transitions to shape id 1 @b = 1 # transitions to shape id 2 end end foo = Foo.new # `foo` has shape id 2 bar = Bar.new # `bar` has shape id 2 ``` Both `foo` and `bar` instances have the same shape because they both set instance variables of the same name in the same order. This technique can help to improve inline cache hits as well as generate more efficient machine code in JIT compilers. This commit also adds some methods for debugging shapes on objects. See `RubyVM::Shape` for more details. For more context on Object Shapes, see [Feature: #18776] Co-Authored-By: Aaron Patterson <tenderlove@ruby-lang.org> Co-Authored-By: Eileen M. Uchitelle <eileencodes@gmail.com> Co-Authored-By: John Hawthorn <john@hawthorn.email>
2022-09-23 13:54:42 -04:00
rb_cref_t
rb_control_frame_t
This commit implements the Object Shapes technique in CRuby. Object Shapes is used for accessing instance variables and representing the "frozenness" of objects. Object instances have a "shape" and the shape represents some attributes of the object (currently which instance variables are set and the "frozenness"). Shapes form a tree data structure, and when a new instance variable is set on an object, that object "transitions" to a new shape in the shape tree. Each shape has an ID that is used for caching. The shape structure is independent of class, so objects of different types can have the same shape. For example: ```ruby class Foo def initialize # Starts with shape id 0 @a = 1 # transitions to shape id 1 @b = 1 # transitions to shape id 2 end end class Bar def initialize # Starts with shape id 0 @a = 1 # transitions to shape id 1 @b = 1 # transitions to shape id 2 end end foo = Foo.new # `foo` has shape id 2 bar = Bar.new # `bar` has shape id 2 ``` Both `foo` and `bar` instances have the same shape because they both set instance variables of the same name in the same order. This technique can help to improve inline cache hits as well as generate more efficient machine code in JIT compilers. This commit also adds some methods for debugging shapes on objects. See `RubyVM::Shape` for more details. For more context on Object Shapes, see [Feature: #18776] Co-Authored-By: Aaron Patterson <tenderlove@ruby-lang.org> Co-Authored-By: Eileen M. Uchitelle <eileencodes@gmail.com> Co-Authored-By: John Hawthorn <john@hawthorn.email>
2022-09-23 13:54:42 -04:00
rb_execution_context_t
rb_execution_context_struct
2022-09-05 00:53:46 -04:00
rb_iseq_constant_body
2022-09-05 04:06:37 -04:00
rb_iseq_location_t
2022-09-05 00:53:46 -04:00
rb_iseq_struct
rb_iseq_t
rb_iv_index_tbl_entry
rb_method_definition_struct
rb_method_iseq_t
rb_method_type_t
2022-09-05 04:06:37 -04:00
rb_mjit_compile_info
rb_mjit_unit
rb_serial_t
2022-09-05 00:53:46 -04:00
],
2022-09-20 10:23:50 -04:00
dynamic_types: %w[
VALUE
],
skip_fields: {
'rb_execution_context_struct.machine': %w[regs], # differs between macOS and Linux
rb_execution_context_struct: %w[method_missing_reason], # non-leading bit fields not supported
rb_iseq_constant_body: %w[yjit_payload], # conditionally defined
},
2022-09-05 00:53:46 -04:00
ruby_fields: {
rb_iseq_location_struct: %w[
base_label
first_lineno
2022-09-05 04:06:37 -04:00
label
pathobj
2022-09-05 00:53:46 -04:00
]
},
)
generator.generate(nodes)
2022-09-18 10:17:22 -04:00
File.write(src_path, generator.src)