mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
156ea41fa2
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@6874 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
523 lines
17 KiB
Ruby
523 lines
17 KiB
Ruby
# We attempt to parse C extension files. Basically we look for
|
|
# the standard patterns that you find in extensions: <tt>rb_define_class,
|
|
# rb_define_method</tt> and so on. We also try to find the corresponding
|
|
# C source for the methods and extract comments, but if we fail
|
|
# we don't worry too much.
|
|
#
|
|
# The comments associated with a Ruby method are extracted from the C
|
|
# comment block associated with the routine that _implements_ that
|
|
# method, that is to say the method whose name is given in the
|
|
# <tt>rb_define_method</tt> call. For example, you might write:
|
|
#
|
|
# /*
|
|
# * Returns a new array that is a one-dimensional flattening of this
|
|
# * array (recursively). That is, for every element that is an array,
|
|
# * extract its elements into the new array.
|
|
# *
|
|
# * s = [ 1, 2, 3 ] #=> [1, 2, 3]
|
|
# * t = [ 4, 5, 6, [7, 8] ] #=> [4, 5, 6, [7, 8]]
|
|
# * a = [ s, t, 9, 10 ] #=> [[1, 2, 3], [4, 5, 6, [7, 8]], 9, 10]
|
|
# * a.flatten #=> [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
|
# */
|
|
# static VALUE
|
|
# rb_ary_flatten(ary)
|
|
# VALUE ary;
|
|
# {
|
|
# ary = rb_obj_dup(ary);
|
|
# rb_ary_flatten_bang(ary);
|
|
# return ary;
|
|
# }
|
|
#
|
|
# ...
|
|
#
|
|
# void
|
|
# Init_Array()
|
|
# {
|
|
# ...
|
|
# rb_define_method(rb_cArray, "flatten", rb_ary_flatten, 0);
|
|
#
|
|
# Here RDoc will determine from the rb_define_method line that there's a
|
|
# method called "flatten" in class Array, and will look for the implementation
|
|
# in the method rb_ary_flatten. It will then use the comment from that
|
|
# method in the HTML output. This method must be in the same source file
|
|
# as the rb_define_method.
|
|
#
|
|
# C classes can be diagramed (see /tc/dl/ruby/ruby/error.c), and RDoc
|
|
# integrates C and Ruby source into one tree
|
|
#
|
|
# The comment blocks may include special direcives:
|
|
#
|
|
# [Document-class: <i>name</i>]
|
|
# This comment block is documentation for the given class. Use this
|
|
# when the <tt>Init_xxx</tt> method is not named after the class.
|
|
#
|
|
# [Document-method: <i>name</i>]
|
|
# This comment documents the named method. Use when RDoc cannot outomatically
|
|
# find the method from it's declaration
|
|
#
|
|
# [call-seq: <i>text up to an empty line</i>]
|
|
# Because C source doesn't give descripive names to Ruby-level parameters,
|
|
# you need to document the calling sequence explicitly
|
|
#
|
|
# In additon, RDoc assumes by default that the C method implementing a
|
|
# Ruby function is in the same source file as the rb_define_method call.
|
|
# If this isn't the case, add the comment
|
|
#
|
|
# rb_define_method(....); // in: filename
|
|
#
|
|
# As an example, we might have an extension that defines multiple classes
|
|
# in its Init_xxx method. We could document them using
|
|
#
|
|
#
|
|
# /*
|
|
# * Document-class: MyClass
|
|
# *
|
|
# * Encapsulate the writing and reading of the configuration
|
|
# * file. ...
|
|
# */
|
|
#
|
|
# /*
|
|
# * Document-method: read_value
|
|
# *
|
|
# * call-seq:
|
|
# * cfg.read_value(key) -> value
|
|
# * cfg.read_value(key} { |key| } -> value
|
|
# *
|
|
# * Return the value corresponding to +key+ from the configuration.
|
|
# * In the second form, if the key isn't found, invoke the
|
|
# * block and return its value.
|
|
# */
|
|
#
|
|
|
|
|
|
# Classes and modules built in to the interpreter. We need
|
|
# these to define superclasses of user objects
|
|
|
|
require "rdoc/code_objects"
|
|
require "rdoc/parsers/parserfactory"
|
|
|
|
|
|
module RDoc
|
|
|
|
KNOWN_CLASSES = {
|
|
"rb_cObject" => "Object",
|
|
"rb_cArray" => "Array",
|
|
"rb_cBignum" => "Bignum",
|
|
"rb_cClass" => "Class",
|
|
"rb_cDir" => "Dir",
|
|
"rb_cData" => "Data",
|
|
"rb_cFalseClass" => "FalseClass",
|
|
"rb_cFile" => "File",
|
|
"rb_cFixnum" => "Fixnum",
|
|
"rb_cFloat" => "Float",
|
|
"rb_cHash" => "Hash",
|
|
"rb_cInteger" => "Integer",
|
|
"rb_cIO" => "IO",
|
|
"rb_cModule" => "Module",
|
|
"rb_cNilClass" => "NilClass",
|
|
"rb_cNumeric" => "Numeric",
|
|
"rb_cProc" => "Proc",
|
|
"rb_cRange" => "Range",
|
|
"rb_cRegexp" => "Regexp",
|
|
"rb_cString" => "String",
|
|
"rb_cSymbol" => "Symbol",
|
|
"rb_cThread" => "Thread",
|
|
"rb_cTime" => "Time",
|
|
"rb_cTrueClass" => "TrueClass",
|
|
"rb_cStruct" => "Struct",
|
|
"rb_eException" => "Exception",
|
|
"rb_eStandardError" => "StandardError",
|
|
"rb_eSystemExit" => "SystemExit",
|
|
"rb_eInterrupt" => "Interrupt",
|
|
"rb_eSignal" => "Signal",
|
|
"rb_eFatal" => "Fatal",
|
|
"rb_eArgError" => "ArgError",
|
|
"rb_eEOFError" => "EOFError",
|
|
"rb_eIndexError" => "IndexError",
|
|
"rb_eRangeError" => "RangeError",
|
|
"rb_eIOError" => "IOError",
|
|
"rb_eRuntimeError" => "RuntimeError",
|
|
"rb_eSecurityError" => "SecurityError",
|
|
"rb_eSystemCallError" => "SystemCallError",
|
|
"rb_eTypeError" => "TypeError",
|
|
"rb_eZeroDivError" => "ZeroDivError",
|
|
"rb_eNotImpError" => "NotImpError",
|
|
"rb_eNoMemError" => "NoMemError",
|
|
"rb_eFloatDomainError" => "FloatDomainError",
|
|
"rb_eScriptError" => "ScriptError",
|
|
"rb_eNameError" => "NameError",
|
|
"rb_eSyntaxError" => "SyntaxError",
|
|
"rb_eLoadError" => "LoadError",
|
|
|
|
"rb_mKernel" => "Kernel",
|
|
"rb_mComparable" => "Comparable",
|
|
"rb_mEnumerable" => "Enumerable",
|
|
"rb_mPrecision" => "Precision",
|
|
"rb_mErrno" => "Errno",
|
|
"rb_mFileTest" => "FileTest",
|
|
"rb_mGC" => "GC",
|
|
"rb_mMath" => "Math",
|
|
"rb_mProcess" => "Process"
|
|
|
|
}
|
|
|
|
# See rdoc/c_parse.rb
|
|
|
|
class C_Parser
|
|
|
|
|
|
extend ParserFactory
|
|
parse_files_matching(/\.(c|cc|cpp|CC)$/)
|
|
|
|
@@known_bodies = {}
|
|
|
|
# prepare to parse a C file
|
|
def initialize(top_level, file_name, body, options, stats)
|
|
@known_classes = KNOWN_CLASSES.dup
|
|
@body = handle_ifdefs_in(body)
|
|
@options = options
|
|
@stats = stats
|
|
@top_level = top_level
|
|
@classes = Hash.new
|
|
@file_dir = File.dirname(file_name)
|
|
end
|
|
|
|
# Extract the classes/modules and methods from a C file
|
|
# and return the corresponding top-level object
|
|
def scan
|
|
remove_commented_out_lines
|
|
do_classes
|
|
do_methods
|
|
do_includes
|
|
do_aliases
|
|
@top_level
|
|
end
|
|
|
|
#######
|
|
private
|
|
#######
|
|
|
|
# remove lines that are commented out that might otherwise get
|
|
# picked up when scanning for classes and methods
|
|
|
|
def remove_commented_out_lines
|
|
@body.gsub!(%r{//.*rb_define_}, '//')
|
|
end
|
|
|
|
def handle_class_module(var_name, class_mod, class_name, parent, in_module)
|
|
parent_name = @known_classes[parent] || parent
|
|
|
|
if in_module
|
|
enclosure = @classes[in_module]
|
|
unless enclosure
|
|
$stderr.puts("Enclosing class/module '#{in_module}' for " +
|
|
"#{class_mod} #{class_name} not known")
|
|
return
|
|
end
|
|
else
|
|
enclosure = @top_level
|
|
end
|
|
|
|
if class_mod == "class"
|
|
cm = enclosure.add_class(NormalClass, class_name, parent_name)
|
|
@stats.num_classes += 1
|
|
else
|
|
cm = enclosure.add_module(NormalModule, class_name)
|
|
@stats.num_modules += 1
|
|
end
|
|
cm.record_location(enclosure.toplevel)
|
|
|
|
find_class_comment(cm.full_name, cm)
|
|
@classes[var_name] = cm
|
|
@known_classes[var_name] = cm.full_name
|
|
end
|
|
|
|
|
|
############################################################
|
|
|
|
def find_class_comment(class_name, class_meth)
|
|
comment = nil
|
|
if @body =~ %r{((?>/\*.*?\*/\s+))
|
|
(static\s+)?void\s+Init_#{class_name}\s*\(\)}xmi
|
|
comment = $1
|
|
elsif @body =~ %r{Document-(class|module):\s#{class_name}.*?\n((?>.*?\*/))}m
|
|
comment = $2
|
|
end
|
|
class_meth.comment = mangle_comment(comment) if comment
|
|
end
|
|
|
|
############################################################
|
|
|
|
def do_classes
|
|
@body.scan(/(\w+)\s* = \s*rb_define_module\(\s*"(\w+)"\s*\)/mx) do
|
|
|var_name, class_name|
|
|
handle_class_module(var_name, "module", class_name, nil, nil)
|
|
end
|
|
|
|
# The '.' lets us handle SWIG-generated files
|
|
@body.scan(/([\w\.]+)\s* = \s*rb_define_class
|
|
\(
|
|
\s*"(\w+)",
|
|
\s*(\w+)\s*
|
|
\)/mx) do
|
|
|
|
|var_name, class_name, parent|
|
|
handle_class_module(var_name, "class", class_name, parent, nil)
|
|
end
|
|
|
|
@body.scan(/(\w+)\s*=\s*boot_defclass\(\s*"(\w+?)",\s*(\w+?)\)/) do
|
|
|var_name, class_name, parent|
|
|
parent = nil if parent == "0"
|
|
handle_class_module(var_name, "class", class_name, parent, nil)
|
|
end
|
|
|
|
@body.scan(/(\w+)\s* = \s*rb_define_module_under\s*
|
|
\(
|
|
\s*(\w+),
|
|
\s*"(\w+)"
|
|
\s*\)/mx) do
|
|
|
|
|var_name, in_module, class_name|
|
|
handle_class_module(var_name, "module", class_name, nil, in_module)
|
|
end
|
|
|
|
@body.scan(/([\w\.]+)\s* = \s*rb_define_class_under
|
|
\(
|
|
\s*(\w+),
|
|
\s*"(\w+)",
|
|
\s*(\w+)\s*
|
|
\)/mx) do
|
|
|
|
|var_name, in_module, class_name, parent|
|
|
handle_class_module(var_name, "class", class_name, parent, in_module)
|
|
end
|
|
|
|
end
|
|
|
|
############################################################
|
|
|
|
def do_methods
|
|
|
|
@body.scan(%r{rb_define_
|
|
(
|
|
singleton_method |
|
|
method |
|
|
module_function |
|
|
private_method
|
|
)
|
|
\(\s*([\w\.]+),
|
|
\s*"([^"]+)",
|
|
\s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?,
|
|
\s*(-?\w+)\s*\)
|
|
(?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))?
|
|
}xm) do
|
|
|type, var_name, meth_name, meth_body, param_count, source_file|
|
|
#"
|
|
|
|
# Ignore top-object and weird struct.c dynamic stuff
|
|
next if var_name == "ruby_top_self"
|
|
next if var_name == "nstr"
|
|
next if var_name == "envtbl"
|
|
next if var_name == "argf" # it'd be nice to handle this one
|
|
|
|
var_name = "rb_cObject" if var_name == "rb_mKernel"
|
|
handle_method(type, var_name, meth_name,
|
|
meth_body, param_count, source_file)
|
|
end
|
|
|
|
@body.scan(%r{rb_define_global_function\(
|
|
\s*"([^"]+)",
|
|
\s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?,
|
|
\s*(-?\w+)\s*\)
|
|
(?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))?
|
|
}xm) do #"
|
|
|meth_name, meth_body, param_count, source_file|
|
|
handle_method("method", "rb_mKernel", meth_name,
|
|
meth_body, param_count, source_file)
|
|
end
|
|
|
|
@body.scan(/define_filetest_function\(
|
|
\s*"([^"]+)",
|
|
\s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?,
|
|
\s*(-?\w+)\s*\)/xm) do #"
|
|
|meth_name, meth_body, param_count|
|
|
|
|
handle_method("method", "rb_mFileTest", meth_name, meth_body, param_count)
|
|
handle_method("singleton_method", "rb_cFile", meth_name, meth_body, param_count)
|
|
end
|
|
end
|
|
|
|
############################################################
|
|
|
|
def do_aliases
|
|
@body.scan(%r{rb_define_alias\(\s*(\w+),\s*"([^"]+)",\s*"([^"]+)"\s*\)}m) do
|
|
|var_name, new_name, old_name|
|
|
@stats.num_methods += 1
|
|
class_name = @known_classes[var_name] || var_name
|
|
class_obj = find_class(var_name, class_name)
|
|
|
|
class_obj.add_alias(Alias.new("", old_name, new_name, ""))
|
|
end
|
|
end
|
|
|
|
############################################################
|
|
|
|
def handle_method(type, var_name, meth_name,
|
|
meth_body, param_count, source_file = nil)
|
|
@stats.num_methods += 1
|
|
class_name = @known_classes[var_name]
|
|
|
|
return unless class_name
|
|
|
|
class_obj = find_class(var_name, class_name)
|
|
|
|
if class_obj
|
|
if meth_name == "initialize"
|
|
meth_name = "new"
|
|
type = "singleton_method"
|
|
end
|
|
meth_obj = AnyMethod.new("", meth_name)
|
|
meth_obj.singleton = type == "singleton_method"
|
|
|
|
p_count = (Integer(param_count) rescue -1)
|
|
|
|
if p_count < 0
|
|
meth_obj.params = "(...)"
|
|
elsif p_count == 0
|
|
meth_obj.params = "()"
|
|
else
|
|
meth_obj.params = "(" +
|
|
(1..p_count).map{|i| "p#{i}"}.join(", ") +
|
|
")"
|
|
end
|
|
|
|
if source_file
|
|
file_name = File.join(@file_dir, source_file)
|
|
body = (@@known_bodies[source_file] ||= File.read(file_name))
|
|
else
|
|
body = @body
|
|
end
|
|
find_body(meth_body, meth_obj, body)
|
|
class_obj.add_method(meth_obj)
|
|
end
|
|
end
|
|
|
|
############################################################
|
|
|
|
# Find the C code corresponding to a Ruby method
|
|
def find_body(meth_name, meth_obj, body)
|
|
if body =~ %r{((?>/\*.*?\*/\s*))(static\s+)?VALUE\s+#{meth_name}
|
|
\s*(\(.*?\)).*?^}xm
|
|
|
|
comment, params = $1, $3
|
|
body_text = $&
|
|
|
|
# see if we can find the whole body
|
|
|
|
re = Regexp.escape(body_text) + '[^(]*^\{.*?^\}'
|
|
if Regexp.new(re, Regexp::MULTILINE).match(body)
|
|
body_text = $&
|
|
end
|
|
|
|
# The comment block may have been overridden with a
|
|
# 'Document-method' block. This happens in the interpreter
|
|
# when multiple methods are vectored through to the same
|
|
# C method but those methods are logically distinct (for
|
|
# example Kernel.hash and Kernel.object_id share the same
|
|
# implementation
|
|
|
|
override_comment = find_override_comment(meth_obj.name)
|
|
comment = override_comment if override_comment
|
|
|
|
find_call_seq(comment, meth_obj) if comment
|
|
|
|
# meth_obj.params = params
|
|
meth_obj.start_collecting_tokens
|
|
meth_obj.add_token(RubyToken::Token.new(1,1).set_text(body_text))
|
|
meth_obj.comment = mangle_comment(comment)
|
|
|
|
else
|
|
|
|
# No body, but might still have an override comment
|
|
comment = find_override_comment(meth_obj.name)
|
|
|
|
if comment
|
|
find_call_seq(comment, meth_obj)
|
|
meth_obj.comment = mangle_comment(comment)
|
|
else
|
|
$stderr.puts "No definition for #{meth_name}"
|
|
end
|
|
end
|
|
end
|
|
|
|
|
|
##################################################
|
|
#
|
|
# If the comment block contains a section that looks like
|
|
# call-seq:
|
|
# Array.new
|
|
# Array.new(10)
|
|
# use it for the parameters
|
|
def find_call_seq(comment, meth_obj)
|
|
if comment.sub!(/call-seq:(.*?)^\s*\*?\s*$/m, '')
|
|
seq = $1
|
|
seq.gsub!(/^\s*\*\s*/, '')
|
|
meth_obj.call_seq = seq
|
|
end
|
|
end
|
|
|
|
############################################################
|
|
|
|
def find_override_comment(meth_name)
|
|
comment = nil
|
|
name = Regexp.escape(meth_name)
|
|
if @body =~ %r{Document-method:\s#{name}.*?\n((?>.*?\*/))}m
|
|
comment = $1
|
|
end
|
|
comment
|
|
end
|
|
|
|
############################################################
|
|
|
|
# Look for includes of the form
|
|
# rb_include_module(rb_cArray, rb_mEnumerable);
|
|
def do_includes
|
|
@body.scan(/rb_include_module\(\s*(\w+?),\s*(\w+?)\s*\)/) do |c,m|
|
|
if cls = @classes[c]
|
|
m = KNOWN_CLASSES[m] || m
|
|
cls.add_include(Include.new(m, ""))
|
|
end
|
|
end
|
|
end
|
|
|
|
############################################################
|
|
|
|
# Remove the /*'s and leading asterisks from C comments
|
|
|
|
def mangle_comment(comment)
|
|
comment.sub!(%r{/\*+}) { " " * $&.length }
|
|
comment.sub!(%r{\*+/}) { " " * $&.length }
|
|
comment.gsub!(/^[ \t]*\*/m) { " " * $&.length }
|
|
comment
|
|
end
|
|
|
|
def find_class(raw_name, name)
|
|
unless @classes[raw_name]
|
|
if raw_name =~ /^rb_m/
|
|
@classes[raw_name] = @top_level.add_module(NormalModule, name)
|
|
else
|
|
@classes[raw_name] = @top_level.add_class(NormalClass, name, nil)
|
|
end
|
|
end
|
|
@classes[raw_name]
|
|
end
|
|
|
|
# Remove #ifdefs that would otherwise confuse us
|
|
|
|
def handle_ifdefs_in(body)
|
|
body.gsub(/^#ifdef HAVE_PROTOTYPES.*?#else.*?\n(.*?)#endif.*?\n/m) { $1 }
|
|
end
|
|
|
|
end
|
|
|
|
end
|