ruby--ruby/lib/rdoc/parser.rb

require 'rdoc'
require 'rdoc/code_objects'
require 'rdoc/markup/preprocess'
require 'rdoc/stats'

##
# A parser is simple a class that implements
#
#   #initialize(file_name, body, options)
#
# and
#
#   #scan
#
# The initialize method takes a file name to be used, the body of the file,
# and an RDoc::Options object. The scan method is then called to return an
# appropriately parsed TopLevel code object.
#
# The ParseFactory is used to redirect to the correct parser given a
# filename extension. This magic works because individual parsers have to
# register themselves with us as they are loaded in. The do this using the
# following incantation
#
#   require "rdoc/parser"
#
#   class RDoc::Parser::Xyz < RDoc::Parser
#     parse_files_matching /\.xyz$/ # <<<<
#
#     def initialize(file_name, body, options)
#       ...
#     end
#
#     def scan
#       ...
#     end
#   end
#
# Just to make life interesting, if we suspect a plain text file, we also
# look for a shebang line just in case it's a potential shell script

class RDoc::Parser

  @parsers = []

  class << self
    attr_reader :parsers
  end

  ##
  # Alias an extension to another extension. After this call, files ending
  # "new_ext" will be parsed using the same parser as "old_ext"

  def self.alias_extension(old_ext, new_ext)
    old_ext = old_ext.sub(/^\.(.*)/, '\1')
    new_ext = new_ext.sub(/^\.(.*)/, '\1')

    parser = can_parse "xxx.#{old_ext}"
    return false unless parser

    RDoc::Parser.parsers.unshift [/\.#{new_ext}$/, parser]

    true
  end

  ##
  # Determines if the file is a "binary" file which basically means it has
  # content that an RDoc parser shouldn't try to consume.

  def self.binary?(file)
    s = File.read(file, 1024) or return false

    if s[0, 2] == Marshal.dump('')[0, 2] then
      true
    elsif file =~ /erb\.rb$/ then
      false
    elsif s.scan(/<%|%>/).length >= 4 || s.index("\x00") then
      true
    elsif 0.respond_to? :fdiv then
      s.count("\x00-\x7F", "^ -~\t\r\n").fdiv(s.size) > 0.3
    else # HACK 1.8.6
      (s.count("\x00-\x7F", "^ -~\t\r\n").to_f / s.size) > 0.3
    end
  end

  ##
  # Checks if +file+ is a zip file in disguise.  Signatures from
  # http://www.garykessler.net/library/file_sigs.html

  def self.zip? file
    zip_signature = File.read file, 4

    zip_signature == "PK\x03\x04" or
      zip_signature == "PK\x05\x06" or
      zip_signature == "PK\x07\x08"
  end

  ##
  # Return a parser that can handle a particular extension

  def self.can_parse(file_name)
    parser = RDoc::Parser.parsers.find { |regexp,| regexp =~ file_name }.last

    # HACK Selenium hides a jar file using a .txt extension
    return if parser == RDoc::Parser::Simple and zip? file_name

    # The default parser must not parse binary files
    ext_name = File.extname file_name
    return parser if ext_name.empty?
    return if parser == RDoc::Parser::Simple and ext_name !~ /txt|rdoc/

    parser
  end

  ##
  # Find the correct parser for a particular file name. Return a SimpleParser
  # for ones that we don't know

  def self.for(top_level, file_name, body, options, stats)
    return if binary? file_name

    # If no extension, look for shebang
    if file_name !~ /\.\w+$/ && body =~ %r{\A#!(.+)} then
      shebang = $1
      case shebang
      when %r{env\s+ruby}, %r{/ruby}
        file_name = "dummy.rb"
      end
    end

    parser = can_parse file_name

    return unless parser

    parser.new top_level, file_name, body, options, stats
  end

  ##
  # Record which file types this parser can understand.
  #
  # It is ok to call this multiple times.

  def self.parse_files_matching(regexp)
    RDoc::Parser.parsers.unshift [regexp, self]
  end

  def initialize(top_level, file_name, content, options, stats)
    @top_level = top_level
    @file_name = file_name
    @content = content
    @options = options
    @stats = stats
  end

end

require 'rdoc/parser/simple'
Import RDoc r101. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18121 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-07-17 20:46:16 -04:00			`require 'rdoc'`
			`require 'rdoc/code_objects'`
			`require 'rdoc/markup/preprocess'`
			`require 'rdoc/stats'`

			`##`
			`# A parser is simple a class that implements`
			`#`
			`# #initialize(file_name, body, options)`
			`#`
			`# and`
			`#`
			`# #scan`
			`#`
			`# The initialize method takes a file name to be used, the body of the file,`
			`# and an RDoc::Options object. The scan method is then called to return an`
			`# appropriately parsed TopLevel code object.`
			`#`
			`# The ParseFactory is used to redirect to the correct parser given a`
			`# filename extension. This magic works because individual parsers have to`
			`# register themselves with us as they are loaded in. The do this using the`
			`# following incantation`
			`#`
			`# require "rdoc/parser"`
* {lib,test}/rdoc: removed trailing spaces. reapplied r22784. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27176 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2010-04-02 00:40:47 -04:00			`#`
Import RDoc r101. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18121 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-07-17 20:46:16 -04:00			`# class RDoc::Parser::Xyz < RDoc::Parser`
			`# parse_files_matching /\.xyz$/ # <<<<`
* {lib,test}/rdoc: removed trailing spaces. reapplied r22784. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27176 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2010-04-02 00:40:47 -04:00			`#`
Import RDoc r101. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18121 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-07-17 20:46:16 -04:00			`# def initialize(file_name, body, options)`
			`# ...`
			`# end`
* {lib,test}/rdoc: removed trailing spaces. reapplied r22784. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27176 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2010-04-02 00:40:47 -04:00			`#`
Import RDoc r101. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18121 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-07-17 20:46:16 -04:00			`# def scan`
			`# ...`
			`# end`
			`# end`
			`#`
			`# Just to make life interesting, if we suspect a plain text file, we also`
			`# look for a shebang line just in case it's a potential shell script`

			`class RDoc::Parser`

			`@parsers = []`

			`class << self`
			`attr_reader :parsers`
			`end`

			`##`
			`# Alias an extension to another extension. After this call, files ending`
			`# "new_ext" will be parsed using the same parser as "old_ext"`

			`def self.alias_extension(old_ext, new_ext)`
Import RDoc r104. Various test fixes. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18131 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-07-18 17:10:47 -04:00			`old_ext = old_ext.sub(/^\.(.*)/, '\1')`
			`new_ext = new_ext.sub(/^\.(.*)/, '\1')`

Import RDoc r101. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18121 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-07-17 20:46:16 -04:00			`parser = can_parse "xxx.#{old_ext}"`
			`return false unless parser`

Import RDoc r104. Various test fixes. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18131 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-07-18 17:10:47 -04:00			`RDoc::Parser.parsers.unshift [/\.#{new_ext}$/, parser]`
Import RDoc r101. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18121 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-07-17 20:46:16 -04:00
			`true`
			`end`

Import RDoc 2.2.1 r185 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19537 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-09-24 22:43:03 -04:00			`##`
Import RDoc 2.5 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27147 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2010-04-01 03:45:16 -04:00			`# Determines if the file is a "binary" file which basically means it has`
			`# content that an RDoc parser shouldn't try to consume.`
Import RDoc 2.2.1 r185 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19537 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-09-24 22:43:03 -04:00
			`def self.binary?(file)`
* lib/rdoc/parser.rb (RDoc::Parser.binary?): blksize may be nil and is irrelevant to whether a file is binary. TAB and newlines would be usually considered to be included in text data. reapplied r23071 and r24297. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27180 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2010-04-02 01:12:31 -04:00			`s = File.read(file, 1024) or return false`
Import RDoc 2.5 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27147 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2010-04-01 03:45:16 -04:00
			`if s[0, 2] == Marshal.dump('')[0, 2] then`
			`true`
			`elsif file =~ /erb\.rb$/ then`
			`false`
* lib/rdoc: Import RDoc 2.5.2 * lib/rdoc/parser/ruby.rb (RDoc::Parser::Ruby): Don't parse rdoc files, reverts r24976 in favor of include directive support in C parser. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27283 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2010-04-10 02:36:13 -04:00			`elsif s.scan(/<%\|%>/).length >= 4 \|\| s.index("\x00") then`
Import RDoc 2.5 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27147 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2010-04-01 03:45:16 -04:00			`true`
Import RDoc 2.5.4 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27396 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2010-04-19 01:08:28 -04:00			`elsif 0.respond_to? :fdiv then`
* string.c (tr_setup_table): fix bug in r29146. Initialize table even if cflag is 0; tr_find see whether del is empty or not. * string.c (tr_find): nodel can't be NULL; if NULL, it means it is not specified. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@29148 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2010-08-30 15:45:30 -04:00			`s.count("\x00-\x7F", "^ -~\t\r\n").fdiv(s.size) > 0.3`
Import RDoc 2.5.4 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27396 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2010-04-19 01:08:28 -04:00			`else # HACK 1.8.6`
* string.c (tr_setup_table): fix bug in r29146. Initialize table even if cflag is 0; tr_find see whether del is empty or not. * string.c (tr_find): nodel can't be NULL; if NULL, it means it is not specified. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@29148 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2010-08-30 15:45:30 -04:00			`(s.count("\x00-\x7F", "^ -~\t\r\n").to_f / s.size) > 0.3`
Import RDoc 2.5 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27147 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2010-04-01 03:45:16 -04:00			`end`
			`end`

			`##`
			`# Checks if +file+ is a zip file in disguise. Signatures from`
			`# http://www.garykessler.net/library/file_sigs.html`

			`def self.zip? file`
			`zip_signature = File.read file, 4`

			`zip_signature == "PK\x03\x04" or`
			`zip_signature == "PK\x05\x06" or`
			`zip_signature == "PK\x07\x08"`
Import RDoc 2.2.1 r185 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19537 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-09-24 22:43:03 -04:00			`end`

Import RDoc r101. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18121 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-07-17 20:46:16 -04:00			`##`
			`# Return a parser that can handle a particular extension`

			`def self.can_parse(file_name)`
Import RDoc 2.2.1 r185 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19537 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-09-24 22:43:03 -04:00			`parser = RDoc::Parser.parsers.find { \|regexp,\| regexp =~ file_name }.last`

Import RDoc 2.5 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27147 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2010-04-01 03:45:16 -04:00			`# HACK Selenium hides a jar file using a .txt extension`
			`return if parser == RDoc::Parser::Simple and zip? file_name`
Import RDoc 2.2.1 r185 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19537 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-09-24 22:43:03 -04:00
Import RDoc 2.5 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27147 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2010-04-01 03:45:16 -04:00			`# The default parser must not parse binary files`
* lib/rdoc: Import RDoc 2.5.2 * lib/rdoc/parser/ruby.rb (RDoc::Parser::Ruby): Don't parse rdoc files, reverts r24976 in favor of include directive support in C parser. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27283 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2010-04-10 02:36:13 -04:00			`ext_name = File.extname file_name`
			`return parser if ext_name.empty?`
			`return if parser == RDoc::Parser::Simple and ext_name !~ /txt\|rdoc/`
Import RDoc 2.5 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27147 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2010-04-01 03:45:16 -04:00
			`parser`
Import RDoc r101. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18121 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-07-17 20:46:16 -04:00			`end`

			`##`
			`# Find the correct parser for a particular file name. Return a SimpleParser`
			`# for ones that we don't know`

			`def self.for(top_level, file_name, body, options, stats)`
Import RDoc 2.5 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27147 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2010-04-01 03:45:16 -04:00			`return if binary? file_name`

Import RDoc r101. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18121 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-07-17 20:46:16 -04:00			`# If no extension, look for shebang`
			`if file_name !~ /\.\w+$/ && body =~ %r{\A#!(.+)} then`
			`shebang = $1`
			`case shebang`
			`when %r{env\s+ruby}, %r{/ruby}`
			`file_name = "dummy.rb"`
			`end`
			`end`

			`parser = can_parse file_name`

Import RDoc 2.5 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27147 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2010-04-01 03:45:16 -04:00			`return unless parser`
Import RDoc 2.2.2 r192 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19930 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-10-24 19:05:28 -04:00
Import RDoc r101. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18121 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-07-17 20:46:16 -04:00			`parser.new top_level, file_name, body, options, stats`
			`end`

			`##`
			`# Record which file types this parser can understand.`
Import RDoc 2.5 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27147 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2010-04-01 03:45:16 -04:00			`#`
			`# It is ok to call this multiple times.`
Import RDoc r101. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18121 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2008-07-17 20:46:16 -04:00
			`def self.parse_files_matching(regexp)`
			`RDoc::Parser.parsers.unshift [regexp, self]`
			`end`

			`def initialize(top_level, file_name, content, options, stats)`
			`@top_level = top_level`
			`@file_name = file_name`
			`@content = content`
			`@options = options`
			`@stats = stats`
			`end`

			`end`

			`require 'rdoc/parser/simple'`