2008-07-17 20:46:16 -04:00
|
|
|
require 'rdoc'
|
|
|
|
require 'rdoc/code_objects'
|
|
|
|
require 'rdoc/markup/preprocess'
|
|
|
|
require 'rdoc/stats'
|
|
|
|
|
|
|
|
##
|
|
|
|
# A parser is simple a class that implements
|
|
|
|
#
|
|
|
|
# #initialize(file_name, body, options)
|
|
|
|
#
|
|
|
|
# and
|
|
|
|
#
|
|
|
|
# #scan
|
|
|
|
#
|
|
|
|
# The initialize method takes a file name to be used, the body of the file,
|
|
|
|
# and an RDoc::Options object. The scan method is then called to return an
|
|
|
|
# appropriately parsed TopLevel code object.
|
|
|
|
#
|
|
|
|
# The ParseFactory is used to redirect to the correct parser given a
|
|
|
|
# filename extension. This magic works because individual parsers have to
|
|
|
|
# register themselves with us as they are loaded in. The do this using the
|
|
|
|
# following incantation
|
|
|
|
#
|
|
|
|
# require "rdoc/parser"
|
2010-04-02 00:40:47 -04:00
|
|
|
#
|
2008-07-17 20:46:16 -04:00
|
|
|
# class RDoc::Parser::Xyz < RDoc::Parser
|
|
|
|
# parse_files_matching /\.xyz$/ # <<<<
|
2010-04-02 00:40:47 -04:00
|
|
|
#
|
2008-07-17 20:46:16 -04:00
|
|
|
# def initialize(file_name, body, options)
|
|
|
|
# ...
|
|
|
|
# end
|
2010-04-02 00:40:47 -04:00
|
|
|
#
|
2008-07-17 20:46:16 -04:00
|
|
|
# def scan
|
|
|
|
# ...
|
|
|
|
# end
|
|
|
|
# end
|
|
|
|
#
|
|
|
|
# Just to make life interesting, if we suspect a plain text file, we also
|
|
|
|
# look for a shebang line just in case it's a potential shell script
|
|
|
|
|
|
|
|
class RDoc::Parser
|
|
|
|
|
|
|
|
@parsers = []
|
|
|
|
|
|
|
|
class << self
|
|
|
|
attr_reader :parsers
|
|
|
|
end
|
|
|
|
|
|
|
|
##
|
|
|
|
# Alias an extension to another extension. After this call, files ending
|
|
|
|
# "new_ext" will be parsed using the same parser as "old_ext"
|
|
|
|
|
|
|
|
def self.alias_extension(old_ext, new_ext)
|
2008-07-18 17:10:47 -04:00
|
|
|
old_ext = old_ext.sub(/^\.(.*)/, '\1')
|
|
|
|
new_ext = new_ext.sub(/^\.(.*)/, '\1')
|
|
|
|
|
2008-07-17 20:46:16 -04:00
|
|
|
parser = can_parse "xxx.#{old_ext}"
|
|
|
|
return false unless parser
|
|
|
|
|
2008-07-18 17:10:47 -04:00
|
|
|
RDoc::Parser.parsers.unshift [/\.#{new_ext}$/, parser]
|
2008-07-17 20:46:16 -04:00
|
|
|
|
|
|
|
true
|
|
|
|
end
|
|
|
|
|
2008-09-24 22:43:03 -04:00
|
|
|
##
|
2010-04-01 03:45:16 -04:00
|
|
|
# Determines if the file is a "binary" file which basically means it has
|
|
|
|
# content that an RDoc parser shouldn't try to consume.
|
2008-09-24 22:43:03 -04:00
|
|
|
|
|
|
|
def self.binary?(file)
|
2010-04-02 01:12:31 -04:00
|
|
|
s = File.read(file, 1024) or return false
|
2010-04-01 03:45:16 -04:00
|
|
|
|
|
|
|
if s[0, 2] == Marshal.dump('')[0, 2] then
|
|
|
|
true
|
|
|
|
elsif file =~ /erb\.rb$/ then
|
|
|
|
false
|
2010-04-10 02:36:13 -04:00
|
|
|
elsif s.scan(/<%|%>/).length >= 4 || s.index("\x00") then
|
2010-04-01 03:45:16 -04:00
|
|
|
true
|
2010-04-19 01:08:28 -04:00
|
|
|
elsif 0.respond_to? :fdiv then
|
2010-08-30 15:45:30 -04:00
|
|
|
s.count("\x00-\x7F", "^ -~\t\r\n").fdiv(s.size) > 0.3
|
2010-04-19 01:08:28 -04:00
|
|
|
else # HACK 1.8.6
|
2010-08-30 15:45:30 -04:00
|
|
|
(s.count("\x00-\x7F", "^ -~\t\r\n").to_f / s.size) > 0.3
|
2010-04-01 03:45:16 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
##
|
|
|
|
# Checks if +file+ is a zip file in disguise. Signatures from
|
|
|
|
# http://www.garykessler.net/library/file_sigs.html
|
|
|
|
|
|
|
|
def self.zip? file
|
|
|
|
zip_signature = File.read file, 4
|
|
|
|
|
|
|
|
zip_signature == "PK\x03\x04" or
|
|
|
|
zip_signature == "PK\x05\x06" or
|
|
|
|
zip_signature == "PK\x07\x08"
|
2008-09-24 22:43:03 -04:00
|
|
|
end
|
|
|
|
|
2008-07-17 20:46:16 -04:00
|
|
|
##
|
|
|
|
# Return a parser that can handle a particular extension
|
|
|
|
|
|
|
|
def self.can_parse(file_name)
|
2008-09-24 22:43:03 -04:00
|
|
|
parser = RDoc::Parser.parsers.find { |regexp,| regexp =~ file_name }.last
|
|
|
|
|
2010-04-01 03:45:16 -04:00
|
|
|
# HACK Selenium hides a jar file using a .txt extension
|
|
|
|
return if parser == RDoc::Parser::Simple and zip? file_name
|
2008-09-24 22:43:03 -04:00
|
|
|
|
2010-04-01 03:45:16 -04:00
|
|
|
# The default parser must not parse binary files
|
2010-04-10 02:36:13 -04:00
|
|
|
ext_name = File.extname file_name
|
|
|
|
return parser if ext_name.empty?
|
|
|
|
return if parser == RDoc::Parser::Simple and ext_name !~ /txt|rdoc/
|
2010-04-01 03:45:16 -04:00
|
|
|
|
|
|
|
parser
|
2008-07-17 20:46:16 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
##
|
|
|
|
# Find the correct parser for a particular file name. Return a SimpleParser
|
|
|
|
# for ones that we don't know
|
|
|
|
|
|
|
|
def self.for(top_level, file_name, body, options, stats)
|
2010-04-01 03:45:16 -04:00
|
|
|
return if binary? file_name
|
|
|
|
|
2008-07-17 20:46:16 -04:00
|
|
|
# If no extension, look for shebang
|
|
|
|
if file_name !~ /\.\w+$/ && body =~ %r{\A#!(.+)} then
|
|
|
|
shebang = $1
|
|
|
|
case shebang
|
|
|
|
when %r{env\s+ruby}, %r{/ruby}
|
|
|
|
file_name = "dummy.rb"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
parser = can_parse file_name
|
|
|
|
|
2010-04-01 03:45:16 -04:00
|
|
|
return unless parser
|
2008-10-24 19:05:28 -04:00
|
|
|
|
2008-07-17 20:46:16 -04:00
|
|
|
parser.new top_level, file_name, body, options, stats
|
|
|
|
end
|
|
|
|
|
|
|
|
##
|
|
|
|
# Record which file types this parser can understand.
|
2010-04-01 03:45:16 -04:00
|
|
|
#
|
|
|
|
# It is ok to call this multiple times.
|
2008-07-17 20:46:16 -04:00
|
|
|
|
|
|
|
def self.parse_files_matching(regexp)
|
|
|
|
RDoc::Parser.parsers.unshift [regexp, self]
|
|
|
|
end
|
|
|
|
|
|
|
|
def initialize(top_level, file_name, content, options, stats)
|
|
|
|
@top_level = top_level
|
|
|
|
@file_name = file_name
|
|
|
|
@content = content
|
|
|
|
@options = options
|
|
|
|
@stats = stats
|
|
|
|
end
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
require 'rdoc/parser/simple'
|
|
|
|
|