2012-12-02 17:52:57 -05:00
|
|
|
# -*- coding: us-ascii -*-
|
|
|
|
|
2008-07-17 20:46:16 -04:00
|
|
|
##
|
2012-11-26 23:28:14 -05:00
|
|
|
# A parser is simple a class that subclasses RDoc::Parser and implements #scan
|
|
|
|
# to fill in an RDoc::TopLevel with parsed data.
|
2008-07-17 20:46:16 -04:00
|
|
|
#
|
2012-11-26 23:28:14 -05:00
|
|
|
# The initialize method takes an RDoc::TopLevel to fill with parsed content,
|
|
|
|
# the name of the file to be parsed, the content of the file, an RDoc::Options
|
|
|
|
# object and an RDoc::Stats object to inform the user of parsed items. The
|
|
|
|
# scan method is then called to parse the file and must return the
|
|
|
|
# RDoc::TopLevel object. By calling super these items will be set for you.
|
2008-07-17 20:46:16 -04:00
|
|
|
#
|
2012-11-26 23:28:14 -05:00
|
|
|
# In order to be used by RDoc the parser needs to register the file extensions
|
|
|
|
# it can parse. Use ::parse_files_matching to register extensions.
|
2008-07-17 20:46:16 -04:00
|
|
|
#
|
2012-11-26 23:28:14 -05:00
|
|
|
# require 'rdoc'
|
2010-04-02 00:40:47 -04:00
|
|
|
#
|
2008-07-17 20:46:16 -04:00
|
|
|
# class RDoc::Parser::Xyz < RDoc::Parser
|
2012-11-26 23:28:14 -05:00
|
|
|
# parse_files_matching /\.xyz$/
|
2010-04-02 00:40:47 -04:00
|
|
|
#
|
2012-11-26 23:28:14 -05:00
|
|
|
# def initialize top_level, file_name, content, options, stats
|
|
|
|
# super
|
|
|
|
#
|
|
|
|
# # extra initialization if needed
|
2008-07-17 20:46:16 -04:00
|
|
|
# end
|
2010-04-02 00:40:47 -04:00
|
|
|
#
|
2008-07-17 20:46:16 -04:00
|
|
|
# def scan
|
2012-11-26 23:28:14 -05:00
|
|
|
# # parse file and fill in @top_level
|
2008-07-17 20:46:16 -04:00
|
|
|
# end
|
|
|
|
# end
|
|
|
|
|
|
|
|
class RDoc::Parser
|
|
|
|
|
|
|
|
@parsers = []
|
|
|
|
|
|
|
|
class << self
|
2010-12-19 22:22:49 -05:00
|
|
|
|
|
|
|
##
|
2011-08-26 19:45:41 -04:00
|
|
|
# An Array of arrays that maps file extension (or name) regular
|
|
|
|
# expressions to parser classes that will parse matching filenames.
|
2010-12-19 22:22:49 -05:00
|
|
|
#
|
|
|
|
# Use parse_files_matching to register a parser's file extensions.
|
|
|
|
|
2008-07-17 20:46:16 -04:00
|
|
|
attr_reader :parsers
|
2010-12-19 22:22:49 -05:00
|
|
|
|
2008-07-17 20:46:16 -04:00
|
|
|
end
|
|
|
|
|
2012-12-18 03:24:57 -05:00
|
|
|
##
|
|
|
|
# The name of the file being parsed
|
|
|
|
|
|
|
|
attr_reader :file_name
|
|
|
|
|
2008-07-17 20:46:16 -04:00
|
|
|
##
|
|
|
|
# Alias an extension to another extension. After this call, files ending
|
|
|
|
# "new_ext" will be parsed using the same parser as "old_ext"
|
|
|
|
|
|
|
|
def self.alias_extension(old_ext, new_ext)
|
2008-07-18 17:10:47 -04:00
|
|
|
old_ext = old_ext.sub(/^\.(.*)/, '\1')
|
|
|
|
new_ext = new_ext.sub(/^\.(.*)/, '\1')
|
|
|
|
|
2013-01-04 01:16:13 -05:00
|
|
|
parser = can_parse_by_name "xxx.#{old_ext}"
|
2008-07-17 20:46:16 -04:00
|
|
|
return false unless parser
|
|
|
|
|
2008-07-18 17:10:47 -04:00
|
|
|
RDoc::Parser.parsers.unshift [/\.#{new_ext}$/, parser]
|
2008-07-17 20:46:16 -04:00
|
|
|
|
|
|
|
true
|
|
|
|
end
|
|
|
|
|
2008-09-24 22:43:03 -04:00
|
|
|
##
|
2010-04-01 03:45:16 -04:00
|
|
|
# Determines if the file is a "binary" file which basically means it has
|
|
|
|
# content that an RDoc parser shouldn't try to consume.
|
2008-09-24 22:43:03 -04:00
|
|
|
|
|
|
|
def self.binary?(file)
|
2010-12-19 22:22:49 -05:00
|
|
|
return false if file =~ /\.(rdoc|txt)$/
|
|
|
|
|
2010-04-02 01:12:31 -04:00
|
|
|
s = File.read(file, 1024) or return false
|
2010-04-01 03:45:16 -04:00
|
|
|
|
2010-12-19 22:22:49 -05:00
|
|
|
have_encoding = s.respond_to? :encoding
|
|
|
|
|
|
|
|
return true if s[0, 2] == Marshal.dump('')[0, 2] or s.index("\x00")
|
|
|
|
|
|
|
|
if have_encoding then
|
2013-01-04 01:16:13 -05:00
|
|
|
mode = "r"
|
|
|
|
s.sub!(/\A#!.*\n/, '') # assume shebang line isn't longer than 1024.
|
|
|
|
encoding = s[/^\s*\#\s*(?:-\*-\s*)?(?:en)?coding:\s*([^\s;]+?)(?:-\*-|[\s;])/, 1]
|
|
|
|
mode = "r:#{encoding}" if encoding
|
|
|
|
s = File.open(file, mode) {|f| f.gets(nil, 1024)}
|
2010-12-19 22:22:49 -05:00
|
|
|
|
|
|
|
not s.valid_encoding?
|
|
|
|
else
|
|
|
|
if 0.respond_to? :fdiv then
|
|
|
|
s.count("\x00-\x7F", "^ -~\t\r\n").fdiv(s.size) > 0.3
|
|
|
|
else # HACK 1.8.6
|
|
|
|
(s.count("\x00-\x7F", "^ -~\t\r\n").to_f / s.size) > 0.3
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
##
|
|
|
|
# Processes common directives for CodeObjects for the C and Ruby parsers.
|
|
|
|
#
|
|
|
|
# Applies +directive+'s +value+ to +code_object+, if appropriate
|
|
|
|
|
|
|
|
def self.process_directive code_object, directive, value
|
2011-07-30 20:19:00 -04:00
|
|
|
warn "RDoc::Parser::process_directive is deprecated and wil be removed in RDoc 4. Use RDoc::Markup::PreProcess#handle_directive instead" if $-w
|
|
|
|
|
2010-12-19 22:22:49 -05:00
|
|
|
case directive
|
|
|
|
when 'nodoc' then
|
|
|
|
code_object.document_self = nil # notify nodoc
|
|
|
|
code_object.document_children = value.downcase != 'all'
|
|
|
|
when 'doc' then
|
|
|
|
code_object.document_self = true
|
|
|
|
code_object.force_documentation = true
|
|
|
|
when 'yield', 'yields' then
|
|
|
|
# remove parameter &block
|
|
|
|
code_object.params.sub!(/,?\s*&\w+/, '') if code_object.params
|
|
|
|
|
|
|
|
code_object.block_params = value
|
|
|
|
when 'arg', 'args' then
|
|
|
|
code_object.params = value
|
2010-04-01 03:45:16 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
##
|
|
|
|
# Checks if +file+ is a zip file in disguise. Signatures from
|
|
|
|
# http://www.garykessler.net/library/file_sigs.html
|
|
|
|
|
|
|
|
def self.zip? file
|
|
|
|
zip_signature = File.read file, 4
|
|
|
|
|
|
|
|
zip_signature == "PK\x03\x04" or
|
|
|
|
zip_signature == "PK\x05\x06" or
|
|
|
|
zip_signature == "PK\x07\x08"
|
2013-01-04 01:16:13 -05:00
|
|
|
rescue
|
|
|
|
false
|
2008-09-24 22:43:03 -04:00
|
|
|
end
|
|
|
|
|
2008-07-17 20:46:16 -04:00
|
|
|
##
|
|
|
|
# Return a parser that can handle a particular extension
|
|
|
|
|
2013-01-04 01:16:13 -05:00
|
|
|
def self.can_parse file_name
|
|
|
|
parser = can_parse_by_name file_name
|
2008-09-24 22:43:03 -04:00
|
|
|
|
2010-04-01 03:45:16 -04:00
|
|
|
# HACK Selenium hides a jar file using a .txt extension
|
|
|
|
return if parser == RDoc::Parser::Simple and zip? file_name
|
2008-09-24 22:43:03 -04:00
|
|
|
|
2013-01-04 01:16:13 -05:00
|
|
|
parser
|
|
|
|
end
|
|
|
|
|
|
|
|
##
|
|
|
|
# Returns a parser that can handle the extension for +file_name+. This does
|
|
|
|
# not depend upon the file being readable.
|
|
|
|
|
|
|
|
def self.can_parse_by_name file_name
|
|
|
|
_, parser = RDoc::Parser.parsers.find { |regexp,| regexp =~ file_name }
|
|
|
|
|
2010-04-01 03:45:16 -04:00
|
|
|
# The default parser must not parse binary files
|
2010-04-10 02:36:13 -04:00
|
|
|
ext_name = File.extname file_name
|
|
|
|
return parser if ext_name.empty?
|
2013-01-04 01:16:13 -05:00
|
|
|
|
2012-12-01 17:24:53 -05:00
|
|
|
if parser == RDoc::Parser::Simple and ext_name !~ /txt|rdoc/ then
|
|
|
|
case check_modeline file_name
|
2013-01-04 01:16:13 -05:00
|
|
|
when nil, 'rdoc' then # continue
|
2012-12-01 17:24:53 -05:00
|
|
|
else return nil
|
|
|
|
end
|
|
|
|
end
|
2010-04-01 03:45:16 -04:00
|
|
|
|
|
|
|
parser
|
2012-11-26 23:28:14 -05:00
|
|
|
rescue Errno::EACCES
|
2008-07-17 20:46:16 -04:00
|
|
|
end
|
|
|
|
|
2012-12-01 17:24:53 -05:00
|
|
|
##
|
|
|
|
# Returns the file type from the modeline in +file_name+
|
|
|
|
|
|
|
|
def self.check_modeline file_name
|
|
|
|
line = open file_name do |io|
|
|
|
|
io.gets
|
|
|
|
end
|
|
|
|
|
2012-12-02 17:52:57 -05:00
|
|
|
/-\*-\s*(.*?\S)\s*-\*-/ =~ line
|
2012-12-01 17:24:53 -05:00
|
|
|
|
|
|
|
return nil unless type = $1
|
|
|
|
|
2012-12-02 17:52:57 -05:00
|
|
|
if /;/ =~ type then
|
|
|
|
return nil unless /(?:\s|\A)mode:\s*([^\s;]+)/i =~ type
|
|
|
|
type = $1
|
|
|
|
end
|
|
|
|
|
2013-01-04 01:16:13 -05:00
|
|
|
return nil if /coding:/i =~ type
|
|
|
|
|
2012-12-02 17:52:57 -05:00
|
|
|
type.downcase
|
2012-12-01 17:24:53 -05:00
|
|
|
rescue ArgumentError # invalid byte sequence, etc.
|
|
|
|
end
|
|
|
|
|
2008-07-17 20:46:16 -04:00
|
|
|
##
|
2012-11-26 23:28:14 -05:00
|
|
|
# Finds and instantiates the correct parser for the given +file_name+ and
|
|
|
|
# +content+.
|
2008-07-17 20:46:16 -04:00
|
|
|
|
2012-11-26 23:28:14 -05:00
|
|
|
def self.for top_level, file_name, content, options, stats
|
2010-04-01 03:45:16 -04:00
|
|
|
return if binary? file_name
|
|
|
|
|
2012-11-26 23:28:14 -05:00
|
|
|
parser = use_markup content
|
|
|
|
|
|
|
|
unless parser then
|
2012-12-18 03:24:57 -05:00
|
|
|
parse_name = file_name
|
|
|
|
|
2012-11-26 23:28:14 -05:00
|
|
|
# If no extension, look for shebang
|
|
|
|
if file_name !~ /\.\w+$/ && content =~ %r{\A#!(.+)} then
|
|
|
|
shebang = $1
|
|
|
|
case shebang
|
|
|
|
when %r{env\s+ruby}, %r{/ruby}
|
2012-12-18 03:24:57 -05:00
|
|
|
parse_name = 'dummy.rb'
|
2012-11-26 23:28:14 -05:00
|
|
|
end
|
2008-07-17 20:46:16 -04:00
|
|
|
end
|
|
|
|
|
2012-12-18 03:24:57 -05:00
|
|
|
parser = can_parse parse_name
|
2012-11-26 23:28:14 -05:00
|
|
|
end
|
2008-07-17 20:46:16 -04:00
|
|
|
|
2010-04-01 03:45:16 -04:00
|
|
|
return unless parser
|
2008-10-24 19:05:28 -04:00
|
|
|
|
2013-09-18 19:33:36 -04:00
|
|
|
content = remove_modeline content
|
|
|
|
|
2012-11-26 23:28:14 -05:00
|
|
|
parser.new top_level, file_name, content, options, stats
|
2013-01-04 01:16:13 -05:00
|
|
|
rescue SystemCallError
|
|
|
|
nil
|
2008-07-17 20:46:16 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
##
|
|
|
|
# Record which file types this parser can understand.
|
2010-04-01 03:45:16 -04:00
|
|
|
#
|
|
|
|
# It is ok to call this multiple times.
|
2008-07-17 20:46:16 -04:00
|
|
|
|
|
|
|
def self.parse_files_matching(regexp)
|
|
|
|
RDoc::Parser.parsers.unshift [regexp, self]
|
|
|
|
end
|
|
|
|
|
2013-09-18 19:33:36 -04:00
|
|
|
##
|
|
|
|
# Removes an emacs-style modeline from the first line of the document
|
|
|
|
|
|
|
|
def self.remove_modeline content
|
|
|
|
content.sub(/\A.*-\*-\s*(.*?\S)\s*-\*-.*\r?\n/, '')
|
|
|
|
end
|
|
|
|
|
2010-12-19 22:22:49 -05:00
|
|
|
##
|
2012-11-26 23:28:14 -05:00
|
|
|
# If there is a <tt>markup: parser_name</tt> comment at the front of the
|
|
|
|
# file, use it to determine the parser. For example:
|
|
|
|
#
|
|
|
|
# # markup: rdoc
|
|
|
|
# # Class comment can go here
|
|
|
|
#
|
|
|
|
# class C
|
|
|
|
# end
|
|
|
|
#
|
|
|
|
# The comment should appear as the first line of the +content+.
|
2010-12-19 22:22:49 -05:00
|
|
|
#
|
2012-11-26 23:28:14 -05:00
|
|
|
# If the content contains a shebang or editor modeline the comment may
|
|
|
|
# appear on the second or third line.
|
|
|
|
#
|
|
|
|
# Any comment style may be used to hide the markup comment.
|
|
|
|
|
|
|
|
def self.use_markup content
|
|
|
|
markup = content.lines.first(3).grep(/markup:\s+(\w+)/) { $1 }.first
|
|
|
|
|
|
|
|
return unless markup
|
|
|
|
|
|
|
|
# TODO Ruby should be returned only when the filename is correct
|
|
|
|
return RDoc::Parser::Ruby if %w[tomdoc markdown].include? markup
|
2010-12-19 22:22:49 -05:00
|
|
|
|
2012-11-26 23:28:14 -05:00
|
|
|
markup = Regexp.escape markup
|
|
|
|
|
2013-12-02 19:42:49 -05:00
|
|
|
_, selected = RDoc::Parser.parsers.find do |_, parser|
|
2012-11-26 23:28:14 -05:00
|
|
|
/^#{markup}$/i =~ parser.name.sub(/.*:/, '')
|
2013-12-02 19:42:49 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
selected
|
2012-11-26 23:28:14 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
##
|
|
|
|
# Creates a new Parser storing +top_level+, +file_name+, +content+,
|
|
|
|
# +options+ and +stats+ in instance variables. In +@preprocess+ an
|
|
|
|
# RDoc::Markup::PreProcess object is created which allows processing of
|
|
|
|
# directives.
|
|
|
|
|
|
|
|
def initialize top_level, file_name, content, options, stats
|
2008-07-17 20:46:16 -04:00
|
|
|
@top_level = top_level
|
2012-11-26 23:28:14 -05:00
|
|
|
@top_level.parser = self.class
|
|
|
|
@store = @top_level.store
|
|
|
|
|
2008-07-17 20:46:16 -04:00
|
|
|
@file_name = file_name
|
|
|
|
@content = content
|
|
|
|
@options = options
|
|
|
|
@stats = stats
|
2011-07-30 20:19:00 -04:00
|
|
|
|
|
|
|
@preprocess = RDoc::Markup::PreProcess.new @file_name, @options.rdoc_include
|
|
|
|
@preprocess.options = @options
|
2008-07-17 20:46:16 -04:00
|
|
|
end
|
|
|
|
|
2012-11-26 23:28:14 -05:00
|
|
|
autoload :RubyTools, 'rdoc/parser/ruby_tools'
|
|
|
|
autoload :Text, 'rdoc/parser/text'
|
|
|
|
|
2008-07-17 20:46:16 -04:00
|
|
|
end
|
|
|
|
|
2012-11-26 23:28:14 -05:00
|
|
|
# simple must come first in order to show up last in the parsers list
|
2008-07-17 20:46:16 -04:00
|
|
|
require 'rdoc/parser/simple'
|
2012-11-26 23:28:14 -05:00
|
|
|
require 'rdoc/parser/c'
|
2012-12-04 02:50:48 -05:00
|
|
|
require 'rdoc/parser/changelog'
|
2012-11-26 23:28:14 -05:00
|
|
|
require 'rdoc/parser/markdown'
|
|
|
|
require 'rdoc/parser/rd'
|
|
|
|
require 'rdoc/parser/ruby'
|
2008-07-17 20:46:16 -04:00
|
|
|
|