1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

Import RDoc 3

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@30249 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
drbrain 2010-12-20 03:22:49 +00:00
parent d7effd506f
commit 2ef9c50c6e
106 changed files with 8878 additions and 4179 deletions

View file

@ -52,13 +52,13 @@ class RDoc::Markup::Parser
attr_reader :tokens
##
# Parsers +str+ into a Document
# Parses +str+ into a Document
def self.parse str
parser = new
#parser.debug = true
parser.tokenize str
RDoc::Markup::Document.new(*parser.parse)
doc = RDoc::Markup::Document.new
parser.parse doc
end
##
@ -86,6 +86,7 @@ class RDoc::Markup::Parser
# Builds a Heading of +level+
def build_heading level
_, text, = get # TEXT
heading = RDoc::Markup::Heading.new level, text
skip :NEWLINE
@ -105,38 +106,69 @@ class RDoc::Markup::Parser
case type
when :BULLET, :LABEL, :LALPHA, :NOTE, :NUMBER, :UALPHA then
list_type = type
if column < margin then
if column < margin || (list.type && list.type != type) then
unget
break
end
if list.type and list.type != list_type then
unget
break
end
list.type = list_type
list.type = type
peek_type, _, column, = peek_token
case type
when :NOTE, :LABEL then
_, indent, = get # SPACE
if :NEWLINE == peek_token.first then
get
peek_type, new_indent, peek_column, = peek_token
indent = new_indent if
peek_type == :INDENT and peek_column >= column
unget
if peek_type == :NEWLINE then
# description not on the same line as LABEL/NOTE
# skip the trailing newline & any blank lines below
while peek_type == :NEWLINE
get
peek_type, _, column, = peek_token
end
# we may be:
# - at end of stream
# - at a column < margin:
# [text]
# blah blah blah
# - at the same column, but with a different type of list item
# [text]
# * blah blah
# - at the same column, with the same type of list item
# [one]
# [two]
# In all cases, we have an empty description.
# In the last case only, we continue.
if peek_type.nil? || column < margin then
empty = 1
elsif column == margin then
case peek_type
when type
empty = 2 # continue
when *LIST_TOKENS
empty = 1
else
empty = 0
end
else
empty = 0
end
if empty > 0 then
item = RDoc::Markup::ListItem.new(data)
item << RDoc::Markup::BlankLine.new
list << item
break if empty == 1
next
end
end
else
data = nil
_, indent, = get
end
list_item = build_list_item(margin + indent, data)
list_item = RDoc::Markup::ListItem.new data
parse list_item, column
list << list_item
list << list_item if list_item
else
unget
break
@ -150,54 +182,6 @@ class RDoc::Markup::Parser
list
end
##
# Builds a ListItem that is flush to +indent+ with type +item_type+
def build_list_item indent, item_type = nil
p :list_item_start => [indent, item_type] if @debug
list_item = RDoc::Markup::ListItem.new item_type
until @tokens.empty? do
type, data, column = get
if column < indent and
not type == :NEWLINE and
(type != :INDENT or data < indent) then
unget
break
end
case type
when :INDENT then
unget
list_item.push(*parse(indent))
when :TEXT then
unget
list_item << build_paragraph(indent)
when :HEADER then
list_item << build_heading(data)
when :NEWLINE then
list_item << RDoc::Markup::BlankLine.new
when *LIST_TOKENS then
unget
list_item << build_list(column)
else
raise ParseError, "Unhandled token #{@current_token.inspect}"
end
end
p :list_item_end => [indent, item_type] if @debug
return nil if list_item.empty?
list_item.parts.shift if
RDoc::Markup::BlankLine === list_item.parts.first and
list_item.length > 1
list_item
end
##
# Builds a Paragraph that is flush to +margin+
@ -209,18 +193,7 @@ class RDoc::Markup::Parser
until @tokens.empty? do
type, data, column, = get
case type
when :INDENT then
next if data == margin and peek_token[0] == :TEXT
unget
break
when :TEXT then
if column != margin then
unget
break
end
if type == :TEXT && column == margin then
paragraph << data
skip :NEWLINE
else
@ -235,67 +208,81 @@ class RDoc::Markup::Parser
end
##
# Builds a Verbatim that is flush to +margin+
# Builds a Verbatim that is indented from +margin+.
#
# The verbatim block is shifted left (the least indented lines start in
# column 0). Each part of the verbatim is one line of text, always
# terminated by a newline. Blank lines always consist of a single newline
# character, and there is never a single newline at the end of the verbatim.
def build_verbatim margin
p :verbatim_begin => margin if @debug
verbatim = RDoc::Markup::Verbatim.new
min_indent = nil
generate_leading_spaces = true
line = ''
until @tokens.empty? do
type, data, column, = get
case type
when :INDENT then
if margin >= data then
unget
break
end
if type == :NEWLINE then
line << data
verbatim << line
line = ''
generate_leading_spaces = true
next
end
indent = data - margin
verbatim << ' ' * indent
when :HEADER then
verbatim << '=' * data
_, _, peek_column, = peek_token
peek_column ||= column + data
verbatim << ' ' * (peek_column - column - data)
when :RULE then
width = 2 + data
verbatim << '-' * width
_, _, peek_column, = peek_token
peek_column ||= column + data + 2
verbatim << ' ' * (peek_column - column - width)
when :TEXT then
verbatim << data
when *LIST_TOKENS then
if column <= margin then
unget
break
end
list_marker = case type
when :BULLET then '*'
when :LABEL then "[#{data}]"
when :LALPHA, :NUMBER, :UALPHA then "#{data}."
when :NOTE then "#{data}::"
end
verbatim << list_marker
_, data, = get
verbatim << ' ' * (data - list_marker.length)
when :NEWLINE then
verbatim << data
break unless [:INDENT, :NEWLINE].include? peek_token[0]
else
if column <= margin
unget
break
end
if generate_leading_spaces then
indent = column - margin
line << ' ' * indent
min_indent = indent if min_indent.nil? || indent < min_indent
generate_leading_spaces = false
end
case type
when :HEADER then
line << '=' * data
_, _, peek_column, = peek_token
peek_column ||= column + data
indent = peek_column - column - data
line << ' ' * indent
when :RULE then
width = 2 + data
line << '-' * width
_, _, peek_column, = peek_token
peek_column ||= column + width
indent = peek_column - column - width
line << ' ' * indent
when :TEXT then
line << data
else # *LIST_TOKENS
list_marker = case type
when :BULLET then data
when :LABEL then "[#{data}]"
when :NOTE then "#{data}::"
else # :LALPHA, :NUMBER, :UALPHA
"#{data}."
end
line << list_marker
peek_type, _, peek_column = peek_token
unless peek_type == :NEWLINE then
peek_column ||= column + list_marker.length
indent = peek_column - column - list_marker.length
line << ' ' * indent
end
end
end
verbatim << line << "\n" unless line.empty?
verbatim.parts.each { |p| p.slice!(0, min_indent) unless p == "\n" } if min_indent > 0
verbatim.normalize
p :verbatim_end => margin if @debug
@ -313,65 +300,60 @@ class RDoc::Markup::Parser
end
##
# Parses the tokens into a Document
# Parses the tokens into an array of RDoc::Markup::XXX objects,
# and appends them to the passed +parent+ RDoc::Markup::YYY object.
#
# Exits at the end of the token stream, or when it encounters a token
# in a column less than +indent+ (unless it is a NEWLINE).
#
# Returns +parent+.
def parse indent = 0
def parse parent, indent = 0
p :parse_start => indent if @debug
document = []
until @tokens.empty? do
type, data, column, = get
if type != :INDENT and column < indent then
unget
break
if type == :NEWLINE then
# trailing newlines are skipped below, so this is a blank line
parent << RDoc::Markup::BlankLine.new
skip :NEWLINE, false
next
end
# indentation change: break or verbattim
if column < indent then
unget
break
elsif column > indent then
unget
parent << build_verbatim(indent)
next
end
# indentation is the same
case type
when :HEADER then
document << build_heading(data)
when :INDENT then
if indent > data then
unget
break
elsif indent == data then
next
end
unget
document << build_verbatim(indent)
when :NEWLINE then
document << RDoc::Markup::BlankLine.new
skip :NEWLINE, false
parent << build_heading(data)
when :RULE then
document << RDoc::Markup::Rule.new(data)
parent << RDoc::Markup::Rule.new(data)
skip :NEWLINE
when :TEXT then
unget
document << build_paragraph(indent)
# we're done with this paragraph (indent mismatch)
break if peek_token[0] == :TEXT
parent << build_paragraph(indent)
when *LIST_TOKENS then
unget
list = build_list(indent)
document << list if list
# we're done with this list (indent mismatch)
break if LIST_TOKENS.include? peek_token.first and indent > 0
parent << build_list(indent)
else
type, data, column, line = @current_token
raise ParseError,
"Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}"
raise ParseError, "Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}"
end
end
p :parse_end => indent if @debug
document
parent
end
##
@ -384,63 +366,16 @@ class RDoc::Markup::Parser
end
##
# Skips a token of +token_type+, optionally raising an error.
# Skips the next token if its type is +token_type+.
#
# Optionally raises an error if the next token is not of the expected type.
def skip token_type, error = true
type, = get
return unless type # end of stream
return @current_token if token_type == type
unget
raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if
error
end
##
# Consumes tokens until NEWLINE and turns them back into text
def text
text = ''
loop do
type, data, = get
text << case type
when :BULLET then
_, space, = get # SPACE
"*#{' ' * (space - 1)}"
when :LABEL then
_, space, = get # SPACE
"[#{data}]#{' ' * (space - data.length - 2)}"
when :LALPHA, :NUMBER, :UALPHA then
_, space, = get # SPACE
"#{data}.#{' ' * (space - 2)}"
when :NOTE then
_, space = get # SPACE
"#{data}::#{' ' * (space - data.length - 2)}"
when :TEXT then
data
when :NEWLINE then
unget
break
when nil then
break
else
raise ParseError, "unhandled token #{@current_token.inspect}"
end
end
text
end
##
# Calculates the column and line of the current token based on +offset+.
def token_pos offset
[offset - @line_pos, @line]
raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if error
end
##
@ -455,51 +390,62 @@ class RDoc::Markup::Parser
until s.eos? do
pos = s.pos
# leading spaces will be reflected by the column of the next token
# the only thing we loose are trailing spaces at the end of the file
next if s.scan(/ +/)
# note: after BULLET, LABEL, etc.,
# indent will be the column of the next non-newline token
@tokens << case
# [CR]LF => :NEWLINE
when s.scan(/\r?\n/) then
token = [:NEWLINE, s.matched, *token_pos(pos)]
@line_pos = s.pos
@line += 1
token
when s.scan(/ +/) then
[:INDENT, s.matched_size, *token_pos(pos)]
# === text => :HEADER then :TEXT
when s.scan(/(=+)\s*/) then
level = s[1].length
level = 6 if level > 6
@tokens << [:HEADER, level, *token_pos(pos)]
pos = s.pos
s.scan(/.*/)
[:TEXT, s.matched, *token_pos(pos)]
when s.scan(/^(-{3,}) *$/) then
[:TEXT, s.matched.sub(/\r$/, ''), *token_pos(pos)]
# --- (at least 3) and nothing else on the line => :RULE
when s.scan(/(-{3,}) *$/) then
[:RULE, s[1].length - 2, *token_pos(pos)]
when s.scan(/([*-])\s+/) then
@tokens << [:BULLET, :BULLET, *token_pos(pos)]
[:SPACE, s.matched_size, *token_pos(pos)]
when s.scan(/([a-z]|\d+)\.[ \t]+\S/i) then
# * or - followed by white space and text => :BULLET
when s.scan(/([*-]) +(\S)/) then
s.pos -= s[2].bytesize # unget \S
[:BULLET, s[1], *token_pos(pos)]
# A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
when s.scan(/([a-z]|\d+)\. +(\S)/i) then
# FIXME if tab(s), the column will be wrong
# either support tabs everywhere by first expanding them to
# spaces, or assume that they will have been replaced
# before (and provide a check for that at least in debug
# mode)
list_label = s[1]
width = s.matched_size - 1
s.pos -= 1 # unget \S
list_type = case list_label
when /[a-z]/ then :LALPHA
when /[A-Z]/ then :UALPHA
when /\d/ then :NUMBER
else
raise ParseError, "BUG token #{list_label}"
end
@tokens << [list_type, list_label, *token_pos(pos)]
[:SPACE, width, *token_pos(pos)]
s.pos -= s[2].bytesize # unget \S
list_type =
case list_label
when /[a-z]/ then :LALPHA
when /[A-Z]/ then :UALPHA
when /\d/ then :NUMBER
else
raise ParseError, "BUG token #{list_label}"
end
[list_type, list_label, *token_pos(pos)]
# [text] followed by spaces or end of line => :LABEL
when s.scan(/\[(.*?)\]( +|$)/) then
@tokens << [:LABEL, s[1], *token_pos(pos)]
[:SPACE, s.matched_size, *token_pos(pos)]
[:LABEL, s[1], *token_pos(pos)]
# text:: followed by spaces or end of line => :NOTE
when s.scan(/(.*?)::( +|$)/) then
@tokens << [:NOTE, s[1], *token_pos(pos)]
[:SPACE, s.matched_size, *token_pos(pos)]
[:NOTE, s[1], *token_pos(pos)]
# anything else: :TEXT
else s.scan(/.*/)
[:TEXT, s.matched, *token_pos(pos)]
[:TEXT, s.matched.sub(/\r$/, ''), *token_pos(pos)]
end
end
@ -507,9 +453,17 @@ class RDoc::Markup::Parser
end
##
# Returns the current token or +token+ to the token stream
# Calculates the column and line of the current token based on +offset+.
def unget token = @current_token
def token_pos offset
[offset - @line_pos, @line]
end
##
# Returns the current token to the token stream
def unget
token = @current_token
p :unget => token if @debug
raise Error, 'too many #ungets' if token == @tokens.first
@tokens.unshift token if token