mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
Import RDoc 3
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@30249 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
d7effd506f
commit
2ef9c50c6e
106 changed files with 8878 additions and 4179 deletions
|
@ -52,13 +52,13 @@ class RDoc::Markup::Parser
|
|||
attr_reader :tokens
|
||||
|
||||
##
|
||||
# Parsers +str+ into a Document
|
||||
# Parses +str+ into a Document
|
||||
|
||||
def self.parse str
|
||||
parser = new
|
||||
#parser.debug = true
|
||||
parser.tokenize str
|
||||
RDoc::Markup::Document.new(*parser.parse)
|
||||
doc = RDoc::Markup::Document.new
|
||||
parser.parse doc
|
||||
end
|
||||
|
||||
##
|
||||
|
@ -86,6 +86,7 @@ class RDoc::Markup::Parser
|
|||
# Builds a Heading of +level+
|
||||
|
||||
def build_heading level
|
||||
_, text, = get # TEXT
|
||||
heading = RDoc::Markup::Heading.new level, text
|
||||
skip :NEWLINE
|
||||
|
||||
|
@ -105,38 +106,69 @@ class RDoc::Markup::Parser
|
|||
|
||||
case type
|
||||
when :BULLET, :LABEL, :LALPHA, :NOTE, :NUMBER, :UALPHA then
|
||||
list_type = type
|
||||
|
||||
if column < margin then
|
||||
if column < margin || (list.type && list.type != type) then
|
||||
unget
|
||||
break
|
||||
end
|
||||
|
||||
if list.type and list.type != list_type then
|
||||
unget
|
||||
break
|
||||
end
|
||||
|
||||
list.type = list_type
|
||||
list.type = type
|
||||
peek_type, _, column, = peek_token
|
||||
|
||||
case type
|
||||
when :NOTE, :LABEL then
|
||||
_, indent, = get # SPACE
|
||||
if :NEWLINE == peek_token.first then
|
||||
get
|
||||
peek_type, new_indent, peek_column, = peek_token
|
||||
indent = new_indent if
|
||||
peek_type == :INDENT and peek_column >= column
|
||||
unget
|
||||
if peek_type == :NEWLINE then
|
||||
# description not on the same line as LABEL/NOTE
|
||||
# skip the trailing newline & any blank lines below
|
||||
while peek_type == :NEWLINE
|
||||
get
|
||||
peek_type, _, column, = peek_token
|
||||
end
|
||||
|
||||
# we may be:
|
||||
# - at end of stream
|
||||
# - at a column < margin:
|
||||
# [text]
|
||||
# blah blah blah
|
||||
# - at the same column, but with a different type of list item
|
||||
# [text]
|
||||
# * blah blah
|
||||
# - at the same column, with the same type of list item
|
||||
# [one]
|
||||
# [two]
|
||||
# In all cases, we have an empty description.
|
||||
# In the last case only, we continue.
|
||||
if peek_type.nil? || column < margin then
|
||||
empty = 1
|
||||
elsif column == margin then
|
||||
case peek_type
|
||||
when type
|
||||
empty = 2 # continue
|
||||
when *LIST_TOKENS
|
||||
empty = 1
|
||||
else
|
||||
empty = 0
|
||||
end
|
||||
else
|
||||
empty = 0
|
||||
end
|
||||
|
||||
if empty > 0 then
|
||||
item = RDoc::Markup::ListItem.new(data)
|
||||
item << RDoc::Markup::BlankLine.new
|
||||
list << item
|
||||
break if empty == 1
|
||||
next
|
||||
end
|
||||
end
|
||||
else
|
||||
data = nil
|
||||
_, indent, = get
|
||||
end
|
||||
|
||||
list_item = build_list_item(margin + indent, data)
|
||||
list_item = RDoc::Markup::ListItem.new data
|
||||
parse list_item, column
|
||||
list << list_item
|
||||
|
||||
list << list_item if list_item
|
||||
else
|
||||
unget
|
||||
break
|
||||
|
@ -150,54 +182,6 @@ class RDoc::Markup::Parser
|
|||
list
|
||||
end
|
||||
|
||||
##
|
||||
# Builds a ListItem that is flush to +indent+ with type +item_type+
|
||||
|
||||
def build_list_item indent, item_type = nil
|
||||
p :list_item_start => [indent, item_type] if @debug
|
||||
|
||||
list_item = RDoc::Markup::ListItem.new item_type
|
||||
|
||||
until @tokens.empty? do
|
||||
type, data, column = get
|
||||
|
||||
if column < indent and
|
||||
not type == :NEWLINE and
|
||||
(type != :INDENT or data < indent) then
|
||||
unget
|
||||
break
|
||||
end
|
||||
|
||||
case type
|
||||
when :INDENT then
|
||||
unget
|
||||
list_item.push(*parse(indent))
|
||||
when :TEXT then
|
||||
unget
|
||||
list_item << build_paragraph(indent)
|
||||
when :HEADER then
|
||||
list_item << build_heading(data)
|
||||
when :NEWLINE then
|
||||
list_item << RDoc::Markup::BlankLine.new
|
||||
when *LIST_TOKENS then
|
||||
unget
|
||||
list_item << build_list(column)
|
||||
else
|
||||
raise ParseError, "Unhandled token #{@current_token.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
p :list_item_end => [indent, item_type] if @debug
|
||||
|
||||
return nil if list_item.empty?
|
||||
|
||||
list_item.parts.shift if
|
||||
RDoc::Markup::BlankLine === list_item.parts.first and
|
||||
list_item.length > 1
|
||||
|
||||
list_item
|
||||
end
|
||||
|
||||
##
|
||||
# Builds a Paragraph that is flush to +margin+
|
||||
|
||||
|
@ -209,18 +193,7 @@ class RDoc::Markup::Parser
|
|||
until @tokens.empty? do
|
||||
type, data, column, = get
|
||||
|
||||
case type
|
||||
when :INDENT then
|
||||
next if data == margin and peek_token[0] == :TEXT
|
||||
|
||||
unget
|
||||
break
|
||||
when :TEXT then
|
||||
if column != margin then
|
||||
unget
|
||||
break
|
||||
end
|
||||
|
||||
if type == :TEXT && column == margin then
|
||||
paragraph << data
|
||||
skip :NEWLINE
|
||||
else
|
||||
|
@ -235,67 +208,81 @@ class RDoc::Markup::Parser
|
|||
end
|
||||
|
||||
##
|
||||
# Builds a Verbatim that is flush to +margin+
|
||||
# Builds a Verbatim that is indented from +margin+.
|
||||
#
|
||||
# The verbatim block is shifted left (the least indented lines start in
|
||||
# column 0). Each part of the verbatim is one line of text, always
|
||||
# terminated by a newline. Blank lines always consist of a single newline
|
||||
# character, and there is never a single newline at the end of the verbatim.
|
||||
|
||||
def build_verbatim margin
|
||||
p :verbatim_begin => margin if @debug
|
||||
verbatim = RDoc::Markup::Verbatim.new
|
||||
|
||||
min_indent = nil
|
||||
generate_leading_spaces = true
|
||||
line = ''
|
||||
|
||||
until @tokens.empty? do
|
||||
type, data, column, = get
|
||||
|
||||
case type
|
||||
when :INDENT then
|
||||
if margin >= data then
|
||||
unget
|
||||
break
|
||||
end
|
||||
if type == :NEWLINE then
|
||||
line << data
|
||||
verbatim << line
|
||||
line = ''
|
||||
generate_leading_spaces = true
|
||||
next
|
||||
end
|
||||
|
||||
indent = data - margin
|
||||
|
||||
verbatim << ' ' * indent
|
||||
when :HEADER then
|
||||
verbatim << '=' * data
|
||||
|
||||
_, _, peek_column, = peek_token
|
||||
peek_column ||= column + data
|
||||
verbatim << ' ' * (peek_column - column - data)
|
||||
when :RULE then
|
||||
width = 2 + data
|
||||
verbatim << '-' * width
|
||||
|
||||
_, _, peek_column, = peek_token
|
||||
peek_column ||= column + data + 2
|
||||
verbatim << ' ' * (peek_column - column - width)
|
||||
when :TEXT then
|
||||
verbatim << data
|
||||
when *LIST_TOKENS then
|
||||
if column <= margin then
|
||||
unget
|
||||
break
|
||||
end
|
||||
|
||||
list_marker = case type
|
||||
when :BULLET then '*'
|
||||
when :LABEL then "[#{data}]"
|
||||
when :LALPHA, :NUMBER, :UALPHA then "#{data}."
|
||||
when :NOTE then "#{data}::"
|
||||
end
|
||||
|
||||
verbatim << list_marker
|
||||
|
||||
_, data, = get
|
||||
|
||||
verbatim << ' ' * (data - list_marker.length)
|
||||
when :NEWLINE then
|
||||
verbatim << data
|
||||
break unless [:INDENT, :NEWLINE].include? peek_token[0]
|
||||
else
|
||||
if column <= margin
|
||||
unget
|
||||
break
|
||||
end
|
||||
|
||||
if generate_leading_spaces then
|
||||
indent = column - margin
|
||||
line << ' ' * indent
|
||||
min_indent = indent if min_indent.nil? || indent < min_indent
|
||||
generate_leading_spaces = false
|
||||
end
|
||||
|
||||
case type
|
||||
when :HEADER then
|
||||
line << '=' * data
|
||||
_, _, peek_column, = peek_token
|
||||
peek_column ||= column + data
|
||||
indent = peek_column - column - data
|
||||
line << ' ' * indent
|
||||
when :RULE then
|
||||
width = 2 + data
|
||||
line << '-' * width
|
||||
_, _, peek_column, = peek_token
|
||||
peek_column ||= column + width
|
||||
indent = peek_column - column - width
|
||||
line << ' ' * indent
|
||||
when :TEXT then
|
||||
line << data
|
||||
else # *LIST_TOKENS
|
||||
list_marker = case type
|
||||
when :BULLET then data
|
||||
when :LABEL then "[#{data}]"
|
||||
when :NOTE then "#{data}::"
|
||||
else # :LALPHA, :NUMBER, :UALPHA
|
||||
"#{data}."
|
||||
end
|
||||
line << list_marker
|
||||
peek_type, _, peek_column = peek_token
|
||||
unless peek_type == :NEWLINE then
|
||||
peek_column ||= column + list_marker.length
|
||||
indent = peek_column - column - list_marker.length
|
||||
line << ' ' * indent
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
verbatim << line << "\n" unless line.empty?
|
||||
verbatim.parts.each { |p| p.slice!(0, min_indent) unless p == "\n" } if min_indent > 0
|
||||
verbatim.normalize
|
||||
|
||||
p :verbatim_end => margin if @debug
|
||||
|
@ -313,65 +300,60 @@ class RDoc::Markup::Parser
|
|||
end
|
||||
|
||||
##
|
||||
# Parses the tokens into a Document
|
||||
# Parses the tokens into an array of RDoc::Markup::XXX objects,
|
||||
# and appends them to the passed +parent+ RDoc::Markup::YYY object.
|
||||
#
|
||||
# Exits at the end of the token stream, or when it encounters a token
|
||||
# in a column less than +indent+ (unless it is a NEWLINE).
|
||||
#
|
||||
# Returns +parent+.
|
||||
|
||||
def parse indent = 0
|
||||
def parse parent, indent = 0
|
||||
p :parse_start => indent if @debug
|
||||
|
||||
document = []
|
||||
|
||||
until @tokens.empty? do
|
||||
type, data, column, = get
|
||||
|
||||
if type != :INDENT and column < indent then
|
||||
unget
|
||||
break
|
||||
if type == :NEWLINE then
|
||||
# trailing newlines are skipped below, so this is a blank line
|
||||
parent << RDoc::Markup::BlankLine.new
|
||||
skip :NEWLINE, false
|
||||
next
|
||||
end
|
||||
|
||||
# indentation change: break or verbattim
|
||||
if column < indent then
|
||||
unget
|
||||
break
|
||||
elsif column > indent then
|
||||
unget
|
||||
parent << build_verbatim(indent)
|
||||
next
|
||||
end
|
||||
|
||||
# indentation is the same
|
||||
case type
|
||||
when :HEADER then
|
||||
document << build_heading(data)
|
||||
when :INDENT then
|
||||
if indent > data then
|
||||
unget
|
||||
break
|
||||
elsif indent == data then
|
||||
next
|
||||
end
|
||||
|
||||
unget
|
||||
document << build_verbatim(indent)
|
||||
when :NEWLINE then
|
||||
document << RDoc::Markup::BlankLine.new
|
||||
skip :NEWLINE, false
|
||||
parent << build_heading(data)
|
||||
when :RULE then
|
||||
document << RDoc::Markup::Rule.new(data)
|
||||
parent << RDoc::Markup::Rule.new(data)
|
||||
skip :NEWLINE
|
||||
when :TEXT then
|
||||
unget
|
||||
document << build_paragraph(indent)
|
||||
|
||||
# we're done with this paragraph (indent mismatch)
|
||||
break if peek_token[0] == :TEXT
|
||||
parent << build_paragraph(indent)
|
||||
when *LIST_TOKENS then
|
||||
unget
|
||||
|
||||
list = build_list(indent)
|
||||
|
||||
document << list if list
|
||||
|
||||
# we're done with this list (indent mismatch)
|
||||
break if LIST_TOKENS.include? peek_token.first and indent > 0
|
||||
parent << build_list(indent)
|
||||
else
|
||||
type, data, column, line = @current_token
|
||||
raise ParseError,
|
||||
"Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}"
|
||||
raise ParseError, "Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}"
|
||||
end
|
||||
end
|
||||
|
||||
p :parse_end => indent if @debug
|
||||
|
||||
document
|
||||
parent
|
||||
|
||||
end
|
||||
|
||||
##
|
||||
|
@ -384,63 +366,16 @@ class RDoc::Markup::Parser
|
|||
end
|
||||
|
||||
##
|
||||
# Skips a token of +token_type+, optionally raising an error.
|
||||
# Skips the next token if its type is +token_type+.
|
||||
#
|
||||
# Optionally raises an error if the next token is not of the expected type.
|
||||
|
||||
def skip token_type, error = true
|
||||
type, = get
|
||||
|
||||
return unless type # end of stream
|
||||
|
||||
return @current_token if token_type == type
|
||||
|
||||
unget
|
||||
|
||||
raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if
|
||||
error
|
||||
end
|
||||
|
||||
##
|
||||
# Consumes tokens until NEWLINE and turns them back into text
|
||||
|
||||
def text
|
||||
text = ''
|
||||
|
||||
loop do
|
||||
type, data, = get
|
||||
|
||||
text << case type
|
||||
when :BULLET then
|
||||
_, space, = get # SPACE
|
||||
"*#{' ' * (space - 1)}"
|
||||
when :LABEL then
|
||||
_, space, = get # SPACE
|
||||
"[#{data}]#{' ' * (space - data.length - 2)}"
|
||||
when :LALPHA, :NUMBER, :UALPHA then
|
||||
_, space, = get # SPACE
|
||||
"#{data}.#{' ' * (space - 2)}"
|
||||
when :NOTE then
|
||||
_, space = get # SPACE
|
||||
"#{data}::#{' ' * (space - data.length - 2)}"
|
||||
when :TEXT then
|
||||
data
|
||||
when :NEWLINE then
|
||||
unget
|
||||
break
|
||||
when nil then
|
||||
break
|
||||
else
|
||||
raise ParseError, "unhandled token #{@current_token.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
text
|
||||
end
|
||||
|
||||
##
|
||||
# Calculates the column and line of the current token based on +offset+.
|
||||
|
||||
def token_pos offset
|
||||
[offset - @line_pos, @line]
|
||||
raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if error
|
||||
end
|
||||
|
||||
##
|
||||
|
@ -455,51 +390,62 @@ class RDoc::Markup::Parser
|
|||
until s.eos? do
|
||||
pos = s.pos
|
||||
|
||||
# leading spaces will be reflected by the column of the next token
|
||||
# the only thing we loose are trailing spaces at the end of the file
|
||||
next if s.scan(/ +/)
|
||||
|
||||
# note: after BULLET, LABEL, etc.,
|
||||
# indent will be the column of the next non-newline token
|
||||
|
||||
@tokens << case
|
||||
# [CR]LF => :NEWLINE
|
||||
when s.scan(/\r?\n/) then
|
||||
token = [:NEWLINE, s.matched, *token_pos(pos)]
|
||||
@line_pos = s.pos
|
||||
@line += 1
|
||||
token
|
||||
when s.scan(/ +/) then
|
||||
[:INDENT, s.matched_size, *token_pos(pos)]
|
||||
# === text => :HEADER then :TEXT
|
||||
when s.scan(/(=+)\s*/) then
|
||||
level = s[1].length
|
||||
level = 6 if level > 6
|
||||
@tokens << [:HEADER, level, *token_pos(pos)]
|
||||
|
||||
pos = s.pos
|
||||
s.scan(/.*/)
|
||||
[:TEXT, s.matched, *token_pos(pos)]
|
||||
when s.scan(/^(-{3,}) *$/) then
|
||||
[:TEXT, s.matched.sub(/\r$/, ''), *token_pos(pos)]
|
||||
# --- (at least 3) and nothing else on the line => :RULE
|
||||
when s.scan(/(-{3,}) *$/) then
|
||||
[:RULE, s[1].length - 2, *token_pos(pos)]
|
||||
when s.scan(/([*-])\s+/) then
|
||||
@tokens << [:BULLET, :BULLET, *token_pos(pos)]
|
||||
[:SPACE, s.matched_size, *token_pos(pos)]
|
||||
when s.scan(/([a-z]|\d+)\.[ \t]+\S/i) then
|
||||
# * or - followed by white space and text => :BULLET
|
||||
when s.scan(/([*-]) +(\S)/) then
|
||||
s.pos -= s[2].bytesize # unget \S
|
||||
[:BULLET, s[1], *token_pos(pos)]
|
||||
# A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
|
||||
when s.scan(/([a-z]|\d+)\. +(\S)/i) then
|
||||
# FIXME if tab(s), the column will be wrong
|
||||
# either support tabs everywhere by first expanding them to
|
||||
# spaces, or assume that they will have been replaced
|
||||
# before (and provide a check for that at least in debug
|
||||
# mode)
|
||||
list_label = s[1]
|
||||
width = s.matched_size - 1
|
||||
|
||||
s.pos -= 1 # unget \S
|
||||
|
||||
list_type = case list_label
|
||||
when /[a-z]/ then :LALPHA
|
||||
when /[A-Z]/ then :UALPHA
|
||||
when /\d/ then :NUMBER
|
||||
else
|
||||
raise ParseError, "BUG token #{list_label}"
|
||||
end
|
||||
|
||||
@tokens << [list_type, list_label, *token_pos(pos)]
|
||||
[:SPACE, width, *token_pos(pos)]
|
||||
s.pos -= s[2].bytesize # unget \S
|
||||
list_type =
|
||||
case list_label
|
||||
when /[a-z]/ then :LALPHA
|
||||
when /[A-Z]/ then :UALPHA
|
||||
when /\d/ then :NUMBER
|
||||
else
|
||||
raise ParseError, "BUG token #{list_label}"
|
||||
end
|
||||
[list_type, list_label, *token_pos(pos)]
|
||||
# [text] followed by spaces or end of line => :LABEL
|
||||
when s.scan(/\[(.*?)\]( +|$)/) then
|
||||
@tokens << [:LABEL, s[1], *token_pos(pos)]
|
||||
[:SPACE, s.matched_size, *token_pos(pos)]
|
||||
[:LABEL, s[1], *token_pos(pos)]
|
||||
# text:: followed by spaces or end of line => :NOTE
|
||||
when s.scan(/(.*?)::( +|$)/) then
|
||||
@tokens << [:NOTE, s[1], *token_pos(pos)]
|
||||
[:SPACE, s.matched_size, *token_pos(pos)]
|
||||
[:NOTE, s[1], *token_pos(pos)]
|
||||
# anything else: :TEXT
|
||||
else s.scan(/.*/)
|
||||
[:TEXT, s.matched, *token_pos(pos)]
|
||||
[:TEXT, s.matched.sub(/\r$/, ''), *token_pos(pos)]
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -507,9 +453,17 @@ class RDoc::Markup::Parser
|
|||
end
|
||||
|
||||
##
|
||||
# Returns the current token or +token+ to the token stream
|
||||
# Calculates the column and line of the current token based on +offset+.
|
||||
|
||||
def unget token = @current_token
|
||||
def token_pos offset
|
||||
[offset - @line_pos, @line]
|
||||
end
|
||||
|
||||
##
|
||||
# Returns the current token to the token stream
|
||||
|
||||
def unget
|
||||
token = @current_token
|
||||
p :unget => token if @debug
|
||||
raise Error, 'too many #ungets' if token == @tokens.first
|
||||
@tokens.unshift token if token
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue