Import RDoc 3

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@30249 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2022-11-09 12:17:21 -05:00 · 2010-12-20 03:22:49 +00:00 · 2010-12-20 03:22:49 +00:00 · 2ef9c50c6e
commit 2ef9c50c6e
parent d7effd506f
106 changed files with 8878 additions and 4179 deletions
--- a/lib/rdoc/markup/parser.rb
+++ b/lib/rdoc/markup/parser.rb
@ -52,13 +52,13 @@ class RDoc::Markup::Parser
  attr_reader :tokens

  ##
-  # Parsers +str+ into a Document
+  # Parses +str+ into a Document

  def self.parse str
    parser = new
-    #parser.debug = true
    parser.tokenize str
-    RDoc::Markup::Document.new(*parser.parse)
+    doc = RDoc::Markup::Document.new
+    parser.parse doc
  end

  ##
@ -86,6 +86,7 @@ class RDoc::Markup::Parser
  # Builds a Heading of +level+

  def build_heading level
+    _, text, = get  # TEXT
    heading = RDoc::Markup::Heading.new level, text
    skip :NEWLINE

@ -105,38 +106,69 @@ class RDoc::Markup::Parser

      case type
      when :BULLET, :LABEL, :LALPHA, :NOTE, :NUMBER, :UALPHA then
-        list_type = type

-        if column < margin then
+        if column < margin || (list.type && list.type != type) then
          unget
          break
        end

-        if list.type and list.type != list_type then
-          unget
-          break
-        end
-
-        list.type = list_type
+        list.type = type
+        peek_type, _, column, = peek_token

        case type
        when :NOTE, :LABEL then
-          _, indent, = get # SPACE
-          if :NEWLINE == peek_token.first then
-            get
-            peek_type, new_indent, peek_column, = peek_token
-            indent = new_indent if
-              peek_type == :INDENT and peek_column >= column
-            unget
+          if peek_type == :NEWLINE then
+            # description not on the same line as LABEL/NOTE
+            # skip the trailing newline & any blank lines below
+            while peek_type == :NEWLINE
+              get
+              peek_type, _, column, = peek_token
+            end
+
+            # we may be:
+            #   - at end of stream
+            #   - at a column < margin:
+            #         [text]
+            #       blah blah blah
+            #   - at the same column, but with a different type of list item
+            #       [text]
+            #       * blah blah
+            #   - at the same column, with the same type of list item
+            #       [one]
+            #       [two]
+            # In all cases, we have an empty description.
+            # In the last case only, we continue.
+            if peek_type.nil? || column < margin then
+              empty = 1
+            elsif column == margin then
+              case peek_type
+              when type
+                empty = 2 # continue
+              when *LIST_TOKENS
+                empty = 1
+              else
+                empty = 0
+              end
+            else
+              empty = 0
+            end
+
+            if empty > 0 then
+              item = RDoc::Markup::ListItem.new(data)
+              item << RDoc::Markup::BlankLine.new
+              list << item
+              break if empty == 1
+              next
+            end
          end
        else
          data = nil
-          _, indent, = get
        end

-        list_item = build_list_item(margin + indent, data)
+        list_item = RDoc::Markup::ListItem.new data
+        parse list_item, column
+        list << list_item

-        list << list_item if list_item
      else
        unget
        break
@ -150,54 +182,6 @@ class RDoc::Markup::Parser
    list
  end

-  ##
-  # Builds a ListItem that is flush to +indent+ with type +item_type+
-
-  def build_list_item indent, item_type = nil
-    p :list_item_start => [indent, item_type] if @debug
-
-    list_item = RDoc::Markup::ListItem.new item_type
-
-    until @tokens.empty? do
-      type, data, column = get
-
-      if column < indent and
-         not type == :NEWLINE and
-         (type != :INDENT or data < indent) then
-        unget
-        break
-      end
-
-      case type
-      when :INDENT then
-        unget
-        list_item.push(*parse(indent))
-      when :TEXT then
-        unget
-        list_item << build_paragraph(indent)
-      when :HEADER then
-        list_item << build_heading(data)
-      when :NEWLINE then
-        list_item << RDoc::Markup::BlankLine.new
-      when *LIST_TOKENS then
-        unget
-        list_item << build_list(column)
-      else
-        raise ParseError, "Unhandled token #{@current_token.inspect}"
-      end
-    end
-
-    p :list_item_end => [indent, item_type] if @debug
-
-    return nil if list_item.empty?
-
-    list_item.parts.shift if
-      RDoc::Markup::BlankLine === list_item.parts.first and
-      list_item.length > 1
-
-    list_item
-  end
-
  ##
  # Builds a Paragraph that is flush to +margin+

@ -209,18 +193,7 @@ class RDoc::Markup::Parser
    until @tokens.empty? do
      type, data, column, = get

-      case type
-      when :INDENT then
-        next if data == margin and peek_token[0] == :TEXT
-
-        unget
-        break
-      when :TEXT then
-        if column != margin then
-          unget
-          break
-        end
-
+      if type == :TEXT && column == margin then
        paragraph << data
        skip :NEWLINE
      else
@ -235,67 +208,81 @@ class RDoc::Markup::Parser
  end

  ##
-  # Builds a Verbatim that is flush to +margin+
+  # Builds a Verbatim that is indented from +margin+.
+  #
+  # The verbatim block is shifted left (the least indented lines start in
+  # column 0).  Each part of the verbatim is one line of text, always
+  # terminated by a newline.  Blank lines always consist of a single newline
+  # character, and there is never a single newline at the end of the verbatim.

  def build_verbatim margin
    p :verbatim_begin => margin if @debug
    verbatim = RDoc::Markup::Verbatim.new

+    min_indent = nil
+    generate_leading_spaces = true
+    line = ''
+
    until @tokens.empty? do
      type, data, column, = get

-      case type
-      when :INDENT then
-        if margin >= data then
-          unget
-          break
-        end
+      if type == :NEWLINE then
+        line << data
+        verbatim << line
+        line = ''
+        generate_leading_spaces = true
+        next
+      end

-        indent = data - margin
-
-        verbatim << ' ' * indent
-      when :HEADER then
-        verbatim << '=' * data
-
-        _, _, peek_column, = peek_token
-        peek_column ||= column + data
-        verbatim << ' ' * (peek_column - column - data)
-      when :RULE then
-        width = 2 + data
-        verbatim << '-' * width
-
-        _, _, peek_column, = peek_token
-        peek_column ||= column + data + 2
-        verbatim << ' ' * (peek_column - column - width)
-      when :TEXT then
-        verbatim << data
-      when *LIST_TOKENS then
-        if column <= margin then
-          unget
-          break
-        end
-
-        list_marker = case type
-                      when :BULLET                   then '*'
-                      when :LABEL                    then "[#{data}]"
-                      when :LALPHA, :NUMBER, :UALPHA then "#{data}."
-                      when :NOTE                     then "#{data}::"
-                      end
-
-        verbatim << list_marker
-
-        _, data, = get
-
-        verbatim << ' ' * (data - list_marker.length)
-      when :NEWLINE then
-        verbatim << data
-        break unless [:INDENT, :NEWLINE].include? peek_token[0]
-      else
+      if column <= margin
        unget
        break
      end
+
+      if generate_leading_spaces then
+        indent = column - margin
+        line << ' ' * indent
+        min_indent = indent if min_indent.nil? || indent < min_indent
+        generate_leading_spaces = false
+      end
+
+      case type
+      when :HEADER then
+        line << '=' * data
+        _, _, peek_column, = peek_token
+        peek_column ||= column + data
+        indent = peek_column - column - data
+        line << ' ' * indent
+      when :RULE then
+        width = 2 + data
+        line << '-' * width
+        _, _, peek_column, = peek_token
+        peek_column ||= column + width
+        indent = peek_column - column - width
+        line << ' ' * indent
+      when :TEXT then
+        line << data
+      else # *LIST_TOKENS
+        list_marker = case type
+                      when :BULLET then data
+                      when :LABEL  then "[#{data}]"
+                      when :NOTE   then "#{data}::"
+                      else # :LALPHA, :NUMBER, :UALPHA
+                        "#{data}."
+                      end
+        line << list_marker
+        peek_type, _, peek_column = peek_token
+        unless peek_type == :NEWLINE then
+          peek_column ||= column + list_marker.length
+          indent = peek_column - column - list_marker.length
+          line << ' ' * indent
+        end
+      end
+
    end

+    verbatim << line << "\n" unless line.empty?
+    verbatim.parts.each { |p| p.slice!(0, min_indent) unless p == "\n" } if min_indent > 0
    verbatim.normalize

    p :verbatim_end => margin if @debug
@ -313,65 +300,60 @@ class RDoc::Markup::Parser
  end

  ##
-  # Parses the tokens into a Document
+  # Parses the tokens into an array of RDoc::Markup::XXX objects,
+  # and appends them to the passed +parent+ RDoc::Markup::YYY object.
+  #
+  # Exits at the end of the token stream, or when it encounters a token
+  # in a column less than +indent+ (unless it is a NEWLINE).
+  #
+  # Returns +parent+.

-  def parse indent = 0
+  def parse parent, indent = 0
    p :parse_start => indent if @debug

-    document = []
-
    until @tokens.empty? do
      type, data, column, = get

-      if type != :INDENT and column < indent then
-        unget
-        break
+      if type == :NEWLINE then
+        # trailing newlines are skipped below, so this is a blank line
+        parent << RDoc::Markup::BlankLine.new
+        skip :NEWLINE, false
+        next
      end

+      # indentation change: break or verbattim
+      if column < indent then
+        unget
+        break
+      elsif column > indent then
+        unget
+        parent << build_verbatim(indent)
+        next
+      end
+
+      # indentation is the same
      case type
      when :HEADER then
-        document << build_heading(data)
-      when :INDENT then
-        if indent > data then
-          unget
-          break
-        elsif indent == data then
-          next
-        end
-
-        unget
-        document << build_verbatim(indent)
-      when :NEWLINE then
-        document << RDoc::Markup::BlankLine.new
-        skip :NEWLINE, false
+        parent << build_heading(data)
      when :RULE then
-        document << RDoc::Markup::Rule.new(data)
+        parent << RDoc::Markup::Rule.new(data)
        skip :NEWLINE
      when :TEXT then
        unget
-        document << build_paragraph(indent)
-
-        # we're done with this paragraph (indent mismatch)
-        break if peek_token[0] == :TEXT
+        parent << build_paragraph(indent)
      when *LIST_TOKENS then
        unget
-
-        list = build_list(indent)
-
-        document << list if list
-
-        # we're done with this list (indent mismatch)
-        break if LIST_TOKENS.include? peek_token.first and indent > 0
+        parent << build_list(indent)
      else
        type, data, column, line = @current_token
-        raise ParseError,
-              "Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}"
+        raise ParseError, "Unhandled token #{type} (#{data.inspect}) at #{line}:#{column}"
      end
    end

    p :parse_end => indent if @debug

-    document
+    parent
+
  end

  ##
@ -384,63 +366,16 @@ class RDoc::Markup::Parser
  end

  ##
-  # Skips a token of +token_type+, optionally raising an error.
+  # Skips the next token if its type is +token_type+.
+  #
+  # Optionally raises an error if the next token is not of the expected type.

  def skip token_type, error = true
    type, = get
-
    return unless type # end of stream
-
    return @current_token if token_type == type
-
    unget
-
-    raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if
-      error
-  end
-
-  ##
-  # Consumes tokens until NEWLINE and turns them back into text
-
-  def text
-    text = ''
-
-    loop do
-      type, data, = get
-
-      text << case type
-              when :BULLET then
-                _, space, = get # SPACE
-                "*#{' ' * (space - 1)}"
-              when :LABEL then
-                _, space, = get # SPACE
-                "[#{data}]#{' ' * (space - data.length - 2)}"
-              when :LALPHA, :NUMBER, :UALPHA then
-                _, space, = get # SPACE
-                "#{data}.#{' ' * (space - 2)}"
-              when :NOTE then
-                _, space = get # SPACE
-                "#{data}::#{' ' * (space - data.length - 2)}"
-              when :TEXT then
-                data
-              when :NEWLINE then
-                unget
-                break
-              when nil then
-                break
-              else
-                raise ParseError, "unhandled token #{@current_token.inspect}"
-              end
-    end
-
-    text
-  end
-
-  ##
-  # Calculates the column and line of the current token based on +offset+.
-
-  def token_pos offset
-    [offset - @line_pos, @line]
+    raise ParseError, "expected #{token_type} got #{@current_token.inspect}" if error
  end

  ##
@ -455,51 +390,62 @@ class RDoc::Markup::Parser
    until s.eos? do
      pos = s.pos

+      # leading spaces will be reflected by the column of the next token
+      # the only thing we loose are trailing spaces at the end of the file
+      next if s.scan(/ +/)
+
+      # note: after BULLET, LABEL, etc.,
+      # indent will be the column of the next non-newline token
+
      @tokens << case
+                 # [CR]LF => :NEWLINE
                 when s.scan(/\r?\n/) then
                   token = [:NEWLINE, s.matched, *token_pos(pos)]
                   @line_pos = s.pos
                   @line += 1
                   token
-                 when s.scan(/ +/) then
-                   [:INDENT, s.matched_size, *token_pos(pos)]
+                 # === text => :HEADER then :TEXT
                 when s.scan(/(=+)\s*/) then
                   level = s[1].length
                   level = 6 if level > 6
                   @tokens << [:HEADER, level, *token_pos(pos)]
-
                   pos = s.pos
                   s.scan(/.*/)
-                   [:TEXT, s.matched, *token_pos(pos)]
-                 when s.scan(/^(-{3,}) *$/) then
+                   [:TEXT, s.matched.sub(/\r$/, ''), *token_pos(pos)]
+                 # --- (at least 3) and nothing else on the line => :RULE
+                 when s.scan(/(-{3,}) *$/) then
                   [:RULE, s[1].length - 2, *token_pos(pos)]
-                 when s.scan(/([*-])\s+/) then
-                   @tokens << [:BULLET, :BULLET, *token_pos(pos)]
-                   [:SPACE, s.matched_size, *token_pos(pos)]
-                 when s.scan(/([a-z]|\d+)\.[ \t]+\S/i) then
+                 # * or - followed by white space and text => :BULLET
+                 when s.scan(/([*-]) +(\S)/) then
+                   s.pos -= s[2].bytesize # unget \S
+                   [:BULLET, s[1], *token_pos(pos)]
+                 # A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
+                 when s.scan(/([a-z]|\d+)\. +(\S)/i) then
+                   # FIXME if tab(s), the column will be wrong
+                   # either support tabs everywhere by first expanding them to
+                   # spaces, or assume that they will have been replaced
+                   # before (and provide a check for that at least in debug
+                   # mode)
                   list_label = s[1]
-                   width      = s.matched_size - 1
-
-                   s.pos -= 1 # unget \S
-
-                   list_type = case list_label
-                               when /[a-z]/ then :LALPHA
-                               when /[A-Z]/ then :UALPHA
-                               when /\d/    then :NUMBER
-                               else
-                                 raise ParseError, "BUG token #{list_label}"
-                               end
-
-                   @tokens << [list_type, list_label, *token_pos(pos)]
-                   [:SPACE, width, *token_pos(pos)]
+                   s.pos -= s[2].bytesize # unget \S
+                   list_type =
+                     case list_label
+                     when /[a-z]/ then :LALPHA
+                     when /[A-Z]/ then :UALPHA
+                     when /\d/    then :NUMBER
+                     else
+                       raise ParseError, "BUG token #{list_label}"
+                     end
+                   [list_type, list_label, *token_pos(pos)]
+                 # [text] followed by spaces or end of line => :LABEL
                 when s.scan(/\[(.*?)\]( +|$)/) then
-                   @tokens << [:LABEL, s[1], *token_pos(pos)]
-                   [:SPACE, s.matched_size, *token_pos(pos)]
+                   [:LABEL, s[1], *token_pos(pos)]
+                 # text:: followed by spaces or end of line => :NOTE
                 when s.scan(/(.*?)::( +|$)/) then
-                   @tokens << [:NOTE, s[1], *token_pos(pos)]
-                   [:SPACE, s.matched_size, *token_pos(pos)]
+                   [:NOTE, s[1], *token_pos(pos)]
+                 # anything else: :TEXT
                 else s.scan(/.*/)
-                   [:TEXT, s.matched, *token_pos(pos)]
+                   [:TEXT, s.matched.sub(/\r$/, ''), *token_pos(pos)]
                 end
    end

@ -507,9 +453,17 @@ class RDoc::Markup::Parser
  end

  ##
-  # Returns the current token or +token+ to the token stream
+  # Calculates the column and line of the current token based on +offset+.

-  def unget token = @current_token
+  def token_pos offset
+    [offset - @line_pos, @line]
+  end
+
+  ##
+  # Returns the current token to the token stream
+
+  def unget
+    token = @current_token
    p :unget => token if @debug
    raise Error, 'too many #ungets' if token == @tokens.first
    @tokens.unshift token if token