ruby--ruby/lib/rdoc/parsers/parse_rb.rb

#!/usr/local/bin/ruby

# Parse a Ruby source file, building a set of objects
# representing the modules, classes, methods,
# requires, and includes we find (these classes
# are defined in code_objects.rb).

# This file contains stuff stolen outright from:
#
#   rtags.rb - 
#   ruby-lex.rb - ruby lexcal analizer
#   ruby-token.rb - ruby tokens 
#   	by Keiju ISHITSUKA (Nippon Rational Inc.)
#

require "tracer"
require "e2mmap"
require "irb/slex"

require "rdoc/code_objects"
require "rdoc/tokenstream"

require "rdoc/markup/simple_markup/preprocess"

require "rdoc/parsers/parserfactory"

$TOKEN_DEBUG = $DEBUG

# Definitions of all tokens involved in the lexical analysis

module RubyToken
  EXPR_BEG   = :EXPR_BEG
  EXPR_MID   = :EXPR_MID
  EXPR_END   = :EXPR_END
  EXPR_ARG   = :EXPR_ARG
  EXPR_FNAME = :EXPR_FNAME
  EXPR_DOT   = :EXPR_DOT
  EXPR_CLASS = :EXPR_CLASS
  
  class Token
    NO_TEXT = "??".freeze
    attr :text

    def initialize(line_no, char_no)
      @line_no = line_no
      @char_no = char_no
      @text    = NO_TEXT
    end

    # Because we're used in contexts that expect to return a token,
    # we set the text string and then return ourselves
    def set_text(text)
      @text = text
      self
    end

    attr_reader :line_no, :char_no, :text
  end

  class TkNode < Token
    attr :node
  end

  class TkId < Token
    def initialize(line_no, char_no, name)
      super(line_no, char_no)
      @name = name
    end
    attr :name
  end

  class TkKW < TkId
  end

  class TkVal < Token
    def initialize(line_no, char_no, value = nil)
      super(line_no, char_no)
      set_text(value)
    end
  end

  class TkOp < Token
    def name
      self.class.op_name
    end
  end

  class TkOPASGN < TkOp
    def initialize(line_no, char_no, op)
      super(line_no, char_no)
      op = TkReading2Token[op] unless op.kind_of?(Symbol)
      @op = op
    end
    attr :op
  end

  class TkUnknownChar < Token
    def initialize(line_no, char_no, id)
      super(line_no, char_no)
      @name = char_no.chr
    end
    attr :name
  end

  class TkError < Token
  end

  def set_token_position(line, char)
    @prev_line_no = line
    @prev_char_no = char
  end

  def Token(token, value = nil)
    tk = nil
    case token
    when String, Symbol
      source = token.kind_of?(String) ? TkReading2Token : TkSymbol2Token
      if (tk = source[token]).nil?
	IRB.fail TkReading2TokenNoKey, token
      end
      tk = Token(tk[0], value) 
    else 
      tk = if (token.ancestors & [TkId, TkVal, TkOPASGN, TkUnknownChar]).empty?
             token.new(@prev_line_no, @prev_char_no)
           else
             token.new(@prev_line_no, @prev_char_no, value)
           end
    end
    tk
  end

  TokenDefinitions = [
    [:TkCLASS,      TkKW,  "class",  EXPR_CLASS],
    [:TkMODULE,     TkKW,  "module", EXPR_BEG],
    [:TkDEF,	    TkKW,  "def",    EXPR_FNAME],
    [:TkUNDEF,      TkKW,  "undef",  EXPR_FNAME],
    [:TkBEGIN,      TkKW,  "begin",  EXPR_BEG],
    [:TkRESCUE,     TkKW,  "rescue", EXPR_MID],
    [:TkENSURE,     TkKW,  "ensure", EXPR_BEG],
    [:TkEND,	    TkKW,  "end",    EXPR_END],
    [:TkIF,         TkKW,  "if",     EXPR_BEG, :TkIF_MOD],
    [:TkUNLESS,     TkKW,  "unless", EXPR_BEG, :TkUNLESS_MOD],
    [:TkTHEN,	    TkKW,  "then",   EXPR_BEG],
    [:TkELSIF,      TkKW,  "elsif",  EXPR_BEG],
    [:TkELSE,	    TkKW,  "else",   EXPR_BEG],
    [:TkCASE,	    TkKW,  "case",   EXPR_BEG],
    [:TkWHEN,	    TkKW,  "when",   EXPR_BEG],
    [:TkWHILE,      TkKW,  "while",  EXPR_BEG, :TkWHILE_MOD],
    [:TkUNTIL,      TkKW,  "until",  EXPR_BEG, :TkUNTIL_MOD],
    [:TkFOR,	    TkKW,  "for",    EXPR_BEG],
    [:TkBREAK,      TkKW,  "break",  EXPR_END],
    [:TkNEXT,	    TkKW,  "next",   EXPR_END],
    [:TkREDO,	    TkKW,  "redo",   EXPR_END],
    [:TkRETRY,      TkKW,  "retry",  EXPR_END],
    [:TkIN,	    TkKW,  "in",     EXPR_BEG],
    [:TkDO,	    TkKW,  "do",     EXPR_BEG],
    [:TkRETURN,     TkKW,  "return", EXPR_MID],
    [:TkYIELD,      TkKW,  "yield",  EXPR_END],
    [:TkSUPER,      TkKW,  "super",  EXPR_END],
    [:TkSELF,	    TkKW,  "self",   EXPR_END],
    [:TkNIL, 	    TkKW,  "nil",    EXPR_END],
    [:TkTRUE,	    TkKW,  "true",   EXPR_END],
    [:TkFALSE,      TkKW,  "false",  EXPR_END],
    [:TkAND,	    TkKW,  "and",    EXPR_BEG],
    [:TkOR, 	    TkKW,  "or",     EXPR_BEG],
    [:TkNOT,	    TkKW,  "not",    EXPR_BEG],
    [:TkIF_MOD,     TkKW],
    [:TkUNLESS_MOD, TkKW],
    [:TkWHILE_MOD,  TkKW],
    [:TkUNTIL_MOD,  TkKW],
    [:TkALIAS,      TkKW,  "alias",    EXPR_FNAME],
    [:TkDEFINED,    TkKW,  "defined?", EXPR_END],
    [:TklBEGIN,     TkKW,  "BEGIN",    EXPR_END],
    [:TklEND,	    TkKW,  "END",      EXPR_END],
    [:Tk__LINE__,   TkKW,  "__LINE__", EXPR_END],
    [:Tk__FILE__,   TkKW,  "__FILE__", EXPR_END],

    [:TkIDENTIFIER, TkId],
    [:TkFID,	    TkId],
    [:TkGVAR,	    TkId],
    [:TkIVAR,	    TkId],
    [:TkCONSTANT,   TkId],

    [:TkINTEGER,    TkVal],
    [:TkFLOAT,      TkVal],
    [:TkSTRING,     TkVal],
    [:TkXSTRING,    TkVal],
    [:TkREGEXP,     TkVal],
    [:TkCOMMENT,    TkVal],

    [:TkDSTRING,    TkNode],
    [:TkDXSTRING,   TkNode],
    [:TkDREGEXP,    TkNode],
    [:TkNTH_REF,    TkId],
    [:TkBACK_REF,   TkId],

    [:TkUPLUS,      TkOp,   "+@"],
    [:TkUMINUS,     TkOp,   "-@"],
    [:TkPOW,	    TkOp,   "**"],
    [:TkCMP,	    TkOp,   "<=>"],
    [:TkEQ,	    TkOp,   "=="],
    [:TkEQQ,	    TkOp,   "==="],
    [:TkNEQ,	    TkOp,   "!="],
    [:TkGEQ,	    TkOp,   ">="],
    [:TkLEQ,	    TkOp,   "<="],
    [:TkANDOP,      TkOp,   "&&"],
    [:TkOROP,	    TkOp,   "||"],
    [:TkMATCH,      TkOp,   "=~"],
    [:TkNMATCH,     TkOp,   "!~"],
    [:TkDOT2,	    TkOp,   ".."],
    [:TkDOT3,	    TkOp,   "..."],
    [:TkAREF,	    TkOp,   "[]"],
    [:TkASET,	    TkOp,   "[]="],
    [:TkLSHFT,      TkOp,   "<<"],
    [:TkRSHFT,      TkOp,   ">>"],
    [:TkCOLON2,     TkOp],
    [:TkCOLON3,     TkOp],
#   [:OPASGN,	    TkOp],               # +=, -=  etc. #
    [:TkASSOC,      TkOp,   "=>"],
    [:TkQUESTION,   TkOp,   "?"],	 #?
    [:TkCOLON,      TkOp,   ":"],        #:
    
    [:TkfLPAREN],         # func( #
    [:TkfLBRACK],         # func[ #
    [:TkfLBRACE],         # func{ #
    [:TkSTAR],            # *arg
    [:TkAMPER],           # &arg #
    [:TkSYMBOL,     TkId],          # :SYMBOL
    [:TkSYMBEG,     TkId], 
    [:TkGT,	    TkOp,   ">"],
    [:TkLT,	    TkOp,   "<"],
    [:TkPLUS,	    TkOp,   "+"],
    [:TkMINUS,      TkOp,   "-"],
    [:TkMULT,	    TkOp,   "*"],
    [:TkDIV,	    TkOp,   "/"],
    [:TkMOD,	    TkOp,   "%"],
    [:TkBITOR,      TkOp,   "|"],
    [:TkBITXOR,     TkOp,   "^"],
    [:TkBITAND,     TkOp,   "&"],
    [:TkBITNOT,     TkOp,   "~"],
    [:TkNOTOP,      TkOp,   "!"],

    [:TkBACKQUOTE,  TkOp,   "`"],

    [:TkASSIGN,     Token,  "="],
    [:TkDOT,	    Token,  "."],
    [:TkLPAREN,     Token,  "("],  #(exp)
    [:TkLBRACK,     Token,  "["],  #[arry]
    [:TkLBRACE,     Token,  "{"],  #{hash}
    [:TkRPAREN,     Token,  ")"],
    [:TkRBRACK,     Token,  "]"],
    [:TkRBRACE,     Token,  "}"],
    [:TkCOMMA,      Token,  ","],
    [:TkSEMICOLON,  Token,  ";"],

    [:TkRD_COMMENT],
    [:TkSPACE],
    [:TkNL],
    [:TkEND_OF_SCRIPT],

    [:TkBACKSLASH,  TkUnknownChar,  "\\"],
    [:TkAT,	    TkUnknownChar,  "@"],
    [:TkDOLLAR,     TkUnknownChar,  "\$"], #"
  ]

  # {reading => token_class}
  # {reading => [token_class, *opt]}
  TkReading2Token = {}
  TkSymbol2Token = {}

  def RubyToken.def_token(token_n, super_token = Token, reading = nil, *opts)
    token_n = token_n.id2name unless token_n.kind_of?(String)
    if RubyToken.const_defined?(token_n)
      IRB.fail AlreadyDefinedToken, token_n
    end

    token_c =  Class.new super_token
    RubyToken.const_set token_n, token_c
#    token_c.inspect
 
    if reading
      if TkReading2Token[reading]
	IRB.fail TkReading2TokenDuplicateError, token_n, reading
      end
      if opts.empty?
	TkReading2Token[reading] = [token_c]
      else
	TkReading2Token[reading] = [token_c].concat(opts)
      end
    end
    TkSymbol2Token[token_n.intern] = token_c

    if token_c <= TkOp
      token_c.class_eval %{
        def self.op_name; "#{reading}"; end
      }
    end
  end

  for defs in TokenDefinitions
    def_token(*defs)
  end

  NEWLINE_TOKEN = TkNL.new(0,0)
  NEWLINE_TOKEN.set_text("\n")

end


# Lexical analyzer for Ruby source

class RubyLex

  ######################################################################
  #
  # Read an input stream character by character. We allow for unlimited
  # ungetting of characters just read.
  #
  # We simplify the implementation greatly by reading the entire input
  # into a buffer initially, and then simply traversing it using
  # pointers.
  #
  # We also have to allow for the <i>here document diversion</i>. This
  # little gem comes about when the lexer encounters a here
  # document. At this point we effectively need to split the input
  # stream into two parts: one to read the body of the here document,
  # the other to read the rest of the input line where the here
  # document was initially encountered. For example, we might have
  #
  #   do_something(<<-A, <<-B)
  #     stuff
  #     for
  #   A
  #     stuff
  #     for
  #   B
  #
  # When the lexer encounters the <<A, it reads until the end of the
  # line, and keeps it around for later. It then reads the body of the
  # here document.  Once complete, it needs to read the rest of the
  # original line, but then skip the here document body.
  #
  
  class BufferedReader
    
    attr_reader :line_num
    
    def initialize(content)
      if /\t/ =~ content
        tab_width = Options.instance.tab_width
        content = content.split(/\n/).map do |line|
          1 while line.gsub!(/\t+/) { ' ' * (tab_width*$&.length - $`.length % tab_width)}  && $~ #`
          line
        end .join("\n")
      end
      @content   = content
      @content << "\n" unless @content[-1,1] == "\n"
      @size      = @content.size
      @offset    = 0
      @hwm       = 0
      @line_num  = 1
      @read_back_offset = 0
      @last_newline = 0
      @newline_pending = false
    end
    
    def column
      @offset - @last_newline
    end
    
    def getc
      return nil if @offset >= @size
      ch = @content[@offset, 1]
      
      @offset += 1
      @hwm = @offset if @hwm < @offset
      
      if @newline_pending
        @line_num += 1
        @last_newline = @offset - 1
        @newline_pending = false
      end
      
      if ch == "\n"
        @newline_pending = true
      end
      ch
    end
    
    def getc_already_read
      getc
    end
    
    def ungetc(ch)
      raise "unget past beginning of file" if @offset <= 0
      @offset -= 1
      if @content[@offset] == ?\n
        @newline_pending = false
      end
    end
    
    def get_read
      res = @content[@read_back_offset...@offset]
      @read_back_offset = @offset
      res
    end
    
    def peek(at)
      pos = @offset + at
      if pos >= @size
        nil
      else
        @content[pos, 1]
      end
    end
    
    def peek_equal(str)
      @content[@offset, str.length] == str
    end
    
    def divert_read_from(reserve)
      @content[@offset, 0] = reserve
      @size      = @content.size
    end
  end

  # end of nested class BufferedReader

  extend Exception2MessageMapper
  def_exception(:AlreadyDefinedToken, "Already defined token(%s)")
  def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')")
  def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')")
  def_exception(:TkReading2TokenDuplicateError, 
		"key duplicate(token_n='%s', key='%s')")
  def_exception(:SyntaxError, "%s")
  
  include RubyToken

  attr_reader :continue
  attr_reader :lex_state

  def RubyLex.debug?
    false
  end

  def initialize(content)
    lex_init

    @reader = BufferedReader.new(content)

    @exp_line_no = @line_no = 1
    @base_char_no = 0
    @indent = 0

    @ltype = nil
    @quoted = nil
    @lex_state = EXPR_BEG
    @space_seen = false
    
    @continue = false
    @line = ""

    @skip_space = false
    @read_auto_clean_up = false
    @exception_on_syntax_error = true
  end

  attr :skip_space, true
  attr :read_auto_clean_up, true
  attr :exception_on_syntax_error, true

  attr :indent

  # io functions
  def line_no
    @reader.line_num
  end

  def char_no
    @reader.column
  end

  def get_read
    @reader.get_read
  end

  def getc
    @reader.getc
  end

  def getc_of_rests
    @reader.getc_already_read
  end

  def gets
    c = getc or return
    l = ""
    begin
      l.concat c unless c == "\r"
      break if c == "\n"
    end while c = getc
    l
  end


  def ungetc(c = nil)
    @reader.ungetc(c)
  end

  def peek_equal?(str)
    @reader.peek_equal(str)
  end

  def peek(i = 0)
    @reader.peek(i)
  end

  def lex
    until (((tk = token).kind_of?(TkNL) || tk.kind_of?(TkEND_OF_SCRIPT)) &&
	     !@continue or
	     tk.nil?)
    end
    line = get_read

    if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil?
      nil
    else
      line
    end
  end

  def token
    set_token_position(line_no, char_no)
    begin
      begin
	tk = @OP.match(self)
	@space_seen = tk.kind_of?(TkSPACE)
      rescue SyntaxError
	abort if @exception_on_syntax_error
	tk = TkError.new(line_no, char_no)
      end
    end while @skip_space and tk.kind_of?(TkSPACE)
    if @read_auto_clean_up
      get_read
    end
#   throw :eof unless tk
    p tk if $DEBUG
    tk
  end
  
  ENINDENT_CLAUSE = [
    "case", "class", "def", "do", "for", "if",
    "module", "unless", "until", "while", "begin" #, "when"
  ]
  DEINDENT_CLAUSE = ["end" #, "when"
  ]

  PERCENT_LTYPE = {
    "q" => "\'",
    "Q" => "\"",
    "x" => "\`",
    "r" => "\/",
    "w" => "]"
  }
  
  PERCENT_PAREN = {
    "{" => "}",
    "[" => "]",
    "<" => ">",
    "(" => ")"
  }

  Ltype2Token = {
    "\'" => TkSTRING,
    "\"" => TkSTRING,
    "\`" => TkXSTRING,
    "\/" => TkREGEXP,
    "]" => TkDSTRING
  }
  Ltype2Token.default = TkSTRING

  DLtype2Token = {
    "\"" => TkDSTRING,
    "\`" => TkDXSTRING,
    "\/" => TkDREGEXP,
  }

  def lex_init()
    @OP = SLex.new
    @OP.def_rules("\0", "\004", "\032") do |chars, io|
      Token(TkEND_OF_SCRIPT).set_text(chars)
    end

    @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |chars, io|
      @space_seen = TRUE
      while (ch = getc) =~ /[ \t\f\r\13]/
        chars << ch
      end
      ungetc
      Token(TkSPACE).set_text(chars)
    end

    @OP.def_rule("#") do
      |op, io|
      identify_comment
    end

    @OP.def_rule("=begin", proc{@prev_char_no == 0 && peek(0) =~ /\s/}) do
      |op, io|
      str = op
      @ltype = "="


      begin
        line = ""
        begin
          ch = getc
          line << ch
        end until ch == "\n"
        str << line
      end until line =~ /^=end/

      ungetc

      @ltype = nil

      if str =~ /\A=begin\s+rdoc/i
        str.sub!(/\A=begin.*\n/, '')
        str.sub!(/^=end.*/m, '')
        Token(TkCOMMENT).set_text(str)
      else
        Token(TkRD_COMMENT)#.set_text(str)
      end
    end

    @OP.def_rule("\n") do
      print "\\n\n" if RubyLex.debug?
      case @lex_state
      when EXPR_BEG, EXPR_FNAME, EXPR_DOT
	@continue = TRUE
      else
	@continue = FALSE
	@lex_state = EXPR_BEG
      end
      Token(TkNL).set_text("\n")
    end

    @OP.def_rules("*", "**",	
		  "!", "!=", "!~",
		  "=", "==", "===", 
		  "=~", "<=>",	
		  "<", "<=",
		  ">", ">=", ">>") do
      |op, io|
      @lex_state = EXPR_BEG
      Token(op).set_text(op)
    end

    @OP.def_rules("<<") do
      |op, io|
      tk = nil
      if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
	  (@lex_state != EXPR_ARG || @space_seen)
	c = peek(0)
	if /[-\w_\"\'\`]/ =~ c
	  tk = identify_here_document
	end
      end
      if !tk
        @lex_state = EXPR_BEG
        tk = Token(op).set_text(op)
      end
      tk
    end

    @OP.def_rules("'", '"') do
      |op, io|
      identify_string(op)
    end

    @OP.def_rules("`") do
      |op, io|
      if @lex_state == EXPR_FNAME
	Token(op).set_text(op)
      else
	identify_string(op)
      end
    end

    @OP.def_rules('?') do
      |op, io|
      if @lex_state == EXPR_END
	@lex_state = EXPR_BEG
	Token(TkQUESTION).set_text(op)
      else
	ch = getc
	if @lex_state == EXPR_ARG && ch !~ /\s/
	  ungetc
	  @lex_state = EXPR_BEG;
	  Token(TkQUESTION).set_text(op)
	else
          str = op
          str << ch
	  if (ch == '\\') #'
	    str << read_escape
	  end
	  @lex_state = EXPR_END
	  Token(TkINTEGER).set_text(str)
	end
      end
    end

    @OP.def_rules("&", "&&", "|", "||") do
      |op, io|
      @lex_state = EXPR_BEG
      Token(op).set_text(op)
    end
    
    @OP.def_rules("+=", "-=", "*=", "**=", 
		  "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
      |op, io|
      @lex_state = EXPR_BEG
      op =~ /^(.*)=$/
      Token(TkOPASGN, $1).set_text(op)
    end

    @OP.def_rule("+@", proc{@lex_state == EXPR_FNAME}) do |op, io|
      Token(TkUPLUS).set_text(op)
    end

    @OP.def_rule("-@", proc{@lex_state == EXPR_FNAME}) do |op, io|
      Token(TkUMINUS).set_text(op)
    end

    @OP.def_rules("+", "-") do
      |op, io|
      catch(:RET) do
	if @lex_state == EXPR_ARG
	  if @space_seen and peek(0) =~ /[0-9]/
	    throw :RET, identify_number(op)
	  else
	    @lex_state = EXPR_BEG
	  end
	elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
	  throw :RET, identify_number(op)
	else
	  @lex_state = EXPR_BEG
	end
	Token(op).set_text(op)
      end
    end

    @OP.def_rule(".") do
      @lex_state = EXPR_BEG
      if peek(0) =~ /[0-9]/
	ungetc
	identify_number("")
      else
	# for obj.if
	@lex_state = EXPR_DOT
	Token(TkDOT).set_text(".")
      end
    end

    @OP.def_rules("..", "...") do
      |op, io|
      @lex_state = EXPR_BEG
      Token(op).set_text(op)
    end

    lex_int2
  end
  
  def lex_int2
    @OP.def_rules("]", "}", ")") do
      |op, io|
      @lex_state = EXPR_END
      @indent -= 1
      Token(op).set_text(op)
    end

    @OP.def_rule(":") do
      if @lex_state == EXPR_END || peek(0) =~ /\s/
	@lex_state = EXPR_BEG
	tk = Token(TkCOLON)
      else
	@lex_state = EXPR_FNAME;
	tk = Token(TkSYMBEG)
      end
      tk.set_text(":")
    end

    @OP.def_rule("::") do
#      p @lex_state.id2name, @space_seen
      if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
	@lex_state = EXPR_BEG
	tk = Token(TkCOLON3)
      else
	@lex_state = EXPR_DOT
	tk = Token(TkCOLON2)
      end
      tk.set_text("::")
    end

    @OP.def_rule("/") do
      |op, io|
      if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
	identify_string(op)
      elsif peek(0) == '='
	getc
	@lex_state = EXPR_BEG
	Token(TkOPASGN, :/).set_text("/=") #")
      elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
	identify_string(op)
      else 
	@lex_state = EXPR_BEG
        Token("/").set_text(op)
      end
    end

    @OP.def_rules("^") do
      @lex_state = EXPR_BEG
      Token("^").set_text("^")
    end

    #       @OP.def_rules("^=") do
    # 	@lex_state = EXPR_BEG
    # 	Token(TkOPASGN, :^)
    #       end
    
    @OP.def_rules(",", ";") do
      |op, io|
      @lex_state = EXPR_BEG
      Token(op).set_text(op)
    end

    @OP.def_rule("~") do
      @lex_state = EXPR_BEG
      Token("~").set_text("~")
    end

    @OP.def_rule("~@", proc{@lex_state = EXPR_FNAME}) do
      @lex_state = EXPR_BEG
      Token("~").set_text("~@")
    end
    
    @OP.def_rule("(") do
      @indent += 1
      if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
	@lex_state = EXPR_BEG
	tk = Token(TkfLPAREN)
      else
	@lex_state = EXPR_BEG
	tk = Token(TkLPAREN)
      end
      tk.set_text("(")
    end

    @OP.def_rule("[]", proc{@lex_state == EXPR_FNAME}) do
      Token("[]").set_text("[]")
    end

    @OP.def_rule("[]=", proc{@lex_state == EXPR_FNAME}) do
      Token("[]=").set_text("[]=")
    end

    @OP.def_rule("[") do
      @indent += 1
      if @lex_state == EXPR_FNAME
	t = Token(TkfLBRACK)
      else
	if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
	  t = Token(TkLBRACK)
	elsif @lex_state == EXPR_ARG && @space_seen
	  t = Token(TkLBRACK)
	else
	  t = Token(TkfLBRACK)
	end
	@lex_state = EXPR_BEG
      end
      t.set_text("[")
    end

    @OP.def_rule("{") do
      @indent += 1
      if @lex_state != EXPR_END && @lex_state != EXPR_ARG
	t = Token(TkLBRACE)
      else
	t = Token(TkfLBRACE)
      end
      @lex_state = EXPR_BEG
      t.set_text("{")
    end

    @OP.def_rule('\\') do   #'
      if getc == "\n" 
	@space_seen = true
	@continue = true
	Token(TkSPACE).set_text("\\\n")
      else 
	ungetc
	Token("\\").set_text("\\")  #"
      end 
    end 

    @OP.def_rule('%') do
      |op, io|
      if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
	identify_quotation('%')
      elsif peek(0) == '='
	getc
	Token(TkOPASGN, "%").set_text("%=")
      elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
	identify_quotation('%')
      else
	@lex_state = EXPR_BEG
	Token("%").set_text("%")
      end
    end

    @OP.def_rule('$') do  #'
      identify_gvar
    end

    @OP.def_rule('@') do
      if peek(0) =~ /[@\w_]/
	ungetc
	identify_identifier
      else
	Token("@").set_text("@")
      end
    end

    #       @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do 
    # 	|op, io|
    # 	@indent += 1
    # 	@lex_state = EXPR_FNAME
    # #	@lex_state = EXPR_END
    # #	until @rests[0] == "\n" or @rests[0] == ";"
    # #	  rests.shift
    # #	end
    #       end

    @OP.def_rule("__END__", proc{@prev_char_no == 0 && peek(0) =~ /[\r\n]/}) do
      throw :eof
    end

    @OP.def_rule("") do
      |op, io|
      printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
      if peek(0) =~ /[0-9]/
	t = identify_number("")
      elsif peek(0) =~ /[\w_]/
	t = identify_identifier
      end
      printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug?
      t
    end
    
    p @OP if RubyLex.debug?
  end
  
  def identify_gvar
    @lex_state = EXPR_END
    str = "$"

    tk = case ch = getc
         when /[~_*$?!@\/\\;,=:<>".]/   #"
           str << ch
           Token(TkGVAR, str)
           
         when "-"
           str << "-" << getc
           Token(TkGVAR, str)
           
         when "&", "`", "'", "+"
           str << ch
           Token(TkBACK_REF, str)
           
         when /[1-9]/
           str << ch
           while (ch = getc) =~ /[0-9]/
             str << ch
           end
           ungetc
           Token(TkNTH_REF)
         when /\w/
           ungetc
           ungetc
           return identify_identifier
         else 
           ungetc
           Token("$")     
         end
    tk.set_text(str)
  end
  
  def identify_identifier
    token = ""
    token.concat getc if peek(0) =~ /[$@]/
    token.concat getc if peek(0) == "@"

    while (ch = getc) =~ /\w|_/
      print ":", ch, ":" if RubyLex.debug?
      token.concat ch
    end
    ungetc
    
    if ch == "!" or ch == "?"
      token.concat getc
    end
    # fix token

    # $stderr.puts "identifier - #{token}, state = #@lex_state"

    case token
    when /^\$/
      return Token(TkGVAR, token).set_text(token)
    when /^\@/
      @lex_state = EXPR_END
      return Token(TkIVAR, token).set_text(token)
    end
    
    if @lex_state != EXPR_DOT
      print token, "\n" if RubyLex.debug?

      token_c, *trans = TkReading2Token[token]
      if token_c
	# reserved word?

	if (@lex_state != EXPR_BEG &&
	    @lex_state != EXPR_FNAME &&
	    trans[1])
	  # modifiers
	  token_c = TkSymbol2Token[trans[1]]
	  @lex_state = trans[0]
	else
	  if @lex_state != EXPR_FNAME
	    if ENINDENT_CLAUSE.include?(token)
	      @indent += 1
	    elsif DEINDENT_CLAUSE.include?(token)
	      @indent -= 1
	    end
	    @lex_state = trans[0]
	  else
	    @lex_state = EXPR_END
	  end
	end
	return Token(token_c, token).set_text(token)
      end
    end

    if @lex_state == EXPR_FNAME
      @lex_state = EXPR_END
      if peek(0) == '='
	token.concat getc
      end
    elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
      @lex_state = EXPR_ARG
    else
      @lex_state = EXPR_END
    end

    if token[0, 1] =~ /[A-Z]/
      return Token(TkCONSTANT, token).set_text(token)
    elsif token[token.size - 1, 1] =~ /[!?]/
      return Token(TkFID, token).set_text(token)
    else
      return Token(TkIDENTIFIER, token).set_text(token)
    end
  end

  def identify_here_document
    ch = getc
    if ch == "-"
      ch = getc
      indent = true
    end
    if /['"`]/ =~ ch            # '
      lt = ch
      quoted = ""
      while (c = getc) && c != lt
	quoted.concat c
      end
    else
      lt = '"'
      quoted = ch.dup
      while (c = getc) && c =~ /\w/
	quoted.concat c
      end
      ungetc
    end

    ltback, @ltype = @ltype, lt
    reserve = ""

    while ch = getc
      reserve << ch
      if ch == "\\"    #"
        ch = getc
	reserve << ch
      elsif ch == "\n"
	break
      end
    end

    str = ""
    while (l = gets)
      l.chomp!
      l.strip! if indent
      break if l == quoted
      str << l.chomp << "\n"
    end

    @reader.divert_read_from(reserve)

    @ltype = ltback
    @lex_state = EXPR_END
    Token(Ltype2Token[lt], str).set_text(str.dump)
  end
  
  def identify_quotation(initial_char)
    ch = getc
    if lt = PERCENT_LTYPE[ch]
      ch = getc
    elsif ch =~ /\W/
      lt = "\""
    else
      RubyLex.fail SyntaxError, "unknown type of %string ('#{ch}')"
    end
#     if ch !~ /\W/
#       ungetc
#       next
#     end
    #@ltype = lt
    @quoted = ch unless @quoted = PERCENT_PAREN[ch]
    identify_string(lt, @quoted, ch, initial_char)
  end

  def identify_number(start)
    str = start.dup

    if start == "+" or start == "-" or start == ""
      start = getc
      str << start
    end

    @lex_state = EXPR_END

    if start == "0"
      if peek(0) == "x"
        ch = getc
        str << ch
        match = /[0-9a-f_]/
      else
        match = /[0-7_]/
      end
      while ch = getc
        if ch !~ match
          ungetc
          break
        else
          str << ch
        end
      end
      return Token(TkINTEGER).set_text(str)
    end

    type = TkINTEGER
    allow_point = TRUE
    allow_e = TRUE
    while ch = getc
      case ch
      when /[0-9_]/
        str << ch

      when allow_point && "."
	type = TkFLOAT
	if peek(0) !~ /[0-9]/
	  ungetc
	  break
	end
        str << ch
	allow_point = false

      when allow_e && "e", allow_e && "E"
        str << ch
	type = TkFLOAT
	if peek(0) =~ /[+-]/
	  str << getc
	end
	allow_e = false
	allow_point = false
      else
	ungetc
	break
      end
    end
    Token(type).set_text(str)
  end
  
  def identify_string(ltype, quoted = ltype, opener=nil, initial_char = nil)
    @ltype = ltype
    @quoted = quoted
    subtype = nil

    str = ""
    str << initial_char if initial_char
    str << (opener||quoted)

    nest = 0
    begin
      while ch = getc 
	str << ch
	if @quoted == ch 
          if nest == 0
            break
          else
            nest -= 1
          end
        elsif opener == ch
          nest += 1
	elsif @ltype != "'" && @ltype != "]" and ch == "#"
          ch = getc
          if ch == "{"
            subtype = true
            str << ch << skip_inner_expression
          else
            ungetc(ch)
          end
	elsif ch == '\\' #'
	  str << read_escape
	end
      end
      if @ltype == "/"
	if peek(0) =~ /i|o|n|e|s/
	  str << getc
	end
      end
      if subtype
	Token(DLtype2Token[ltype], str)
      else
	Token(Ltype2Token[ltype], str)
      end.set_text(str)
    ensure
      @ltype = nil
      @quoted = nil
      @lex_state = EXPR_END
    end
  end

  def skip_inner_expression
    res = ""
    nest = 0
    while (ch = getc)
      res << ch
      if ch == '}'
        break if nest.zero?
        nest -= 1
      elsif ch == '{'
        nest += 1
      end
    end
    res
  end

  def identify_comment
    @ltype = "#"
    comment = "#"
    while ch = getc
      if ch == "\\"
        ch = getc
        if ch == "\n"
          ch = " "
        else
          comment << "\\" 
        end
      else
        if ch == "\n"
          @ltype = nil
          ungetc
          break
        end
      end
      comment << ch
    end
    return Token(TkCOMMENT).set_text(comment)
  end
  
  def read_escape
    res = ""
    case ch = getc
    when /[0-7]/
      ungetc ch
      3.times do
	case ch = getc
	when /[0-7]/
	when nil
	  break
	else
	  ungetc
	  break
	end
        res << ch
      end
      
    when "x"
      res << ch
      2.times do
	case ch = getc
	when /[0-9a-fA-F]/
	when nil
	  break
	else
	  ungetc
	  break
	end
        res << ch
      end

    when "M"
      res << ch
      if (ch = getc) != '-'
	ungetc
      else
        res << ch
	if (ch = getc) == "\\" #"
          res << ch
	  res << read_escape
        else
          res << ch
	end
      end

    when "C", "c", "^"
      res << ch
      if ch == "C" and (ch = getc) != "-"
	ungetc
      else
        res << ch
        if (ch = getc) == "\\" #"
          res << ch
          res << read_escape
        else
          res << ch
        end
      end
    else
      res << ch
    end
    res
  end
end


# Extract code elements from a source file, returning a TopLevel
# object containing the constituent file elements.
#
# This file is based on rtags

module RDoc

  GENERAL_MODIFIERS = [ 'nodoc' ].freeze

  CLASS_MODIFIERS = GENERAL_MODIFIERS

  ATTR_MODIFIERS  = GENERAL_MODIFIERS

  CONSTANT_MODIFIERS = GENERAL_MODIFIERS

  METHOD_MODIFIERS = GENERAL_MODIFIERS + 
    [ 'arg', 'args', 'yield', 'yields', 'notnew', 'not-new', 'not_new', 'doc' ]


  class RubyParser
    include RubyToken
    include TokenStream

    extend ParserFactory

    parse_files_matching(/\.rbw?$/)


    def initialize(top_level, file_name, content, options, stats)
      @options = options
      @stats   = stats
      @size = 0
      @token_listeners = nil
      @input_file_name = file_name
      @scanner = RubyLex.new(content)
      @scanner.exception_on_syntax_error = false
      @top_level = top_level
      @progress = $stderr unless options.quiet
    end

    def scan
      @tokens = []
      @unget_read = []
      @read = []
      catch(:eof) do
        begin
          parse_toplevel_statements(@top_level)
        rescue Exception => e
          $stderr.puts "\n\n"
          $stderr.puts "RDoc failure in #@input_file_name at or around " +
                       "line #{@scanner.line_no} column #{@scanner.char_no}"
          $stderr.puts 
          $stderr.puts "Before reporting this, could you check that the file"
          $stderr.puts "you're documenting compiles cleanly--RDoc is not a"
          $stderr.puts "full Ruby parser, and gets confused easily if fed"
          $stderr.puts "invalid programs."
          $stderr.puts
          $stderr.puts "The internal error was:\n\n"
          
          e.set_backtrace(e.backtrace[0,4])
          raise
        end
      end
      @top_level
    end

    private 

    def warn(msg)
      prefix = "\n" + @input_file_name + ":"
      if @scanner
        prefix << "#{@scanner.line_no}:#{@scanner.char_no}: "
      end
      $stderr.puts prefix + msg
    end

    def error(msg)
      warn msg
      exit(1)
    end

    def progress(char)
      unless @options.quiet
        @progress.print(char)
	@progress.flush
      end
    end

    def add_token_listener(obj)
      @token_listeners ||= []
      @token_listeners << obj
    end

    def remove_token_listener(obj)
      @token_listeners.delete(obj)
    end

    def get_tk
      tk = nil
      if @tokens.empty?
	tk = @scanner.token
	@read.push @scanner.get_read
	puts "get_tk1 => #{tk.inspect}" if $TOKEN_DEBUG
      else
	@read.push @unget_read.shift
	tk = @tokens.shift
	puts "get_tk2 => #{tk.inspect}" if $TOKEN_DEBUG
      end

      if tk.kind_of?(TkSYMBEG)
        set_token_position(tk.line_no, tk.char_no)
        tk1 = get_tk
        if tk1.kind_of?(TkId) || tk1.kind_of?(TkOp)
          tk = Token(TkSYMBOL).set_text(":" + tk1.name)
          # remove the identifier we just read (we're about to
          # replace it with a symbol)
          @token_listeners.each do |obj|
            obj.pop_token
          end if @token_listeners
        else
          warn("':' not followed by identified or operator")
          tk = tk1
        end
      end

      # inform any listeners of our shiny new token
      @token_listeners.each do |obj|
        obj.add_token(tk)
      end if @token_listeners

      tk
    end

    def peek_tk
      unget_tk(tk = get_tk)
      tk
    end

    def unget_tk(tk)
      @tokens.unshift tk
      @unget_read.unshift @read.pop

      # Remove this token from any listeners
      @token_listeners.each do |obj|
        obj.pop_token
      end if @token_listeners
    end

    def skip_tkspace(skip_nl = true)
      tokens = []
      while ((tk = get_tk).kind_of?(TkSPACE) ||
	     (skip_nl && tk.kind_of?(TkNL)))
	tokens.push tk
      end
      unget_tk(tk)
      tokens
    end

    def get_tkread
      read = @read.join("")
      @read = []
      read
    end

    def peek_read
      @read.join('')
    end

    NORMAL = "::"
    SINGLE = "<<"

    # Look for the first comment in a file that isn't
    # a shebang line.

    def collect_first_comment
      skip_tkspace
      res = ''
      first_line = true

      tk = get_tk
      while tk.kind_of?(TkCOMMENT)
        if first_line && tk.text[0,2] == "#!"
          skip_tkspace
          tk = get_tk
        else
          res << tk.text << "\n"
          tk = get_tk
          if tk.kind_of? TkNL
            skip_tkspace(false)
            tk = get_tk
          end
        end
        first_line = false
      end
      unget_tk(tk)
      res
    end

    def parse_toplevel_statements(container)
      comment = collect_first_comment
      look_for_directives_in(container, comment)
      container.comment = comment unless comment.empty?
      parse_statements(container, NORMAL, nil, comment)
    end
    
    def parse_statements(container, single=NORMAL, current_method=nil, comment='')
      nest = 1
      save_visibility = container.visibility
      
#      if container.kind_of?(TopLevel)
#      else
#        comment = ''
#      end

      non_comment_seen = true
      
      while tk = get_tk
        
        keep_comment = false
        
        non_comment_seen = true unless tk.kind_of?(TkCOMMENT)
        
#         $stderr.puts "===== #{tk.inspect}"
#         blank_line_seen = true
#         while tk.kind_of?(TkNL)
#           skip_tkspace(false)
#           if peek_tk.kind_of?(TkCOMMENT)
#             if blank_line_seen
#               comment = ''
#               blank_line_seen = false
#             end
#             tk = get_tk
#             comment << tk.text << "\n"
#           else
#             blank_line_seen = true
#           end
#           tk = get_tk
#         end

	case tk

        when TkNL
          skip_tkspace(true)   # Skip blanks and newlines
          tk = get_tk
          if tk.kind_of?(TkCOMMENT)
            if non_comment_seen
              comment = ''
              non_comment_seen = false
            end
            while tk.kind_of?(TkCOMMENT)
              comment << tk.text << "\n"
              tk = get_tk          # this is the newline 
              skip_tkspace(false)  # leading spaces
              tk = get_tk
            end
            unless comment.empty?
              look_for_directives_in(container, comment) 
              if container.done_documenting
                container.ongoing_visibility = save_visibility
                return
              end
            end
            keep_comment = true
          else
            non_comment_seen = true
          end
          unget_tk(tk)
          keep_comment = true


	when TkCLASS
	  if container.document_children
            parse_class(container, single, tk, comment)
	  else
	    nest += 1
          end

	when TkMODULE
	  if container.document_children
            parse_module(container, single, tk, comment)
	  else
	    nest += 1
          end

	when TkDEF
	  if container.document_self
	    parse_method(container, single, tk, comment)
	  else
	    nest += 1
          end

        when TkCONSTANT
          if container.document_self
            parse_constant(container, single, tk, comment)
          end

	when TkALIAS
 	  if container.document_self
	    parse_alias(container, single, tk, comment)
	  end

        when TkYIELD
          if current_method.nil?
            warn("Warning: yield outside of method") if container.document_self
          else
            parse_yield(container, single, tk, current_method)
          end

          # Until and While can have a 'do', which shouldn't increas
          # the nesting. We can't solve the general case, but we can
          # handle most occurrences by ignoring a do at the end of a line

        when  TkUNTIL, TkWHILE
          nest += 1
          puts "FOUND #{tk.class} in #{container.name}, nest = #{nest}, " +
            "line #{tk.line_no}" if $DEBUG
          skip_optional_do_after_expression

	when TkCASE, TkDO, TkIF, TkUNLESS, TkBEGIN, TkFOR
	  nest += 1
          puts "Found #{tk.class} in #{container.name}, nest = #{nest}, " +
            "line #{tk.line_no}" if $DEBUG

	when TkIDENTIFIER
          if nest == 1 and current_method.nil?
            case tk.name
            when "private", "protected", "public",
                 "private_class_method", "public_class_method"
              parse_visibility(container, single, tk)
              keep_comment = true
            when "attr"
              parse_attr(container, single, tk, comment)
            when /^attr_(reader|writer|accessor)$/, @options.extra_accessors
              parse_attr_accessor(container, single, tk, comment)
            when "alias_method"
              if container.document_self
	        parse_alias(container, single, tk, comment)
	      end
            end
	  end
	  
	  case tk.name
	  when "require"
	    parse_require(container, comment)
	  when "include"
	    parse_include(container, comment)
	  end


	when TkEND
          nest -= 1
          puts "Found 'end' in #{container.name}, nest = #{nest}, line #{tk.line_no}" if $DEBUG
          puts "Method = #{current_method.name}" if $DEBUG and current_method
	  if nest == 0
            read_documentation_modifiers(container, CLASS_MODIFIERS)
            container.ongoing_visibility = save_visibility
            return
          end

	end

        comment = '' unless keep_comment
	begin
	  get_tkread
	  skip_tkspace(false)
	end while peek_tk == TkNL

      end
    end
    
    def parse_class(container, single, tk, comment, &block)
      progress("c")

      @stats.num_classes += 1

      container, name_t = get_class_or_module(container)

      case name_t
      when TkCONSTANT
	name = name_t.name
        superclass = "Object"
        
        if peek_tk.kind_of?(TkLT)
          get_tk
          skip_tkspace(true)
          superclass = get_class_specification
          superclass = "<unknown>" if superclass.empty?
        end

	if single == SINGLE
	  cls_type = SingleClass
	else
	  cls_type = NormalClass
	end
        
        cls = container.add_class(cls_type, name, superclass)
        read_documentation_modifiers(cls, CLASS_MODIFIERS)
        cls.record_location(@top_level)
	parse_statements(cls)
        cls.comment = comment

      when TkLSHFT
	case name = get_class_specification
	when "self", container.name
	  parse_statements(container, SINGLE, &block)
	else
          other = TopLevel.find_class_named(name)
          unless other
#            other = @top_level.add_class(NormalClass, name, nil)
#            other.record_location(@top_level)
#            other.comment = comment
            other = NormalClass.new("Dummy", nil)
          end
          read_documentation_modifiers(other, CLASS_MODIFIERS)
          parse_statements(other, SINGLE, &block)
	end

      else
	warn("Expected class name or '<<'. Got #{name_t.class}: #{name_t.text.inspect}")
      end
    end

    def parse_module(container, single, tk, comment)
      progress("m")
      @stats.num_modules += 1
      container, name_t  = get_class_or_module(container)
#      skip_tkspace
      name = name_t.name
      mod = container.add_module(NormalModule, name)
      mod.record_location(@top_level)
      read_documentation_modifiers(mod, CLASS_MODIFIERS)
      parse_statements(mod)
      mod.comment = comment
    end

    # Look for the name of a class of module (optionally with a leading :: or
    # with :: separated named) and return the ultimate name and container

    def get_class_or_module(container)
      skip_tkspace
      name_t = get_tk

      # class ::A -> A is in the top level
      if name_t.kind_of?(TkCOLON2)
        name_t = get_tk
        container = @top_level
      end

      skip_tkspace(false)

      while peek_tk.kind_of?(TkCOLON2)
        prev_container = container
        container = container.find_module_named(name_t.name)
        if !container
#          warn("Couldn't find module #{name_t.name}")
          container = prev_container.add_module(NormalModule, name_t.name)
        end
        get_tk
        name_t = get_tk
      end
      return [container, name_t]
    end

    def parse_constant(container, single, tk, comment)
      name = tk.name
      skip_tkspace(false)
      eq_tk = get_tk

      unless eq_tk.kind_of?(TkASSIGN)
        unget_tk(eq_tk)
        return
      end


      nest = 0
      get_tkread

      tk = get_tk
      if tk.kind_of? TkGT
        unget_tk(tk)
        unget_tk(eq_tk)
        return
      end

      loop do
        puts("Param: #{tk}, #{@scanner.continue} " +
          "#{@scanner.lex_state} #{nest}")  if $DEBUG

        case tk
        when TkSEMICOLON
          break
        when TkLPAREN, TkfLPAREN
          nest += 1
        when TkRPAREN
          nest -= 1
        when TkCOMMENT
          if nest <= 0 && @scanner.lex_state == EXPR_END
            unget_tk(tk)
            break
          end
        when TkNL
          if (@scanner.lex_state == EXPR_END and nest <= 0) || !@scanner.continue
            unget_tk(tk)
            break
          end
        end
        tk = get_tk
      end

      res = get_tkread.tr("\n", " ").strip
      res = "" if res == ";"
      con = Constant.new(name, res, comment)
      read_documentation_modifiers(con, CONSTANT_MODIFIERS)
      if con.document_self
	container.add_constant(con)
      end
    end

    def parse_method(container, single, tk, comment)
      progress(".")
      @stats.num_methods += 1
      line_no = tk.line_no
      column  = tk.char_no
      
      start_collecting_tokens
      add_token(tk)
      add_token_listener(self)
      
      @scanner.instance_eval{@lex_state = EXPR_FNAME}
      skip_tkspace(false)
      name_t = get_tk
      back_tk = skip_tkspace
      meth = nil

      dot = get_tk
      if dot.kind_of?(TkDOT) or dot.kind_of?(TkCOLON2)
	@scanner.instance_eval{@lex_state = EXPR_FNAME}
	skip_tkspace
	name_t2 = get_tk
	case name_t
	when TkSELF
	  name = name_t2.name
	when TkCONSTANT
          name = name_t2.name
          prev_container = container
          container = container.find_module_named(name_t.name)
          if !container
            warn("Couldn't find #{name_t.name}. Assuming it's a module")
            container = prev_container.add_module(NormalModule, name_t.name)
          end
	else
	  # warn("Unexpected token '#{name_t2.inspect}'")
	  # break
          skip_method(container)
          return
	end
	meth =  AnyMethod.new(get_tkread, name)
        meth.singleton = true
      else
	unget_tk dot
	back_tk.reverse_each do
	  |tk|
	  unget_tk tk
	end
	name = name_t.name

        meth =  AnyMethod.new(get_tkread, name)
        meth.singleton = (single == SINGLE)
      end

      remove_token_listener(self)

      meth.start_collecting_tokens
      indent = TkSPACE.new(1,1)
      indent.set_text(" " * column)

      meth.add_tokens([TkCOMMENT.new(line_no,
                                     1,
                                     "# File #{@top_level.file_absolute_name}, line #{line_no}"),
                        NEWLINE_TOKEN,
                        indent])

      meth.add_tokens(@token_stream)

      add_token_listener(meth)

      @scanner.instance_eval{@continue = false}
      parse_method_parameters(meth)

      if meth.document_self
	container.add_method(meth)
      end

      # Having now read the method parameters and documentation modifiers, we
      # now know whether we have to rename #initialize to ::new

      if name == "initialize" && !meth.singleton
        if meth.dont_rename_initialize
          meth.visibility = :protected
        else
          meth.singleton = true
          meth.name = "new"
          meth.visibility = :public
        end
      end
      
      parse_statements(container, single, meth)
      
      remove_token_listener(meth)
      
      meth.comment = comment
    end
    
    def skip_method(container)
      meth =  AnyMethod.new("", "anon")
      parse_method_parameters(meth)
      parse_statements(container, false, meth)
    end
    
    # Capture the method's parameters. Along the way,
    # look for a comment containing 
    #
    #    # yields: ....
    #
    # and add this as the block_params for the method

    def parse_method_parameters(method)
      res = parse_method_or_yield_parameters(method)
      method.params = res unless method.params
      if method.block_params.nil?
          skip_tkspace(false)
	  read_documentation_modifiers(method, METHOD_MODIFIERS)
      end
    end

    def parse_method_or_yield_parameters(method=nil, modifiers=METHOD_MODIFIERS)
      skip_tkspace(false)
      tk = get_tk

      # Little hack going on here. In the statement
      #  f = 2*(1+yield)
      # We see the RPAREN as the next token, so we need
      # to exit early. This still won't catch all cases
      # (such as "a = yield + 1"
      end_token = case tk
                  when TkLPAREN, TkfLPAREN
                    TkRPAREN
                  when TkRPAREN
                    return ""
                  else
                    TkNL
                  end
      nest = 0

      loop do
        puts("Param: #{tk.inspect}, #{@scanner.continue} " +
          "#{@scanner.lex_state} #{nest}")  if $DEBUG
        case tk
        when TkSEMICOLON
          break
        when TkLBRACE
          nest += 1
        when TkRBRACE
          nest -= 1
          break if nest <= 0
        when TkLPAREN, TkfLPAREN
          nest += 1
        when end_token
          if end_token == TkRPAREN
            nest -= 1
            break if @scanner.lex_state == EXPR_END and nest <= 0
          else
            break unless @scanner.continue
          end
        when method && method.block_params.nil? && TkCOMMENT
	  unget_tk(tk)
	  read_documentation_modifiers(method, modifiers)
        end
        tk = get_tk
      end
      res = get_tkread.tr("\n", " ").strip
      res = "" if res == ";"
      res
    end

    # while, until, and for have an optional 
    def skip_optional_do_after_expression
      skip_tkspace(false)
      tk = get_tk
      case tk
      when TkLPAREN, TkfLPAREN
        end_token = TkRPAREN
      else
        end_token = TkNL
      end

      nest = 0
      @scanner.instance_eval{@continue = false}

      loop do
        puts("\nWhile: #{tk}, #{@scanner.continue} " +
          "#{@scanner.lex_state} #{nest}") if $DEBUG
        case tk
        when TkSEMICOLON
          break
        when TkLPAREN, TkfLPAREN
          nest += 1
        when TkDO
          break if nest.zero?
        when end_token
          if end_token == TkRPAREN
            nest -= 1
            break if @scanner.lex_state == EXPR_END and nest.zero?
          else
            break unless @scanner.continue
          end
        end
        tk = get_tk
      end
      skip_tkspace(false)
      if peek_tk.kind_of? TkDO
        get_tk
      end
    end
    
    # Return a superclass, which can be either a constant
    # of an expression

    def get_class_specification
      tk = get_tk
      return "self" if tk.kind_of?(TkSELF)
        
      res = ""
      while tk.kind_of?(TkCOLON2) ||
          tk.kind_of?(TkCOLON3)   ||
          tk.kind_of?(TkCONSTANT)   
        
        res += tk.text
        tk = get_tk
      end

      unget_tk(tk)
      skip_tkspace(false)

      get_tkread # empty out read buffer

      tk = get_tk

      case tk
      when TkNL, TkCOMMENT, TkSEMICOLON
        unget_tk(tk)
        return res
      end

      res += parse_call_parameters(tk)
      res
    end

    def parse_call_parameters(tk)

      end_token = case tk
                  when TkLPAREN, TkfLPAREN
                    TkRPAREN
                  when TkRPAREN
                    return ""
                  else
                    TkNL
                  end
      nest = 0

      loop do
        puts("Call param: #{tk}, #{@scanner.continue} " +
          "#{@scanner.lex_state} #{nest}") if $DEBUG
        case tk
        when TkSEMICOLON
          break
        when TkLPAREN, TkfLPAREN
          nest += 1
        when end_token
          if end_token == TkRPAREN
            nest -= 1
            break if @scanner.lex_state == EXPR_END and nest <= 0
          else
            break unless @scanner.continue
          end
        when TkCOMMENT
	  unget_tk(tk)
	  break
        end
        tk = get_tk
      end
      res = get_tkread.tr("\n", " ").strip
      res = "" if res == ";"
      res
    end


    # Parse a constant, which might be qualified by
    # one or more class or module names

    def get_constant
      res = ""
      skip_tkspace(false)
      tk = get_tk

      while tk.kind_of?(TkCOLON2) ||
          tk.kind_of?(TkCOLON3)   ||
          tk.kind_of?(TkCONSTANT)          
        
        res += tk.text
        tk = get_tk
      end

#      if res.empty?
#        warn("Unexpected token #{tk} in constant")
#      end 
      unget_tk(tk)
      res
    end

    # Get a constant that may be surrounded by parens
    
    def get_constant_with_optional_parens
      skip_tkspace(false)
      nest = 0
      while (tk = peek_tk).kind_of?(TkLPAREN)  || tk.kind_of?(TkfLPAREN)
        get_tk
        skip_tkspace(true)
        nest += 1
      end

      name = get_constant

      while nest > 0
        skip_tkspace(true)
        tk = get_tk
        nest -= 1 if tk.kind_of?(TkRPAREN)
      end
      name
    end

    # Directives are modifier comments that can appear after class, module,
    # or method names. For example
    #
    #   def fred    # :yields:  a, b
    #
    # or
    #
    #   class SM  # :nodoc:
    #
    # we return the directive name and any parameters as a two element array
    
    def read_directive(allowed)
      tk = get_tk
      puts "directive: #{tk.inspect}" if $DEBUG
      result = nil
      if tk.kind_of?(TkCOMMENT) 
        if tk.text =~ /\s*:?(\w+):\s*(.*)/
          directive = $1.downcase
          if allowed.include?(directive)
            result = [directive, $2]
          end
        end
      else
        unget_tk(tk)
      end
      result
    end

    
    def read_documentation_modifiers(context, allow)
      dir = read_directive(allow)

      case dir[0]

      when "notnew", "not_new", "not-new"
        context.dont_rename_initialize = true

      when "nodoc"
        context.document_self = false
	if dir[1].downcase == "all"
	  context.document_children = false
	end

      when "doc"
        context.document_self = true
        context.force_documentation = true

      when "yield", "yields"
        unless context.params.nil?
          context.params.sub!(/(,|)\s*&\w+/,'') # remove parameter &proc
        end
	context.block_params = dir[1]

      when "arg", "args"
        context.params = dir[1]
      end if dir
    end

    
    # Look for directives in a normal comment block:
    #
    #   #--       - don't display comment from this point forward
    #  
    #
    # This routine modifies it's parameter

    def look_for_directives_in(context, comment)

      preprocess = SM::PreProcess.new(@input_file_name,
                                      @options.rdoc_include)

      preprocess.handle(comment) do |directive, param|
        case directive
        when "stopdoc"
          context.stop_doc
          ""
        when "startdoc"
          context.start_doc
          context.force_documentation = true
          ""

        when "enddoc"
          context.done_documenting = true
          ""

        when "main"
          options = Options.instance
          options.main_page = param
	  ""

        when "title"
          options = Options.instance
          options.title = param
          ""

        else
          warn "Unrecognized directive '#{directive}'"
          break
        end
      end

      remove_private_comments(comment)
    end

    def remove_private_comments(comment)
      comment.gsub!(/^#--.*?^#\+\+/m, '')
      comment.sub!(/^#--.*/m, '')
    end


    def get_symbol_or_name
      tk = get_tk
      case tk
      when  TkSYMBOL
        tk.text.sub(/^:/, '')
      when TkId, TkOp
        tk.name
      when TkSTRING
        tk.text
      else
        raise "Name or symbol expected (got #{tk})"
      end
    end
    
    def parse_alias(context, single, tk, comment)
      skip_tkspace
      if (peek_tk.kind_of? TkLPAREN)
        get_tk
        skip_tkspace
      end
      new_name = get_symbol_or_name
      @scanner.instance_eval{@lex_state = EXPR_FNAME}
      skip_tkspace
      if (peek_tk.kind_of? TkCOMMA)
        get_tk
        skip_tkspace
      end
      old_name = get_symbol_or_name

      al = Alias.new(get_tkread, old_name, new_name, comment)
      read_documentation_modifiers(al, ATTR_MODIFIERS)
      if al.document_self
	context.add_alias(al)
      end
    end

    def parse_yield_parameters
      parse_method_or_yield_parameters
    end

  def parse_yield(context, single, tk, method)
    if method.block_params.nil?
      get_tkread
      @scanner.instance_eval{@continue = false}
      method.block_params = parse_yield_parameters
    end
  end

  def parse_require(context, comment)
    skip_tkspace_comment
    tk = get_tk
    if tk.kind_of? TkLPAREN
      skip_tkspace_comment
      tk = get_tk
    end

    name = nil
    case tk
    when TkSTRING
      name = tk.text
    when TkCONSTANT, TkIDENTIFIER, TkIVAR, TkGVAR
      name = tk.name
    when TkDSTRING
      warn "Skipping require of dynamic string: #{tk.text}"
    else
      error("Unknown argument type to require: #{tk}")
    end
    if name
      context.add_require(Require.new(name, comment))
    end
  end

  def parse_include(context, comment)
    skip_tkspace_comment
    name = get_constant_with_optional_parens
    unless name.empty?
      context.add_include(Include.new(name, comment))
    end
  end

    def get_bool
      skip_tkspace
      tk = get_tk
      case tk
      when TkTRUE
        true
      when TkFALSE, TkNIL
        false
      else
        unget_tk tk
        true
      end
    end

    def parse_attr(context, single, tk, comment)
      args = parse_symbol_arg(1)
      if args.size > 0
	name = args[0]
        rw = "R"
        skip_tkspace(false)
        tk = get_tk
        if tk.kind_of? TkCOMMA
          rw = "RW" if get_bool
        else
          unget_tk tk
        end
	att = Attr.new(get_tkread, name, rw, comment)
	read_documentation_modifiers(att, ATTR_MODIFIERS)
	if att.document_self
	  context.add_attribute(att)
	end
      else
	error("Missing argument to 'attr'")
      end    

    end

    def parse_visibility(container, single, tk)
      singleton = (single == SINGLE)
      vis = case tk.name
            when "private"   then :private
            when "protected" then :protected
            when "public"    then :public
            when "private_class_method"
              singleton = true
              :private
            when "public_class_method"
              singleton = true
              :public
            else raise "Invalid visibility: #{tk.name}"
            end
            
      skip_tkspace_comment(false)
      if peek_tk.kind_of? TkNL
#        error("Missing argument") if singleton        
        container.ongoing_visibility = vis
      else
        args = parse_symbol_arg
        container.set_visibility_for(args, vis, singleton)
      end
    end

    def parse_attr_accessor(context, single, tk, comment)
      args = parse_symbol_arg
      read = get_tkread
      rw = "?"

      # If nodoc is given, don't document any of them

      tmp = CodeObject.new
      read_documentation_modifiers(tmp, ATTR_MODIFIERS)
      return unless tmp.document_self

      case tk.name
      when "attr_reader"   then rw = "R"
      when "attr_writer"   then rw = "W"
      when "attr_accessor" then rw = "RW"
      else
        rw = @options.extra_accessor_flags[tk.name]
      end
      
      for name in args
	att = Attr.new(get_tkread, name, rw, comment)
        context.add_attribute(att)
      end    
    end

    def skip_tkspace_comment(skip_nl = true)
      loop do
        skip_tkspace(skip_nl)
        return unless peek_tk.kind_of? TkCOMMENT
        get_tk
      end
    end

    def parse_symbol_arg(no = nil)

      args = []
      skip_tkspace_comment
      case tk = get_tk
      when TkLPAREN
	loop do
	  skip_tkspace_comment
	  if tk1 = parse_symbol_in_arg
	    args.push tk1
	    break if no and args.size >= no
	  end
	  
	  skip_tkspace_comment
	  case tk2 = get_tk
	  when TkRPAREN
	    break
	  when TkCOMMA
	  else
	    warn("unexpected token: '#{tk.inspect}'")
	    break
	  end
	end
      else
	unget_tk tk
	if tk = parse_symbol_in_arg
	  args.push tk
	  return args if no and args.size >= no
	end

	loop do
#	  skip_tkspace_comment(false)
	  skip_tkspace(false)

	  tk1 = get_tk
	  unless tk1.kind_of?(TkCOMMA) 
	    unget_tk tk1
	    break
	  end
	  
	  skip_tkspace_comment
	  if tk = parse_symbol_in_arg
	    args.push tk
	    break if no and args.size >= no
	  end
	end
      end
      args
    end

    def parse_symbol_in_arg
      case tk = get_tk
      when TkSYMBOL
        tk.text.sub(/^:/, '')
      when TkSTRING
	eval @read[-1]
      else
	warn("Expected symbol or string, got #{tk.inspect}") if $DEBUG
	nil
      end
    end
  end

end