mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
214a7f8d49
* This version changed lexer used Ripper from lexer based IRB. see details: https://github.com/ruby/rdoc/pull/512 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@59845 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
587 lines
16 KiB
Ruby
587 lines
16 KiB
Ruby
require 'ripper'
|
|
|
|
class RDoc::RipperStateLex
|
|
EXPR_NONE = 0
|
|
EXPR_BEG = 1
|
|
EXPR_END = 2
|
|
EXPR_ENDARG = 4
|
|
EXPR_ENDFN = 8
|
|
EXPR_ARG = 16
|
|
EXPR_CMDARG = 32
|
|
EXPR_MID = 64
|
|
EXPR_FNAME = 128
|
|
EXPR_DOT = 256
|
|
EXPR_CLASS = 512
|
|
EXPR_LABEL = 1024
|
|
EXPR_LABELED = 2048
|
|
EXPR_FITEM = 4096
|
|
EXPR_VALUE = EXPR_BEG
|
|
EXPR_BEG_ANY = (EXPR_BEG | EXPR_MID | EXPR_CLASS)
|
|
EXPR_ARG_ANY = (EXPR_ARG | EXPR_CMDARG)
|
|
EXPR_END_ANY = (EXPR_END | EXPR_ENDARG | EXPR_ENDFN)
|
|
|
|
class InnerStateLex < Ripper::Filter
|
|
attr_accessor :lex_state
|
|
|
|
def initialize(code)
|
|
@lex_state = EXPR_BEG
|
|
@in_fname = false
|
|
@continue = false
|
|
reset
|
|
super(code)
|
|
end
|
|
|
|
def reset
|
|
@command_start = false
|
|
@cmd_state = @command_start
|
|
end
|
|
|
|
def on_nl(tok, data)
|
|
case @lex_state
|
|
when EXPR_FNAME, EXPR_DOT
|
|
@continue = true
|
|
else
|
|
@continue = false
|
|
@lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0
|
|
end
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_ignored_nl(tok, data)
|
|
case @lex_state
|
|
when EXPR_FNAME, EXPR_DOT
|
|
@continue = true
|
|
else
|
|
@continue = false
|
|
@lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0
|
|
end
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_op(tok, data)
|
|
case tok
|
|
when '&', '|', '!', '!=', '!~'
|
|
case @lex_state
|
|
when EXPR_FNAME, EXPR_DOT
|
|
@lex_state = EXPR_ARG
|
|
else
|
|
@lex_state = EXPR_BEG
|
|
end
|
|
when '<<'
|
|
# TODO next token?
|
|
case @lex_state
|
|
when EXPR_FNAME, EXPR_DOT
|
|
@lex_state = EXPR_ARG
|
|
else
|
|
@lex_state = EXPR_BEG
|
|
end
|
|
when '?'
|
|
@lex_state = EXPR_BEG
|
|
when '&&', '||', '+=', '-=', '*=', '**=',
|
|
'&=', '|=', '^=', '<<=', '>>=', '||=', '&&='
|
|
@lex_state = EXPR_BEG
|
|
else
|
|
case @lex_state
|
|
when EXPR_FNAME, EXPR_DOT
|
|
@lex_state = EXPR_ARG
|
|
else
|
|
@lex_state = EXPR_BEG
|
|
end
|
|
end
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_kw(tok, data)
|
|
case tok
|
|
when 'class'
|
|
@lex_state = EXPR_CLASS
|
|
@in_fname = true
|
|
when 'def'
|
|
@lex_state = EXPR_FNAME
|
|
@continue = true
|
|
@in_fname = true
|
|
when 'if', 'unless', 'while', 'until'
|
|
if ((EXPR_END | EXPR_ENDARG | EXPR_ENDFN | EXPR_ARG | EXPR_CMDARG) & @lex_state) != 0 # postfix if
|
|
@lex_state = EXPR_BEG | EXPR_LABEL
|
|
else
|
|
@lex_state = EXPR_BEG
|
|
end
|
|
when 'begin'
|
|
@lex_state = EXPR_BEG
|
|
else
|
|
if @lex_state == EXPR_FNAME
|
|
@lex_state = EXPR_END
|
|
else
|
|
@lex_state = EXPR_END
|
|
end
|
|
end
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_tstring_beg(tok, data)
|
|
@lex_state = EXPR_BEG
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_tstring_end(tok, data)
|
|
@lex_state = EXPR_END | EXPR_ENDARG
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_CHAR(tok, data)
|
|
@lex_state = EXPR_END
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_period(tok, data)
|
|
@lex_state = EXPR_DOT
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_int(tok, data)
|
|
@lex_state = EXPR_END | EXPR_ENDARG
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_float(tok, data)
|
|
@lex_state = EXPR_END | EXPR_ENDARG
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_rational(tok, data)
|
|
@lex_state = EXPR_END | EXPR_ENDARG
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_imaginary(tok, data)
|
|
@lex_state = EXPR_END | EXPR_ENDARG
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_symbeg(tok, data)
|
|
@lex_state = EXPR_FNAME
|
|
@continue = true
|
|
@in_fname = true
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
private def on_variables(event, tok, data)
|
|
if @in_fname
|
|
@lex_state = EXPR_ENDFN
|
|
@in_fname = false
|
|
@continue = false
|
|
elsif @continue
|
|
case @lex_state
|
|
when EXPR_DOT
|
|
@lex_state = EXPR_ARG
|
|
else
|
|
@lex_state = EXPR_ENDFN
|
|
@continue = false
|
|
end
|
|
else
|
|
@lex_state = EXPR_CMDARG
|
|
end
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => event, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_ident(tok, data)
|
|
on_variables(__method__, tok, data)
|
|
end
|
|
|
|
def on_ivar(tok, data)
|
|
@lex_state = EXPR_END
|
|
on_variables(__method__, tok, data)
|
|
end
|
|
|
|
def on_cvar(tok, data)
|
|
@lex_state = EXPR_END
|
|
on_variables(__method__, tok, data)
|
|
end
|
|
|
|
def on_gvar(tok, data)
|
|
@lex_state = EXPR_END
|
|
on_variables(__method__, tok, data)
|
|
end
|
|
|
|
def on_backref(tok, data)
|
|
@lex_state = EXPR_END
|
|
on_variables(__method__, tok, data)
|
|
end
|
|
|
|
def on_lparen(tok, data)
|
|
@lex_state = EXPR_LABEL | EXPR_BEG
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_rparen(tok, data)
|
|
@lex_state = EXPR_ENDFN
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_lbrace(tok, data)
|
|
@lex_state = EXPR_LABEL | EXPR_BEG
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_rbrace(tok, data)
|
|
@lex_state = EXPR_ENDARG
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_lbracket(tok, data)
|
|
@lex_state = EXPR_LABEL | EXPR_BEG
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_rbracket(tok, data)
|
|
@lex_state = EXPR_ENDARG
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_const(tok, data)
|
|
case @lex_state
|
|
when EXPR_FNAME
|
|
@lex_state = EXPR_ENDFN
|
|
when EXPR_CLASS
|
|
@lex_state = EXPR_ARG
|
|
else
|
|
@lex_state = EXPR_CMDARG
|
|
end
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_sp(tok, data)
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_comma(tok, data)
|
|
@lex_state = EXPR_BEG | EXPR_LABEL if (EXPR_ARG_ANY & @lex_state) != 0
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_comment(tok, data)
|
|
@lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_ignored_sp(tok, data)
|
|
@lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def on_heredoc_end(tok, data)
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
|
|
@lex_state = EXPR_BEG
|
|
end
|
|
|
|
def on_default(event, tok, data)
|
|
reset
|
|
@callback.call({ :line_no => lineno, :char_no => column, :kind => event, :text => tok, :state => @lex_state})
|
|
end
|
|
|
|
def each(&block)
|
|
@callback = block
|
|
parse
|
|
end
|
|
end
|
|
|
|
def get_squashed_tk
|
|
if @buf.empty?
|
|
tk = @inner_lex_enumerator.next
|
|
else
|
|
tk = @buf.shift
|
|
end
|
|
case tk[:kind]
|
|
when :on_symbeg then
|
|
tk = get_symbol_tk(tk)
|
|
when :on_tstring_beg then
|
|
tk = get_string_tk(tk)
|
|
when :on_backtick then
|
|
if (EXPR_FNAME & tk[:state]) != 0
|
|
@inner_lex.lex_state = EXPR_ARG
|
|
tk[:kind] = :on_ident
|
|
tk[:state] = @inner_lex.lex_state
|
|
else
|
|
tk = get_string_tk(tk)
|
|
end
|
|
when :on_regexp_beg then
|
|
tk = get_regexp_tk(tk)
|
|
when :on_embdoc_beg then
|
|
tk = get_embdoc_tk(tk)
|
|
when :on_heredoc_beg then
|
|
@heredoc_queue << retrieve_heredoc_info(tk)
|
|
@inner_lex.lex_state = EXPR_END
|
|
when :on_nl, :on_ignored_nl, :on_comment, :on_heredoc_end then
|
|
unless @heredoc_queue.empty?
|
|
get_heredoc_tk(*@heredoc_queue.shift)
|
|
end
|
|
when :on_words_beg then
|
|
tk = get_words_tk(tk)
|
|
when :on_qwords_beg then
|
|
tk = get_words_tk(tk)
|
|
when :on_symbols_beg then
|
|
tk = get_words_tk(tk)
|
|
when :on_qsymbols_beg then
|
|
tk = get_words_tk(tk)
|
|
when :on_op then
|
|
if '&.' == tk[:text]
|
|
tk[:kind] = :on_period
|
|
else
|
|
tk = get_op_tk(tk)
|
|
end
|
|
end
|
|
tk
|
|
end
|
|
|
|
private def get_symbol_tk(tk)
|
|
is_symbol = true
|
|
symbol_tk = { :line_no => tk[:line_no], :char_no => tk[:char_no], :kind => :on_symbol }
|
|
if ":'" == tk[:text] or ':"' == tk[:text]
|
|
tk1 = get_string_tk(tk)
|
|
symbol_tk[:text] = tk1[:text]
|
|
symbol_tk[:state] = tk1[:state]
|
|
else
|
|
case (tk1 = get_squashed_tk)[:kind]
|
|
when :on_ident
|
|
symbol_tk[:text] = ":#{tk1[:text]}"
|
|
symbol_tk[:state] = tk1[:state]
|
|
when :on_tstring_content
|
|
symbol_tk[:text] = ":#{tk1[:text]}"
|
|
symbol_tk[:state] = get_squashed_tk[:state] # skip :on_tstring_end
|
|
when :on_tstring_end
|
|
symbol_tk[:text] = ":#{tk1[:text]}"
|
|
symbol_tk[:state] = tk1[:state]
|
|
when :on_op
|
|
symbol_tk[:text] = ":#{tk1[:text]}"
|
|
symbol_tk[:state] = tk1[:state]
|
|
when :on_ivar
|
|
symbol_tk[:text] = ":#{tk1[:text]}"
|
|
symbol_tk[:state] = tk1[:state]
|
|
when :on_cvar
|
|
symbol_tk[:text] = ":#{tk1[:text]}"
|
|
symbol_tk[:state] = tk1[:state]
|
|
when :on_gvar
|
|
symbol_tk[:text] = ":#{tk1[:text]}"
|
|
symbol_tk[:state] = tk1[:state]
|
|
when :on_const
|
|
symbol_tk[:text] = ":#{tk1[:text]}"
|
|
symbol_tk[:state] = tk1[:state]
|
|
when :on_kw
|
|
symbol_tk[:text] = ":#{tk1[:text]}"
|
|
symbol_tk[:state] = tk1[:state]
|
|
else
|
|
is_symbol = false
|
|
tk = tk1
|
|
end
|
|
end
|
|
if is_symbol
|
|
tk = symbol_tk
|
|
end
|
|
tk
|
|
end
|
|
|
|
private def get_string_tk(tk)
|
|
string = tk[:text]
|
|
state = nil
|
|
kind = :on_tstring
|
|
loop do
|
|
inner_str_tk = get_squashed_tk
|
|
if inner_str_tk.nil?
|
|
break
|
|
elsif :on_tstring_end == inner_str_tk[:kind]
|
|
string = string + inner_str_tk[:text]
|
|
state = inner_str_tk[:state]
|
|
break
|
|
elsif :on_label_end == inner_str_tk[:kind]
|
|
string = string + inner_str_tk[:text]
|
|
state = inner_str_tk[:state]
|
|
kind = :on_symbol
|
|
break
|
|
else
|
|
string = string + inner_str_tk[:text]
|
|
if :on_embexpr_beg == inner_str_tk[:kind] then
|
|
kind = :on_dstring if :on_tstring == kind
|
|
end
|
|
end
|
|
end
|
|
{
|
|
:line_no => tk[:line_no],
|
|
:char_no => tk[:char_no],
|
|
:kind => kind,
|
|
:text => string,
|
|
:state => state
|
|
}
|
|
end
|
|
|
|
private def get_regexp_tk(tk)
|
|
string = tk[:text]
|
|
state = nil
|
|
loop do
|
|
inner_str_tk = get_squashed_tk
|
|
if inner_str_tk.nil?
|
|
break
|
|
elsif :on_regexp_end == inner_str_tk[:kind]
|
|
string = string + inner_str_tk[:text]
|
|
state = inner_str_tk[:state]
|
|
break
|
|
else
|
|
string = string + inner_str_tk[:text]
|
|
end
|
|
end
|
|
{
|
|
:line_no => tk[:line_no],
|
|
:char_no => tk[:char_no],
|
|
:kind => :on_regexp,
|
|
:text => string,
|
|
:state => state
|
|
}
|
|
end
|
|
|
|
private def get_embdoc_tk(tk)
|
|
string = tk[:text]
|
|
until :on_embdoc_end == (embdoc_tk = get_squashed_tk)[:kind] do
|
|
string = string + embdoc_tk[:text]
|
|
end
|
|
string = string + embdoc_tk[:text]
|
|
{
|
|
:line_no => tk[:line_no],
|
|
:char_no => tk[:char_no],
|
|
:kind => :on_embdoc,
|
|
:text => string,
|
|
:state => embdoc_tk[:state]
|
|
}
|
|
end
|
|
|
|
private def get_heredoc_tk(heredoc_name, indent)
|
|
string = ''
|
|
start_tk = nil
|
|
prev_tk = nil
|
|
until heredoc_end?(heredoc_name, indent, tk = @inner_lex_enumerator.next) do
|
|
start_tk = tk unless start_tk
|
|
if (prev_tk.nil? or "\n" == prev_tk[:text][-1]) and 0 != tk[:char_no]
|
|
string = string + (' ' * tk[:char_no])
|
|
end
|
|
string = string + tk[:text]
|
|
prev_tk = tk
|
|
end
|
|
start_tk = tk unless start_tk
|
|
prev_tk = tk unless prev_tk
|
|
@buf.unshift tk # closing heredoc
|
|
heredoc_tk = {
|
|
:line_no => start_tk[:line_no],
|
|
:char_no => start_tk[:char_no],
|
|
:kind => :on_heredoc,
|
|
:text => string,
|
|
:state => prev_tk[:state]
|
|
}
|
|
@buf.unshift heredoc_tk
|
|
end
|
|
|
|
private def retrieve_heredoc_info(tk)
|
|
name = tk[:text].gsub(/\A<<[-~]?(['"`]?)(.+)\1\z/, '\2')
|
|
indent = tk[:text] =~ /\A<<[-~]/
|
|
[name, indent]
|
|
end
|
|
|
|
private def heredoc_end?(name, indent, tk)
|
|
result = false
|
|
if :on_heredoc_end == tk[:kind] then
|
|
tk_name = (indent ? tk[:text].gsub(/^ *(.+)\n?$/, '\1') : tk[:text].gsub(/\n\z/, ''))
|
|
if name == tk_name
|
|
result = true
|
|
end
|
|
end
|
|
result
|
|
end
|
|
|
|
private def get_words_tk(tk)
|
|
string = ''
|
|
start_token = tk[:text]
|
|
start_quote = tk[:text].rstrip[-1]
|
|
line_no = tk[:line_no]
|
|
char_no = tk[:char_no]
|
|
state = tk[:state]
|
|
end_quote =
|
|
case start_quote
|
|
when ?( then ?)
|
|
when ?[ then ?]
|
|
when ?{ then ?}
|
|
when ?< then ?>
|
|
else start_quote
|
|
end
|
|
end_token = nil
|
|
loop do
|
|
tk = get_squashed_tk
|
|
if tk.nil?
|
|
end_token = end_quote
|
|
break
|
|
elsif :on_tstring_content == tk[:kind] then
|
|
string += tk[:text]
|
|
elsif :on_words_sep == tk[:kind] or :on_tstring_end == tk[:kind] then
|
|
if end_quote == tk[:text].strip then
|
|
end_token = tk[:text]
|
|
break
|
|
else
|
|
string += tk[:text]
|
|
end
|
|
else
|
|
string += tk[:text]
|
|
end
|
|
end
|
|
text = "#{start_token}#{string}#{end_token}"
|
|
{
|
|
:line_no => line_no,
|
|
:char_no => char_no,
|
|
:kind => :on_dstring,
|
|
:text => text,
|
|
:state => state
|
|
}
|
|
end
|
|
|
|
private def get_op_tk(tk)
|
|
redefinable_operators = %w[! != !~ % & * ** + +@ - -@ / < << <= <=> == === =~ > >= >> [] []= ^ ` | ~]
|
|
if redefinable_operators.include?(tk[:text]) and EXPR_ARG == tk[:state] then
|
|
@inner_lex.lex_state = EXPR_ARG
|
|
tk[:kind] = :on_ident
|
|
tk[:state] = @inner_lex.lex_state
|
|
elsif tk[:text] =~ /^[-+]$/ then
|
|
tk_ahead = get_squashed_tk
|
|
case tk_ahead[:kind]
|
|
when :on_int, :on_float, :on_rational, :on_imaginary then
|
|
tk[:text] += tk_ahead[:text]
|
|
tk[:kind] = tk_ahead[:kind]
|
|
tk[:state] = tk_ahead[:state]
|
|
else
|
|
@buf.unshift tk_ahead
|
|
end
|
|
end
|
|
tk
|
|
end
|
|
|
|
def initialize(code)
|
|
@buf = []
|
|
@heredoc_queue = []
|
|
@inner_lex = InnerStateLex.new(code)
|
|
@inner_lex_enumerator = Enumerator.new do |y|
|
|
@inner_lex.each do |tk|
|
|
y << tk
|
|
end
|
|
end
|
|
end
|
|
|
|
def self.parse(code)
|
|
lex = self.new(code)
|
|
tokens = []
|
|
begin
|
|
while tk = lex.get_squashed_tk
|
|
tokens.push tk
|
|
end
|
|
rescue StopIteration
|
|
end
|
|
tokens
|
|
end
|
|
|
|
def self.end?(token)
|
|
(token[:state] & EXPR_END)
|
|
end
|
|
end
|