1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00
ruby--ruby/lib/irb/ruby-lex.rb

802 lines
23 KiB
Ruby

# frozen_string_literal: false
#
# irb/ruby-lex.rb - ruby lexcal analyzer
# $Release Version: 0.9.6$
# $Revision$
# by Keiju ISHITSUKA(keiju@ruby-lang.org)
#
# --
#
#
#
require "ripper"
require "jruby" if RUBY_ENGINE == "jruby"
# :stopdoc:
class RubyLex
class TerminateLineInput < StandardError
def initialize
super("Terminate Line Input")
end
end
def initialize
@exp_line_no = @line_no = 1
@indent = 0
@continue = false
@line = ""
@prompt = nil
end
def self.compile_with_errors_suppressed(code)
line_no = 1
begin
result = yield code, line_no
rescue ArgumentError
code = ";\n#{code}"
line_no = 0
result = yield code, line_no
end
result
end
# io functions
def set_input(io, p = nil, &block)
@io = io
if @io.respond_to?(:check_termination)
@io.check_termination do |code|
if Reline::IOGate.in_pasting?
lex = RubyLex.new
rest = lex.check_termination_in_prev_line(code)
if rest
Reline.delete_text
rest.bytes.reverse_each do |c|
Reline.ungetc(c)
end
true
else
false
end
else
code.gsub!(/\s*\z/, '').concat("\n")
ltype, indent, continue, code_block_open = check_state(code)
if ltype or indent > 0 or continue or code_block_open
false
else
true
end
end
end
end
if @io.respond_to?(:dynamic_prompt)
@io.dynamic_prompt do |lines|
lines << '' if lines.empty?
result = []
tokens = self.class.ripper_lex_without_warning(lines.map{ |l| l + "\n" }.join)
code = String.new
partial_tokens = []
unprocessed_tokens = []
line_num_offset = 0
tokens.each do |t|
partial_tokens << t
unprocessed_tokens << t
if t[2].include?("\n")
t_str = t[2]
t_str.each_line("\n") do |s|
code << s << "\n"
ltype, indent, continue, code_block_open = check_state(code, partial_tokens)
result << @prompt.call(ltype, indent, continue || code_block_open, @line_no + line_num_offset)
line_num_offset += 1
end
unprocessed_tokens = []
else
code << t[2]
end
end
unless unprocessed_tokens.empty?
ltype, indent, continue, code_block_open = check_state(code, unprocessed_tokens)
result << @prompt.call(ltype, indent, continue || code_block_open, @line_no + line_num_offset)
end
result
end
end
if p.respond_to?(:call)
@input = p
elsif block_given?
@input = block
else
@input = Proc.new{@io.gets}
end
end
def set_prompt(p = nil, &block)
p = block if block_given?
if p.respond_to?(:call)
@prompt = p
else
@prompt = Proc.new{print p}
end
end
ERROR_TOKENS = [
:on_parse_error,
:compile_error,
:on_assign_error,
:on_alias_error,
:on_class_name_error,
:on_param_error
]
def self.ripper_lex_without_warning(code)
verbose, $VERBOSE = $VERBOSE, nil
tokens = nil
compile_with_errors_suppressed(code) do |inner_code, line_no|
lexer = Ripper::Lexer.new(inner_code, '-', line_no)
if lexer.respond_to?(:scan) # Ruby 2.7+
tokens = []
pos_to_index = {}
lexer.scan.each do |t|
if pos_to_index.has_key?(t[0])
index = pos_to_index[t[0]]
found_tk = tokens[index]
if ERROR_TOKENS.include?(found_tk[1]) && !ERROR_TOKENS.include?(t[1])
tokens[index] = t
end
else
pos_to_index[t[0]] = tokens.size
tokens << t
end
end
else
tokens = lexer.parse
end
end
tokens
ensure
$VERBOSE = verbose
end
def find_prev_spaces(line_index)
return 0 if @tokens.size == 0
md = @tokens[0][2].match(/(\A +)/)
prev_spaces = md.nil? ? 0 : md[1].count(' ')
line_count = 0
@tokens.each_with_index do |t, i|
if t[2].include?("\n")
line_count += t[2].count("\n")
if line_count >= line_index
return prev_spaces
end
if (@tokens.size - 1) > i
md = @tokens[i + 1][2].match(/(\A +)/)
prev_spaces = md.nil? ? 0 : md[1].count(' ')
end
end
end
prev_spaces
end
def set_auto_indent(context)
if @io.respond_to?(:auto_indent) and context.auto_indent_mode
@io.auto_indent do |lines, line_index, byte_pointer, is_newline|
if is_newline
@tokens = self.class.ripper_lex_without_warning(lines[0..line_index].join("\n"))
prev_spaces = find_prev_spaces(line_index)
depth_difference = check_newline_depth_difference
depth_difference = 0 if depth_difference < 0
prev_spaces + depth_difference * 2
else
code = line_index.zero? ? '' : lines[0..(line_index - 1)].map{ |l| l + "\n" }.join
last_line = lines[line_index]&.byteslice(0, byte_pointer)
code += last_line if last_line
@tokens = self.class.ripper_lex_without_warning(code)
corresponding_token_depth = check_corresponding_token_depth
if corresponding_token_depth
corresponding_token_depth
else
nil
end
end
end
end
end
def check_state(code, tokens = nil)
tokens = self.class.ripper_lex_without_warning(code) unless tokens
ltype = process_literal_type(tokens)
indent = process_nesting_level(tokens)
continue = process_continue(tokens)
code_block_open = check_code_block(code, tokens)
[ltype, indent, continue, code_block_open]
end
def prompt
if @prompt
@prompt.call(@ltype, @indent, @continue, @line_no)
end
end
def initialize_input
@ltype = nil
@indent = 0
@continue = false
@line = ""
@exp_line_no = @line_no
@code_block_open = false
end
def each_top_level_statement
initialize_input
catch(:TERM_INPUT) do
loop do
begin
prompt
unless l = lex
throw :TERM_INPUT if @line == ''
else
@line_no += l.count("\n")
if l == "\n"
@exp_line_no += 1
next
end
@line.concat l
if @code_block_open or @ltype or @continue or @indent > 0
next
end
end
if @line != "\n"
@line.force_encoding(@io.encoding)
yield @line, @exp_line_no
end
raise TerminateLineInput if @io.eof?
@line = ''
@exp_line_no = @line_no
@indent = 0
rescue TerminateLineInput
initialize_input
prompt
end
end
end
end
def lex
line = @input.call
if @io.respond_to?(:check_termination)
return line # multiline
end
code = @line + (line.nil? ? '' : line)
code.gsub!(/\s*\z/, '').concat("\n")
@tokens = self.class.ripper_lex_without_warning(code)
@continue = process_continue
@code_block_open = check_code_block(code)
@indent = process_nesting_level
@ltype = process_literal_type
line
end
def process_continue(tokens = @tokens)
# last token is always newline
if tokens.size >= 2 and tokens[-2][1] == :on_regexp_end
# end of regexp literal
return false
elsif tokens.size >= 2 and tokens[-2][1] == :on_semicolon
return false
elsif tokens.size >= 2 and tokens[-2][1] == :on_kw and ['begin', 'else', 'ensure'].include?(tokens[-2][2])
return false
elsif !tokens.empty? and tokens.last[2] == "\\\n"
return true
elsif tokens.size >= 1 and tokens[-1][1] == :on_heredoc_end # "EOH\n"
return false
elsif tokens.size >= 2 and defined?(Ripper::EXPR_BEG) and tokens[-2][3].anybits?(Ripper::EXPR_BEG | Ripper::EXPR_FNAME) and tokens[-2][2] !~ /\A\.\.\.?\z/
# end of literal except for regexp
# endless range at end of line is not a continue
return true
end
false
end
def check_code_block(code, tokens = @tokens)
return true if tokens.empty?
if tokens.last[1] == :on_heredoc_beg
return true
end
begin # check if parser error are available
verbose, $VERBOSE = $VERBOSE, nil
case RUBY_ENGINE
when 'ruby'
self.class.compile_with_errors_suppressed(code) do |inner_code, line_no|
RubyVM::InstructionSequence.compile(inner_code, nil, nil, line_no)
end
when 'jruby'
JRuby.compile_ir(code)
else
catch(:valid) do
eval("BEGIN { throw :valid, true }\n#{code}")
false
end
end
rescue EncodingError
# This is for a hash with invalid encoding symbol, {"\xAE": 1}
rescue SyntaxError => e
case e.message
when /unterminated (?:string|regexp) meets end of file/
# "unterminated regexp meets end of file"
#
# example:
# /
#
# "unterminated string meets end of file"
#
# example:
# '
return true
when /syntax error, unexpected end-of-input/
# "syntax error, unexpected end-of-input, expecting keyword_end"
#
# example:
# if ture
# hoge
# if false
# fuga
# end
return true
when /syntax error, unexpected keyword_end/
# "syntax error, unexpected keyword_end"
#
# example:
# if (
# end
#
# example:
# end
return false
when /syntax error, unexpected '\.'/
# "syntax error, unexpected '.'"
#
# example:
# .
return false
when /unexpected tREGEXP_BEG/
# "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('"
#
# example:
# method / f /
return false
end
ensure
$VERBOSE = verbose
end
if defined?(Ripper::EXPR_BEG)
last_lex_state = tokens.last[3]
if last_lex_state.allbits?(Ripper::EXPR_BEG)
return false
elsif last_lex_state.allbits?(Ripper::EXPR_DOT)
return true
elsif last_lex_state.allbits?(Ripper::EXPR_CLASS)
return true
elsif last_lex_state.allbits?(Ripper::EXPR_FNAME)
return true
elsif last_lex_state.allbits?(Ripper::EXPR_VALUE)
return true
elsif last_lex_state.allbits?(Ripper::EXPR_ARG)
return false
end
end
false
end
def process_nesting_level(tokens = @tokens)
indent = 0
in_oneliner_def = nil
tokens.each_with_index { |t, index|
# detecting one-liner method definition
if in_oneliner_def.nil?
if t[3].allbits?(Ripper::EXPR_ENDFN)
in_oneliner_def = :ENDFN
end
else
if t[3].allbits?(Ripper::EXPR_ENDFN)
# continuing
elsif t[3].allbits?(Ripper::EXPR_BEG)
if t[2] == '='
in_oneliner_def = :BODY
end
else
if in_oneliner_def == :BODY
# one-liner method definition
indent -= 1
end
in_oneliner_def = nil
end
end
case t[1]
when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg
indent += 1
when :on_rbracket, :on_rbrace, :on_rparen
indent -= 1
when :on_kw
next if index > 0 and tokens[index - 1][3].allbits?(Ripper::EXPR_FNAME)
case t[2]
when 'do'
syntax_of_do = take_corresponding_syntax_to_kw_do(tokens, index)
indent += 1 if syntax_of_do == :method_calling
when 'def', 'case', 'for', 'begin', 'class', 'module'
indent += 1
when 'if', 'unless', 'while', 'until'
# postfix if/unless/while/until must be Ripper::EXPR_LABEL
indent += 1 unless t[3].allbits?(Ripper::EXPR_LABEL)
when 'end'
indent -= 1
end
end
# percent literals are not indented
}
indent
end
def is_method_calling?(tokens, index)
tk = tokens[index]
if tk[3].anybits?(Ripper::EXPR_CMDARG) and tk[1] == :on_ident
# The target method call to pass the block with "do".
return true
elsif tk[3].anybits?(Ripper::EXPR_ARG) and tk[1] == :on_ident
non_sp_index = tokens[0..(index - 1)].rindex{ |t| t[1] != :on_sp }
if non_sp_index
prev_tk = tokens[non_sp_index]
if prev_tk[3].anybits?(Ripper::EXPR_DOT) and prev_tk[1] == :on_period
# The target method call with receiver to pass the block with "do".
return true
end
end
end
false
end
def take_corresponding_syntax_to_kw_do(tokens, index)
syntax_of_do = nil
# Finding a syntax correnponding to "do".
index.downto(0) do |i|
tk = tokens[i]
# In "continue", the token isn't the corresponding syntax to "do".
non_sp_index = tokens[0..(i - 1)].rindex{ |t| t[1] != :on_sp }
first_in_fomula = false
if non_sp_index.nil?
first_in_fomula = true
elsif [:on_ignored_nl, :on_nl, :on_comment].include?(tokens[non_sp_index][1])
first_in_fomula = true
end
if is_method_calling?(tokens, i)
syntax_of_do = :method_calling
break if first_in_fomula
elsif tk[1] == :on_kw && %w{while until for}.include?(tk[2])
# A loop syntax in front of "do" found.
#
# while cond do # also "until" or "for"
# end
#
# This "do" doesn't increment indent because the loop syntax already
# incremented.
syntax_of_do = :loop_syntax
break if first_in_fomula
end
end
syntax_of_do
end
def is_the_in_correspond_to_a_for(tokens, index)
syntax_of_in = nil
# Finding a syntax correnponding to "do".
index.downto(0) do |i|
tk = tokens[i]
# In "continue", the token isn't the corresponding syntax to "do".
non_sp_index = tokens[0..(i - 1)].rindex{ |t| t[1] != :on_sp }
first_in_fomula = false
if non_sp_index.nil?
first_in_fomula = true
elsif [:on_ignored_nl, :on_nl, :on_comment].include?(tokens[non_sp_index][1])
first_in_fomula = true
end
if tk[1] == :on_kw && tk[2] == 'for'
# A loop syntax in front of "do" found.
#
# while cond do # also "until" or "for"
# end
#
# This "do" doesn't increment indent because the loop syntax already
# incremented.
syntax_of_in = :for
end
break if first_in_fomula
end
syntax_of_in
end
def check_newline_depth_difference
depth_difference = 0
open_brace_on_line = 0
in_oneliner_def = nil
@tokens.each_with_index do |t, index|
# detecting one-liner method definition
if in_oneliner_def.nil?
if t[3].allbits?(Ripper::EXPR_ENDFN)
in_oneliner_def = :ENDFN
end
else
if t[3].allbits?(Ripper::EXPR_ENDFN)
# continuing
elsif t[3].allbits?(Ripper::EXPR_BEG)
if t[2] == '='
in_oneliner_def = :BODY
end
else
if in_oneliner_def == :BODY
# one-liner method definition
depth_difference -= 1
end
in_oneliner_def = nil
end
end
case t[1]
when :on_ignored_nl, :on_nl, :on_comment
if index != (@tokens.size - 1) and in_oneliner_def != :BODY
depth_difference = 0
open_brace_on_line = 0
end
next
when :on_sp
next
end
case t[1]
when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg
depth_difference += 1
open_brace_on_line += 1
when :on_rbracket, :on_rbrace, :on_rparen
depth_difference -= 1 if open_brace_on_line > 0
when :on_kw
next if index > 0 and @tokens[index - 1][3].allbits?(Ripper::EXPR_FNAME)
case t[2]
when 'do'
syntax_of_do = take_corresponding_syntax_to_kw_do(@tokens, index)
depth_difference += 1 if syntax_of_do == :method_calling
when 'def', 'case', 'for', 'begin', 'class', 'module'
depth_difference += 1
when 'if', 'unless', 'while', 'until', 'rescue'
# postfix if/unless/while/until/rescue must be Ripper::EXPR_LABEL
unless t[3].allbits?(Ripper::EXPR_LABEL)
depth_difference += 1
end
when 'else', 'elsif', 'ensure', 'when'
depth_difference += 1
when 'in'
unless is_the_in_correspond_to_a_for(@tokens, index)
depth_difference += 1
end
when 'end'
depth_difference -= 1
end
end
end
depth_difference
end
def check_corresponding_token_depth
corresponding_token_depth = nil
is_first_spaces_of_line = true
is_first_printable_of_line = true
spaces_of_nest = []
spaces_at_line_head = 0
open_brace_on_line = 0
in_oneliner_def = nil
@tokens.each_with_index do |t, index|
# detecting one-liner method definition
if in_oneliner_def.nil?
if t[3].allbits?(Ripper::EXPR_ENDFN)
in_oneliner_def = :ENDFN
end
else
if t[3].allbits?(Ripper::EXPR_ENDFN)
# continuing
elsif t[3].allbits?(Ripper::EXPR_BEG)
if t[2] == '='
in_oneliner_def = :BODY
end
else
if in_oneliner_def == :BODY
# one-liner method definition
if is_first_printable_of_line
corresponding_token_depth = spaces_of_nest.pop
else
spaces_of_nest.pop
corresponding_token_depth = nil
end
end
in_oneliner_def = nil
end
end
case t[1]
when :on_ignored_nl, :on_nl, :on_comment
if in_oneliner_def != :BODY
corresponding_token_depth = nil
spaces_at_line_head = 0
is_first_spaces_of_line = true
is_first_printable_of_line = true
open_brace_on_line = 0
end
next
when :on_sp
spaces_at_line_head = t[2].count(' ') if is_first_spaces_of_line
is_first_spaces_of_line = false
next
end
case t[1]
when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg
spaces_of_nest.push(spaces_at_line_head + open_brace_on_line * 2)
open_brace_on_line += 1
when :on_rbracket, :on_rbrace, :on_rparen
if is_first_printable_of_line
corresponding_token_depth = spaces_of_nest.pop
else
spaces_of_nest.pop
corresponding_token_depth = nil
end
open_brace_on_line -= 1
when :on_kw
next if index > 0 and @tokens[index - 1][3].allbits?(Ripper::EXPR_FNAME)
case t[2]
when 'do'
syntax_of_do = take_corresponding_syntax_to_kw_do(@tokens, index)
if syntax_of_do == :method_calling
spaces_of_nest.push(spaces_at_line_head)
end
when 'def', 'case', 'for', 'begin', 'class', 'module'
spaces_of_nest.push(spaces_at_line_head)
when 'rescue'
unless t[3].allbits?(Ripper::EXPR_LABEL)
corresponding_token_depth = spaces_of_nest.last
end
when 'if', 'unless', 'while', 'until'
# postfix if/unless/while/until must be Ripper::EXPR_LABEL
unless t[3].allbits?(Ripper::EXPR_LABEL)
spaces_of_nest.push(spaces_at_line_head)
end
when 'else', 'elsif', 'ensure', 'when', 'in'
corresponding_token_depth = spaces_of_nest.last
when 'end'
if is_first_printable_of_line
corresponding_token_depth = spaces_of_nest.pop
else
spaces_of_nest.pop
corresponding_token_depth = nil
end
end
end
is_first_spaces_of_line = false
is_first_printable_of_line = false
end
corresponding_token_depth
end
def check_string_literal(tokens)
i = 0
start_token = []
end_type = []
while i < tokens.size
t = tokens[i]
case t[1]
when :on_tstring_beg
start_token << t
end_type << [:on_tstring_end, :on_label_end]
when :on_regexp_beg
start_token << t
end_type << :on_regexp_end
when :on_symbeg
acceptable_single_tokens = %i{on_ident on_const on_op on_cvar on_ivar on_gvar on_kw}
if (i + 1) < tokens.size and acceptable_single_tokens.all?{ |st| tokens[i + 1][1] != st }
start_token << t
end_type << :on_tstring_end
end
when :on_backtick
start_token << t
end_type << :on_tstring_end
when :on_qwords_beg, :on_words_beg, :on_qsymbols_beg, :on_symbols_beg
start_token << t
end_type << :on_tstring_end
when :on_heredoc_beg
start_token << t
end_type << :on_heredoc_end
when *end_type.last
start_token.pop
end_type.pop
end
i += 1
end
start_token.last.nil? ? '' : start_token.last
end
def process_literal_type(tokens = @tokens)
start_token = check_string_literal(tokens)
case start_token[1]
when :on_tstring_beg
case start_token[2]
when ?" then ?"
when /^%.$/ then ?"
when /^%Q.$/ then ?"
when ?' then ?'
when /^%q.$/ then ?'
end
when :on_regexp_beg then ?/
when :on_symbeg then ?:
when :on_backtick then ?`
when :on_qwords_beg then ?]
when :on_words_beg then ?]
when :on_qsymbols_beg then ?]
when :on_symbols_beg then ?]
when :on_heredoc_beg
start_token[2] =~ /<<[-~]?(['"`])[_a-zA-Z0-9]+\1/
case $1
when ?" then ?"
when ?' then ?'
when ?` then ?`
else ?"
end
else
nil
end
end
def check_termination_in_prev_line(code)
tokens = self.class.ripper_lex_without_warning(code)
past_first_newline = false
index = tokens.rindex do |t|
# traverse first token before last line
if past_first_newline
if t.tok.include?("\n")
true
end
elsif t.tok.include?("\n")
past_first_newline = true
false
else
false
end
end
if index
first_token = nil
last_line_tokens = tokens[(index + 1)..(tokens.size - 1)]
last_line_tokens.each do |t|
unless [:on_sp, :on_ignored_sp, :on_comment].include?(t.event)
first_token = t
break
end
end
if first_token.nil?
return false
elsif first_token && first_token.state == Ripper::EXPR_DOT
return false
else
tokens_without_last_line = tokens[0..index]
ltype = process_literal_type(tokens_without_last_line)
indent = process_nesting_level(tokens_without_last_line)
continue = process_continue(tokens_without_last_line)
code_block_open = check_code_block(tokens_without_last_line.map(&:tok).join(''), tokens_without_last_line)
if ltype or indent > 0 or continue or code_block_open
return false
else
return last_line_tokens.map(&:tok).join('')
end
end
end
false
end
end
# :startdoc: