mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
3e92b635fb
When you change this to true, you may need to add more tests. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53141 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
421 lines
9.1 KiB
Ruby
421 lines
9.1 KiB
Ruby
# coding: UTF-8
|
|
# frozen_string_literal: false
|
|
# :markup: markdown
|
|
|
|
##
|
|
#--
|
|
# This set of literals is for Ruby 1.9 regular expressions and gives full
|
|
# unicode support.
|
|
#
|
|
# Unlike peg-markdown, this set of literals recognizes Unicode alphanumeric
|
|
# characters, newlines and spaces.
|
|
class RDoc::Markdown::Literals
|
|
# :stopdoc:
|
|
|
|
# This is distinct from setup_parser so that a standalone parser
|
|
# can redefine #initialize and still have access to the proper
|
|
# parser setup code.
|
|
def initialize(str, debug=false)
|
|
setup_parser(str, debug)
|
|
end
|
|
|
|
|
|
|
|
# Prepares for parsing +str+. If you define a custom initialize you must
|
|
# call this method before #parse
|
|
def setup_parser(str, debug=false)
|
|
set_string str, 0
|
|
@memoizations = Hash.new { |h,k| h[k] = {} }
|
|
@result = nil
|
|
@failed_rule = nil
|
|
@failing_rule_offset = -1
|
|
|
|
setup_foreign_grammar
|
|
end
|
|
|
|
attr_reader :string
|
|
attr_reader :failing_rule_offset
|
|
attr_accessor :result, :pos
|
|
|
|
def current_column(target=pos)
|
|
if c = string.rindex("\n", target-1)
|
|
return target - c - 1
|
|
end
|
|
|
|
target + 1
|
|
end
|
|
|
|
def current_line(target=pos)
|
|
cur_offset = 0
|
|
cur_line = 0
|
|
|
|
string.each_line do |line|
|
|
cur_line += 1
|
|
cur_offset += line.size
|
|
return cur_line if cur_offset >= target
|
|
end
|
|
|
|
-1
|
|
end
|
|
|
|
def lines
|
|
lines = []
|
|
string.each_line { |l| lines << l }
|
|
lines
|
|
end
|
|
|
|
|
|
|
|
def get_text(start)
|
|
@string[start..@pos-1]
|
|
end
|
|
|
|
# Sets the string and current parsing position for the parser.
|
|
def set_string string, pos
|
|
@string = string
|
|
@string_size = string ? string.size : 0
|
|
@pos = pos
|
|
end
|
|
|
|
def show_pos
|
|
width = 10
|
|
if @pos < width
|
|
"#{@pos} (\"#{@string[0,@pos]}\" @ \"#{@string[@pos,width]}\")"
|
|
else
|
|
"#{@pos} (\"... #{@string[@pos - width, width]}\" @ \"#{@string[@pos,width]}\")"
|
|
end
|
|
end
|
|
|
|
def failure_info
|
|
l = current_line @failing_rule_offset
|
|
c = current_column @failing_rule_offset
|
|
|
|
if @failed_rule.kind_of? Symbol
|
|
info = self.class::Rules[@failed_rule]
|
|
"line #{l}, column #{c}: failed rule '#{info.name}' = '#{info.rendered}'"
|
|
else
|
|
"line #{l}, column #{c}: failed rule '#{@failed_rule}'"
|
|
end
|
|
end
|
|
|
|
def failure_caret
|
|
l = current_line @failing_rule_offset
|
|
c = current_column @failing_rule_offset
|
|
|
|
line = lines[l-1]
|
|
"#{line}\n#{' ' * (c - 1)}^"
|
|
end
|
|
|
|
def failure_character
|
|
l = current_line @failing_rule_offset
|
|
c = current_column @failing_rule_offset
|
|
lines[l-1][c-1, 1]
|
|
end
|
|
|
|
def failure_oneline
|
|
l = current_line @failing_rule_offset
|
|
c = current_column @failing_rule_offset
|
|
|
|
char = lines[l-1][c-1, 1]
|
|
|
|
if @failed_rule.kind_of? Symbol
|
|
info = self.class::Rules[@failed_rule]
|
|
"@#{l}:#{c} failed rule '#{info.name}', got '#{char}'"
|
|
else
|
|
"@#{l}:#{c} failed rule '#{@failed_rule}', got '#{char}'"
|
|
end
|
|
end
|
|
|
|
class ParseError < RuntimeError
|
|
end
|
|
|
|
def raise_error
|
|
raise ParseError, failure_oneline
|
|
end
|
|
|
|
def show_error(io=STDOUT)
|
|
error_pos = @failing_rule_offset
|
|
line_no = current_line(error_pos)
|
|
col_no = current_column(error_pos)
|
|
|
|
io.puts "On line #{line_no}, column #{col_no}:"
|
|
|
|
if @failed_rule.kind_of? Symbol
|
|
info = self.class::Rules[@failed_rule]
|
|
io.puts "Failed to match '#{info.rendered}' (rule '#{info.name}')"
|
|
else
|
|
io.puts "Failed to match rule '#{@failed_rule}'"
|
|
end
|
|
|
|
io.puts "Got: #{string[error_pos,1].inspect}"
|
|
line = lines[line_no-1]
|
|
io.puts "=> #{line}"
|
|
io.print(" " * (col_no + 3))
|
|
io.puts "^"
|
|
end
|
|
|
|
def set_failed_rule(name)
|
|
if @pos > @failing_rule_offset
|
|
@failed_rule = name
|
|
@failing_rule_offset = @pos
|
|
end
|
|
end
|
|
|
|
attr_reader :failed_rule
|
|
|
|
def match_string(str)
|
|
len = str.size
|
|
if @string[pos,len] == str
|
|
@pos += len
|
|
return str
|
|
end
|
|
|
|
return nil
|
|
end
|
|
|
|
def scan(reg)
|
|
if m = reg.match(@string[@pos..-1])
|
|
width = m.end(0)
|
|
@pos += width
|
|
return true
|
|
end
|
|
|
|
return nil
|
|
end
|
|
|
|
if "".respond_to? :ord
|
|
def get_byte
|
|
if @pos >= @string_size
|
|
return nil
|
|
end
|
|
|
|
s = @string[@pos].ord
|
|
@pos += 1
|
|
s
|
|
end
|
|
else
|
|
def get_byte
|
|
if @pos >= @string_size
|
|
return nil
|
|
end
|
|
|
|
s = @string[@pos]
|
|
@pos += 1
|
|
s
|
|
end
|
|
end
|
|
|
|
def parse(rule=nil)
|
|
# We invoke the rules indirectly via apply
|
|
# instead of by just calling them as methods because
|
|
# if the rules use left recursion, apply needs to
|
|
# manage that.
|
|
|
|
if !rule
|
|
apply(:_root)
|
|
else
|
|
method = rule.gsub("-","_hyphen_")
|
|
apply :"_#{method}"
|
|
end
|
|
end
|
|
|
|
class MemoEntry
|
|
def initialize(ans, pos)
|
|
@ans = ans
|
|
@pos = pos
|
|
@result = nil
|
|
@set = false
|
|
@left_rec = false
|
|
end
|
|
|
|
attr_reader :ans, :pos, :result, :set
|
|
attr_accessor :left_rec
|
|
|
|
def move!(ans, pos, result)
|
|
@ans = ans
|
|
@pos = pos
|
|
@result = result
|
|
@set = true
|
|
@left_rec = false
|
|
end
|
|
end
|
|
|
|
def external_invoke(other, rule, *args)
|
|
old_pos = @pos
|
|
old_string = @string
|
|
|
|
set_string other.string, other.pos
|
|
|
|
begin
|
|
if val = __send__(rule, *args)
|
|
other.pos = @pos
|
|
other.result = @result
|
|
else
|
|
other.set_failed_rule "#{self.class}##{rule}"
|
|
end
|
|
val
|
|
ensure
|
|
set_string old_string, old_pos
|
|
end
|
|
end
|
|
|
|
def apply_with_args(rule, *args)
|
|
memo_key = [rule, args]
|
|
if m = @memoizations[memo_key][@pos]
|
|
@pos = m.pos
|
|
if !m.set
|
|
m.left_rec = true
|
|
return nil
|
|
end
|
|
|
|
@result = m.result
|
|
|
|
return m.ans
|
|
else
|
|
m = MemoEntry.new(nil, @pos)
|
|
@memoizations[memo_key][@pos] = m
|
|
start_pos = @pos
|
|
|
|
ans = __send__ rule, *args
|
|
|
|
lr = m.left_rec
|
|
|
|
m.move! ans, @pos, @result
|
|
|
|
# Don't bother trying to grow the left recursion
|
|
# if it's failing straight away (thus there is no seed)
|
|
if ans and lr
|
|
return grow_lr(rule, args, start_pos, m)
|
|
else
|
|
return ans
|
|
end
|
|
|
|
return ans
|
|
end
|
|
end
|
|
|
|
def apply(rule)
|
|
if m = @memoizations[rule][@pos]
|
|
@pos = m.pos
|
|
if !m.set
|
|
m.left_rec = true
|
|
return nil
|
|
end
|
|
|
|
@result = m.result
|
|
|
|
return m.ans
|
|
else
|
|
m = MemoEntry.new(nil, @pos)
|
|
@memoizations[rule][@pos] = m
|
|
start_pos = @pos
|
|
|
|
ans = __send__ rule
|
|
|
|
lr = m.left_rec
|
|
|
|
m.move! ans, @pos, @result
|
|
|
|
# Don't bother trying to grow the left recursion
|
|
# if it's failing straight away (thus there is no seed)
|
|
if ans and lr
|
|
return grow_lr(rule, nil, start_pos, m)
|
|
else
|
|
return ans
|
|
end
|
|
|
|
return ans
|
|
end
|
|
end
|
|
|
|
def grow_lr(rule, args, start_pos, m)
|
|
while true
|
|
@pos = start_pos
|
|
@result = m.result
|
|
|
|
if args
|
|
ans = __send__ rule, *args
|
|
else
|
|
ans = __send__ rule
|
|
end
|
|
return nil unless ans
|
|
|
|
break if @pos <= m.pos
|
|
|
|
m.move! ans, @pos, @result
|
|
end
|
|
|
|
@result = m.result
|
|
@pos = m.pos
|
|
return m.ans
|
|
end
|
|
|
|
class RuleInfo
|
|
def initialize(name, rendered)
|
|
@name = name
|
|
@rendered = rendered
|
|
end
|
|
|
|
attr_reader :name, :rendered
|
|
end
|
|
|
|
def self.rule_info(name, rendered)
|
|
RuleInfo.new(name, rendered)
|
|
end
|
|
|
|
|
|
# :startdoc:
|
|
# :stopdoc:
|
|
def setup_foreign_grammar; end
|
|
|
|
# Alphanumeric = /\p{Word}/
|
|
def _Alphanumeric
|
|
_tmp = scan(/\A(?-mix:\p{Word})/)
|
|
set_failed_rule :_Alphanumeric unless _tmp
|
|
return _tmp
|
|
end
|
|
|
|
# AlphanumericAscii = /[A-Za-z0-9]/
|
|
def _AlphanumericAscii
|
|
_tmp = scan(/\A(?-mix:[A-Za-z0-9])/)
|
|
set_failed_rule :_AlphanumericAscii unless _tmp
|
|
return _tmp
|
|
end
|
|
|
|
# BOM = "uFEFF"
|
|
def _BOM
|
|
_tmp = match_string("uFEFF")
|
|
set_failed_rule :_BOM unless _tmp
|
|
return _tmp
|
|
end
|
|
|
|
# Newline = /\n|\r\n?|\p{Zl}|\p{Zp}/
|
|
def _Newline
|
|
_tmp = scan(/\A(?-mix:\n|\r\n?|\p{Zl}|\p{Zp})/)
|
|
set_failed_rule :_Newline unless _tmp
|
|
return _tmp
|
|
end
|
|
|
|
# NonAlphanumeric = /\p{^Word}/
|
|
def _NonAlphanumeric
|
|
_tmp = scan(/\A(?-mix:\p{^Word})/)
|
|
set_failed_rule :_NonAlphanumeric unless _tmp
|
|
return _tmp
|
|
end
|
|
|
|
# Spacechar = /\t|\p{Zs}/
|
|
def _Spacechar
|
|
_tmp = scan(/\A(?-mix:\t|\p{Zs})/)
|
|
set_failed_rule :_Spacechar unless _tmp
|
|
return _tmp
|
|
end
|
|
|
|
Rules = {}
|
|
Rules[:_Alphanumeric] = rule_info("Alphanumeric", "/\\p{Word}/")
|
|
Rules[:_AlphanumericAscii] = rule_info("AlphanumericAscii", "/[A-Za-z0-9]/")
|
|
Rules[:_BOM] = rule_info("BOM", "\"uFEFF\"")
|
|
Rules[:_Newline] = rule_info("Newline", "/\\n|\\r\\n?|\\p{Zl}|\\p{Zp}/")
|
|
Rules[:_NonAlphanumeric] = rule_info("NonAlphanumeric", "/\\p{^Word}/")
|
|
Rules[:_Spacechar] = rule_info("Spacechar", "/\\t|\\p{Zs}/")
|
|
# :startdoc:
|
|
end
|