diff --git a/ext/racc/cparse/cparse.c b/ext/racc/cparse.c similarity index 99% rename from ext/racc/cparse/cparse.c rename to ext/racc/cparse.c index b4429ed5f3..cc0e8659be 100644 --- a/ext/racc/cparse/cparse.c +++ b/ext/racc/cparse.c @@ -11,7 +11,7 @@ */ -#include "ruby/ruby.h" +#include #ifndef FALSE #define FALSE 0 @@ -24,7 +24,7 @@ Important Constants ----------------------------------------------------------------------- */ -#define RACC_VERSION "1.4.5" +#define RACC_VERSION "1.4.15" #define DEFAULT_TOKEN -1 #define ERROR_TOKEN 1 @@ -72,6 +72,10 @@ static ID id_d_e_pop; # define LONG2NUM(i) INT2NUM(i) #endif +#ifndef HAVE_RB_ARY_SUBSEQ +# define rb_ary_subseq(ary, beg, len) rb_ary_new4(len, RARRAY_PTR(ary) + beg) +#endif + static ID value_to_id _((VALUE v)); static inline long num_to_long _((VALUE n)); diff --git a/ext/racc/cparse/extconf.rb b/ext/racc/cparse/extconf.rb deleted file mode 100644 index dfddf57111..0000000000 --- a/ext/racc/cparse/extconf.rb +++ /dev/null @@ -1,6 +0,0 @@ -# frozen_string_literal: false -# $Id$ - -require 'mkmf' -have_func('rb_block_call', 'ruby/ruby.h') -create_makefile 'racc/cparse' diff --git a/ext/racc/cparse/depend b/ext/racc/depend similarity index 100% rename from ext/racc/cparse/depend rename to ext/racc/depend diff --git a/ext/racc/extconf.rb b/ext/racc/extconf.rb new file mode 100644 index 0000000000..d36b03b388 --- /dev/null +++ b/ext/racc/extconf.rb @@ -0,0 +1,7 @@ +# $Id: 1e30abedf4eea155815d1efa5500ec817b10a2ab $ + +require 'mkmf' + +have_func('rb_ary_subseq') + +create_makefile 'racc/cparse' diff --git a/lib/racc.rb b/lib/racc.rb new file mode 100644 index 0000000000..f6e4ac03a8 --- /dev/null +++ b/lib/racc.rb @@ -0,0 +1,6 @@ +require 'racc/compat' +require 'racc/debugflags' +require 'racc/grammar' +require 'racc/state' +require 'racc/exception' +require 'racc/info' diff --git a/lib/racc/compat.rb b/lib/racc/compat.rb new file mode 100644 index 0000000000..ccb033e2e0 --- /dev/null +++ b/lib/racc/compat.rb @@ -0,0 +1,32 @@ +# +# $Id: 14fa1118eb3a23e85265e4f7afe2d5a297d69f9c $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. +# For details of the GNU LGPL, see the file "COPYING". +# + +unless Object.method_defined?(:__send) + class Object + alias __send __send__ + end +end + +unless Object.method_defined?(:__send!) + class Object + alias __send! __send__ + end +end + +unless Array.method_defined?(:map!) + class Array + if Array.method_defined?(:collect!) + alias map! collect! + else + alias map! filter + end + end +end diff --git a/lib/racc/debugflags.rb b/lib/racc/debugflags.rb new file mode 100644 index 0000000000..1b5d2fe54c --- /dev/null +++ b/lib/racc/debugflags.rb @@ -0,0 +1,59 @@ +# +# $Id: 74ff4369ce53c7f45cfc2644ce907785104ebf6e $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. +# For details of LGPL, see the file "COPYING". +# + +module Racc + + class DebugFlags + def DebugFlags.parse_option_string(s) + parse = rule = token = state = la = prec = conf = false + s.split(//).each do |ch| + case ch + when 'p' then parse = true + when 'r' then rule = true + when 't' then token = true + when 's' then state = true + when 'l' then la = true + when 'c' then prec = true + when 'o' then conf = true + else + raise "unknown debug flag char: #{ch.inspect}" + end + end + new(parse, rule, token, state, la, prec, conf) + end + + def initialize(parse = false, rule = false, token = false, state = false, + la = false, prec = false, conf = false) + @parse = parse + @rule = rule + @token = token + @state = state + @la = la + @prec = prec + @any = (parse || rule || token || state || la || prec) + @status_logging = conf + end + + attr_reader :parse + attr_reader :rule + attr_reader :token + attr_reader :state + attr_reader :la + attr_reader :prec + + def any? + @any + end + + attr_reader :status_logging + end + +end diff --git a/lib/racc/exception.rb b/lib/racc/exception.rb new file mode 100644 index 0000000000..bd46fcb323 --- /dev/null +++ b/lib/racc/exception.rb @@ -0,0 +1,13 @@ +# +# $Id: d5858363d1d4a4b5a2ca8d193b5153a49312188e $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the same terms of ruby. +# see the file "COPYING". + +module Racc + class Error < StandardError; end + class CompileError < Error; end +end diff --git a/lib/racc/grammar.rb b/lib/racc/grammar.rb new file mode 100644 index 0000000000..e55785a3a0 --- /dev/null +++ b/lib/racc/grammar.rb @@ -0,0 +1,1113 @@ +# +# $Id: acc33b7e1fe05f28f2d271f1fb9f1c42e50705dc $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the same terms of ruby. +# see the file "COPYING". + +require 'racc/compat' +require 'racc/iset' +require 'racc/sourcetext' +require 'racc/logfilegenerator' +require 'racc/exception' +require 'forwardable' + +module Racc + + class Grammar + + def initialize(debug_flags = DebugFlags.new) + @symboltable = SymbolTable.new + @debug_symbol = debug_flags.token + @rules = [] # :: [Rule] + @start = nil + @n_expected_srconflicts = nil + @prec_table = [] + @prec_table_closed = false + @closed = false + @states = nil + end + + attr_reader :start + attr_reader :symboltable + attr_accessor :n_expected_srconflicts + + def [](x) + @rules[x] + end + + def each_rule(&block) + @rules.each(&block) + end + + alias each each_rule + + def each_index(&block) + @rules.each_index(&block) + end + + def each_with_index(&block) + @rules.each_with_index(&block) + end + + def size + @rules.size + end + + def to_s + "" + end + + extend Forwardable + + def_delegator "@symboltable", :each, :each_symbol + def_delegator "@symboltable", :each_terminal + def_delegator "@symboltable", :each_nonterminal + + def intern(value, dummy = false) + @symboltable.intern(value, dummy) + end + + def symbols + @symboltable.symbols + end + + def nonterminal_base + @symboltable.nt_base + end + + def useless_nonterminal_exist? + n_useless_nonterminals() != 0 + end + + def n_useless_nonterminals + @n_useless_nonterminals ||= + begin + n = 0 + @symboltable.each_nonterminal do |sym| + n += 1 if sym.useless? + end + n + end + end + + def useless_rule_exist? + n_useless_rules() != 0 + end + + def n_useless_rules + @n_useless_rules ||= + begin + n = 0 + each do |r| + n += 1 if r.useless? + end + n + end + end + + def nfa + (@states ||= States.new(self)).nfa + end + + def dfa + (@states ||= States.new(self)).dfa + end + + alias states dfa + + def state_transition_table + states().state_transition_table + end + + def parser_class + states = states() # cache + if $DEBUG + srcfilename = caller(1).first.slice(/\A(.*?):/, 1) + begin + write_log srcfilename + ".output" + rescue SystemCallError + end + report = lambda {|s| $stderr.puts "racc: #{srcfilename}: #{s}" } + if states.should_report_srconflict? + report["#{states.n_srconflicts} shift/reduce conflicts"] + end + if states.rrconflict_exist? + report["#{states.n_rrconflicts} reduce/reduce conflicts"] + end + g = states.grammar + if g.useless_nonterminal_exist? + report["#{g.n_useless_nonterminals} useless nonterminals"] + end + if g.useless_rule_exist? + report["#{g.n_useless_rules} useless rules"] + end + end + states.state_transition_table.parser_class + end + + def write_log(path) + File.open(path, 'w') {|f| + LogFileGenerator.new(states()).output f + } + end + + # + # Grammar Definition Interface + # + + def add(rule) + raise ArgumentError, "rule added after the Grammar closed" if @closed + @rules.push rule + end + + def added?(sym) + @rules.detect {|r| r.target == sym } + end + + def start_symbol=(s) + raise CompileError, "start symbol set twice'" if @start + @start = s + end + + def declare_precedence(assoc, syms) + raise CompileError, "precedence table defined twice" if @prec_table_closed + @prec_table.push [assoc, syms] + end + + def end_precedence_declaration(reverse) + @prec_table_closed = true + return if @prec_table.empty? + table = reverse ? @prec_table.reverse : @prec_table + table.each_with_index do |(assoc, syms), idx| + syms.each do |sym| + sym.assoc = assoc + sym.precedence = idx + end + end + end + + # + # Dynamic Generation Interface + # + + def Grammar.define(&block) + env = DefinitionEnv.new + env.instance_eval(&block) + env.grammar + end + + class DefinitionEnv + def initialize + @grammar = Grammar.new + @seqs = Hash.new(0) + @delayed = [] + end + + def grammar + flush_delayed + @grammar.each do |rule| + if rule.specified_prec + rule.specified_prec = @grammar.intern(rule.specified_prec) + end + end + @grammar.init + @grammar + end + + def precedence_table(&block) + env = PrecedenceDefinitionEnv.new(@grammar) + env.instance_eval(&block) + @grammar.end_precedence_declaration env.reverse + end + + def method_missing(mid, *args, &block) + unless mid.to_s[-1,1] == '=' + super # raises NoMethodError + end + target = @grammar.intern(mid.to_s.chop.intern) + unless args.size == 1 + raise ArgumentError, "too many arguments for #{mid} (#{args.size} for 1)" + end + _add target, args.first + end + + def _add(target, x) + case x + when Sym + @delayed.each do |rule| + rule.replace x, target if rule.target == x + end + @grammar.symboltable.delete x + else + x.each_rule do |r| + r.target = target + @grammar.add r + end + end + flush_delayed + end + + def _delayed_add(rule) + @delayed.push rule + end + + def _added?(sym) + @grammar.added?(sym) or @delayed.detect {|r| r.target == sym } + end + + def flush_delayed + return if @delayed.empty? + @delayed.each do |rule| + @grammar.add rule + end + @delayed.clear + end + + def seq(*list, &block) + Rule.new(nil, list.map {|x| _intern(x) }, UserAction.proc(block)) + end + + def null(&block) + seq(&block) + end + + def action(&block) + id = "@#{@seqs["action"] += 1}".intern + _delayed_add Rule.new(@grammar.intern(id), [], UserAction.proc(block)) + id + end + + alias _ action + + def option(sym, default = nil, &block) + _defmetasyntax("option", _intern(sym), block) {|target| + seq() { default } | seq(sym) + } + end + + def many(sym, &block) + _defmetasyntax("many", _intern(sym), block) {|target| + seq() { [] }\ + | seq(target, sym) {|list, x| list.push x; list } + } + end + + def many1(sym, &block) + _defmetasyntax("many1", _intern(sym), block) {|target| + seq(sym) {|x| [x] }\ + | seq(target, sym) {|list, x| list.push x; list } + } + end + + def separated_by(sep, sym, &block) + option(separated_by1(sep, sym), [], &block) + end + + def separated_by1(sep, sym, &block) + _defmetasyntax("separated_by1", _intern(sym), block) {|target| + seq(sym) {|x| [x] }\ + | seq(target, sep, sym) {|list, _, x| list.push x; list } + } + end + + def _intern(x) + case x + when Symbol, String + @grammar.intern(x) + when Racc::Sym + x + else + raise TypeError, "wrong type #{x.class} (expected Symbol/String/Racc::Sym)" + end + end + + private + + def _defmetasyntax(type, id, action, &block) + if action + idbase = "#{type}@#{id}-#{@seqs[type] += 1}" + target = _wrap(idbase, "#{idbase}-core", action) + _regist("#{idbase}-core", &block) + else + target = _regist("#{type}@#{id}", &block) + end + @grammar.intern(target) + end + + def _regist(target_name) + target = target_name.intern + unless _added?(@grammar.intern(target)) + yield(target).each_rule do |rule| + rule.target = @grammar.intern(target) + _delayed_add rule + end + end + target + end + + def _wrap(target_name, sym, block) + target = target_name.intern + _delayed_add Rule.new(@grammar.intern(target), + [@grammar.intern(sym.intern)], + UserAction.proc(block)) + target + end + end + + class PrecedenceDefinitionEnv + def initialize(g) + @grammar = g + @prechigh_seen = false + @preclow_seen = false + @reverse = false + end + + attr_reader :reverse + + def higher + if @prechigh_seen + raise CompileError, "prechigh used twice" + end + @prechigh_seen = true + end + + def lower + if @preclow_seen + raise CompileError, "preclow used twice" + end + if @prechigh_seen + @reverse = true + end + @preclow_seen = true + end + + def left(*syms) + @grammar.declare_precedence :Left, syms.map {|s| @grammar.intern(s) } + end + + def right(*syms) + @grammar.declare_precedence :Right, syms.map {|s| @grammar.intern(s) } + end + + def nonassoc(*syms) + @grammar.declare_precedence :Nonassoc, syms.map {|s| @grammar.intern(s)} + end + end + + # + # Computation + # + + def init + return if @closed + @closed = true + @start ||= @rules.map {|r| r.target }.detect {|sym| not sym.dummy? } + raise CompileError, 'no rule in input' if @rules.empty? + add_start_rule + @rules.freeze + fix_ident + compute_hash + compute_heads + determine_terminals + compute_nullable_0 + @symboltable.fix + compute_locate + @symboltable.each_nonterminal {|t| compute_expand t } + compute_nullable + compute_useless + end + + private + + def add_start_rule + r = Rule.new(@symboltable.dummy, + [@start, @symboltable.anchor, @symboltable.anchor], + UserAction.empty) + r.ident = 0 + r.hash = 0 + r.precedence = nil + @rules.unshift r + end + + # Rule#ident + # LocationPointer#ident + def fix_ident + @rules.each_with_index do |rule, idx| + rule.ident = idx + end + end + + # Rule#hash + def compute_hash + hash = 4 # size of dummy rule + @rules.each do |rule| + rule.hash = hash + hash += (rule.size + 1) + end + end + + # Sym#heads + def compute_heads + @rules.each do |rule| + rule.target.heads.push rule.ptrs[0] + end + end + + # Sym#terminal? + def determine_terminals + @symboltable.each do |s| + s.term = s.heads.empty? + end + end + + # Sym#self_null? + def compute_nullable_0 + @symboltable.each do |s| + if s.terminal? + s.snull = false + else + s.snull = s.heads.any? {|loc| loc.reduce? } + end + end + end + + # Sym#locate + def compute_locate + @rules.each do |rule| + t = nil + rule.ptrs.each do |ptr| + unless ptr.reduce? + tok = ptr.dereference + tok.locate.push ptr + t = tok if tok.terminal? + end + end + rule.precedence = t + end + end + + # Sym#expand + def compute_expand(t) + puts "expand> #{t.to_s}" if @debug_symbol + t.expand = _compute_expand(t, ISet.new, []) + puts "expand< #{t.to_s}: #{t.expand.to_s}" if @debug_symbol + end + + def _compute_expand(t, set, lock) + if tmp = t.expand + set.update tmp + return set + end + tok = nil + set.update_a t.heads + t.heads.each do |ptr| + tok = ptr.dereference + if tok and tok.nonterminal? + unless lock[tok.ident] + lock[tok.ident] = true + _compute_expand tok, set, lock + end + end + end + set + end + + # Sym#nullable?, Rule#nullable? + def compute_nullable + @rules.each {|r| r.null = false } + @symboltable.each {|t| t.null = false } + r = @rules.dup + s = @symboltable.nonterminals + begin + rs = r.size + ss = s.size + check_rules_nullable r + check_symbols_nullable s + end until rs == r.size and ss == s.size + end + + def check_rules_nullable(rules) + rules.delete_if do |rule| + rule.null = true + rule.symbols.each do |t| + unless t.nullable? + rule.null = false + break + end + end + rule.nullable? + end + end + + def check_symbols_nullable(symbols) + symbols.delete_if do |sym| + sym.heads.each do |ptr| + if ptr.rule.nullable? + sym.null = true + break + end + end + sym.nullable? + end + end + + # Sym#useless?, Rule#useless? + # FIXME: what means "useless"? + def compute_useless + @symboltable.each_terminal {|sym| sym.useless = false } + @symboltable.each_nonterminal {|sym| sym.useless = true } + @rules.each {|rule| rule.useless = true } + r = @rules.dup + s = @symboltable.nonterminals + begin + rs = r.size + ss = s.size + check_rules_useless r + check_symbols_useless s + end until r.size == rs and s.size == ss + end + + def check_rules_useless(rules) + rules.delete_if do |rule| + rule.useless = false + rule.symbols.each do |sym| + if sym.useless? + rule.useless = true + break + end + end + not rule.useless? + end + end + + def check_symbols_useless(s) + s.delete_if do |t| + t.heads.each do |ptr| + unless ptr.rule.useless? + t.useless = false + break + end + end + not t.useless? + end + end + + end # class Grammar + + + class Rule + + def initialize(target, syms, act) + @target = target + @symbols = syms + @action = act + @alternatives = [] + + @ident = nil + @hash = nil + @ptrs = nil + @precedence = nil + @specified_prec = nil + @null = nil + @useless = nil + end + + attr_accessor :target + attr_reader :symbols + attr_reader :action + + def |(x) + @alternatives.push x.rule + self + end + + def rule + self + end + + def each_rule(&block) + yield self + @alternatives.each(&block) + end + + attr_accessor :ident + + attr_reader :hash + attr_reader :ptrs + + def hash=(n) + @hash = n + ptrs = [] + @symbols.each_with_index do |sym, idx| + ptrs.push LocationPointer.new(self, idx, sym) + end + ptrs.push LocationPointer.new(self, @symbols.size, nil) + @ptrs = ptrs + end + + def precedence + @specified_prec || @precedence + end + + def precedence=(sym) + @precedence ||= sym + end + + def prec(sym, &block) + @specified_prec = sym + if block + unless @action.empty? + raise CompileError, 'both of rule action block and prec block given' + end + @action = UserAction.proc(block) + end + self + end + + attr_accessor :specified_prec + + def nullable?() @null end + def null=(n) @null = n end + + def useless?() @useless end + def useless=(u) @useless = u end + + def inspect + "#" + end + + def ==(other) + other.kind_of?(Rule) and @ident == other.ident + end + + def [](idx) + @symbols[idx] + end + + def size + @symbols.size + end + + def empty? + @symbols.empty? + end + + def to_s + "#" + end + + def accept? + if tok = @symbols[-1] + tok.anchor? + else + false + end + end + + def each(&block) + @symbols.each(&block) + end + + def replace(src, dest) + @target = dest + @symbols = @symbols.map {|s| s == src ? dest : s } + end + + end # class Rule + + + class UserAction + + def UserAction.source_text(src) + new(src, nil) + end + + def UserAction.proc(pr = nil, &block) + if pr and block + raise ArgumentError, "both of argument and block given" + end + new(nil, pr || block) + end + + def UserAction.empty + new(nil, nil) + end + + private_class_method :new + + def initialize(src, proc) + @source = src + @proc = proc + end + + attr_reader :source + attr_reader :proc + + def source? + not @proc + end + + def proc? + not @source + end + + def empty? + not @proc and not @source + end + + def name + "{action type=#{@source || @proc || 'nil'}}" + end + + alias inspect name + + end + + + class OrMark + def initialize(lineno) + @lineno = lineno + end + + def name + '|' + end + + alias inspect name + + attr_reader :lineno + end + + + class Prec + def initialize(symbol, lineno) + @symbol = symbol + @lineno = lineno + end + + def name + "=#{@symbol}" + end + + alias inspect name + + attr_reader :symbol + attr_reader :lineno + end + + + # + # A set of rule and position in it's RHS. + # Note that the number of pointers is more than rule's RHS array, + # because pointer points right edge of the final symbol when reducing. + # + class LocationPointer + + def initialize(rule, i, sym) + @rule = rule + @index = i + @symbol = sym + @ident = @rule.hash + i + @reduce = sym.nil? + end + + attr_reader :rule + attr_reader :index + attr_reader :symbol + + alias dereference symbol + + attr_reader :ident + alias hash ident + attr_reader :reduce + alias reduce? reduce + + def to_s + sprintf('(%d,%d %s)', + @rule.ident, @index, (reduce?() ? '#' : @symbol.to_s)) + end + + alias inspect to_s + + def eql?(ot) + @hash == ot.hash + end + + alias == eql? + + def head? + @index == 0 + end + + def next + @rule.ptrs[@index + 1] or ptr_bug! + end + + alias increment next + + def before(len) + @rule.ptrs[@index - len] or ptr_bug! + end + + private + + def ptr_bug! + raise "racc: fatal: pointer not exist: self: #{to_s}" + end + + end # class LocationPointer + + + class SymbolTable + + include Enumerable + + def initialize + @symbols = [] # :: [Racc::Sym] + @cache = {} # :: {(String|Symbol) => Racc::Sym} + @dummy = intern(:$start, true) + @anchor = intern(false, true) # Symbol ID = 0 + @error = intern(:error, false) # Symbol ID = 1 + end + + attr_reader :dummy + attr_reader :anchor + attr_reader :error + + def [](id) + @symbols[id] + end + + def intern(val, dummy = false) + @cache[val] ||= + begin + sym = Sym.new(val, dummy) + @symbols.push sym + sym + end + end + + attr_reader :symbols + alias to_a symbols + + def delete(sym) + @symbols.delete sym + @cache.delete sym.value + end + + attr_reader :nt_base + + def nt_max + @symbols.size + end + + def each(&block) + @symbols.each(&block) + end + + def terminals(&block) + @symbols[0, @nt_base] + end + + def each_terminal(&block) + @terms.each(&block) + end + + def nonterminals + @symbols[@nt_base, @symbols.size - @nt_base] + end + + def each_nonterminal(&block) + @nterms.each(&block) + end + + def fix + terms, nterms = @symbols.partition {|s| s.terminal? } + @symbols = terms + nterms + @terms = terms + @nterms = nterms + @nt_base = terms.size + fix_ident + check_terminals + end + + private + + def fix_ident + @symbols.each_with_index do |t, i| + t.ident = i + end + end + + def check_terminals + return unless @symbols.any? {|s| s.should_terminal? } + @anchor.should_terminal + @error.should_terminal + each_terminal do |t| + t.should_terminal if t.string_symbol? + end + each do |s| + s.should_terminal if s.assoc + end + terminals().reject {|t| t.should_terminal? }.each do |t| + raise CompileError, "terminal #{t} not declared as terminal" + end + nonterminals().select {|n| n.should_terminal? }.each do |n| + raise CompileError, "symbol #{n} declared as terminal but is not terminal" + end + end + + end # class SymbolTable + + + # Stands terminal and nonterminal symbols. + class Sym + + def initialize(value, dummyp) + @ident = nil + @value = value + @dummyp = dummyp + + @term = nil + @nterm = nil + @should_terminal = false + @precedence = nil + case value + when Symbol + @to_s = value.to_s + @serialized = value.inspect + @string = false + when String + @to_s = value.inspect + @serialized = value.dump + @string = true + when false + @to_s = '$end' + @serialized = 'false' + @string = false + when ErrorSymbolValue + @to_s = 'error' + @serialized = 'Object.new' + @string = false + else + raise ArgumentError, "unknown symbol value: #{value.class}" + end + + @heads = [] + @locate = [] + @snull = nil + @null = nil + @expand = nil + @useless = nil + end + + class << self + def once_writer(nm) + nm = nm.id2name + module_eval(<<-EOS) + def #{nm}=(v) + raise 'racc: fatal: @#{nm} != nil' unless @#{nm}.nil? + @#{nm} = v + end + EOS + end + end + + once_writer :ident + attr_reader :ident + + alias hash ident + + attr_reader :value + + def dummy? + @dummyp + end + + def terminal? + @term + end + + def nonterminal? + @nterm + end + + def term=(t) + raise 'racc: fatal: term= called twice' unless @term.nil? + @term = t + @nterm = !t + end + + def should_terminal + @should_terminal = true + end + + def should_terminal? + @should_terminal + end + + def string_symbol? + @string + end + + def serialize + @serialized + end + + attr_writer :serialized + + attr_accessor :precedence + attr_accessor :assoc + + def to_s + @to_s.dup + end + + alias inspect to_s + + def |(x) + rule() | x.rule + end + + def rule + Rule.new(nil, [self], UserAction.empty) + end + + # + # cache + # + + attr_reader :heads + attr_reader :locate + + def self_null? + @snull + end + + once_writer :snull + + def nullable? + @null + end + + def null=(n) + @null = n + end + + attr_reader :expand + once_writer :expand + + def useless? + @useless + end + + def useless=(f) + @useless = f + end + + end # class Sym + +end # module Racc diff --git a/lib/racc/grammarfileparser.rb b/lib/racc/grammarfileparser.rb new file mode 100644 index 0000000000..7548a9ea37 --- /dev/null +++ b/lib/racc/grammarfileparser.rb @@ -0,0 +1,559 @@ +# +# $Id: 5e1871defa15d288d2252e6a76bb2c4cf2119ed3 $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. +# For details of the GNU LGPL, see the file "COPYING". +# + +require 'racc' +require 'racc/compat' +require 'racc/grammar' +require 'racc/parserfilegenerator' +require 'racc/sourcetext' +require 'stringio' + +module Racc + + grammar = Grammar.define { + g = self + + g.class = seq(:CLASS, :cname, many(:param), :RULE, :rules, option(:END)) + + g.cname = seq(:rubyconst) {|name| + @result.params.classname = name + }\ + | seq(:rubyconst, "<", :rubyconst) {|c, _, s| + @result.params.classname = c + @result.params.superclass = s + } + + g.rubyconst = separated_by1(:colon2, :SYMBOL) {|syms| + syms.map {|s| s.to_s }.join('::') + } + + g.colon2 = seq(':', ':') + + g.param = seq(:CONV, many1(:convdef), :END) {|*| + #@grammar.end_convert_block # FIXME + }\ + | seq(:PRECHIGH, many1(:precdef), :PRECLOW) {|*| + @grammar.end_precedence_declaration true + }\ + | seq(:PRECLOW, many1(:precdef), :PRECHIGH) {|*| + @grammar.end_precedence_declaration false + }\ + | seq(:START, :symbol) {|_, sym| + @grammar.start_symbol = sym + }\ + | seq(:TOKEN, :symbols) {|_, syms| + syms.each do |s| + s.should_terminal + end + }\ + | seq(:OPTION, :options) {|_, syms| + syms.each do |opt| + case opt + when 'result_var' + @result.params.result_var = true + when 'no_result_var' + @result.params.result_var = false + when 'omit_action_call' + @result.params.omit_action_call = true + when 'no_omit_action_call' + @result.params.omit_action_call = false + else + raise CompileError, "unknown option: #{opt}" + end + end + }\ + | seq(:EXPECT, :DIGIT) {|_, num| + if @grammar.n_expected_srconflicts + raise CompileError, "`expect' seen twice" + end + @grammar.n_expected_srconflicts = num + } + + g.convdef = seq(:symbol, :STRING) {|sym, code| + sym.serialized = code + } + + g.precdef = seq(:LEFT, :symbols) {|_, syms| + @grammar.declare_precedence :Left, syms + }\ + | seq(:RIGHT, :symbols) {|_, syms| + @grammar.declare_precedence :Right, syms + }\ + | seq(:NONASSOC, :symbols) {|_, syms| + @grammar.declare_precedence :Nonassoc, syms + } + + g.symbols = seq(:symbol) {|sym| + [sym] + }\ + | seq(:symbols, :symbol) {|list, sym| + list.push sym + list + }\ + | seq(:symbols, "|") + + g.symbol = seq(:SYMBOL) {|sym| @grammar.intern(sym) }\ + | seq(:STRING) {|str| @grammar.intern(str) } + + g.options = many(:SYMBOL) {|syms| syms.map {|s| s.to_s } } + + g.rules = option(:rules_core) {|list| + add_rule_block list unless list.empty? + nil + } + + g.rules_core = seq(:symbol) {|sym| + [sym] + }\ + | seq(:rules_core, :rule_item) {|list, i| + list.push i + list + }\ + | seq(:rules_core, ';') {|list, *| + add_rule_block list unless list.empty? + list.clear + list + }\ + | seq(:rules_core, ':') {|list, *| + next_target = list.pop + add_rule_block list unless list.empty? + [next_target] + } + + g.rule_item = seq(:symbol)\ + | seq("|") {|*| + OrMark.new(@scanner.lineno) + }\ + | seq("=", :symbol) {|_, sym| + Prec.new(sym, @scanner.lineno) + }\ + | seq(:ACTION) {|src| + UserAction.source_text(src) + } + } + + GrammarFileParser = grammar.parser_class + + if grammar.states.srconflict_exist? + raise 'Racc boot script fatal: S/R conflict in build' + end + if grammar.states.rrconflict_exist? + raise 'Racc boot script fatal: R/R conflict in build' + end + + class GrammarFileParser # reopen + + class Result + def initialize(grammar) + @grammar = grammar + @params = ParserFileGenerator::Params.new + end + + attr_reader :grammar + attr_reader :params + end + + def GrammarFileParser.parse_file(filename) + parse(File.read(filename), filename, 1) + end + + def GrammarFileParser.parse(src, filename = '-', lineno = 1) + new().parse(src, filename, lineno) + end + + def initialize(debug_flags = DebugFlags.new) + @yydebug = debug_flags.parse + end + + def parse(src, filename = '-', lineno = 1) + @filename = filename + @lineno = lineno + @scanner = GrammarFileScanner.new(src, @filename) + @scanner.debug = @yydebug + @grammar = Grammar.new + @result = Result.new(@grammar) + @embedded_action_seq = 0 + yyparse @scanner, :yylex + parse_user_code + @result.grammar.init + @result + end + + private + + def next_token + @scanner.scan + end + + def on_error(tok, val, _values) + if val.respond_to?(:id2name) + v = val.id2name + elsif val.kind_of?(String) + v = val + else + v = val.inspect + end + raise CompileError, "#{location()}: unexpected token '#{v}'" + end + + def location + "#{@filename}:#{@lineno - 1 + @scanner.lineno}" + end + + def add_rule_block(list) + sprec = nil + target = list.shift + case target + when OrMark, UserAction, Prec + raise CompileError, "#{target.lineno}: unexpected symbol #{target.name}" + end + curr = [] + list.each do |i| + case i + when OrMark + add_rule target, curr, sprec + curr = [] + sprec = nil + when Prec + raise CompileError, "'=' used twice in one rule" if sprec + sprec = i.symbol + else + curr.push i + end + end + add_rule target, curr, sprec + end + + def add_rule(target, list, sprec) + if list.last.kind_of?(UserAction) + act = list.pop + else + act = UserAction.empty + end + list.map! {|s| s.kind_of?(UserAction) ? embedded_action(s) : s } + rule = Rule.new(target, list, act) + rule.specified_prec = sprec + @grammar.add rule + end + + def embedded_action(act) + sym = @grammar.intern("@#{@embedded_action_seq += 1}".intern, true) + @grammar.add Rule.new(sym, [], act) + sym + end + + # + # User Code Block + # + + def parse_user_code + line = @scanner.lineno + _, *blocks = *@scanner.epilogue.split(/^----/) + blocks.each do |block| + header, *body = block.lines.to_a + label0, pathes = *header.sub(/\A-+/, '').split('=', 2) + label = canonical_label(label0) + (pathes ? pathes.strip.split(' ') : []).each do |path| + add_user_code label, SourceText.new(File.read(path), path, 1) + end + add_user_code label, SourceText.new(body.join(''), @filename, line + 1) + line += (1 + body.size) + end + end + + USER_CODE_LABELS = { + 'header' => :header, + 'prepare' => :header, # obsolete + 'inner' => :inner, + 'footer' => :footer, + 'driver' => :footer # obsolete + } + + def canonical_label(src) + label = src.to_s.strip.downcase.slice(/\w+/) + unless USER_CODE_LABELS.key?(label) + raise CompileError, "unknown user code type: #{label.inspect}" + end + label + end + + def add_user_code(label, src) + @result.params.send(USER_CODE_LABELS[label]).push src + end + + end + + + class GrammarFileScanner + + def initialize(str, filename = '-') + @lines = str.split(/\n|\r\n|\r/) + @filename = filename + @lineno = -1 + @line_head = true + @in_rule_blk = false + @in_conv_blk = false + @in_block = nil + @epilogue = '' + @debug = false + next_line + end + + attr_reader :epilogue + + def lineno + @lineno + 1 + end + + attr_accessor :debug + + def yylex(&block) + unless @debug + yylex0(&block) + else + yylex0 do |sym, tok| + $stderr.printf "%7d %-10s %s\n", lineno(), sym.inspect, tok.inspect + yield [sym, tok] + end + end + end + + private + + def yylex0 + begin + until @line.empty? + @line.sub!(/\A\s+/, '') + if /\A\#/ =~ @line + break + elsif /\A\/\*/ =~ @line + skip_comment + elsif s = reads(/\A[a-zA-Z_]\w*/) + yield [atom_symbol(s), s.intern] + elsif s = reads(/\A\d+/) + yield [:DIGIT, s.to_i] + elsif ch = reads(/\A./) + case ch + when '"', "'" + yield [:STRING, eval(scan_quoted(ch))] + when '{' + lineno = lineno() + yield [:ACTION, SourceText.new(scan_action(), @filename, lineno)] + else + if ch == '|' + @line_head = false + end + yield [ch, ch] + end + else + end + end + end while next_line() + yield nil + end + + def next_line + @lineno += 1 + @line = @lines[@lineno] + if not @line or /\A----/ =~ @line + @epilogue = @lines.join("\n") + @lines.clear + @line = nil + if @in_block + @lineno -= 1 + scan_error! sprintf('unterminated %s', @in_block) + end + false + else + @line.sub!(/(?:\n|\r\n|\r)\z/, '') + @line_head = true + true + end + end + + ReservedWord = { + 'right' => :RIGHT, + 'left' => :LEFT, + 'nonassoc' => :NONASSOC, + 'preclow' => :PRECLOW, + 'prechigh' => :PRECHIGH, + 'token' => :TOKEN, + 'convert' => :CONV, + 'options' => :OPTION, + 'start' => :START, + 'expect' => :EXPECT, + 'class' => :CLASS, + 'rule' => :RULE, + 'end' => :END + } + + def atom_symbol(token) + if token == 'end' + symbol = :END + @in_conv_blk = false + @in_rule_blk = false + else + if @line_head and not @in_conv_blk and not @in_rule_blk + symbol = ReservedWord[token] || :SYMBOL + else + symbol = :SYMBOL + end + case symbol + when :RULE then @in_rule_blk = true + when :CONV then @in_conv_blk = true + end + end + @line_head = false + symbol + end + + def skip_comment + @in_block = 'comment' + until m = /\*\//.match(@line) + next_line + end + @line = m.post_match + @in_block = nil + end + + $raccs_print_type = false + + def scan_action + buf = '' + nest = 1 + pre = nil + @in_block = 'action' + begin + pre = nil + if s = reads(/\A\s+/) + # does not set 'pre' + buf << s + end + until @line.empty? + if s = reads(/\A[^'"`{}%#\/\$]+/) + buf << (pre = s) + next + end + case ch = read(1) + when '{' + nest += 1 + buf << (pre = ch) + when '}' + nest -= 1 + if nest == 0 + @in_block = nil + return buf + end + buf << (pre = ch) + when '#' # comment + buf << ch << @line + break + when "'", '"', '`' + buf << (pre = scan_quoted(ch)) + when '%' + if literal_head? pre, @line + # % string, regexp, array + buf << ch + case ch = read(1) + when /[qQx]/n + buf << ch << (pre = scan_quoted(read(1), '%string')) + when /wW/n + buf << ch << (pre = scan_quoted(read(1), '%array')) + when /s/n + buf << ch << (pre = scan_quoted(read(1), '%symbol')) + when /r/n + buf << ch << (pre = scan_quoted(read(1), '%regexp')) + when /[a-zA-Z0-9= ]/n # does not include "_" + scan_error! "unknown type of % literal '%#{ch}'" + else + buf << (pre = scan_quoted(ch, '%string')) + end + else + # operator + buf << '||op->' if $raccs_print_type + buf << (pre = ch) + end + when '/' + if literal_head? pre, @line + # regexp + buf << (pre = scan_quoted(ch, 'regexp')) + else + # operator + buf << '||op->' if $raccs_print_type + buf << (pre = ch) + end + when '$' # gvar + buf << ch << (pre = read(1)) + else + raise 'racc: fatal: must not happen' + end + end + buf << "\n" + end while next_line() + raise 'racc: fatal: scan finished before parser finished' + end + + def literal_head?(pre, post) + (!pre || /[a-zA-Z_0-9]/n !~ pre[-1,1]) && + !post.empty? && /\A[\s\=]/n !~ post + end + + def read(len) + s = @line[0, len] + @line = @line[len .. -1] + s + end + + def reads(re) + m = re.match(@line) or return nil + @line = m.post_match + m[0] + end + + def scan_quoted(left, tag = 'string') + buf = left.dup + buf = "||#{tag}->" + buf if $raccs_print_type + re = get_quoted_re(left) + sv, @in_block = @in_block, tag + begin + if s = reads(re) + buf << s + break + else + buf << @line + end + end while next_line() + @in_block = sv + buf << "<-#{tag}||" if $raccs_print_type + buf + end + + LEFT_TO_RIGHT = { + '(' => ')', + '{' => '}', + '[' => ']', + '<' => '>' + } + + CACHE = {} + + def get_quoted_re(left) + term = Regexp.quote(LEFT_TO_RIGHT[left] || left) + CACHE[left] ||= /\A[^#{term}\\]*(?:\\.[^\\#{term}]*)*#{term}/ + end + + def scan_error!(msg) + raise CompileError, "#{lineno()}: #{msg}" + end + + end + +end # module Racc diff --git a/lib/racc/info.rb b/lib/racc/info.rb new file mode 100644 index 0000000000..0e61c3a393 --- /dev/null +++ b/lib/racc/info.rb @@ -0,0 +1,14 @@ +# +# $Id: 10d9595b388ab1ba061c08c038901ff632a0c3c3 $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the same terms of ruby. +# see the file "COPYING". + +module Racc + VERSION = '1.4.15' + Version = VERSION + Copyright = 'Copyright (c) 1999-2006 Minero Aoki' +end diff --git a/lib/racc/iset.rb b/lib/racc/iset.rb new file mode 100644 index 0000000000..a79e709f9c --- /dev/null +++ b/lib/racc/iset.rb @@ -0,0 +1,91 @@ +# +# $Id: de638608cfd72d3ed9819d87b65a89ee6a57b589 $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. +# For details of the GNU LGPL, see the file "COPYING". +# + +module Racc + + # An "indexed" set. All items must respond to :ident. + class ISet + + def initialize(a = []) + @set = a + end + + attr_reader :set + + def add(i) + @set[i.ident] = i + end + + def [](key) + @set[key.ident] + end + + def []=(key, val) + @set[key.ident] = val + end + + alias include? [] + alias key? [] + + def update(other) + s = @set + o = other.set + o.each_index do |idx| + if t = o[idx] + s[idx] = t + end + end + end + + def update_a(a) + s = @set + a.each {|i| s[i.ident] = i } + end + + def delete(key) + i = @set[key.ident] + @set[key.ident] = nil + i + end + + def each(&block) + @set.compact.each(&block) + end + + def to_a + @set.compact + end + + def to_s + "[#{@set.compact.join(' ')}]" + end + + alias inspect to_s + + def size + @set.nitems + end + + def empty? + @set.nitems == 0 + end + + def clear + @set.clear + end + + def dup + ISet.new(@set.dup) + end + + end # class ISet + +end # module Racc diff --git a/lib/racc/logfilegenerator.rb b/lib/racc/logfilegenerator.rb new file mode 100644 index 0000000000..b95b1afaa2 --- /dev/null +++ b/lib/racc/logfilegenerator.rb @@ -0,0 +1,211 @@ +# +# $Id: a7e9663605afdda065d305b250a9805e3bd3fa70 $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. +# For details of the GNU LGPL, see the file "COPYING". +# + +module Racc + + class LogFileGenerator + + def initialize(states, debug_flags = DebugFlags.new) + @states = states + @grammar = states.grammar + @debug_flags = debug_flags + end + + def output(out) + output_conflict out; out.puts + output_useless out; out.puts + output_rule out; out.puts + output_token out; out.puts + output_state out + end + + # + # Warnings + # + + def output_conflict(out) + @states.each do |state| + if state.srconf + out.printf "state %d contains %d shift/reduce conflicts\n", + state.stateid, state.srconf.size + end + if state.rrconf + out.printf "state %d contains %d reduce/reduce conflicts\n", + state.stateid, state.rrconf.size + end + end + end + + def output_useless(out) + @grammar.each do |rl| + if rl.useless? + out.printf "rule %d (%s) never reduced\n", + rl.ident, rl.target.to_s + end + end + @grammar.each_nonterminal do |t| + if t.useless? + out.printf "useless nonterminal %s\n", t.to_s + end + end + end + + # + # States + # + + def output_state(out) + out << "--------- State ---------\n" + + showall = @debug_flags.la || @debug_flags.state + @states.each do |state| + out << "\nstate #{state.ident}\n\n" + + (showall ? state.closure : state.core).each do |ptr| + pointer_out(out, ptr) if ptr.rule.ident != 0 or showall + end + out << "\n" + + action_out out, state + end + end + + def pointer_out(out, ptr) + buf = sprintf("%4d) %s :", ptr.rule.ident, ptr.rule.target.to_s) + ptr.rule.symbols.each_with_index do |tok, idx| + buf << ' _' if idx == ptr.index + buf << ' ' << tok.to_s + end + buf << ' _' if ptr.reduce? + out.puts buf + end + + def action_out(f, state) + sr = state.srconf && state.srconf.dup + rr = state.rrconf && state.rrconf.dup + acts = state.action + keys = acts.keys + keys.sort! {|a,b| a.ident <=> b.ident } + + [ Shift, Reduce, Error, Accept ].each do |klass| + keys.delete_if do |tok| + act = acts[tok] + if act.kind_of?(klass) + outact f, tok, act + if sr and c = sr.delete(tok) + outsrconf f, c + end + if rr and c = rr.delete(tok) + outrrconf f, c + end + + true + else + false + end + end + end + sr.each {|tok, c| outsrconf f, c } if sr + rr.each {|tok, c| outrrconf f, c } if rr + + act = state.defact + if not act.kind_of?(Error) or @debug_flags.any? + outact f, '$default', act + end + + f.puts + state.goto_table.each do |t, st| + if t.nonterminal? + f.printf " %-12s go to state %d\n", t.to_s, st.ident + end + end + end + + def outact(f, t, act) + case act + when Shift + f.printf " %-12s shift, and go to state %d\n", + t.to_s, act.goto_id + when Reduce + f.printf " %-12s reduce using rule %d (%s)\n", + t.to_s, act.ruleid, act.rule.target.to_s + when Accept + f.printf " %-12s accept\n", t.to_s + when Error + f.printf " %-12s error\n", t.to_s + else + raise "racc: fatal: wrong act for outact: act=#{act}(#{act.class})" + end + end + + def outsrconf(f, confs) + confs.each do |c| + r = c.reduce + f.printf " %-12s [reduce using rule %d (%s)]\n", + c.shift.to_s, r.ident, r.target.to_s + end + end + + def outrrconf(f, confs) + confs.each do |c| + r = c.low_prec + f.printf " %-12s [reduce using rule %d (%s)]\n", + c.token.to_s, r.ident, r.target.to_s + end + end + + # + # Rules + # + + def output_rule(out) + out.print "-------- Grammar --------\n\n" + @grammar.each do |rl| + if @debug_flags.any? or rl.ident != 0 + out.printf "rule %d %s: %s\n", + rl.ident, rl.target.to_s, rl.symbols.join(' ') + end + end + end + + # + # Tokens + # + + def output_token(out) + out.print "------- Symbols -------\n\n" + + out.print "**Nonterminals, with rules where they appear\n\n" + @grammar.each_nonterminal do |t| + tmp = <filename] [--output-file=filename] +# [-erubypath] [--embedded=rubypath] +# [-v] [--verbose] +# [-Ofilename] [--log-file=filename] +# [-g] [--debug] +# [-E] [--embedded] +# [-l] [--no-line-convert] +# [-c] [--line-convert-all] +# [-a] [--no-omit-actions] +# [-C] [--check-only] +# [-S] [--output-status] +# [--version] [--copyright] [--help] grammarfile +# +# [+filename+] +# Racc grammar file. Any extention is permitted. +# [-o+outfile+, --output-file=+outfile+] +# A filename for output. default is <+filename+>.tab.rb +# [-O+filename+, --log-file=+filename+] +# Place logging output in file +filename+. +# Default log file name is <+filename+>.output. +# [-e+rubypath+, --executable=+rubypath+] +# output executable file(mode 755). where +path+ is the Ruby interpreter. +# [-v, --verbose] +# verbose mode. create +filename+.output file, like yacc's y.output file. +# [-g, --debug] +# add debug code to parser class. To display debuggin information, +# use this '-g' option and set @yydebug true in parser class. +# [-E, --embedded] +# Output parser which doesn't need runtime files (racc/parser.rb). +# [-C, --check-only] +# Check syntax of racc grammer file and quit. +# [-S, --output-status] +# Print messages time to time while compiling. +# [-l, --no-line-convert] +# turns off line number converting. +# [-c, --line-convert-all] +# Convert line number of actions, inner, header and footer. +# [-a, --no-omit-actions] +# Call all actions, even if an action is empty. +# [--version] +# print Racc version and quit. +# [--copyright] +# Print copyright and quit. +# [--help] +# Print usage and quit. +# +# == Generating Parser Using Racc +# +# To compile Racc grammar file, simply type: +# +# $ racc parse.y +# +# This creates Ruby script file "parse.tab.y". The -o option can change the output filename. +# +# == Writing A Racc Grammar File +# +# If you want your own parser, you have to write a grammar file. +# A grammar file contains the name of your parser class, grammar for the parser, +# user code, and anything else. +# When writing a grammar file, yacc's knowledge is helpful. +# If you have not used yacc before, Racc is not too difficult. +# +# Here's an example Racc grammar file. +# +# class Calcparser +# rule +# target: exp { print val[0] } +# +# exp: exp '+' exp +# | exp '*' exp +# | '(' exp ')' +# | NUMBER +# end +# +# Racc grammar files resemble yacc files. +# But (of course), this is Ruby code. +# yacc's $$ is the 'result', $0, $1... is +# an array called 'val', and $-1, $-2... is an array called '_values'. +# +# See the {Grammar File Reference}[rdoc-ref:lib/racc/rdoc/grammar.en.rdoc] for +# more information on grammar files. +# +# == Parser +# +# Then you must prepare the parse entry method. There are two types of +# parse methods in Racc, Racc::Parser#do_parse and Racc::Parser#yyparse +# +# Racc::Parser#do_parse is simple. +# +# It's yyparse() of yacc, and Racc::Parser#next_token is yylex(). +# This method must returns an array like [TOKENSYMBOL, ITS_VALUE]. +# EOF is [false, false]. +# (TOKENSYMBOL is a Ruby symbol (taken from String#intern) by default. +# If you want to change this, see the grammar reference. +# +# Racc::Parser#yyparse is little complicated, but useful. +# It does not use Racc::Parser#next_token, instead it gets tokens from any iterator. +# +# For example, yyparse(obj, :scan) causes +# calling +obj#scan+, and you can return tokens by yielding them from +obj#scan+. +# +# == Debugging +# +# When debugging, "-v" or/and the "-g" option is helpful. +# +# "-v" creates verbose log file (.output). +# "-g" creates a "Verbose Parser". +# Verbose Parser prints the internal status when parsing. +# But it's _not_ automatic. +# You must use -g option and set +@yydebug+ to +true+ in order to get output. +# -g option only creates the verbose parser. +# +# === Racc reported syntax error. +# +# Isn't there too many "end"? +# grammar of racc file is changed in v0.10. +# +# Racc does not use '%' mark, while yacc uses huge number of '%' marks.. +# +# === Racc reported "XXXX conflicts". +# +# Try "racc -v xxxx.y". +# It causes producing racc's internal log file, xxxx.output. +# +# === Generated parsers does not work correctly +# +# Try "racc -g xxxx.y". +# This command let racc generate "debugging parser". +# Then set @yydebug=true in your parser. +# It produces a working log of your parser. +# +# == Re-distributing Racc runtime +# +# A parser, which is created by Racc, requires the Racc runtime module; +# racc/parser.rb. +# +# Ruby 1.8.x comes with Racc runtime module, +# you need NOT distribute Racc runtime files. +# +# If you want to include the Racc runtime module with your parser. +# This can be done by using '-E' option: +# +# $ racc -E -omyparser.rb myparser.y +# +# This command creates myparser.rb which `includes' Racc runtime. +# Only you must do is to distribute your parser file (myparser.rb). +# +# Note: parser.rb is ruby license, but your parser is not. +# Your own parser is completely yours. +module Racc + + unless defined?(Racc_No_Extentions) + Racc_No_Extentions = false # :nodoc: + end + + class Parser + + Racc_Runtime_Version = ::Racc::VERSION + Racc_Runtime_Revision = '$Id: 1c0ef52c0f41acc465725e9e44b5b9d74d392ba5 $' + + Racc_Runtime_Core_Version_R = ::Racc::VERSION + Racc_Runtime_Core_Revision_R = '$Id: 1c0ef52c0f41acc465725e9e44b5b9d74d392ba5 $'.split[1] + begin + if Object.const_defined?(:RUBY_ENGINE) and RUBY_ENGINE == 'jruby' + require 'racc/cparse-jruby.jar' + com.headius.racc.Cparse.new.load(JRuby.runtime, false) + else + require 'racc/cparse' + end + # Racc_Runtime_Core_Version_C = (defined in extention) + Racc_Runtime_Core_Revision_C = Racc_Runtime_Core_Id_C.split[2] + unless new.respond_to?(:_racc_do_parse_c, true) + raise LoadError, 'old cparse.so' + end + if Racc_No_Extentions + raise LoadError, 'selecting ruby version of racc runtime core' + end + + Racc_Main_Parsing_Routine = :_racc_do_parse_c # :nodoc: + Racc_YY_Parse_Method = :_racc_yyparse_c # :nodoc: + Racc_Runtime_Core_Version = Racc_Runtime_Core_Version_C # :nodoc: + Racc_Runtime_Core_Revision = Racc_Runtime_Core_Revision_C # :nodoc: + Racc_Runtime_Type = 'c' # :nodoc: + rescue LoadError +puts $! +puts $!.backtrace + Racc_Main_Parsing_Routine = :_racc_do_parse_rb + Racc_YY_Parse_Method = :_racc_yyparse_rb + Racc_Runtime_Core_Version = Racc_Runtime_Core_Version_R + Racc_Runtime_Core_Revision = Racc_Runtime_Core_Revision_R + Racc_Runtime_Type = 'ruby' + end + + def Parser.racc_runtime_type # :nodoc: + Racc_Runtime_Type + end + + def _racc_setup + @yydebug = false unless self.class::Racc_debug_parser + @yydebug = false unless defined?(@yydebug) + if @yydebug + @racc_debug_out = $stderr unless defined?(@racc_debug_out) + @racc_debug_out ||= $stderr + end + arg = self.class::Racc_arg + arg[13] = true if arg.size < 14 + arg + end + + def _racc_init_sysvars + @racc_state = [0] + @racc_tstack = [] + @racc_vstack = [] + + @racc_t = nil + @racc_val = nil + + @racc_read_next = true + + @racc_user_yyerror = false + @racc_error_status = 0 + end + + # The entry point of the parser. This method is used with #next_token. + # If Racc wants to get token (and its value), calls next_token. + # + # Example: + # def parse + # @q = [[1,1], + # [2,2], + # [3,3], + # [false, '$']] + # do_parse + # end + # + # def next_token + # @q.shift + # end + def do_parse + __send__(Racc_Main_Parsing_Routine, _racc_setup(), false) + end + + # The method to fetch next token. + # If you use #do_parse method, you must implement #next_token. + # + # The format of return value is [TOKEN_SYMBOL, VALUE]. + # +token-symbol+ is represented by Ruby's symbol by default, e.g. :IDENT + # for 'IDENT'. ";" (String) for ';'. + # + # The final symbol (End of file) must be false. + def next_token + raise NotImplementedError, "#{self.class}\#next_token is not defined" + end + + def _racc_do_parse_rb(arg, in_debug) + action_table, action_check, action_default, action_pointer, + _, _, _, _, + _, _, token_table, * = arg + + _racc_init_sysvars + tok = act = i = nil + + catch(:racc_end_parse) { + while true + if i = action_pointer[@racc_state[-1]] + if @racc_read_next + if @racc_t != 0 # not EOF + tok, @racc_val = next_token() + unless tok # EOF + @racc_t = 0 + else + @racc_t = (token_table[tok] or 1) # error token + end + racc_read_token(@racc_t, tok, @racc_val) if @yydebug + @racc_read_next = false + end + end + i += @racc_t + unless i >= 0 and + act = action_table[i] and + action_check[i] == @racc_state[-1] + act = action_default[@racc_state[-1]] + end + else + act = action_default[@racc_state[-1]] + end + while act = _racc_evalact(act, arg) + ; + end + end + } + end + + # Another entry point for the parser. + # If you use this method, you must implement RECEIVER#METHOD_ID method. + # + # RECEIVER#METHOD_ID is a method to get next token. + # It must 'yield' the token, which format is [TOKEN-SYMBOL, VALUE]. + def yyparse(recv, mid) + __send__(Racc_YY_Parse_Method, recv, mid, _racc_setup(), false) + end + + def _racc_yyparse_rb(recv, mid, arg, c_debug) + action_table, action_check, action_default, action_pointer, + _, _, _, _, + _, _, token_table, * = arg + + _racc_init_sysvars + + catch(:racc_end_parse) { + until i = action_pointer[@racc_state[-1]] + while act = _racc_evalact(action_default[@racc_state[-1]], arg) + ; + end + end + recv.__send__(mid) do |tok, val| + unless tok + @racc_t = 0 + else + @racc_t = (token_table[tok] or 1) # error token + end + @racc_val = val + @racc_read_next = false + + i += @racc_t + unless i >= 0 and + act = action_table[i] and + action_check[i] == @racc_state[-1] + act = action_default[@racc_state[-1]] + end + while act = _racc_evalact(act, arg) + ; + end + + while !(i = action_pointer[@racc_state[-1]]) || + ! @racc_read_next || + @racc_t == 0 # $ + unless i and i += @racc_t and + i >= 0 and + act = action_table[i] and + action_check[i] == @racc_state[-1] + act = action_default[@racc_state[-1]] + end + while act = _racc_evalact(act, arg) + ; + end + end + end + } + end + + ### + ### common + ### + + def _racc_evalact(act, arg) + action_table, action_check, _, action_pointer, + _, _, _, _, + _, _, _, shift_n, + reduce_n, * = arg + nerr = 0 # tmp + + if act > 0 and act < shift_n + # + # shift + # + if @racc_error_status > 0 + @racc_error_status -= 1 unless @racc_t <= 1 # error token or EOF + end + @racc_vstack.push @racc_val + @racc_state.push act + @racc_read_next = true + if @yydebug + @racc_tstack.push @racc_t + racc_shift @racc_t, @racc_tstack, @racc_vstack + end + + elsif act < 0 and act > -reduce_n + # + # reduce + # + code = catch(:racc_jump) { + @racc_state.push _racc_do_reduce(arg, act) + false + } + if code + case code + when 1 # yyerror + @racc_user_yyerror = true # user_yyerror + return -reduce_n + when 2 # yyaccept + return shift_n + else + raise '[Racc Bug] unknown jump code' + end + end + + elsif act == shift_n + # + # accept + # + racc_accept if @yydebug + throw :racc_end_parse, @racc_vstack[0] + + elsif act == -reduce_n + # + # error + # + case @racc_error_status + when 0 + unless arg[21] # user_yyerror + nerr += 1 + on_error @racc_t, @racc_val, @racc_vstack + end + when 3 + if @racc_t == 0 # is $ + # We're at EOF, and another error occurred immediately after + # attempting auto-recovery + throw :racc_end_parse, nil + end + @racc_read_next = true + end + @racc_user_yyerror = false + @racc_error_status = 3 + while true + if i = action_pointer[@racc_state[-1]] + i += 1 # error token + if i >= 0 and + (act = action_table[i]) and + action_check[i] == @racc_state[-1] + break + end + end + throw :racc_end_parse, nil if @racc_state.size <= 1 + @racc_state.pop + @racc_vstack.pop + if @yydebug + @racc_tstack.pop + racc_e_pop @racc_state, @racc_tstack, @racc_vstack + end + end + return act + + else + raise "[Racc Bug] unknown action #{act.inspect}" + end + + racc_next_state(@racc_state[-1], @racc_state) if @yydebug + + nil + end + + def _racc_do_reduce(arg, act) + _, _, _, _, + goto_table, goto_check, goto_default, goto_pointer, + nt_base, reduce_table, _, _, + _, use_result, * = arg + + state = @racc_state + vstack = @racc_vstack + tstack = @racc_tstack + + i = act * -3 + len = reduce_table[i] + reduce_to = reduce_table[i+1] + method_id = reduce_table[i+2] + void_array = [] + + tmp_t = tstack[-len, len] if @yydebug + tmp_v = vstack[-len, len] + tstack[-len, len] = void_array if @yydebug + vstack[-len, len] = void_array + state[-len, len] = void_array + + # tstack must be updated AFTER method call + if use_result + vstack.push __send__(method_id, tmp_v, vstack, tmp_v[0]) + else + vstack.push __send__(method_id, tmp_v, vstack) + end + tstack.push reduce_to + + racc_reduce(tmp_t, reduce_to, tstack, vstack) if @yydebug + + k1 = reduce_to - nt_base + if i = goto_pointer[k1] + i += state[-1] + if i >= 0 and (curstate = goto_table[i]) and goto_check[i] == k1 + return curstate + end + end + goto_default[k1] + end + + # This method is called when a parse error is found. + # + # ERROR_TOKEN_ID is an internal ID of token which caused error. + # You can get string representation of this ID by calling + # #token_to_str. + # + # ERROR_VALUE is a value of error token. + # + # value_stack is a stack of symbol values. + # DO NOT MODIFY this object. + # + # This method raises ParseError by default. + # + # If this method returns, parsers enter "error recovering mode". + def on_error(t, val, vstack) + raise ParseError, sprintf("\nparse error on value %s (%s)", + val.inspect, token_to_str(t) || '?') + end + + # Enter error recovering mode. + # This method does not call #on_error. + def yyerror + throw :racc_jump, 1 + end + + # Exit parser. + # Return value is Symbol_Value_Stack[0]. + def yyaccept + throw :racc_jump, 2 + end + + # Leave error recovering mode. + def yyerrok + @racc_error_status = 0 + end + + # For debugging output + def racc_read_token(t, tok, val) + @racc_debug_out.print 'read ' + @racc_debug_out.print tok.inspect, '(', racc_token2str(t), ') ' + @racc_debug_out.puts val.inspect + @racc_debug_out.puts + end + + def racc_shift(tok, tstack, vstack) + @racc_debug_out.puts "shift #{racc_token2str tok}" + racc_print_stacks tstack, vstack + @racc_debug_out.puts + end + + def racc_reduce(toks, sim, tstack, vstack) + out = @racc_debug_out + out.print 'reduce ' + if toks.empty? + out.print ' ' + else + toks.each {|t| out.print ' ', racc_token2str(t) } + end + out.puts " --> #{racc_token2str(sim)}" + racc_print_stacks tstack, vstack + @racc_debug_out.puts + end + + def racc_accept + @racc_debug_out.puts 'accept' + @racc_debug_out.puts + end + + def racc_e_pop(state, tstack, vstack) + @racc_debug_out.puts 'error recovering mode: pop token' + racc_print_states state + racc_print_stacks tstack, vstack + @racc_debug_out.puts + end + + def racc_next_state(curstate, state) + @racc_debug_out.puts "goto #{curstate}" + racc_print_states state + @racc_debug_out.puts + end + + def racc_print_stacks(t, v) + out = @racc_debug_out + out.print ' [' + t.each_index do |i| + out.print ' (', racc_token2str(t[i]), ' ', v[i].inspect, ')' + end + out.puts ' ]' + end + + def racc_print_states(s) + out = @racc_debug_out + out.print ' [' + s.each {|st| out.print ' ', st } + out.puts ' ]' + end + + def racc_token2str(tok) + self.class::Racc_token_to_s_table[tok] or + raise "[Racc Bug] can't convert token #{tok} to string" + end + + # Convert internal ID of token symbol to the string. + def token_to_str(t) + self.class::Racc_token_to_s_table[t] + end + + end + +end + +__end_of_file__ +end diff --git a/lib/racc/parser.rb b/lib/racc/parser.rb index 0cdb42e49d..41740ade39 100644 --- a/lib/racc/parser.rb +++ b/lib/racc/parser.rb @@ -1,7 +1,5 @@ # frozen_string_literal: false #-- -# $originalId: parser.rb,v 1.8 2006/07/06 11:42:07 aamine Exp $ -# # Copyright (c) 1999-2006 Minero Aoki # # This program is free software. @@ -12,6 +10,12 @@ # without restriction. #++ +require 'racc/info' + +unless defined?(NotImplementedError) + NotImplementedError = NotImplementError # :nodoc: +end + module Racc class ParseError < StandardError; end end @@ -49,12 +53,12 @@ end # [-v, --verbose] # verbose mode. create +filename+.output file, like yacc's y.output file. # [-g, --debug] -# add debug code to parser class. To display debugging information, +# add debug code to parser class. To display debuggin information, # use this '-g' option and set @yydebug true in parser class. # [-E, --embedded] # Output parser which doesn't need runtime files (racc/parser.rb). # [-C, --check-only] -# Check syntax of racc grammar file and quit. +# Check syntax of racc grammer file and quit. # [-S, --output-status] # Print messages time to time while compiling. # [-l, --no-line-convert] @@ -171,29 +175,34 @@ end # This command creates myparser.rb which `includes' Racc runtime. # Only you must do is to distribute your parser file (myparser.rb). # -# Note: parser.rb is LGPL, but your parser is not. +# Note: parser.rb is ruby license, but your parser is not. # Your own parser is completely yours. module Racc - unless defined?(Racc_No_Extensions) - Racc_No_Extensions = false # :nodoc: + unless defined?(Racc_No_Extentions) + Racc_No_Extentions = false # :nodoc: end class Parser - Racc_Runtime_Version = '1.4.6' - Racc_Runtime_Revision = %w$originalRevision: 1.8 $[1] + Racc_Runtime_Version = ::Racc::VERSION + Racc_Runtime_Revision = '$Id: 87af5c09d4467cae567837b4162ec2145417a90e $' - Racc_Runtime_Core_Version_R = '1.4.6' - Racc_Runtime_Core_Revision_R = %w$originalRevision: 1.8 $[1] + Racc_Runtime_Core_Version_R = ::Racc::VERSION + Racc_Runtime_Core_Revision_R = '$Id: 87af5c09d4467cae567837b4162ec2145417a90e $'.split[1] begin - require 'racc/cparse' - # Racc_Runtime_Core_Version_C = (defined in extension) + if Object.const_defined?(:RUBY_ENGINE) and RUBY_ENGINE == 'jruby' + require 'racc/cparse-jruby.jar' + com.headius.racc.Cparse.new.load(JRuby.runtime, false) + else + require 'racc/cparse' + end + # Racc_Runtime_Core_Version_C = (defined in extention) Racc_Runtime_Core_Revision_C = Racc_Runtime_Core_Id_C.split[2] unless new.respond_to?(:_racc_do_parse_c, true) raise LoadError, 'old cparse.so' end - if Racc_No_Extensions + if Racc_No_Extentions raise LoadError, 'selecting ruby version of racc runtime core' end @@ -203,6 +212,8 @@ module Racc Racc_Runtime_Core_Revision = Racc_Runtime_Core_Revision_C # :nodoc: Racc_Runtime_Type = 'c' # :nodoc: rescue LoadError +puts $! +puts $!.backtrace Racc_Main_Parsing_Routine = :_racc_do_parse_rb Racc_YY_Parse_Method = :_racc_yyparse_rb Racc_Runtime_Core_Version = Racc_Runtime_Core_Version_R @@ -255,9 +266,11 @@ module Racc # def next_token # @q.shift # end + class_eval %{ def do_parse - __send__(Racc_Main_Parsing_Routine, _racc_setup(), false) + #{Racc_Main_Parsing_Routine}(_racc_setup(), false) end + } # The method to fetch next token. # If you use #do_parse method, you must implement #next_token. @@ -274,8 +287,7 @@ module Racc def _racc_do_parse_rb(arg, in_debug) action_table, action_check, action_default, action_pointer, _, _, _, _, - _, _, token_table, _, - _, _, * = arg + _, _, token_table, * = arg _racc_init_sysvars tok = act = i = nil @@ -316,19 +328,18 @@ module Racc # # RECEIVER#METHOD_ID is a method to get next token. # It must 'yield' the token, which format is [TOKEN-SYMBOL, VALUE]. + class_eval %{ def yyparse(recv, mid) - __send__(Racc_YY_Parse_Method, recv, mid, _racc_setup(), true) + #{Racc_YY_Parse_Method}(recv, mid, _racc_setup(), true) end + } def _racc_yyparse_rb(recv, mid, arg, c_debug) action_table, action_check, action_default, action_pointer, - _, _, _, _, - _, _, token_table, _, - _, _, * = arg + _, _, _, _, + _, _, token_table, * = arg _racc_init_sysvars - act = nil - i = nil catch(:racc_end_parse) { until i = action_pointer[@racc_state[-1]] @@ -355,9 +366,9 @@ module Racc ; end - while not(i = action_pointer[@racc_state[-1]]) or - not @racc_read_next or - @racc_t == 0 # $ + while !(i = action_pointer[@racc_state[-1]]) || + ! @racc_read_next || + @racc_t == 0 # $ unless i and i += @racc_t and i >= 0 and act = action_table[i] and @@ -378,16 +389,17 @@ module Racc def _racc_evalact(act, arg) action_table, action_check, _, action_pointer, - _, _, _, _, - _, _, _, shift_n, reduce_n, - _, _, * = arg + _, _, _, _, + _, _, _, shift_n, + reduce_n, * = arg + nerr = 0 # tmp if act > 0 and act < shift_n # # shift # if @racc_error_status > 0 - @racc_error_status -= 1 unless @racc_t == 1 # error token + @racc_error_status -= 1 unless @racc_t <= 1 # error token or EOF end @racc_vstack.push @racc_val @racc_state.push act @@ -431,10 +443,13 @@ module Racc case @racc_error_status when 0 unless arg[21] # user_yyerror + nerr += 1 on_error @racc_t, @racc_val, @racc_vstack end when 3 if @racc_t == 0 # is $ + # We're at EOF, and another error occurred immediately after + # attempting auto-recovery throw :racc_end_parse, nil end @racc_read_next = true @@ -470,10 +485,11 @@ module Racc end def _racc_do_reduce(arg, act) - _, _, _, _, - goto_table, goto_check, goto_default, goto_pointer, - nt_base, reduce_table, _, _, - _, use_result, * = arg + _, _, _, _, + goto_table, goto_check, goto_default, goto_pointer, + nt_base, reduce_table, _, _, + _, use_result, * = arg + state = @racc_state vstack = @racc_vstack tstack = @racc_tstack @@ -569,7 +585,6 @@ module Racc toks.each {|t| out.print ' ', racc_token2str(t) } end out.puts " --> #{racc_token2str(sim)}" - racc_print_stacks tstack, vstack @racc_debug_out.puts end diff --git a/lib/racc/parserfilegenerator.rb b/lib/racc/parserfilegenerator.rb new file mode 100644 index 0000000000..f082144854 --- /dev/null +++ b/lib/racc/parserfilegenerator.rb @@ -0,0 +1,510 @@ +# +# $Id: 19fb5debfd07d70f6bc2ddc79ef43fbb3d27f15e $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the same terms of ruby. +# see the file "COPYING". + +require 'enumerator' +require 'racc/compat' +require 'racc/sourcetext' +require 'racc/parser-text' +require 'rbconfig' + +module Racc + + class ParserFileGenerator + + class Params + def self.bool_attr(name) + module_eval(<<-End) + def #{name}? + @#{name} + end + + def #{name}=(b) + @#{name} = b + end + End + end + + attr_accessor :filename + attr_accessor :classname + attr_accessor :superclass + bool_attr :omit_action_call + bool_attr :result_var + attr_accessor :header + attr_accessor :inner + attr_accessor :footer + + bool_attr :debug_parser + bool_attr :convert_line + bool_attr :convert_line_all + bool_attr :embed_runtime + bool_attr :make_executable + attr_accessor :interpreter + + def initialize + # Parameters derived from parser + self.filename = nil + self.classname = nil + self.superclass = 'Racc::Parser' + self.omit_action_call = true + self.result_var = true + self.header = [] + self.inner = [] + self.footer = [] + + # Parameters derived from command line options + self.debug_parser = false + self.convert_line = true + self.convert_line_all = false + self.embed_runtime = false + self.make_executable = false + self.interpreter = nil + end + end + + def initialize(states, params) + @states = states + @grammar = states.grammar + @params = params + end + + def generate_parser + string_io = StringIO.new + + init_line_conversion_system + @f = string_io + parser_file + + string_io.rewind + string_io.read + end + + def generate_parser_file(destpath) + init_line_conversion_system + File.open(destpath, 'w') {|f| + @f = f + parser_file + } + File.chmod 0755, destpath if @params.make_executable? + end + + private + + def parser_file + shebang @params.interpreter if @params.make_executable? + notice + line + if @params.embed_runtime? + embed_library runtime_source() + else + require 'racc/parser.rb' + end + header + parser_class(@params.classname, @params.superclass) { + inner + state_transition_table + } + footer + end + + c = ::RbConfig::CONFIG + RUBY_PATH = "#{c['bindir']}/#{c['ruby_install_name']}#{c['EXEEXT']}" + + def shebang(path) + line '#!' + (path == 'ruby' ? RUBY_PATH : path) + end + + def notice + line %q[#] + line %q[# DO NOT MODIFY!!!!] + line %Q[# This file is automatically generated by Racc #{Racc::Version}] + line %Q[# from Racc grammer file "#{@params.filename}".] + line %q[#] + end + + def runtime_source + SourceText.new(::Racc::PARSER_TEXT, 'racc/parser.rb', 1) + end + + def embed_library(src) + line %[###### #{src.filename} begin] + line %[unless $".index '#{src.filename}'] + line %[$".push '#{src.filename}'] + put src, @params.convert_line? + line %[end] + line %[###### #{src.filename} end] + end + + def require(feature) + line "require '#{feature}'" + end + + def parser_class(classname, superclass) + mods = classname.split('::') + classid = mods.pop + mods.each do |mod| + indent; line "module #{mod}" + cref_push mod + end + indent; line "class #{classid} < #{superclass}" + cref_push classid + yield + cref_pop + indent; line "end \# class #{classid}" + mods.reverse_each do |mod| + indent; line "end \# module #{mod}" + cref_pop + end + end + + def header + @params.header.each do |src| + line + put src, @params.convert_line_all? + end + end + + def inner + @params.inner.each do |src| + line + put src, @params.convert_line? + end + end + + def footer + @params.footer.each do |src| + line + put src, @params.convert_line_all? + end + end + + # Low Level Routines + + def put(src, convert_line = false) + if convert_line + replace_location(src) { + @f.puts src.text + } + else + @f.puts src.text + end + end + + def line(str = '') + @f.puts str + end + + def init_line_conversion_system + @cref = [] + @used_separator = {} + end + + def cref_push(name) + @cref.push name + end + + def cref_pop + @cref.pop + end + + def indent + @f.print ' ' * @cref.size + end + + def toplevel? + @cref.empty? + end + + def replace_location(src) + sep = make_separator(src) + @f.print 'self.class.' if toplevel? + @f.puts "module_eval(<<'#{sep}', '#{src.filename}', #{src.lineno})" + yield + @f.puts sep + end + + def make_separator(src) + sep = unique_separator(src.filename) + sep *= 2 while src.text.index(sep) + sep + end + + def unique_separator(id) + sep = "...end #{id}/module_eval..." + while @used_separator.key?(sep) + sep.concat sprintf('%02x', rand(255)) + end + @used_separator[sep] = true + sep + end + + # + # State Transition Table Serialization + # + + public + + def put_state_transition_table(f) + @f = f + state_transition_table + end + + private + + def state_transition_table + table = @states.state_transition_table + table.use_result_var = @params.result_var? + table.debug_parser = @params.debug_parser? + + line "##### State transition tables begin ###" + line + integer_list 'racc_action_table', table.action_table + line + integer_list 'racc_action_check', table.action_check + line + integer_list 'racc_action_pointer', table.action_pointer + line + integer_list 'racc_action_default', table.action_default + line + integer_list 'racc_goto_table', table.goto_table + line + integer_list 'racc_goto_check', table.goto_check + line + integer_list 'racc_goto_pointer', table.goto_pointer + line + integer_list 'racc_goto_default', table.goto_default + line + i_i_sym_list 'racc_reduce_table', table.reduce_table + line + line "racc_reduce_n = #{table.reduce_n}" + line + line "racc_shift_n = #{table.shift_n}" + line + sym_int_hash 'racc_token_table', table.token_table + line + line "racc_nt_base = #{table.nt_base}" + line + line "racc_use_result_var = #{table.use_result_var}" + line + @f.print(unindent_auto(<<-End)) + Racc_arg = [ + racc_action_table, + racc_action_check, + racc_action_default, + racc_action_pointer, + racc_goto_table, + racc_goto_check, + racc_goto_default, + racc_goto_pointer, + racc_nt_base, + racc_reduce_table, + racc_token_table, + racc_shift_n, + racc_reduce_n, + racc_use_result_var ] + End + line + string_list 'Racc_token_to_s_table', table.token_to_s_table + line + line "Racc_debug_parser = #{table.debug_parser}" + line + line '##### State transition tables end #####' + actions + end + + def integer_list(name, table) + if table.size > 2000 + serialize_integer_list_compressed name, table + else + serialize_integer_list_std name, table + end + end + + def serialize_integer_list_compressed(name, table) + # TODO: this can be made a LOT more clean with a simple split/map + sep = "\n" + nsep = ",\n" + buf = '' + com = '' + ncom = ',' + co = com + @f.print 'clist = [' + table.each do |i| + buf << co << i.to_s; co = ncom + if buf.size > 66 + @f.print sep; sep = nsep + @f.print "'", buf, "'" + buf = '' + co = com + end + end + unless buf.empty? + @f.print sep + @f.print "'", buf, "'" + end + line ' ]' + + @f.print(<<-End) + #{name} = arr = ::Array.new(#{table.size}, nil) + idx = 0 + clist.each do |str| + str.split(',', -1).each do |i| + arr[idx] = i.to_i unless i.empty? + idx += 1 + end + end + End + end + + def serialize_integer_list_std(name, table) + sep = '' + line "#{name} = [" + table.each_slice(10) do |ns| + @f.print sep; sep = ",\n" + @f.print ns.map {|n| sprintf('%6s', n ? n.to_s : 'nil') }.join(',') + end + line ' ]' + end + + def i_i_sym_list(name, table) + sep = '' + line "#{name} = [" + table.each_slice(3) do |len, target, mid| + @f.print sep; sep = ",\n" + @f.printf ' %d, %d, %s', len, target, mid.inspect + end + line " ]" + end + + def sym_int_hash(name, h) + sep = "\n" + @f.print "#{name} = {" + h.to_a.sort_by {|sym, i| i }.each do |sym, i| + @f.print sep; sep = ",\n" + @f.printf " %s => %d", sym.serialize, i + end + line " }" + end + + def string_list(name, list) + sep = " " + line "#{name} = [" + list.each do |s| + @f.print sep; sep = ",\n " + @f.print s.dump + end + line ' ]' + end + + def actions + @grammar.each do |rule| + unless rule.action.source? + raise "racc: fatal: cannot generate parser file when any action is a Proc" + end + end + + if @params.result_var? + decl = ', result' + retval = "\n result" + default_body = '' + else + decl = '' + retval = '' + default_body = 'val[0]' + end + @grammar.each do |rule| + line + if rule.action.empty? and @params.omit_action_call? + line "# reduce #{rule.ident} omitted" + else + src0 = rule.action.source || SourceText.new(default_body, __FILE__, 0) + if @params.convert_line? + src = remove_blank_lines(src0) + delim = make_delimiter(src.text) + @f.printf unindent_auto(<<-End), + module_eval(<<'%s', '%s', %d) + def _reduce_%d(val, _values%s) + %s%s + end + %s + End + delim, src.filename, src.lineno - 1, + rule.ident, decl, + src.text, retval, + delim + else + src = remove_blank_lines(src0) + @f.printf unindent_auto(<<-End), + def _reduce_%d(val, _values%s) + %s%s + end + End + rule.ident, decl, + src.text, retval + end + end + end + line + @f.printf unindent_auto(<<-'End'), decl + def _reduce_none(val, _values%s) + val[0] + end + End + line + end + + def remove_blank_lines(src) + body = src.text.dup + line = src.lineno + while body.slice!(/\A[ \t\f]*(?:\n|\r\n|\r)/) + line += 1 + end + SourceText.new(body, src.filename, line) + end + + def make_delimiter(body) + delim = '.,.,' + while body.index(delim) + delim *= 2 + end + delim + end + + def unindent_auto(str) + lines = str.lines.to_a + n = minimum_indent(lines) + lines.map {|line| detab(line).sub(indent_re(n), '').rstrip + "\n" }.join('') + end + + def minimum_indent(lines) + lines.map {|line| n_indent(line) }.min + end + + def n_indent(line) + line.slice(/\A\s+/).size + end + + RE_CACHE = {} + + def indent_re(n) + RE_CACHE[n] ||= /\A {#{n}}/ + end + + def detab(str, ts = 8) + add = 0 + len = nil + str.gsub(/\t/) { + len = ts - ($`.size + add) % ts + add += len - 1 + ' ' * len + } + end + + end + +end diff --git a/lib/racc/pre-setup b/lib/racc/pre-setup new file mode 100644 index 0000000000..5027d865b7 --- /dev/null +++ b/lib/racc/pre-setup @@ -0,0 +1,13 @@ +def generate_parser_text_rb(target) + return if File.exist?(srcfile(target)) + $stderr.puts "generating #{target}..." + File.open(target, 'w') {|f| + f.puts "module Racc" + f.puts " PARSER_TEXT = <<'__end_of_file__'" + f.puts File.read(srcfile('parser.rb')) + f.puts "__end_of_file__" + f.puts "end" + } +end + +generate_parser_text_rb 'parser-text.rb' diff --git a/lib/racc/sourcetext.rb b/lib/racc/sourcetext.rb new file mode 100644 index 0000000000..b33ba29291 --- /dev/null +++ b/lib/racc/sourcetext.rb @@ -0,0 +1,34 @@ +# +# $Id: 3b2d89d9ada2f5fcb043837dcc5c9631856d5b70 $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. +# For details of LGPL, see the file "COPYING". +# + +module Racc + + class SourceText + def initialize(text, filename, lineno) + @text = text + @filename = filename + @lineno = lineno + end + + attr_reader :text + attr_reader :filename + attr_reader :lineno + + def to_s + "#" + end + + def location + "#{@filename}:#{@lineno}" + end + end + +end diff --git a/lib/racc/state.rb b/lib/racc/state.rb new file mode 100644 index 0000000000..347a74329a --- /dev/null +++ b/lib/racc/state.rb @@ -0,0 +1,969 @@ +# +# $Id: a101d6acb72abc392f7757cda89bf6f0a683a43d $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the same terms of ruby. +# see the file "COPYING". + +require 'racc/iset' +require 'racc/statetransitiontable' +require 'racc/exception' +require 'forwardable' + +module Racc + + # A table of LALR states. + class States + + include Enumerable + + def initialize(grammar, debug_flags = DebugFlags.new) + @grammar = grammar + @symboltable = grammar.symboltable + @d_state = debug_flags.state + @d_la = debug_flags.la + @d_prec = debug_flags.prec + @states = [] + @statecache = {} + @actions = ActionTable.new(@grammar, self) + @nfa_computed = false + @dfa_computed = false + end + + attr_reader :grammar + attr_reader :actions + + def size + @states.size + end + + def inspect + '#' + end + + alias to_s inspect + + def [](i) + @states[i] + end + + def each_state(&block) + @states.each(&block) + end + + alias each each_state + + def each_index(&block) + @states.each_index(&block) + end + + extend Forwardable + + def_delegator "@actions", :shift_n + def_delegator "@actions", :reduce_n + def_delegator "@actions", :nt_base + + def should_report_srconflict? + srconflict_exist? and + (n_srconflicts() != @grammar.n_expected_srconflicts) + end + + def srconflict_exist? + n_srconflicts() != 0 + end + + def n_srconflicts + @n_srconflicts ||= inject(0) {|sum, st| sum + st.n_srconflicts } + end + + def rrconflict_exist? + n_rrconflicts() != 0 + end + + def n_rrconflicts + @n_rrconflicts ||= inject(0) {|sum, st| sum + st.n_rrconflicts } + end + + def state_transition_table + @state_transition_table ||= StateTransitionTable.generate(self.dfa) + end + + # + # NFA (Non-deterministic Finite Automaton) Computation + # + + public + + def nfa + return self if @nfa_computed + compute_nfa + @nfa_computed = true + self + end + + private + + def compute_nfa + @grammar.init + # add state 0 + core_to_state [ @grammar[0].ptrs[0] ] + # generate LALR states + cur = 0 + @gotos = [] + while cur < @states.size + generate_states @states[cur] # state is added here + cur += 1 + end + @actions.init + end + + def generate_states(state) + puts "dstate: #{state}" if @d_state + + table = {} + state.closure.each do |ptr| + if sym = ptr.dereference + addsym table, sym, ptr.next + end + end + table.each do |sym, core| + puts "dstate: sym=#{sym} ncore=#{core}" if @d_state + + dest = core_to_state(core.to_a) + state.goto_table[sym] = dest + id = sym.nonterminal?() ? @gotos.size : nil + g = Goto.new(id, sym, state, dest) + @gotos.push g if sym.nonterminal? + state.gotos[sym] = g + puts "dstate: #{state.ident} --#{sym}--> #{dest.ident}" if @d_state + + # check infinite recursion + if state.ident == dest.ident and state.closure.size == 1 + raise CompileError, + sprintf("Infinite recursion: state %d, with rule %d", + state.ident, state.ptrs[0].rule.ident) + end + end + end + + def addsym(table, sym, ptr) + unless s = table[sym] + table[sym] = s = ISet.new + end + s.add ptr + end + + def core_to_state(core) + # + # convert CORE to a State object. + # If matching state does not exist, create it and add to the table. + # + + k = fingerprint(core) + unless dest = @statecache[k] + # not registered yet + dest = State.new(@states.size, core) + @states.push dest + + @statecache[k] = dest + + puts "core_to_state: create state ID #{dest.ident}" if @d_state + else + if @d_state + puts "core_to_state: dest is cached ID #{dest.ident}" + puts "core_to_state: dest core #{dest.core.join(' ')}" + end + end + + dest + end + + def fingerprint(arr) + arr.map {|i| i.ident }.pack('L*') + end + + # + # DFA (Deterministic Finite Automaton) Generation + # + + public + + def dfa + return self if @dfa_computed + nfa + compute_dfa + @dfa_computed = true + self + end + + private + + def compute_dfa + la = lookahead() + @states.each do |state| + state.la = la + resolve state + end + set_accept + @states.each do |state| + pack state + end + check_useless + end + + def lookahead + # + # lookahead algorithm ver.3 -- from bison 1.26 + # + + gotos = @gotos + if @d_la + puts "\n--- goto ---" + gotos.each_with_index {|g, i| print i, ' '; p g } + end + + ### initialize_LA() + ### set_goto_map() + la_rules = [] + @states.each do |state| + state.check_la la_rules + end + + ### initialize_F() + f = create_tmap(gotos.size) + reads = [] + edge = [] + gotos.each do |goto| + goto.to_state.goto_table.each do |t, st| + if t.terminal? + f[goto.ident] |= (1 << t.ident) + elsif t.nullable? + edge.push goto.to_state.gotos[t].ident + end + end + if edge.empty? + reads.push nil + else + reads.push edge + edge = [] + end + end + digraph f, reads + if @d_la + puts "\n--- F1 (reads) ---" + print_tab gotos, reads, f + end + + ### build_relations() + ### compute_FOLLOWS + path = nil + edge = [] + lookback = Array.new(la_rules.size, nil) + includes = [] + gotos.each do |goto| + goto.symbol.heads.each do |ptr| + path = record_path(goto.from_state, ptr.rule) + lastgoto = path.last + st = lastgoto ? lastgoto.to_state : goto.from_state + if st.conflict? + addrel lookback, st.rruleid(ptr.rule), goto + end + path.reverse_each do |g| + break if g.symbol.terminal? + edge.push g.ident + break unless g.symbol.nullable? + end + end + if edge.empty? + includes.push nil + else + includes.push edge + edge = [] + end + end + includes = transpose(includes) + digraph f, includes + if @d_la + puts "\n--- F2 (includes) ---" + print_tab gotos, includes, f + end + + ### compute_lookaheads + la = create_tmap(la_rules.size) + lookback.each_with_index do |arr, i| + if arr + arr.each do |g| + la[i] |= f[g.ident] + end + end + end + if @d_la + puts "\n--- LA (lookback) ---" + print_tab la_rules, lookback, la + end + + la + end + + def create_tmap(size) + Array.new(size, 0) # use Integer as bitmap + end + + def addrel(tbl, i, item) + if a = tbl[i] + a.push item + else + tbl[i] = [item] + end + end + + def record_path(begst, rule) + st = begst + path = [] + rule.symbols.each do |t| + goto = st.gotos[t] + path.push goto + st = goto.to_state + end + path + end + + def transpose(rel) + new = Array.new(rel.size, nil) + rel.each_with_index do |arr, idx| + if arr + arr.each do |i| + addrel new, i, idx + end + end + end + new + end + + def digraph(map, relation) + n = relation.size + index = Array.new(n, nil) + vertices = [] + @infinity = n + 2 + + index.each_index do |i| + if not index[i] and relation[i] + traverse i, index, vertices, map, relation + end + end + end + + def traverse(i, index, vertices, map, relation) + vertices.push i + index[i] = height = vertices.size + + if rp = relation[i] + rp.each do |proci| + unless index[proci] + traverse proci, index, vertices, map, relation + end + if index[i] > index[proci] + # circulative recursion !!! + index[i] = index[proci] + end + map[i] |= map[proci] + end + end + + if index[i] == height + while true + proci = vertices.pop + index[proci] = @infinity + break if i == proci + + map[proci] |= map[i] + end + end + end + + # for debug + def print_atab(idx, tab) + tab.each_with_index do |i,ii| + printf '%-20s', idx[ii].inspect + p i + end + end + + def print_tab(idx, rel, tab) + tab.each_with_index do |bin,i| + print i, ' ', idx[i].inspect, ' << '; p rel[i] + print ' ' + each_t(@symboltable, bin) {|t| print ' ', t } + puts + end + end + + # for debug + def print_tab_i(idx, rel, tab, i) + bin = tab[i] + print i, ' ', idx[i].inspect, ' << '; p rel[i] + print ' ' + each_t(@symboltable, bin) {|t| print ' ', t } + end + + # for debug + def printb(i) + each_t(@symboltable, i) do |t| + print t, ' ' + end + puts + end + + def each_t(tbl, set) + 0.upto( set.size ) do |i| + (0..7).each do |ii| + if set[idx = i * 8 + ii] == 1 + yield tbl[idx] + end + end + end + end + + # + # resolve + # + + def resolve(state) + if state.conflict? + resolve_rr state, state.ritems + resolve_sr state, state.stokens + else + if state.rrules.empty? + # shift + state.stokens.each do |t| + state.action[t] = @actions.shift(state.goto_table[t]) + end + else + # reduce + state.defact = @actions.reduce(state.rrules[0]) + end + end + end + + def resolve_rr(state, r) + r.each do |item| + item.each_la(@symboltable) do |t| + act = state.action[t] + if act + unless act.kind_of?(Reduce) + raise "racc: fatal: #{act.class} in action table" + end + # Cannot resolve R/R conflict (on t). + # Reduce with upper rule as default. + state.rr_conflict act.rule, item.rule, t + else + # No conflict. + state.action[t] = @actions.reduce(item.rule) + end + end + end + end + + def resolve_sr(state, s) + s.each do |stok| + goto = state.goto_table[stok] + act = state.action[stok] + + unless act + # no conflict + state.action[stok] = @actions.shift(goto) + else + unless act.kind_of?(Reduce) + puts 'DEBUG -------------------------------' + p stok + p act + state.action.each do |k,v| + print k.inspect, ' ', v.inspect, "\n" + end + raise "racc: fatal: #{act.class} in action table" + end + + # conflict on stok + + rtok = act.rule.precedence + case do_resolve_sr(stok, rtok) + when :Reduce + # action is already set + + when :Shift + # overwrite + act.decref + state.action[stok] = @actions.shift(goto) + + when :Error + act.decref + state.action[stok] = @actions.error + + when :CantResolve + # shift as default + act.decref + state.action[stok] = @actions.shift(goto) + state.sr_conflict stok, act.rule + end + end + end + end + + ASSOC = { + :Left => :Reduce, + :Right => :Shift, + :Nonassoc => :Error + } + + def do_resolve_sr(stok, rtok) + puts "resolve_sr: s/r conflict: rtok=#{rtok}, stok=#{stok}" if @d_prec + + unless rtok and rtok.precedence + puts "resolve_sr: no prec for #{rtok}(R)" if @d_prec + return :CantResolve + end + rprec = rtok.precedence + + unless stok and stok.precedence + puts "resolve_sr: no prec for #{stok}(S)" if @d_prec + return :CantResolve + end + sprec = stok.precedence + + ret = if rprec == sprec + ASSOC[rtok.assoc] or + raise "racc: fatal: #{rtok}.assoc is not Left/Right/Nonassoc" + else + (rprec > sprec) ? (:Reduce) : (:Shift) + end + + puts "resolve_sr: resolved as #{ret.id2name}" if @d_prec + ret + end + + # + # complete + # + + def set_accept + anch = @symboltable.anchor + init_state = @states[0].goto_table[@grammar.start] + targ_state = init_state.action[anch].goto_state + acc_state = targ_state.action[anch].goto_state + + acc_state.action.clear + acc_state.goto_table.clear + acc_state.defact = @actions.accept + end + + def pack(state) + ### find most frequently used reduce rule + act = state.action + arr = Array.new(@grammar.size, 0) + act.each do |t, a| + arr[a.ruleid] += 1 if a.kind_of?(Reduce) + end + i = arr.max + s = (i > 0) ? arr.index(i) : nil + + ### set & delete default action + if s + r = @actions.reduce(s) + if not state.defact or state.defact == r + act.delete_if {|t, a| a == r } + state.defact = r + end + else + state.defact ||= @actions.error + end + end + + def check_useless + used = [] + @actions.each_reduce do |act| + if not act or act.refn == 0 + act.rule.useless = true + else + t = act.rule.target + used[t.ident] = t + end + end + @symboltable.nt_base.upto(@symboltable.nt_max - 1) do |n| + unless used[n] + @symboltable[n].useless = true + end + end + end + + end # class StateTable + + + # A LALR state. + class State + + def initialize(ident, core) + @ident = ident + @core = core + @goto_table = {} + @gotos = {} + @stokens = nil + @ritems = nil + @action = {} + @defact = nil + @rrconf = nil + @srconf = nil + + @closure = make_closure(@core) + end + + attr_reader :ident + alias stateid ident + alias hash ident + + attr_reader :core + attr_reader :closure + + attr_reader :goto_table + attr_reader :gotos + + attr_reader :stokens + attr_reader :ritems + attr_reader :rrules + + attr_reader :action + attr_accessor :defact # default action + + attr_reader :rrconf + attr_reader :srconf + + def inspect + "" + end + + alias to_s inspect + + def ==(oth) + @ident == oth.ident + end + + alias eql? == + + def make_closure(core) + set = ISet.new + core.each do |ptr| + set.add ptr + if t = ptr.dereference and t.nonterminal? + set.update_a t.expand + end + end + set.to_a + end + + def check_la(la_rules) + @conflict = false + s = [] + r = [] + @closure.each do |ptr| + if t = ptr.dereference + if t.terminal? + s[t.ident] = t + if t.ident == 1 # $error + @conflict = true + end + end + else + r.push ptr.rule + end + end + unless r.empty? + if not s.empty? or r.size > 1 + @conflict = true + end + end + s.compact! + @stokens = s + @rrules = r + + if @conflict + @la_rules_i = la_rules.size + @la_rules = r.map {|i| i.ident } + la_rules.concat r + else + @la_rules_i = @la_rules = nil + end + end + + def conflict? + @conflict + end + + def rruleid(rule) + if i = @la_rules.index(rule.ident) + @la_rules_i + i + else + puts '/// rruleid' + p self + p rule + p @rrules + p @la_rules_i + raise 'racc: fatal: cannot get reduce rule id' + end + end + + def la=(la) + return unless @conflict + i = @la_rules_i + @ritems = r = [] + @rrules.each do |rule| + r.push Item.new(rule, la[i]) + i += 1 + end + end + + def rr_conflict(high, low, ctok) + c = RRconflict.new(@ident, high, low, ctok) + + @rrconf ||= {} + if a = @rrconf[ctok] + a.push c + else + @rrconf[ctok] = [c] + end + end + + def sr_conflict(shift, reduce) + c = SRconflict.new(@ident, shift, reduce) + + @srconf ||= {} + if a = @srconf[shift] + a.push c + else + @srconf[shift] = [c] + end + end + + def n_srconflicts + @srconf ? @srconf.size : 0 + end + + def n_rrconflicts + @rrconf ? @rrconf.size : 0 + end + + end # class State + + + # + # Represents a transition on the grammar. + # "Real goto" means a transition by nonterminal, + # but this class treats also terminal's. + # If one is a terminal transition, .ident returns nil. + # + class Goto + def initialize(ident, sym, from, to) + @ident = ident + @symbol = sym + @from_state = from + @to_state = to + end + + attr_reader :ident + attr_reader :symbol + attr_reader :from_state + attr_reader :to_state + + def inspect + "(#{@from_state.ident}-#{@symbol}->#{@to_state.ident})" + end + end + + + # LALR item. A set of rule and its lookahead tokens. + class Item + def initialize(rule, la) + @rule = rule + @la = la + end + + attr_reader :rule + attr_reader :la + + def each_la(tbl) + la = @la + 0.upto(la.size - 1) do |i| + (0..7).each do |ii| + if la[idx = i * 8 + ii] == 1 + yield tbl[idx] + end + end + end + end + end + + + # The table of LALR actions. Actions are either of + # Shift, Reduce, Accept and Error. + class ActionTable + + def initialize(rt, st) + @grammar = rt + @statetable = st + + @reduce = [] + @shift = [] + @accept = nil + @error = nil + end + + def init + @grammar.each do |rule| + @reduce.push Reduce.new(rule) + end + @statetable.each do |state| + @shift.push Shift.new(state) + end + @accept = Accept.new + @error = Error.new + end + + def reduce_n + @reduce.size + end + + def reduce(i) + case i + when Rule then i = i.ident + when Integer then ; + else + raise "racc: fatal: wrong class #{i.class} for reduce" + end + + r = @reduce[i] or raise "racc: fatal: reduce action #{i.inspect} not exist" + r.incref + r + end + + def each_reduce(&block) + @reduce.each(&block) + end + + def shift_n + @shift.size + end + + def shift(i) + case i + when State then i = i.ident + when Integer then ; + else + raise "racc: fatal: wrong class #{i.class} for shift" + end + + @shift[i] or raise "racc: fatal: shift action #{i} does not exist" + end + + def each_shift(&block) + @shift.each(&block) + end + + attr_reader :accept + attr_reader :error + + end + + + class Shift + def initialize(goto) + @goto_state = goto + end + + attr_reader :goto_state + + def goto_id + @goto_state.ident + end + + def inspect + "" + end + end + + + class Reduce + def initialize(rule) + @rule = rule + @refn = 0 + end + + attr_reader :rule + attr_reader :refn + + def ruleid + @rule.ident + end + + def inspect + "" + end + + def incref + @refn += 1 + end + + def decref + @refn -= 1 + raise 'racc: fatal: act.refn < 0' if @refn < 0 + end + end + + class Accept + def inspect + "" + end + end + + class Error + def inspect + "" + end + end + + class SRconflict + def initialize(sid, shift, reduce) + @stateid = sid + @shift = shift + @reduce = reduce + end + + attr_reader :stateid + attr_reader :shift + attr_reader :reduce + + def to_s + sprintf('state %d: S/R conflict rule %d reduce and shift %s', + @stateid, @reduce.ruleid, @shift.to_s) + end + end + + class RRconflict + def initialize(sid, high, low, tok) + @stateid = sid + @high_prec = high + @low_prec = low + @token = tok + end + + attr_reader :stateid + attr_reader :high_prec + attr_reader :low_prec + attr_reader :token + + def to_s + sprintf('state %d: R/R conflict with rule %d and %d on %s', + @stateid, @high_prec.ident, @low_prec.ident, @token.to_s) + end + end + +end diff --git a/lib/racc/statetransitiontable.rb b/lib/racc/statetransitiontable.rb new file mode 100644 index 0000000000..23df4102ec --- /dev/null +++ b/lib/racc/statetransitiontable.rb @@ -0,0 +1,316 @@ +# +# $Id: 4c5f4311663b6d03050953d64d6a0e7905ff2216 $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. +# For details of LGPL, see the file "COPYING". +# + +require 'racc/parser' + +unless Object.method_defined?(:funcall) + class Object + alias funcall __send__ + end +end + +module Racc + + StateTransitionTable = Struct.new(:action_table, + :action_check, + :action_default, + :action_pointer, + :goto_table, + :goto_check, + :goto_default, + :goto_pointer, + :token_table, + :reduce_table, + :reduce_n, + :shift_n, + :nt_base, + :token_to_s_table, + :use_result_var, + :debug_parser) + class StateTransitionTable # reopen + def StateTransitionTable.generate(states) + StateTransitionTableGenerator.new(states).generate + end + + def initialize(states) + super() + @states = states + @grammar = states.grammar + self.use_result_var = true + self.debug_parser = true + end + + attr_reader :states + attr_reader :grammar + + def parser_class + ParserClassGenerator.new(@states).generate + end + + def token_value_table + h = {} + token_table().each do |sym, i| + h[sym.value] = i + end + h + end + end + + + class StateTransitionTableGenerator + + def initialize(states) + @states = states + @grammar = states.grammar + end + + def generate + t = StateTransitionTable.new(@states) + gen_action_tables t, @states + gen_goto_tables t, @grammar + t.token_table = token_table(@grammar) + t.reduce_table = reduce_table(@grammar) + t.reduce_n = @states.reduce_n + t.shift_n = @states.shift_n + t.nt_base = @grammar.nonterminal_base + t.token_to_s_table = @grammar.symbols.map {|sym| sym.to_s } + t + end + + def reduce_table(grammar) + t = [0, 0, :racc_error] + grammar.each_with_index do |rule, idx| + next if idx == 0 + t.push rule.size + t.push rule.target.ident + t.push(if rule.action.empty? # and @params.omit_action_call? + then :_reduce_none + else "_reduce_#{idx}".intern + end) + end + t + end + + def token_table(grammar) + h = {} + grammar.symboltable.terminals.each do |t| + h[t] = t.ident + end + h + end + + def gen_action_tables(t, states) + t.action_table = yytable = [] + t.action_check = yycheck = [] + t.action_default = yydefact = [] + t.action_pointer = yypact = [] + e1 = [] + e2 = [] + states.each do |state| + yydefact.push act2actid(state.defact) + if state.action.empty? + yypact.push nil + next + end + vector = [] + state.action.each do |tok, act| + vector[tok.ident] = act2actid(act) + end + addent e1, vector, state.ident, yypact + end + set_table e1, e2, yytable, yycheck, yypact + end + + def gen_goto_tables(t, grammar) + t.goto_table = yytable2 = [] + t.goto_check = yycheck2 = [] + t.goto_pointer = yypgoto = [] + t.goto_default = yydefgoto = [] + e1 = [] + e2 = [] + grammar.each_nonterminal do |tok| + tmp = [] + + # decide default + freq = Array.new(@states.size, 0) + @states.each do |state| + st = state.goto_table[tok] + if st + st = st.ident + freq[st] += 1 + end + tmp[state.ident] = st + end + max = freq.max + if max > 1 + default = freq.index(max) + tmp.map! {|i| default == i ? nil : i } + else + default = nil + end + yydefgoto.push default + + # delete default value + tmp.pop until tmp.last or tmp.empty? + if tmp.compact.empty? + # only default + yypgoto.push nil + next + end + + addent e1, tmp, (tok.ident - grammar.nonterminal_base), yypgoto + end + set_table e1, e2, yytable2, yycheck2, yypgoto + end + + def addent(all, arr, chkval, ptr) + max = arr.size + min = nil + arr.each_with_index do |item, idx| + if item + min ||= idx + end + end + ptr.push(-7777) # mark + arr = arr[min...max] + all.push [arr, chkval, mkmapexp(arr), min, ptr.size - 1] + end + + n = 2 ** 16 + begin + Regexp.compile("a{#{n}}") + RE_DUP_MAX = n + rescue RegexpError + n /= 2 + retry + end + + def mkmapexp(arr) + i = ii = 0 + as = arr.size + map = '' + maxdup = RE_DUP_MAX + curr = nil + while i < as + ii = i + 1 + if arr[i] + ii += 1 while ii < as and arr[ii] + curr = '-' + else + ii += 1 while ii < as and not arr[ii] + curr = '.' + end + + offset = ii - i + if offset == 1 + map << curr + else + while offset > maxdup + map << "#{curr}{#{maxdup}}" + offset -= maxdup + end + map << "#{curr}{#{offset}}" if offset > 1 + end + i = ii + end + Regexp.compile(map, 'n') + end + + def set_table(entries, dummy, tbl, chk, ptr) + upper = 0 + map = '-' * 10240 + + # sort long to short + entries.sort! {|a,b| b[0].size <=> a[0].size } + + entries.each do |arr, chkval, expr, min, ptri| + if upper + arr.size > map.size + map << '-' * (arr.size + 1024) + end + idx = map.index(expr) + ptr[ptri] = idx - min + arr.each_with_index do |item, i| + if item + i += idx + tbl[i] = item + chk[i] = chkval + map[i] = ?o + end + end + upper = idx + arr.size + end + end + + def act2actid(act) + case act + when Shift then act.goto_id + when Reduce then -act.ruleid + when Accept then @states.shift_n + when Error then @states.reduce_n * -1 + else + raise "racc: fatal: wrong act type #{act.class} in action table" + end + end + + end + + + class ParserClassGenerator + + def initialize(states) + @states = states + @grammar = states.grammar + end + + def generate + table = @states.state_transition_table + c = Class.new(::Racc::Parser) + c.const_set :Racc_arg, [table.action_table, + table.action_check, + table.action_default, + table.action_pointer, + table.goto_table, + table.goto_check, + table.goto_default, + table.goto_pointer, + table.nt_base, + table.reduce_table, + table.token_value_table, + table.shift_n, + table.reduce_n, + false] + c.const_set :Racc_token_to_s_table, table.token_to_s_table + c.const_set :Racc_debug_parser, true + define_actions c + c + end + + private + + def define_actions(c) + c.module_eval "def _reduce_none(vals, vstack) vals[0] end" + @grammar.each do |rule| + if rule.action.empty? + c.funcall(:alias_method, "_reduce_#{rule.ident}", :_reduce_none) + else + c.funcall(:define_method, "_racc_action_#{rule.ident}", &rule.action.proc) + c.module_eval(<<-End, __FILE__, __LINE__ + 1) + def _reduce_#{rule.ident}(vals, vstack) + _racc_action_#{rule.ident}(*vals) + end + End + end + end + end + + end + +end # module Racc diff --git a/lib/racc/static.rb b/lib/racc/static.rb new file mode 100644 index 0000000000..bebbeb5aa6 --- /dev/null +++ b/lib/racc/static.rb @@ -0,0 +1,5 @@ +require 'racc' +require 'racc/parser' +require 'racc/grammarfileparser' +require 'racc/parserfilegenerator' +require 'racc/logfilegenerator' diff --git a/libexec/racc b/libexec/racc new file mode 100755 index 0000000000..5656b25e42 --- /dev/null +++ b/libexec/racc @@ -0,0 +1,306 @@ +#!/usr/bin/env ruby +# +# $Id$ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the same terms of ruby. +# see the file "COPYING". + +require 'racc/static' +require 'optparse' + +def main + output = nil + debug_parser = false + make_logfile = false + logfilename = nil + make_executable = false + rubypath = nil + embed_runtime = false + debug_flags = Racc::DebugFlags.new + line_convert = true + line_convert_all = false + omit_action_call = true + superclass = nil + check_only = false + verbose = false + profiler = RaccProfiler.new(false) + + parser = OptionParser.new + parser.banner = "Usage: #{File.basename($0)} [options] " + parser.on('-o', '--output-file=PATH', + 'output file name [.tab.rb]') {|name| + output = name + } + parser.on('-t', '--debug', 'Outputs debugging parser.') {|fl| + debug_parser = fl + } + parser.on('-g', 'Equivalent to -t (obsolete).') {|fl| + $stderr.puts "racc -g is obsolete. Use racc -t instead." if $VERBOSE + debug_parser = fl + } + parser.on('-v', '--verbose', + 'Creates .output log file.') {|fl| + make_logfile = fl + } + parser.on('-O', '--log-file=PATH', + 'Log file name [.output]') {|path| + make_logfile = true + logfilename = path + } + parser.on('-e', '--executable [RUBYPATH]', 'Makes executable parser.') {|path| + executable = true + rubypath = (path == 'ruby' ? nil : path) + } + parser.on('-E', '--embedded', "Embeds Racc runtime in output.") { + embed_runtime = true + } + parser.on('--line-convert-all', 'Converts line numbers of user codes.') { + line_convert_all = true + } + parser.on('-l', '--no-line-convert', 'Never convert line numbers.') { + line_convert = false + line_convert_all = false + } + parser.on('-a', '--no-omit-actions', 'Never omit actions.') { + omit_action_call = false + } + parser.on('--superclass=CLASSNAME', + 'Uses CLASSNAME instead of Racc::Parser.') {|name| + superclass = name + } + parser.on('--runtime=FEATURE', + "Uses FEATURE instead of 'racc/parser'") {|feat| + runtime = feature + } + parser.on('-C', '--check-only', 'Checks syntax and quit immediately.') {|fl| + check_only = fl + } + parser.on('-S', '--output-status', 'Outputs internal status time to time.') { + verbose = true + } + parser.on('-P', 'Enables generator profile') { + profiler = RaccProfiler.new(true) + } + parser.on('-D flags', "Flags for Racc debugging (do not use).") {|flags| + debug_flags = Racc::DebugFlags.parse_option_string(flags) + } + #parser.on('--no-extensions', 'Run Racc without any Ruby extension.') { + # Racc.const_set :Racc_No_Extentions, true + #} + parser.on('--version', 'Prints version and quit.') { + puts "racc version #{Racc::Version}" + exit 0 + } + parser.on('--runtime-version', 'Prints runtime version and quit.') { + printf "racc runtime version %s (rev. %s); %s\n", + Racc::Parser::Racc_Runtime_Version, + Racc::Parser::Racc_Runtime_Revision, + if Racc::Parser.racc_runtime_type == 'ruby' + sprintf('ruby core version %s (rev. %s)', + Racc::Parser::Racc_Runtime_Core_Version_R, + Racc::Parser::Racc_Runtime_Core_Revision_R) + else + sprintf('c core version %s (rev. %s)', + Racc::Parser::Racc_Runtime_Core_Version_C, + Racc::Parser::Racc_Runtime_Core_Revision_C) + end + exit 0 + } + parser.on('--copyright', 'Prints copyright and quit.') { + puts Racc::Copyright + exit 0 + } + parser.on('--help', 'Prints this message and quit.') { + puts parser.help + exit 1 + } + begin + parser.parse! + rescue OptionParser::ParseError => err + $stderr.puts err.message + $stderr.puts parser.help + exit 1 + end + if ARGV.empty? + $stderr.puts 'no input' + exit 1 + end + if ARGV.size > 1 + $stderr.puts 'too many input' + exit 1 + end + input = ARGV[0] + + begin + $stderr.puts 'Parsing grammar file...' if verbose + result = profiler.section('parse') { + parser = Racc::GrammarFileParser.new(debug_flags) + parser.parse(File.read(input), File.basename(input)) + } + if check_only + $stderr.puts 'syntax ok' + exit 0 + end + + $stderr.puts 'Generating LALR states...' if verbose + states = profiler.section('nfa') { + Racc::States.new(result.grammar).nfa + } + + $stderr.puts "Resolving #{states.size} states..." if verbose + profiler.section('dfa') { + states.dfa + } + + $stderr.puts 'Creating parser file...' if verbose + params = result.params.dup + # Overwrites parameters given by a grammar file with command line options. + params.superclass = superclass if superclass + params.omit_action_call = true if omit_action_call + # From command line option + if make_executable + params.make_executable = true + params.interpreter = rubypath + end + params.debug_parser = debug_parser + params.convert_line = line_convert + params.convert_line_all = line_convert_all + params.embed_runtime = embed_runtime + profiler.section('generation') { + generator = Racc::ParserFileGenerator.new(states, params) + generator.generate_parser_file(output || make_filename(input, '.tab.rb')) + } + + if make_logfile + profiler.section('logging') { + $stderr.puts 'Creating log file...' if verbose + logfilename ||= make_filename(output || File.basename(input), '.output') + File.open(logfilename, 'w') {|f| + Racc::LogFileGenerator.new(states, debug_flags).output f + } + } + end + if debug_flags.status_logging + log_useless states.grammar + log_conflict states + else + report_useless states.grammar + report_conflict states + end + + profiler.report + rescue Racc::Error, Errno::ENOENT, Errno::EPERM => err + raise if $DEBUG or debug_flags.any? + lineno = err.message.slice(/\A\d+:/).to_s + $stderr.puts "#{File.basename $0}: #{input}:#{lineno} #{err.message.strip}" + exit 1 + end +end + +def make_filename(path, suffix) + path.sub(/(?:\..*?)?\z/, suffix) +end + +def report_conflict(states) + if states.should_report_srconflict? + $stderr.puts "#{states.n_srconflicts} shift/reduce conflicts" + end + if states.rrconflict_exist? + $stderr.puts "#{states.n_rrconflicts} reduce/reduce conflicts" + end +end + +def log_conflict(states) + logging('w') {|f| + f.puts "ex#{states.grammar.n_expected_srconflicts}" + if states.should_report_srconflict? + f.puts "sr#{states.n_srconflicts}" + end + if states.rrconflict_exist? + f.puts "rr#{states.n_rrconflicts}" + end + } +end + +def report_useless(grammar) + if grammar.useless_nonterminal_exist? + $stderr.puts "#{grammar.n_useless_nonterminals} useless nonterminals" + end + if grammar.useless_rule_exist? + $stderr.puts "#{grammar.n_useless_rules} useless rules" + end + if grammar.start.useless? + $stderr.puts 'fatal: start symbol does not derive any sentence' + end +end + +def log_useless(grammar) + logging('a') {|f| + if grammar.useless_nonterminal_exist? + f.puts "un#{grammar.n_useless_nonterminals}" + end + if grammar.useless_rule_exist? + f.puts "ur#{grammar.n_useless_rules}" + end + } +end + +def logging(mode, &block) + File.open("log/#{File.basename(ARGV[0])}", mode, &block) +end + +class RaccProfiler + def initialize(really) + @really = really + @log = [] + unless ::Process.respond_to?(:times) + # Ruby 1.6 + @class = ::Time + else + @class = ::Process + end + end + + def section(name) + if @really + t1 = @class.times.utime + result = yield + t2 = @class.times.utime + @log.push [name, t2 - t1] + result + else + yield + end + end + + def report + return unless @really + f = $stderr + total = cumulative_time() + f.puts '--task-----------+--sec------+---%-' + @log.each do |name, time| + f.printf "%-19s %s %3d%%\n", name, pjust(time,4,4), (time/total*100).to_i + end + f.puts '-----------------+-----------+-----' + f.printf "%-20s%s\n", 'total', pjust(total,4,4) + end + + private + + def cumulative_time + t = @log.inject(0) {|sum, (name, time)| sum + time } + t == 0 ? 0.01 : t + end + + def pjust(num, i, j) + m = /(\d+)(\.\d+)?/.match(num.to_s) + str = m[1].rjust(i) + str.concat m[2].ljust(j+1)[0,j+1] if m[2] + str + end +end + +main diff --git a/libexec/racc2y b/libexec/racc2y new file mode 100755 index 0000000000..f88d73ed2c --- /dev/null +++ b/libexec/racc2y @@ -0,0 +1,195 @@ +#!/usr/local/bin/ruby +# +# $Id$ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is feee software. +# You can distribute/modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. +# For details of the LGPL, see the file "COPYING". +# + +require 'racc/grammarfileparser' +require 'racc/info' +require 'optparse' + +def main + @with_action = true + with_header = false + with_inner = false + with_footer = false + output = nil + parser = OptionParser.new + parser.banner = "Usage: #{File.basename($0)} [-AHIF] [-oFILENAME] GRAMMARFILE" + parser.on('-o', '--output=FILENAME', 'output file name [.yacc]') {|name| + output = name + } + parser.on('-A', '--without-action', 'Does not include actions.') { + @with_action = false + } + parser.on('-H', '--with-header', 'Includes header part.') { + with_header = true + } + parser.on('-I', '--with-inner', 'Includes inner part.') { + with_inner = true + } + parser.on('-F', '--with-footer', 'Includes footer part.') { + with_footer = true + } + parser.on('--version', 'Prints version and quit.') { + puts "racc2y version #{Racc::Version}" + exit 0 + } + parser.on('--copyright', 'Prints copyright and quit.') { + puts Racc::Copyright + exit 0 + } + parser.on('--help', 'Prints this message and quit.') { + puts parser.help + exit 1 + } + begin + parser.parse! + rescue OptionParser::ParseError => err + $stderr.puts err.message + $stderr.puts parser.help + exit 1 + end + if ARGV.empty? + $stderr.puts "no input file" + exit 1 + end + unless ARGV.size == 1 + $stderr.puts "too many inputs" + exit 1 + end + input = ARGV[0] + + begin + result = Racc::GrammarFileParser.parse_file(input) + result.grammar.init + File.open(output || "#{input}.yacc", 'w') {|f| + f.puts "/* generated from #{input} */" + if with_header + f.puts + f.puts '%{' + print_user_codes f, result.params.header + f.puts '%}' + end + f.puts + print_terminals f, result.grammar + f.puts + print_precedence_table f, precedence_table(result.grammar) + f.puts + f.puts '%%' + print_grammar f, result.grammar + f.puts '%%' + if with_inner + f.puts '/*---- inner ----*/' + print_user_codes f, result.params.inner + end + if with_footer + f.puts '/*---- footer ----*/' + print_user_codes f, result.params.footer + end + } + rescue SystemCallError => err + $stderr.puts err.message + exit 1 + end +end + +def print_terminals(f, grammar) + init_indent = '%token'.size + f.print '%token' + columns = init_indent + grammar.symboltable.each_terminal do |t| + next unless t.terminal? + next if t.dummy? + next if t == grammar.symboltable.anchor + next if t == grammar.symboltable.error + unless t.value.kind_of?(String) + if columns > 60 + f.puts + f.print ' ' * init_indent + columns = init_indent + end + columns += f.write(" #{yacc_symbol(t)}") + end + end + f.puts +end + +def precedence_table(grammar) + table = [] + grammar.symboltable.select {|sym| sym.precedence }.each do |sym| + (table[sym.prec] ||= [sym.assoc]).push sym + end + table.compact +end + +def print_precedence_table(f, table) + return if table.empty? + f.puts '/* precedance table */' + table.each do |syms| + assoc = syms.shift + f.printf '%%%-8s ', assoc.to_s.downcase + f.puts syms.map {|s| yacc_symbol(s) }.join(' ') + end + f.puts +end + +def print_grammar(f, grammar) + prev_target = nil + indent = 10 + embactions = [] + grammar.each do |rule| + if rule.target.dummy? + embactions.push rule.action unless rule.action.empty? + next + end + if rule.target == prev_target + f.print ' ' * indent, '|' + else + prev_target = rule.target + f.printf "\n%-10s:", yacc_symbol(prev_target) + end + rule.symbols.each do |s| + if s.dummy? # target of dummy rule for embedded action + f.puts + print_action f, embactions.shift, indent + f.print ' ' * (indent + 1) + else + f.print ' ', yacc_symbol(s) + end + end + if rule.specified_prec + f.print ' %prec ', yacc_symbol(rule.specified_prec) + end + f.puts + unless rule.action.empty? + print_action f, rule.action, indent + end + end +end + +def print_action(f, action, indent) + return unless @with_action + f.print ' ' * (indent + 4), "{\n" + f.print ' ' * (indent + 6), action.source.text.strip, "\n" + f.print ' ' * (indent + 4) , "}\n" +end + +def print_user_codes(f, srcs) + return if srcs.empty? + srcs.each do |src| + f.puts src.text + end +end + +def yacc_symbol(s) + s.to_s.gsub('"', "'") +end + +main diff --git a/libexec/y2racc b/libexec/y2racc new file mode 100755 index 0000000000..38bd3669a2 --- /dev/null +++ b/libexec/y2racc @@ -0,0 +1,339 @@ +#!/usr/local/bin/ruby +# +# $Id$ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the terms of +# the GNU LGPL, Lesser General Public Lisence version 2.1. +# For details of the GNU LGPL, see the file "COPYING". +# + +require 'racc/info' +require 'strscan' +require 'forwardable' +require 'optparse' + +def main + @with_action = true + @with_header = false + @with_usercode = false + cname = 'MyParser' + input = nil + output = nil + parser = OptionParser.new + parser.banner = "Usage: #{File.basename($0)} [-Ahu] [-c ] [-o ] " + parser.on('-o', '--output=FILENAME', 'output file name [.racc]') {|name| + output = name + } + parser.on('-c', '--classname=NAME', "Name of the parser class. [#{cname}]") {|name| + cname = name + } + parser.on('-A', '--without-action', 'Does not include actions.') { + @with_action = false + } + parser.on('-h', '--with-header', 'Includes header (%{...%}).') { + @with_header = true + } + parser.on('-u', '--with-user-code', 'Includes user code.') { + @with_usercode = true + } + parser.on('--version', 'Prints version and quit.') { + puts "y2racc version #{Racc::Version}" + exit 0 + } + parser.on('--copyright', 'Prints copyright and quit.') { + puts Racc::Copyright + exit 0 + } + parser.on('--help', 'Prints this message and quit.') { + puts parser.help + exit 1 + } + begin + parser.parse! + rescue OptionParser::ParseError => err + $stderr.puts err.message + $stderr.puts parser.help + exit 1 + end + if ARGV.empty? + $stderr.puts 'no input' + exit 1 + end + if ARGV.size > 1 + $stderr.puts 'too many input' + exit 1 + end + input = ARGV[0] + + begin + result = YaccFileParser.parse_file(input) + File.open(output || "#{input}.racc", 'w') {|f| + convert cname, result, f + } + rescue SystemCallError => err + $stderr.puts err.message + exit 1 + end +end + +def convert(classname, result, f) + init_indent = 'token'.size + f.puts %<# Converted from "#{result.filename}" by y2racc version #{Racc::Version}> + f.puts + f.puts "class #{classname}" + unless result.terminals.empty? + f.puts + f.print 'token' + columns = init_indent + result.terminals.each do |t| + if columns > 60 + f.puts + f.print ' ' * init_indent + columns = init_indent + end + columns += f.write(" #{t}") + end + f.puts + end + unless result.precedence_table.empty? + f.puts + f.puts 'preclow' + result.precedence_table.each do |assoc, toks| + f.printf " %-8s %s\n", assoc, toks.join(' ') unless toks.empty? + end + f.puts 'prechigh' + end + if result.start + f.puts + f.puts "start #{@start}" + end + + f.puts + f.puts 'rule' + texts = @with_action ? result.grammar : result.grammar_without_actions + texts.each do |text| + f.print text + end + + if @with_header and result.header + f.puts + f.puts '---- header' + f.puts result.header + end + if @with_usercode and result.usercode + f.puts + f.puts '---- footer' + f.puts result.usercode + end +end + +class ParseError < StandardError; end + +class StringScanner_withlineno + def initialize(src) + @s = StringScanner.new(src) + @lineno = 1 + end + + extend Forwardable + def_delegator "@s", :eos? + def_delegator "@s", :rest + + attr_reader :lineno + + def scan(re) + advance_lineno(@s.scan(re)) + end + + def scan_until(re) + advance_lineno(@s.scan_until(re)) + end + + def skip(re) + str = advance_lineno(@s.scan(re)) + str ? str.size : nil + end + + def getch + advance_lineno(@s.getch) + end + + private + + def advance_lineno(str) + @lineno += str.count("\n") if str + str + end +end + +class YaccFileParser + + Result = Struct.new(:terminals, :precedence_table, :start, + :header, :grammar, :usercode, :filename) + class Result # reopen + def initialize + super + self.terminals = [] + self.precedence_table = [] + self.start = nil + self.grammar = [] + self.header = nil + self.usercode = nil + self.filename = nil + end + + def grammar_without_actions + grammar().map {|text| text[0,1] == '{' ? '{}' : text } + end + end + + def YaccFileParser.parse_file(filename) + new().parse(File.read(filename), filename) + end + + def parse(src, filename = '-') + @result = Result.new + @filename = filename + @result.filename = filename + s = StringScanner_withlineno.new(src) + parse_header s + parse_grammar s + @result + end + + private + + COMMENT = %r + CHAR = /'((?:[^'\\]+|\\.)*)'/ + STRING = /"((?:[^"\\]+|\\.)*)"/ + + def parse_header(s) + skip_until_percent s + until s.eos? + case + when t = s.scan(/left/) + @result.precedence_table.push ['left', scan_symbols(s)] + when t = s.scan(/right/) + @result.precedence_table.push ['right', scan_symbols(s)] + when t = s.scan(/nonassoc/) + @result.precedence_table.push ['nonassoc', scan_symbols(s)] + when t = s.scan(/token/) + list = scan_symbols(s) + list.shift if /\A<(.*)>\z/ =~ list[0] + @result.terminals.concat list + when t = s.scan(/start/) + @result.start = scan_symbols(s)[0] + when s.skip(%r<(?: + type | union | expect | thong | binary | + semantic_parser | pure_parser | no_lines | + raw | token_table + )\b>x) + skip_until_percent s + when s.skip(/\{/) # header (%{...%}) + str = s.scan_until(/\%\}/) + str.chop! + str.chop! + @result.header = str + skip_until_percent s + when s.skip(/\%/) # grammar (%%...) + return + else + raise ParseError, "#{@filename}:#{s.lineno}: scan error" + end + end + end + + def skip_until_percent(s) + until s.eos? + s.skip /[^\%\/]+/ + next if s.skip(COMMENT) + return if s.getch == '%' + end + end + + def scan_symbols(s) + list = [] + until s.eos? + s.skip /\s+/ + if s.skip(COMMENT) + ; + elsif t = s.scan(CHAR) + list.push t + elsif t = s.scan(STRING) + list.push t + elsif s.skip(/\%/) + break + elsif t = s.scan(/\S+/) + list.push t + else + raise ParseError, "#{@filename}:#{@lineno}: scan error" + end + end + list + end + + def parse_grammar(s) + buf = [] + until s.eos? + if t = s.scan(/[^%'"{\/]+/) + buf.push t + break if s.eos? + end + if s.skip(/\{/) + buf.push scan_action(s) + elsif t = s.scan(/'(?:[^'\\]+|\\.)*'/) then buf.push t + elsif t = s.scan(/"(?:[^"\\]+|\\.)*"/) then buf.push t + elsif t = s.scan(COMMENT) then buf.push t + elsif s.skip(/%prec\b/) then buf.push '=' + elsif s.skip(/%%/) + @result.usercode = s.rest + break + else + buf.push s.getch + end + end + @result.grammar = buf + end + + def scan_action(s) + buf = '{' + nest = 1 + until s.eos? + if t = s.scan(%r<[^/{}'"]+>) + buf << t + break if s.eos? + elsif t = s.scan(COMMENT) + buf << t + elsif t = s.scan(CHAR) + buf << t + elsif t = s.scan(STRING) + buf << t + else + c = s.getch + buf << c + case c + when '{' + nest += 1 + when '}' + nest -= 1 + return buf if nest == 0 + end + end + end + $stderr.puts "warning: unterminated action in #{@filename}" + buf + end + +end + +unless Object.method_defined?(:funcall) + class Object + alias funcall __send__ + end +end + + +main diff --git a/test/racc/assets/cadenza.y b/test/racc/assets/cadenza.y new file mode 100644 index 0000000000..1940ead225 --- /dev/null +++ b/test/racc/assets/cadenza.y @@ -0,0 +1,170 @@ +# This grammar is released under an MIT license +# Author: William Howard (http://github.com/whoward) +# Source: https://github.com/whoward/cadenza/blob/master/src/cadenza.y + +class Cadenza::RaccParser + +/* expect this many shift/reduce conflicts */ +expect 37 + +rule + target + : document + | /* none */ { result = nil } + ; + + parameter_list + : logical_expression { result = [val[0]] } + | parameter_list ',' logical_expression { result = val[0].push(val[2]) } + ; + + /* this has a shift/reduce conflict but since Racc will shift in this case it is the correct behavior */ + primary_expression + : IDENTIFIER { result = VariableNode.new(val[0].value) } + | IDENTIFIER parameter_list { result = VariableNode.new(val[0].value, val[1]) } + | INTEGER { result = ConstantNode.new(val[0].value) } + | REAL { result = ConstantNode.new(val[0].value) } + | STRING { result = ConstantNode.new(val[0].value) } + | '(' filtered_expression ')' { result = val[1] } + ; + + multiplicative_expression + : primary_expression + | multiplicative_expression '*' primary_expression { result = OperationNode.new(val[0], "*", val[2]) } + | multiplicative_expression '/' primary_expression { result = OperationNode.new(val[0], "/", val[2]) } + ; + + additive_expression + : multiplicative_expression + | additive_expression '+' multiplicative_expression { result = OperationNode.new(val[0], "+", val[2]) } + | additive_expression '-' multiplicative_expression { result = OperationNode.new(val[0], "-", val[2]) } + ; + + boolean_expression + : additive_expression + | boolean_expression OP_EQ additive_expression { result = OperationNode.new(val[0], "==", val[2]) } + | boolean_expression OP_NEQ additive_expression { result = OperationNode.new(val[0], "!=", val[2]) } + | boolean_expression OP_LEQ additive_expression { result = OperationNode.new(val[0], "<=", val[2]) } + | boolean_expression OP_GEQ additive_expression { result = OperationNode.new(val[0], ">=", val[2]) } + | boolean_expression '>' additive_expression { result = OperationNode.new(val[0], ">", val[2]) } + | boolean_expression '<' additive_expression { result = OperationNode.new(val[0], "<", val[2]) } + ; + + inverse_expression + : boolean_expression + | NOT boolean_expression { result = BooleanInverseNode.new(val[1]) } + ; + + logical_expression + : inverse_expression + | logical_expression AND inverse_expression { result = OperationNode.new(val[0], "and", val[2]) } + | logical_expression OR inverse_expression { result = OperationNode.new(val[0], "or", val[2]) } + ; + + filter + : IDENTIFIER { result = FilterNode.new(val[0].value) } + | IDENTIFIER ':' parameter_list { result = FilterNode.new(val[0].value, val[2]) } + ; + + filter_list + : filter { result = [val[0]] } + | filter_list '|' filter { result = val[0].push(val[2]) } + ; + + filtered_expression + : logical_expression + | logical_expression '|' filter_list { result = FilteredValueNode.new(val[0], val[2]) } + ; + + inject_statement + : VAR_OPEN filtered_expression VAR_CLOSE { result = val[1] } + ; + + if_tag + : STMT_OPEN IF logical_expression STMT_CLOSE { open_scope!; result = val[2] } + | STMT_OPEN UNLESS logical_expression STMT_CLOSE { open_scope!; result = BooleanInverseNode.new(val[2]) } + ; + + else_tag + : STMT_OPEN ELSE STMT_CLOSE { result = close_scope!; open_scope! } + ; + + end_if_tag + : STMT_OPEN ENDIF STMT_CLOSE { result = close_scope! } + | STMT_OPEN ENDUNLESS STMT_CLOSE { result = close_scope! } + ; + + if_block + : if_tag end_if_tag { result = IfNode.new(val[0], val[1]) } + | if_tag document end_if_tag { result = IfNode.new(val[0], val[2]) } + | if_tag else_tag document end_if_tag { result = IfNode.new(val[0], val[1], val[3]) } + | if_tag document else_tag end_if_tag { result = IfNode.new(val[0], val[2], val[3]) } + | if_tag document else_tag document end_if_tag { result = IfNode.new(val[0], val[2], val[4]) } + ; + + for_tag + : STMT_OPEN FOR IDENTIFIER IN filtered_expression STMT_CLOSE { open_scope!; result = [val[2].value, val[4]] } + ; + + end_for_tag + : STMT_OPEN ENDFOR STMT_CLOSE { result = close_scope! } + ; + + /* this has a shift/reduce conflict but since Racc will shift in this case it is the correct behavior */ + for_block + : for_tag end_for_tag { result = ForNode.new(VariableNode.new(val[0].first), val[0].last, val[1]) } + | for_tag document end_for_tag { result = ForNode.new(VariableNode.new(val[0].first), val[0].last, val[2]) } + ; + + block_tag + : STMT_OPEN BLOCK IDENTIFIER STMT_CLOSE { result = open_block_scope!(val[2].value) } + ; + + end_block_tag + : STMT_OPEN ENDBLOCK STMT_CLOSE { result = close_block_scope! } + ; + + /* this has a shift/reduce conflict but since Racc will shift in this case it is the correct behavior */ + block_block + : block_tag end_block_tag { result = BlockNode.new(val[0], val[1]) } + | block_tag document end_block_tag { result = BlockNode.new(val[0], val[2]) } + ; + + generic_block_tag + : STMT_OPEN IDENTIFIER STMT_CLOSE { open_scope!; result = [val[1].value, []] } + | STMT_OPEN IDENTIFIER parameter_list STMT_CLOSE { open_scope!; result = [val[1].value, val[2]] } + ; + + end_generic_block_tag + : STMT_OPEN END STMT_CLOSE { result = close_scope! } + ; + + generic_block + : generic_block_tag document end_generic_block_tag { result = GenericBlockNode.new(val[0].first, val[2], val[0].last) } + ; + + extends_statement + : STMT_OPEN EXTENDS STRING STMT_CLOSE { result = val[2].value } + | STMT_OPEN EXTENDS IDENTIFIER STMT_CLOSE { result = VariableNode.new(val[2].value) } + ; + + document_component + : TEXT_BLOCK { result = TextNode.new(val[0].value) } + | inject_statement + | if_block + | for_block + | generic_block + | block_block + ; + + document + : document_component { push val[0] } + | document document_component { push val[1] } + | extends_statement { document.extends = val[0] } + | document extends_statement { document.extends = val[1] } + ; + +---- header ---- +# racc_parser.rb : generated by racc + +---- inner ---- diff --git a/test/racc/assets/cast.y b/test/racc/assets/cast.y new file mode 100644 index 0000000000..d180c09e14 --- /dev/null +++ b/test/racc/assets/cast.y @@ -0,0 +1,926 @@ +# The MIT License +# +# Copyright (c) George Ogata +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +class C::Parser +# shift/reduce conflict on "if (c) if (c) ; else ; else ;" +expect 1 +rule + +# A.2.4 External definitions + +# Returns TranslationUnit +translation_unit + : external_declaration {result = TranslationUnit.new_at(val[0].pos, NodeChain[val[0]])} + | translation_unit external_declaration {result = val[0]; result.entities << val[1]} + +# Returns Declaration|FunctionDef +external_declaration + : function_definition {result = val[0]} + | declaration {result = val[0]} + +# Returns FunctionDef +function_definition + : declaration_specifiers declarator declaration_list compound_statement {result = make_function_def(val[0][0], val[0][1], val[1], val[2], val[3])} + | declaration_specifiers declarator compound_statement {result = make_function_def(val[0][0], val[0][1], val[1], nil , val[2])} + +# Returns [Declaration] +declaration_list + : declaration {result = [val[0]]} + | declaration_list declaration {result = val[0] << val[1]} + +# A.2.3 Statements + +# Returns Statement +statement + : labeled_statement {result = val[0]} + | compound_statement {result = val[0]} + | expression_statement {result = val[0]} + | selection_statement {result = val[0]} + | iteration_statement {result = val[0]} + | jump_statement {result = val[0]} + +# Returns Statement +labeled_statement + : identifier COLON statement {val[2].labels.unshift(PlainLabel.new_at(val[0].pos, val[0].val)); result = val[2]} + | CASE constant_expression COLON statement {val[3].labels.unshift(Case .new_at(val[0].pos, val[1] )); result = val[3]} + | DEFAULT COLON statement {val[2].labels.unshift(Default .new_at(val[0].pos )); result = val[2]} + # type names can also be used as labels + | typedef_name COLON statement {val[2].labels.unshift(PlainLabel.new_at(val[0].pos, val[0].name)); result = val[2]} + +# Returns Block +compound_statement + : LBRACE block_item_list RBRACE {result = Block.new_at(val[0].pos, val[1])} + | LBRACE RBRACE {result = Block.new_at(val[0].pos )} + +# Returns NodeChain[Declaration|Statement] +block_item_list + : block_item {result = NodeChain[val[0]]} + | block_item_list block_item {result = val[0] << val[1]} + +# Returns Declaration|Statement +block_item + : declaration {result = val[0]} + | statement {result = val[0]} + +# Returns ExpressionStatement +expression_statement + : expression SEMICOLON {result = ExpressionStatement.new_at(val[0].pos, val[0])} + | SEMICOLON {result = ExpressionStatement.new_at(val[0].pos )} + +# Returns Statement +selection_statement + : IF LPAREN expression RPAREN statement {result = If .new_at(val[0].pos, val[2], val[4] )} + | IF LPAREN expression RPAREN statement ELSE statement {result = If .new_at(val[0].pos, val[2], val[4], val[6])} + | SWITCH LPAREN expression RPAREN statement {result = Switch.new_at(val[0].pos, val[2], val[4] )} + +# Returns Statement +iteration_statement + : WHILE LPAREN expression RPAREN statement {result = While.new_at(val[0].pos, val[2], val[4] )} + | DO statement WHILE LPAREN expression RPAREN SEMICOLON {result = While.new_at(val[0].pos, val[4], val[1], :do => true )} + | FOR LPAREN expression SEMICOLON expression SEMICOLON expression RPAREN statement {result = For.new_at(val[0].pos, val[2], val[4], val[6], val[8])} + | FOR LPAREN expression SEMICOLON expression SEMICOLON RPAREN statement {result = For.new_at(val[0].pos, val[2], val[4], nil , val[7])} + | FOR LPAREN expression SEMICOLON SEMICOLON expression RPAREN statement {result = For.new_at(val[0].pos, val[2], nil , val[5], val[7])} + | FOR LPAREN expression SEMICOLON SEMICOLON RPAREN statement {result = For.new_at(val[0].pos, val[2], nil , nil , val[6])} + | FOR LPAREN SEMICOLON expression SEMICOLON expression RPAREN statement {result = For.new_at(val[0].pos, nil , val[3], val[5], val[7])} + | FOR LPAREN SEMICOLON expression SEMICOLON RPAREN statement {result = For.new_at(val[0].pos, nil , val[3], nil , val[6])} + | FOR LPAREN SEMICOLON SEMICOLON expression RPAREN statement {result = For.new_at(val[0].pos, nil , nil , val[4], val[6])} + | FOR LPAREN SEMICOLON SEMICOLON RPAREN statement {result = For.new_at(val[0].pos, nil , nil , nil , val[5])} + | FOR LPAREN declaration expression SEMICOLON expression RPAREN statement {result = For.new_at(val[0].pos, val[2], val[3], val[5], val[7])} + | FOR LPAREN declaration expression SEMICOLON RPAREN statement {result = For.new_at(val[0].pos, val[2], val[3], nil , val[6])} + | FOR LPAREN declaration SEMICOLON expression RPAREN statement {result = For.new_at(val[0].pos, val[2], nil , val[4], val[6])} + | FOR LPAREN declaration SEMICOLON RPAREN statement {result = For.new_at(val[0].pos, val[2], nil , nil , val[5])} + +# Returns Statement +jump_statement + : GOTO identifier SEMICOLON {result = Goto .new_at(val[0].pos, val[1].val)} + | CONTINUE SEMICOLON {result = Continue.new_at(val[0].pos )} + | BREAK SEMICOLON {result = Break .new_at(val[0].pos )} + | RETURN expression SEMICOLON {result = Return .new_at(val[0].pos, val[1] )} + | RETURN SEMICOLON {result = Return .new_at(val[0].pos )} + # type names can also be used as labels + | GOTO typedef_name SEMICOLON {result = Goto .new_at(val[0].pos, val[1].name)} + +# A.2.2 Declarations + +# Returns Declaration +declaration + : declaration_specifiers init_declarator_list SEMICOLON {result = make_declaration(val[0][0], val[0][1], val[1])} + | declaration_specifiers SEMICOLON {result = make_declaration(val[0][0], val[0][1], NodeArray[])} + +# Returns {Pos, [Symbol]} +declaration_specifiers + : storage_class_specifier declaration_specifiers {val[1][1] << val[0][1]; result = val[1]} + | storage_class_specifier {result = [val[0][0], [val[0][1]]]} + | type_specifier declaration_specifiers {val[1][1] << val[0][1]; result = val[1]} + | type_specifier {result = [val[0][0], [val[0][1]]]} + | type_qualifier declaration_specifiers {val[1][1] << val[0][1]; result = val[1]} + | type_qualifier {result = [val[0][0], [val[0][1]]]} + | function_specifier declaration_specifiers {val[1][1] << val[0][1]; result = val[1]} + | function_specifier {result = [val[0][0], [val[0][1]]]} + +# Returns NodeArray[Declarator] +init_declarator_list + : init_declarator {result = NodeArray[val[0]]} + | init_declarator_list COMMA init_declarator {result = val[0] << val[2]} + +# Returns Declarator +init_declarator + : declarator {result = val[0]} + | declarator EQ initializer {val[0].init = val[2]; result = val[0]} + +# Returns [Pos, Symbol] +storage_class_specifier + : TYPEDEF {result = [val[0].pos, :typedef ]} + | EXTERN {result = [val[0].pos, :extern ]} + | STATIC {result = [val[0].pos, :static ]} + | AUTO {result = [val[0].pos, :auto ]} + | REGISTER {result = [val[0].pos, :register]} + +# Returns [Pos, Type|Symbol] +type_specifier + : VOID {result = [val[0].pos, :void ]} + | CHAR {result = [val[0].pos, :char ]} + | SHORT {result = [val[0].pos, :short ]} + | INT {result = [val[0].pos, :int ]} + | LONG {result = [val[0].pos, :long ]} + | FLOAT {result = [val[0].pos, :float ]} + | DOUBLE {result = [val[0].pos, :double ]} + | SIGNED {result = [val[0].pos, :signed ]} + | UNSIGNED {result = [val[0].pos, :unsigned ]} + | BOOL {result = [val[0].pos, :_Bool ]} + | COMPLEX {result = [val[0].pos, :_Complex ]} + | IMAGINARY {result = [val[0].pos, :_Imaginary]} + | struct_or_union_specifier {result = [val[0].pos, val[0] ]} + | enum_specifier {result = [val[0].pos, val[0] ]} + | typedef_name {result = [val[0].pos, val[0] ]} + +# Returns Struct|Union +struct_or_union_specifier + : struct_or_union identifier LBRACE struct_declaration_list RBRACE {result = val[0][1].new_at(val[0][0], val[1].val, val[3])} + | struct_or_union LBRACE struct_declaration_list RBRACE {result = val[0][1].new_at(val[0][0], nil , val[2])} + | struct_or_union identifier {result = val[0][1].new_at(val[0][0], val[1].val, nil )} + # type names can also be used as struct identifiers + | struct_or_union typedef_name LBRACE struct_declaration_list RBRACE {result = val[0][1].new_at(val[0][0], val[1].name, val[3])} + | struct_or_union typedef_name {result = val[0][1].new_at(val[0][0], val[1].name, nil )} + +# Returns [Pos, Class] +struct_or_union + : STRUCT {result = [val[0].pos, Struct]} + | UNION {result = [val[0].pos, Union ]} + +# Returns NodeArray[Declaration] +struct_declaration_list + : struct_declaration {result = NodeArray[val[0]]} + | struct_declaration_list struct_declaration {val[0] << val[1]; result = val[0]} + +# Returns Declaration +struct_declaration + : specifier_qualifier_list struct_declarator_list SEMICOLON {result = make_declaration(val[0][0], val[0][1], val[1])} + +# Returns {Pos, [Symbol]} +specifier_qualifier_list + : type_specifier specifier_qualifier_list {val[1][1] << val[0][1]; result = val[1]} + | type_specifier {result = [val[0][0], [val[0][1]]]} + | type_qualifier specifier_qualifier_list {val[1][1] << val[0][1]; result = val[1]} + | type_qualifier {result = [val[0][0], [val[0][1]]]} + +# Returns NodeArray[Declarator] +struct_declarator_list + : struct_declarator {result = NodeArray[val[0]]} + | struct_declarator_list COMMA struct_declarator {result = val[0] << val[2]} + +# Returns Declarator +struct_declarator + : declarator {result = val[0]} + | declarator COLON constant_expression {result = val[0]; val[0].num_bits = val[2]} + | COLON constant_expression {result = Declarator.new_at(val[0].pos, :num_bits => val[1])} + +# Returns Enum +enum_specifier + : ENUM identifier LBRACE enumerator_list RBRACE {result = Enum.new_at(val[0].pos, val[1].val, val[3])} + | ENUM LBRACE enumerator_list RBRACE {result = Enum.new_at(val[0].pos, nil , val[2])} + | ENUM identifier LBRACE enumerator_list COMMA RBRACE {result = Enum.new_at(val[0].pos, val[1].val, val[3])} + | ENUM LBRACE enumerator_list COMMA RBRACE {result = Enum.new_at(val[0].pos, nil , val[2])} + | ENUM identifier {result = Enum.new_at(val[0].pos, val[1].val, nil )} + # type names can also be used as enum names + | ENUM typedef_name LBRACE enumerator_list RBRACE {result = Enum.new_at(val[0].pos, val[1].name, val[3])} + | ENUM typedef_name LBRACE enumerator_list COMMA RBRACE {result = Enum.new_at(val[0].pos, val[1].name, val[3])} + | ENUM typedef_name {result = Enum.new_at(val[0].pos, val[1].name, nil )} + +# Returns NodeArray[Enumerator] +enumerator_list + : enumerator {result = NodeArray[val[0]]} + | enumerator_list COMMA enumerator {result = val[0] << val[2]} + +# Returns Enumerator +enumerator + : enumeration_constant {result = Enumerator.new_at(val[0].pos, val[0].val, nil )} + | enumeration_constant EQ constant_expression {result = Enumerator.new_at(val[0].pos, val[0].val, val[2])} + +# Returns [Pos, Symbol] +type_qualifier + : CONST {result = [val[0].pos, :const ]} + | RESTRICT {result = [val[0].pos, :restrict]} + | VOLATILE {result = [val[0].pos, :volatile]} + +# Returns [Pos, Symbol] +function_specifier + : INLINE {result = [val[0].pos, :inline]} + +# Returns Declarator +declarator + : pointer direct_declarator {result = add_decl_type(val[1], val[0])} + | direct_declarator {result = val[0]} + +# Returns Declarator +direct_declarator + : identifier {result = Declarator.new_at(val[0].pos, nil, val[0].val)} + | LPAREN declarator RPAREN {result = val[1]} + | direct_declarator LBRACKET type_qualifier_list assignment_expression RBRACKET {result = add_decl_type(val[0], Array.new_at(val[0].pos ))} # TODO + | direct_declarator LBRACKET type_qualifier_list RBRACKET {result = add_decl_type(val[0], Array.new_at(val[0].pos ))} # TODO + | direct_declarator LBRACKET assignment_expression RBRACKET {result = add_decl_type(val[0], Array.new_at(val[0].pos, nil, val[2]))} + | direct_declarator LBRACKET RBRACKET {result = add_decl_type(val[0], Array.new_at(val[0].pos ))} + | direct_declarator LBRACKET STATIC type_qualifier_list assignment_expression RBRACKET {result = add_decl_type(val[0], Array.new_at(val[0].pos ))} # TODO + | direct_declarator LBRACKET STATIC assignment_expression RBRACKET {result = add_decl_type(val[0], Array.new_at(val[0].pos ))} # TODO + | direct_declarator LBRACKET type_qualifier_list STATIC assignment_expression RBRACKET {result = add_decl_type(val[0], Array.new_at(val[0].pos ))} # TODO + | direct_declarator LBRACKET type_qualifier_list MUL RBRACKET {result = add_decl_type(val[0], Array.new_at(val[0].pos ))} # TODO + | direct_declarator LBRACKET MUL RBRACKET {result = add_decl_type(val[0], Array.new_at(val[0].pos ))} # TODO + | direct_declarator LPAREN parameter_type_list RPAREN {result = add_decl_type(val[0], Function.new_at(val[0].pos, nil, param_list(*val[2]), :var_args => val[2][1]))} + | direct_declarator LPAREN identifier_list RPAREN {result = add_decl_type(val[0], Function.new_at(val[0].pos, nil, val[2]))} + | direct_declarator LPAREN RPAREN {result = add_decl_type(val[0], Function.new_at(val[0].pos ))} + +# Returns Pointer +pointer + : MUL type_qualifier_list {result = add_type_quals(Pointer.new_at(val[0].pos), val[1][1]) } + | MUL {result = Pointer.new_at(val[0].pos) } + | MUL type_qualifier_list pointer {p = add_type_quals(Pointer.new_at(val[0].pos), val[1][1]); val[2].direct_type = p; result = val[2]} + | MUL pointer {p = Pointer.new_at(val[0].pos) ; val[1].direct_type = p; result = val[1]} + +# Returns {Pos, [Symbol]} +type_qualifier_list + : type_qualifier {result = [val[0][0], [val[0][1]]]} + | type_qualifier_list type_qualifier {val[0][1] << val[1][1]; result = val[0]} + +# Returns [NodeArray[Parameter], var_args?] +parameter_type_list + : parameter_list {result = [val[0], false]} + | parameter_list COMMA ELLIPSIS {result = [val[0], true ]} + +# Returns NodeArray[Parameter] +parameter_list + : parameter_declaration {result = NodeArray[val[0]]} + | parameter_list COMMA parameter_declaration {result = val[0] << val[2]} + +# Returns Parameter +parameter_declaration + : declaration_specifiers declarator {ind_type = val[1].indirect_type and ind_type.detach + result = make_parameter(val[0][0], val[0][1], ind_type, val[1].name)} + | declaration_specifiers abstract_declarator {result = make_parameter(val[0][0], val[0][1], val[1] , nil )} + | declaration_specifiers {result = make_parameter(val[0][0], val[0][1], nil , nil )} + +# Returns NodeArray[Parameter] +identifier_list + : identifier {result = NodeArray[Parameter.new_at(val[0].pos, nil, val[0].val)]} + | identifier_list COMMA identifier {result = val[0] << Parameter.new_at(val[2].pos, nil, val[2].val)} + +# Returns Type +type_name + : specifier_qualifier_list abstract_declarator {val[1].direct_type = make_direct_type(val[0][0], val[0][1]); result = val[1]} + | specifier_qualifier_list {result = make_direct_type(val[0][0], val[0][1]) } + +# Returns Type +abstract_declarator + : pointer {result = val[0]} + | pointer direct_abstract_declarator {val[1].direct_type = val[0]; result = val[1]} + | direct_abstract_declarator {result = val[0]} + +# Returns Type +direct_abstract_declarator + : LPAREN abstract_declarator RPAREN {result = val[1]} + | direct_abstract_declarator LBRACKET assignment_expression RBRACKET {val[0].direct_type = Array.new_at(val[0].pos, nil, val[2]); result = val[0]} + | direct_abstract_declarator LBRACKET RBRACKET {val[0].direct_type = Array.new_at(val[0].pos, nil, nil ); result = val[0]} + | LBRACKET assignment_expression RBRACKET {result = Array.new_at(val[0].pos, nil, val[1])} + | LBRACKET RBRACKET {result = Array.new_at(val[0].pos )} + | direct_abstract_declarator LBRACKET MUL RBRACKET {val[0].direct_type = Array.new_at(val[0].pos); result = val[0]} # TODO + | LBRACKET MUL RBRACKET {result = Array.new_at(val[0].pos)} # TODO + | direct_abstract_declarator LPAREN parameter_type_list RPAREN {val[0].direct_type = Function.new_at(val[0].pos, nil, param_list(*val[2]), val[2][1]); result = val[0]} + | direct_abstract_declarator LPAREN RPAREN {val[0].direct_type = Function.new_at(val[0].pos ); result = val[0]} + | LPAREN parameter_type_list RPAREN {result = Function.new_at(val[0].pos, nil, param_list(*val[1]), val[1][1])} + | LPAREN RPAREN {result = Function.new_at(val[0].pos )} + +# Returns CustomType +typedef_name + #: identifier -- insufficient since we must distinguish between type + # names and var names (otherwise we have a conflict) + : TYPENAME {result = CustomType.new_at(val[0].pos, val[0].val)} + +# Returns Expression +initializer + : assignment_expression {result = val[0]} + | LBRACE initializer_list RBRACE {result = CompoundLiteral.new_at(val[0].pos, nil, val[1])} + | LBRACE initializer_list COMMA RBRACE {result = CompoundLiteral.new_at(val[0].pos, nil, val[1])} + +# Returns NodeArray[MemberInit] +initializer_list + : designation initializer {result = NodeArray[MemberInit.new_at(val[0][0] , val[0][1], val[1])]} + | initializer {result = NodeArray[MemberInit.new_at(val[0].pos, nil , val[0])]} + | initializer_list COMMA designation initializer {result = val[0] << MemberInit.new_at(val[2][0] , val[2][1], val[3])} + | initializer_list COMMA initializer {result = val[0] << MemberInit.new_at(val[2].pos, nil , val[2])} + +# Returns {Pos, NodeArray[Expression|Token]} +designation + : designator_list EQ {result = val[0]} + +# Returns {Pos, NodeArray[Expression|Token]} +designator_list + : designator {result = val[0]; val[0][1] = NodeArray[val[0][1]]} + | designator_list designator {result = val[0]; val[0][1] << val[1][1]} + +# Returns {Pos, Expression|Member} +designator + : LBRACKET constant_expression RBRACKET {result = [val[1].pos, val[1] ]} + | DOT identifier {result = [val[1].pos, Member.new_at(val[1].pos, val[1].val)]} + +# A.2.1 Expressions + +# Returns Expression +primary_expression + : identifier {result = Variable.new_at(val[0].pos, val[0].val)} + | constant {result = val[0]} + | string_literal {result = val[0]} + # GCC EXTENSION: allow a compound statement in parentheses as an expression + | LPAREN expression RPAREN {result = val[1]} + | LPAREN compound_statement RPAREN {block_expressions_enabled? or parse_error val[0].pos, "compound statement found where expression expected" + result = BlockExpression.new(val[1]); result.pos = val[0].pos} + +# Returns Expression +postfix_expression + : primary_expression {result = val[0]} + | postfix_expression LBRACKET expression RBRACKET {result = Index .new_at(val[0].pos, val[0], val[2])} + | postfix_expression LPAREN argument_expression_list RPAREN {result = Call .new_at(val[0].pos, val[0], val[2] )} + | postfix_expression LPAREN RPAREN {result = Call .new_at(val[0].pos, val[0], NodeArray[])} + | postfix_expression DOT identifier {result = Dot .new_at(val[0].pos, val[0], Member.new(val[2].val))} + | postfix_expression ARROW identifier {result = Arrow .new_at(val[0].pos, val[0], Member.new(val[2].val))} + | postfix_expression INC {result = PostInc .new_at(val[0].pos, val[0] )} + | postfix_expression DEC {result = PostDec .new_at(val[0].pos, val[0] )} + | LPAREN type_name RPAREN LBRACE initializer_list RBRACE {result = CompoundLiteral.new_at(val[0].pos, val[1], val[4])} + | LPAREN type_name RPAREN LBRACE initializer_list COMMA RBRACE {result = CompoundLiteral.new_at(val[0].pos, val[1], val[4])} + +# Returns [Expression|Type] +argument_expression_list + : argument_expression {result = NodeArray[val[0]]} + | argument_expression_list COMMA argument_expression {result = val[0] << val[2]} + +# Returns Expression|Type -- EXTENSION: allow type names here too, to support some standard library macros (e.g., va_arg [7.15.1.1]) +argument_expression + : assignment_expression {result = val[0]} + | type_name {result = val[0]} + +# Returns Expression +unary_expression + : postfix_expression {result = val[0]} + | INC unary_expression {result = PreInc.new_at(val[0].pos, val[1])} + | DEC unary_expression {result = PreDec.new_at(val[0].pos, val[1])} + | unary_operator cast_expression {result = val[0][0].new_at(val[0][1], val[1])} + | SIZEOF unary_expression {result = Sizeof.new_at(val[0].pos, val[1])} + | SIZEOF LPAREN type_name RPAREN {result = Sizeof.new_at(val[0].pos, val[2])} + +# Returns [Class, Pos] +unary_operator + : AND {result = [Address , val[0].pos]} + | MUL {result = [Dereference, val[0].pos]} + | ADD {result = [Positive , val[0].pos]} + | SUB {result = [Negative , val[0].pos]} + | NOT {result = [BitNot , val[0].pos]} + | BANG {result = [Not , val[0].pos]} + +# Returns Expression +cast_expression + : unary_expression {result = val[0]} + | LPAREN type_name RPAREN cast_expression {result = Cast.new_at(val[0].pos, val[1], val[3])} + +# Returns Expression +multiplicative_expression + : cast_expression {result = val[0]} + | multiplicative_expression MUL cast_expression {result = Multiply.new_at(val[0].pos, val[0], val[2])} + | multiplicative_expression DIV cast_expression {result = Divide .new_at(val[0].pos, val[0], val[2])} + | multiplicative_expression MOD cast_expression {result = Mod .new_at(val[0].pos, val[0], val[2])} + +# Returns Expression +additive_expression + : multiplicative_expression {result = val[0]} + | additive_expression ADD multiplicative_expression {result = Add .new_at(val[0].pos, val[0], val[2])} + | additive_expression SUB multiplicative_expression {result = Subtract.new_at(val[0].pos, val[0], val[2])} + +# Returns Expression +shift_expression + : additive_expression {result = val[0]} + | shift_expression LSHIFT additive_expression {result = ShiftLeft .new_at(val[0].pos, val[0], val[2])} + | shift_expression RSHIFT additive_expression {result = ShiftRight.new_at(val[0].pos, val[0], val[2])} + +# Returns Expression +relational_expression + : shift_expression {result = val[0]} + | relational_expression LT shift_expression {result = Less.new_at(val[0].pos, val[0], val[2])} + | relational_expression GT shift_expression {result = More.new_at(val[0].pos, val[0], val[2])} + | relational_expression LEQ shift_expression {result = LessOrEqual.new_at(val[0].pos, val[0], val[2])} + | relational_expression GEQ shift_expression {result = MoreOrEqual.new_at(val[0].pos, val[0], val[2])} + +# Returns Expression +equality_expression + : relational_expression {result = val[0]} + | equality_expression EQEQ relational_expression {result = Equal .new_at(val[0].pos, val[0], val[2])} + | equality_expression NEQ relational_expression {result = NotEqual.new_at(val[0].pos, val[0], val[2])} + +# Returns Expression +and_expression + : equality_expression {result = val[0]} + | and_expression AND equality_expression {result = BitAnd.new_at(val[0].pos, val[0], val[2])} + +# Returns Expression +exclusive_or_expression + : and_expression {result = val[0]} + | exclusive_or_expression XOR and_expression {result = BitXor.new_at(val[0].pos, val[0], val[2])} + +# Returns Expression +inclusive_or_expression + : exclusive_or_expression {result = val[0]} + | inclusive_or_expression OR exclusive_or_expression {result = BitOr.new_at(val[0].pos, val[0], val[2])} + +# Returns Expression +logical_and_expression + : inclusive_or_expression {result = val[0]} + | logical_and_expression ANDAND inclusive_or_expression {result = And.new_at(val[0].pos, val[0], val[2])} + +# Returns Expression +logical_or_expression + : logical_and_expression {result = val[0]} + | logical_or_expression OROR logical_and_expression {result = Or.new_at(val[0].pos, val[0], val[2])} + +# Returns Expression +conditional_expression + : logical_or_expression {result = val[0]} + | logical_or_expression QUESTION expression COLON conditional_expression {result = Conditional.new_at(val[0].pos, val[0], val[2], val[4])} + +# Returns Expression +assignment_expression + : conditional_expression {result = val[0]} + | unary_expression assignment_operator assignment_expression {result = val[1].new_at(val[0].pos, val[0], val[2])} + +# Returns Class +assignment_operator + : EQ {result = Assign} + | MULEQ {result = MultiplyAssign} + | DIVEQ {result = DivideAssign} + | MODEQ {result = ModAssign} + | ADDEQ {result = AddAssign} + | SUBEQ {result = SubtractAssign} + | LSHIFTEQ {result = ShiftLeftAssign} + | RSHIFTEQ {result = ShiftRightAssign} + | ANDEQ {result = BitAndAssign} + | XOREQ {result = BitXorAssign} + | OREQ {result = BitOrAssign} + +# Returns Expression +expression + : assignment_expression {result = val[0]} + | expression COMMA assignment_expression { + if val[0].is_a? Comma + if val[2].is_a? Comma + val[0].exprs.push(*val[2].exprs) + else + val[0].exprs << val[2] + end + result = val[0] + else + if val[2].is_a? Comma + val[2].exprs.unshift(val[0]) + val[2].pos = val[0].pos + result = val[2] + else + result = Comma.new_at(val[0].pos, NodeArray[val[0], val[2]]) + end + end + } + +# Returns Expression +constant_expression + : conditional_expression {result = val[0]} + +# A.1.1 -- Lexical elements +# +# token +# : keyword (raw string) +# | identifier expanded below +# | constant expanded below +# | string_literal expanded below +# | punctuator (raw string) +# +# preprocessing-token (skip) + +# Returns Token +identifier + : ID {result = val[0]} + +# Returns Literal +constant + : ICON {result = val[0].val; result.pos = val[0].pos} + | FCON {result = val[0].val; result.pos = val[0].pos} + #| enumeration_constant -- these are parsed as identifiers at all + # places the `constant' nonterminal appears + | CCON {result = val[0].val; result.pos = val[0].pos} + +# Returns Token +enumeration_constant + : ID {result = val[0]} + +# Returns StringLiteral +# Also handles string literal concatenation (6.4.5.4) +string_literal + : string_literal SCON {val[0].val << val[1].val.val; result = val[0]} + | SCON { result = val[0].val; result.pos = val[0].pos } + +---- inner + # A.1.9 -- Preprocessing numbers -- skip + # A.1.8 -- Header names -- skip + + # A.1.7 -- Puncuators -- we don't bother with {##,#,%:,%:%:} since + # we don't do preprocessing + @@punctuators = %r'\+\+|-[->]|&&|\|\||\.\.\.|(?:<<|>>|[<>=!*/%+\-&^|])=?|[\[\](){}.~?:;,]' + @@digraphs = %r'<[:%]|[:%]>' + + # A.1.6 -- String Literals -- simple for us because we don't decode + # the string (and indeed accept some illegal strings) + @@string_literal = %r'L?"(?:[^\\]|\\.)*?"'m + + # A.1.5 -- Constants + @@decimal_floating_constant = %r'(?:(?:\d*\.\d+|\d+\.)(?:e[-+]?\d+)?|\d+e[-+]?\d+)[fl]?'i + @@hexadecimal_floating_constant = %r'0x(?:(?:[0-9a-f]*\.[0-9a-f]+|[0-9a-f]+\.)|[0-9a-f]+)p[-+]?\d+[fl]?'i + + @@integer_constant = %r'(?:[1-9][0-9]*|0x[0-9a-f]+|0[0-7]*)(?:ul?l?|ll?u?)?'i + @@floating_constant = %r'#{@@decimal_floating_constant}|#{@@hexadecimal_floating_constant}' + @@enumeration_constant = %r'[a-zA-Z_\\][a-zA-Z_\\0-9]*' + @@character_constant = %r"L?'(?:[^\\]|\\.)+?'" + # (note that as with string-literals, we accept some illegal + # character-constants) + + # A.1.4 -- Universal character names -- skip + + # A.1.3 -- Identifiers -- skip, since an identifier is lexically + # identical to an enumeration constant + + # A.1.2 Keywords + keywords = %w'auto break case char const continue default do +double else enum extern float for goto if inline int long register +restrict return short signed sizeof static struct switch typedef union + unsigned void volatile while _Bool _Complex _Imaginary' + @@keywords = %r"#{keywords.join('|')}" + + def initialize + @type_names = ::Set.new + + @warning_proc = lambda{} + @pos = C::Node::Pos.new(nil, 1, 0) + end + def initialize_copy(x) + @pos = x.pos.dup + @type_names = x.type_names.dup + end + attr_accessor :pos, :type_names + + def parse(str) + if str.respond_to? :read + str = str.read + end + @str = str + begin + prepare_lexer(str) + return do_parse + rescue ParseError => e + e.set_backtrace(caller) + raise + end + end + + # + # Error handler, as used by racc. + # + def on_error(error_token_id, error_value, value_stack) + if error_value == '$' + parse_error @pos, "unexpected EOF" + else + parse_error(error_value.pos, + "parse error on #{token_to_str(error_token_id)} (#{error_value.val})") + end + end + + def self.feature(name) + attr_writer "#{name}_enabled" + class_eval <<-EOS + def enable_#{name} + @#{name}_enabled = true + end + def #{name}_enabled? + @#{name}_enabled + end + EOS + end + private_class_method :feature + + # + # Allow blocks in parentheses as expressions, as per the gcc + # extension. [http://rubyurl.com/iB7] + # + feature :block_expressions + + private # --------------------------------------------------------- + + class Token + attr_accessor :pos, :val + def initialize(pos, val) + @pos = pos + @val = val + end + end + def eat(str) + lines = str.split(/\r\n|[\r\n]/, -1) + if lines.length == 1 + @pos.col_num += lines[0].length + else + @pos.line_num += lines.length - 1 + @pos.col_num = lines[-1].length + end + end + + # + # Make a Declaration from the given specs and declarators. + # + def make_declaration(pos, specs, declarators) + specs.all?{|x| x.is_a?(Symbol) || x.is_a?(Type)} or raise specs.map{|x| x.class}.inspect + decl = Declaration.new_at(pos, nil, declarators) + + # set storage class + storage_classes = specs.find_all do |x| + [:typedef, :extern, :static, :auto, :register].include? x + end + # 6.7.1p2: at most, one storage-class specifier may be given in + # the declaration specifiers in a declaration + storage_classes.length <= 1 or + begin + if declarators.length == 0 + for_name = '' + else + for_name = "for `#{declarators[0].name}'" + end + parse_error pos, "multiple or duplicate storage classes given #{for_name}'" + end + decl.storage = storage_classes[0] + + # set type (specifiers, qualifiers) + decl.type = make_direct_type(pos, specs) + + # set function specifiers + decl.inline = specs.include?(:inline) + + # look for new type names + if decl.typedef? + decl.declarators.each do |d| + if d.name + @type_names << d.name + end + end + end + + return decl + end + + def make_function_def(pos, specs, func_declarator, decl_list, defn) + add_decl_type(func_declarator, make_direct_type(pos, specs)) + + # get types from decl_list if necessary + function = func_declarator.indirect_type + function.is_a? Function or + parse_error pos, "non function type for function `#{func_declarator.name}'" + params = function.params + if decl_list + params.all?{|p| p.type.nil?} or + parse_error pos, "both prototype and declaration list given for `#{func_declarator.name}'" + decl_list.each do |declaration| + declaration.declarators.each do |declarator| + param = params.find{|p| p.name == declarator.name} or + parse_error pos, "no parameter named #{declarator.name}" + if declarator.indirect_type + param.type = declarator.indirect_type + param.type.direct_type = declaration.type.dup + else + param.type = declaration.type.dup + end + end + end + params.all?{|p| p.type} or + begin + s = params.find_all{|p| p.type.nil?}.map{|p| "`#{p.name}'"}.join(' and ') + parse_error pos, "types missing for parameters #{s}" + end + end + + fd = FunctionDef.new_at(pos, + function.detach, + func_declarator.name, + defn, + :no_prototype => !decl_list.nil?) + + # set storage class + # 6.9.1p4: only extern or static allowed + specs.each do |s| + [:typedef, :auto, :register].include?(s) and + "`#{s}' illegal for function" + end + storage_classes = specs.find_all do |s| + s == :extern || s == :static + end + # 6.7.1p2: at most, one storage-class specifier may be given in + # the declaration specifiers in a declaration + storage_classes.length <= 1 or + "multiple or duplicate storage classes given for `#{func_declarator.name}'" + fd.storage = storage_classes[0] if storage_classes[0] + + # set function specifiers + # 6.7.4p5 'inline' can be repeated + fd.inline = specs.include?(:inline) + + return fd + end + + # + # Make a direct type from the list of type specifiers and type + # qualifiers. + # + def make_direct_type(pos, specs) + specs_order = [:signed, :unsigned, :short, :long, :double, :void, + :char, :int, :float, :_Bool, :_Complex, :_Imaginary] + + type_specs = specs.find_all do |x| + specs_order.include?(x) || !x.is_a?(Symbol) + end + type_specs.sort! do |a, b| + (specs_order.index(a)||100) <=> (specs_order.index(b)||100) + end + + # set type specifiers + # 6.7.2p2: the specifier list should be one of these + type = + case type_specs + when [:void] + Void.new + when [:char] + Char.new + when [:signed, :char] + Char.new :signed => true + when [:unsigned, :char] + Char.new :signed => false + when [:short], [:signed, :short], [:short, :int], + [:signed, :short, :int] + Int.new :longness => -1 + when [:unsigned, :short], [:unsigned, :short, :int] + Int.new :unsigned => true, :longness => -1 + when [:int], [:signed], [:signed, :int] + Int.new + when [:unsigned], [:unsigned, :int] + Int.new :unsigned => true + when [:long], [:signed, :long], [:long, :int], + [:signed, :long, :int] + Int.new :longness => 1 + when [:unsigned, :long], [:unsigned, :long, :int] + Int.new :longness => 1, :unsigned => true + when [:long, :long], [:signed, :long, :long], + [:long, :long, :int], [:signed, :long, :long, :int] + Int.new :longness => 2 + when [:unsigned, :long, :long], [:unsigned, :long, :long, :int] + Int.new :longness => 2, :unsigned => true + when [:float] + Float.new + when [:double] + Float.new :longness => 1 + when [:long, :double] + Float.new :longness => 2 + when [:_Bool] + Bool.new + when [:float, :_Complex] + Complex.new + when [:double, :_Complex] + Complex.new :longness => 1 + when [:long, :double, :_Complex] + Complex.new :longness => 2 + when [:float, :_Imaginary] + Imaginary.new + when [:double, :_Imaginary] + Imaginary.new :longness => 1 + when [:long, :double, :_Imaginary] + Imaginary.new :longness => 2 + else + if type_specs.length == 1 && + [CustomType, Struct, Union, Enum].any?{|c| type_specs[0].is_a? c} + type_specs[0] + else + if type_specs == [] + parse_error pos, "no type specifiers given" + else + parse_error pos, "invalid type specifier combination: #{type_specs.join(' ')}" + end + end + end + type.pos ||= pos + + # set type qualifiers + # 6.7.3p4: type qualifiers can be repeated + type.const = specs.any?{|x| x.equal? :const } + type.restrict = specs.any?{|x| x.equal? :restrict} + type.volatile = specs.any?{|x| x.equal? :volatile} + + return type + end + + def make_parameter(pos, specs, indirect_type, name) + type = indirect_type + if type + type.direct_type = make_direct_type(pos, specs) + else + type = make_direct_type(pos, specs) + end + [:typedef, :extern, :static, :auto, :inline].each do |sym| + specs.include? sym and + parse_error pos, "parameter `#{declarator.name}' declared `#{sym}'" + end + return Parameter.new_at(pos, type, name, + :register => specs.include?(:register)) + end + + def add_type_quals(type, quals) + type.const = quals.include?(:const ) + type.restrict = quals.include?(:restrict) + type.volatile = quals.include?(:volatile) + return type + end + + # + # Add te given type as the "most direct" type to the given + # declarator. Return the declarator. + # + def add_decl_type(declarator, type) + if declarator.indirect_type + declarator.indirect_type.direct_type = type + else + declarator.indirect_type = type + end + return declarator + end + + def param_list(params, var_args) + if params.length == 1 && + params[0].type.is_a?(Void) && + params[0].name.nil? + return NodeArray[] + elsif params.empty? + return nil + else + return params + end + end + + def parse_error(pos, str) + raise ParseError, "#{pos}: #{str}" + end + +---- header + +require 'set' + +# Error classes +module C + class ParseError < StandardError; end +end + +# Local variables: +# mode: ruby +# end: diff --git a/test/racc/assets/chk.y b/test/racc/assets/chk.y new file mode 100644 index 0000000000..7e0ee20f1e --- /dev/null +++ b/test/racc/assets/chk.y @@ -0,0 +1,126 @@ +# +# racc tester +# + +class Calcp + + prechigh + left '*' '/' + left '+' '-' + preclow + + convert + NUMBER 'Number' + end + +rule + + target : exp | /* none */ { result = 0 } ; + + exp : exp '+' exp { result += val[2]; @plus = 'plus' } + | exp '-' exp { result -= val[2]; @str = "string test" } + | exp '*' exp { result *= val[2] } + | exp '/' exp { result /= val[2] } + | '(' { $emb = true } exp ')' + { + raise 'must not happen' unless $emb + result = val[2] + } + | '-' NUMBER { result = -val[1] } + | NUMBER + ; + +end + +----header + +class Number; end + +----inner + + def parse( src ) + $emb = false + @plus = nil + @str = nil + @src = src + result = do_parse + if @plus + raise 'string parse failed' unless @plus == 'plus' + end + if @str + raise 'string parse failed' unless @str == 'string test' + end + result + end + + def next_token + @src.shift + end + + def initialize + @yydebug = true + end + +----footer + +$parser = Calcp.new +$test_number = 1 + +def chk( src, ans ) + result = $parser.parse(src) + raise "test #{$test_number} fail" unless result == ans + $test_number += 1 +end + +chk( + [ [Number, 9], + [false, false], + [false, false] ], 9 +) + +chk( + [ [Number, 5], + ['*', nil], + [Number, 1], + ['-', nil], + [Number, 1], + ['*', nil], + [Number, 8], + [false, false], + [false, false] ], -3 +) + +chk( + [ [Number, 5], + ['+', nil], + [Number, 2], + ['-', nil], + [Number, 5], + ['+', nil], + [Number, 2], + ['-', nil], + [Number, 5], + [false, false], + [false, false] ], -1 +) + +chk( + [ ['-', nil], + [Number, 4], + [false, false], + [false, false] ], -4 +) + +chk( + [ [Number, 7], + ['*', nil], + ['(', nil], + [Number, 4], + ['+', nil], + [Number, 3], + [')', nil], + ['-', nil], + [Number, 9], + [false, false], + [false, false] ], 40 +) diff --git a/test/racc/assets/conf.y b/test/racc/assets/conf.y new file mode 100644 index 0000000000..de9de71d28 --- /dev/null +++ b/test/racc/assets/conf.y @@ -0,0 +1,16 @@ + +class A +rule + +a: A c C expr; + +b: A B; # useless + +c: A; +c: A; + +expr: expr '+' expr +expr: expr '-' expr +expr: NUMBER + +end diff --git a/test/racc/assets/csspool.y b/test/racc/assets/csspool.y new file mode 100644 index 0000000000..3d6af25d85 --- /dev/null +++ b/test/racc/assets/csspool.y @@ -0,0 +1,729 @@ +class CSSPool::CSS::Parser + +token CHARSET_SYM IMPORT_SYM STRING SEMI IDENT S COMMA LBRACE RBRACE STAR HASH +token LSQUARE RSQUARE EQUAL INCLUDES DASHMATCH LPAREN RPAREN FUNCTION GREATER PLUS +token SLASH NUMBER MINUS LENGTH PERCENTAGE ANGLE TIME FREQ URI +token IMPORTANT_SYM MEDIA_SYM NOT ONLY AND NTH_PSEUDO_CLASS +token DOCUMENT_QUERY_SYM FUNCTION_NO_QUOTE +token TILDE +token PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH +token NOT_PSEUDO_CLASS +token KEYFRAMES_SYM +token MATCHES_PSEUDO_CLASS +token NAMESPACE_SYM +token MOZ_PSEUDO_ELEMENT +token RESOLUTION +token COLON +token SUPPORTS_SYM +token OR +token VARIABLE_NAME +token CALC_SYM +token FONTFACE_SYM +token UNICODE_RANGE +token RATIO + +rule + document + : { @handler.start_document } + stylesheet + { @handler.end_document } + ; + stylesheet + : charset stylesheet + | import stylesheet + | namespace stylesheet + | charset + | import + | namespace + | body + | + ; + charset + : CHARSET_SYM STRING SEMI { @handler.charset interpret_string(val[1]), {} } + ; + import + : IMPORT_SYM import_location medium SEMI { + @handler.import_style val[2], val[1] + } + | IMPORT_SYM import_location SEMI { + @handler.import_style [], val[1] + } + ; + import_location + : import_location S + | STRING { result = Terms::String.new interpret_string val.first } + | URI { result = Terms::URI.new interpret_uri val.first } + ; + namespace + : NAMESPACE_SYM ident import_location SEMI { + @handler.namespace val[1], val[2] + } + | NAMESPACE_SYM import_location SEMI { + @handler.namespace nil, val[1] + } + ; + medium + : medium COMMA IDENT { + result = val[0] << MediaType.new(val[2]) + } + | IDENT { + result = [MediaType.new(val[0])] + } + ; + media_query_list + : media_query { result = MediaQueryList.new([ val[0] ]) } + | media_query_list COMMA media_query { result = val[0] << val[2] } + | { result = MediaQueryList.new } + ; + media_query + : optional_only_or_not media_type optional_and_exprs { result = MediaQuery.new(val[0], val[1], val[2]) } + | media_expr optional_and_exprs { result = MediaQuery.new(nil, val[0], val[1]) } + ; + optional_only_or_not + : ONLY { result = :only } + | NOT { result = :not } + | { result = nil } + ; + media_type + : IDENT { result = MediaType.new(val[0]) } + ; + media_expr + : LPAREN optional_space IDENT optional_space RPAREN { result = MediaType.new(val[2]) } + | LPAREN optional_space IDENT optional_space COLON optional_space expr RPAREN { result = MediaFeature.new(val[2], val[6][0]) } + ; + optional_space + : S { result = val[0] } + | { result = nil } + ; + optional_and_exprs + : optional_and_exprs AND media_expr { result = val[0] << val[2] } + | { result = [] } + ; + resolution + : RESOLUTION { + unit = val.first.gsub(/[\s\d.]/, '') + number = numeric(val.first) + result = Terms::Resolution.new(number, unit) + } + ; + body + : ruleset body + | conditional_rule body + | keyframes_rule body + | fontface_rule body + | ruleset + | conditional_rule + | keyframes_rule + | fontface_rule + ; + conditional_rule + : media + | document_query + | supports + ; + body_in_media + : body + | empty_ruleset + ; + media + : start_media body_in_media RBRACE { @handler.end_media val.first } + ; + start_media + : MEDIA_SYM media_query_list LBRACE { + result = val[1] + @handler.start_media result + } + ; + document_query + : start_document_query body RBRACE { @handler.end_document_query(before_pos(val), after_pos(val)) } + | start_document_query RBRACE { @handler.end_document_query(before_pos(val), after_pos(val)) } + ; + start_document_query + : start_document_query_pos url_match_fns LBRACE { + @handler.start_document_query(val[1], after_pos(val)) + } + ; + start_document_query_pos + : DOCUMENT_QUERY_SYM { + @handler.node_start_pos = before_pos(val) + } + ; + url_match_fns + : url_match_fn COMMA url_match_fns { + result = [val[0], val[2]].flatten + } + | url_match_fn { + result = val + } + ; + url_match_fn + : function_no_quote + | function + | uri + ; + supports + : start_supports body RBRACE { @handler.end_supports } + | start_supports RBRACE { @handler.end_supports } + ; + start_supports + : SUPPORTS_SYM supports_condition_root LBRACE { + @handler.start_supports val[1] + } + ; + supports_condition_root + : supports_negation { result = val.join('') } + | supports_conjunction_or_disjunction { result = val.join('') } + | supports_condition_in_parens { result = val.join('') } + ; + supports_condition + : supports_negation { result = val.join('') } + | supports_conjunction_or_disjunction { result = val.join('') } + | supports_condition_in_parens { result = val.join('') } + ; + supports_condition_in_parens + : LPAREN supports_condition RPAREN { result = val.join('') } + | supports_declaration_condition { result = val.join('') } + ; + supports_negation + : NOT supports_condition_in_parens { result = val.join('') } + ; + supports_conjunction_or_disjunction + : supports_conjunction + | supports_disjunction + ; + supports_conjunction + : supports_condition_in_parens AND supports_condition_in_parens { result = val.join('') } + | supports_conjunction_or_disjunction AND supports_condition_in_parens { result = val.join('') } + ; + supports_disjunction + : supports_condition_in_parens OR supports_condition_in_parens { result = val.join('') } + | supports_conjunction_or_disjunction OR supports_condition_in_parens { result = val.join('') } + ; + supports_declaration_condition + : LPAREN declaration_internal RPAREN { result = val.join('') } + | LPAREN S declaration_internal RPAREN { result = val.join('') } + ; + keyframes_rule + : start_keyframes_rule keyframes_blocks RBRACE + | start_keyframes_rule RBRACE + ; + start_keyframes_rule + : KEYFRAMES_SYM IDENT LBRACE { + @handler.start_keyframes_rule val[1] + } + ; + keyframes_blocks + : keyframes_block keyframes_blocks + | keyframes_block + ; + keyframes_block + : start_keyframes_block declarations RBRACE { @handler.end_keyframes_block } + | start_keyframes_block RBRACE { @handler.end_keyframes_block } + ; + start_keyframes_block + : keyframes_selectors LBRACE { + @handler.start_keyframes_block val[0] + } + ; + keyframes_selectors + | keyframes_selector COMMA keyframes_selectors { + result = val[0] + ', ' + val[2] + } + | keyframes_selector + ; + keyframes_selector + : IDENT + | PERCENTAGE { result = val[0].strip } + ; + fontface_rule + : start_fontface_rule declarations RBRACE { @handler.end_fontface_rule } + | start_fontface_rule RBRACE { @handler.end_fontface_rule } + ; + start_fontface_rule + : FONTFACE_SYM LBRACE { + @handler.start_fontface_rule + } + ; + ruleset + : start_selector declarations RBRACE { + @handler.end_selector val.first + } + | start_selector RBRACE { + @handler.end_selector val.first + } + ; + empty_ruleset + : optional_space { + start = @handler.start_selector([]) + @handler.end_selector(start) + } + ; + start_selector + : S start_selector { result = val.last } + | selectors LBRACE { + @handler.start_selector val.first + } + ; + selectors + : selector COMMA selectors + { + sel = Selector.new(val.first, {}) + result = [sel].concat(val[2]) + } + | selector + { + result = [Selector.new(val.first, {})] + } + ; + selector + : simple_selector combinator selector + { + val.flatten! + val[2].combinator = val.delete_at 1 + result = val + } + | simple_selector + ; + combinator + : S { result = :s } + | GREATER { result = :> } + | PLUS { result = :+ } + | TILDE { result = :~ } + ; + simple_selector + : element_name hcap { + selector = val.first + selector.additional_selectors = val.last + result = [selector] + } + | element_name { result = val } + | hcap + { + ss = Selectors::Simple.new nil, nil + ss.additional_selectors = val.flatten + result = [ss] + } + ; + simple_selectors + : simple_selector COMMA simple_selectors { result = [val[0], val[2]].flatten } + | simple_selector + ; + ident_with_namespace + : IDENT { result = [interpret_identifier(val[0]), nil] } + | IDENT '|' IDENT { result = [interpret_identifier(val[2]), interpret_identifier(val[0])] } + | '|' IDENT { result = [interpret_identifier(val[1]), nil] } + | STAR '|' IDENT { result = [interpret_identifier(val[2]), '*'] } + ; + element_name + : ident_with_namespace { result = Selectors::Type.new val.first[0], nil, val.first[1] } + | STAR { result = Selectors::Universal.new val.first } + | '|' STAR { result = Selectors::Universal.new val[1] } + | STAR '|' STAR { result = Selectors::Universal.new val[2], nil, val[0] } + | IDENT '|' STAR { result = Selectors::Universal.new val[2], nil, interpret_identifier(val[0]) } + ; + hcap + : hash { result = val } + | class { result = val } + | attrib { result = val } + | pseudo { result = val } + | hash hcap { result = val.flatten } + | class hcap { result = val.flatten } + | attrib hcap { result = val.flatten } + | pseudo hcap { result = val.flatten } + ; + hash + : HASH { + result = Selectors::Id.new interpret_identifier val.first.sub(/^#/, '') + } + class + : '.' IDENT { + result = Selectors::Class.new interpret_identifier val.last + } + ; + attrib + : LSQUARE ident_with_namespace EQUAL IDENT RSQUARE { + result = Selectors::Attribute.new( + val[1][0], + interpret_identifier(val[3]), + Selectors::Attribute::EQUALS, + val[1][1] + ) + } + | LSQUARE ident_with_namespace EQUAL STRING RSQUARE { + result = Selectors::Attribute.new( + val[1][0], + interpret_string(val[3]), + Selectors::Attribute::EQUALS, + val[1][1] + ) + } + | LSQUARE ident_with_namespace INCLUDES STRING RSQUARE { + result = Selectors::Attribute.new( + val[1][0], + interpret_string(val[3]), + Selectors::Attribute::INCLUDES, + val[1][1] + ) + } + | LSQUARE ident_with_namespace INCLUDES IDENT RSQUARE { + result = Selectors::Attribute.new( + val[1][0], + interpret_identifier(val[3]), + Selectors::Attribute::INCLUDES, + val[1][1] + ) + } + | LSQUARE ident_with_namespace DASHMATCH IDENT RSQUARE { + result = Selectors::Attribute.new( + val[1][0], + interpret_identifier(val[3]), + Selectors::Attribute::DASHMATCH, + val[1][1] + ) + } + | LSQUARE ident_with_namespace DASHMATCH STRING RSQUARE { + result = Selectors::Attribute.new( + val[1][0], + interpret_string(val[3]), + Selectors::Attribute::DASHMATCH, + val[1][1] + ) + } + | LSQUARE ident_with_namespace PREFIXMATCH IDENT RSQUARE { + result = Selectors::Attribute.new( + val[1][0], + interpret_identifier(val[3]), + Selectors::Attribute::PREFIXMATCH, + val[1][1] + ) + } + | LSQUARE ident_with_namespace PREFIXMATCH STRING RSQUARE { + result = Selectors::Attribute.new( + val[1][0], + interpret_string(val[3]), + Selectors::Attribute::PREFIXMATCH, + val[1][1] + ) + } + | LSQUARE ident_with_namespace SUFFIXMATCH IDENT RSQUARE { + result = Selectors::Attribute.new( + val[1][0], + interpret_identifier(val[3]), + Selectors::Attribute::SUFFIXMATCH, + val[1][1] + ) + } + | LSQUARE ident_with_namespace SUFFIXMATCH STRING RSQUARE { + result = Selectors::Attribute.new( + val[1][0], + interpret_string(val[3]), + Selectors::Attribute::SUFFIXMATCH, + val[1][1] + ) + } + | LSQUARE ident_with_namespace SUBSTRINGMATCH IDENT RSQUARE { + result = Selectors::Attribute.new( + val[1][0], + interpret_identifier(val[3]), + Selectors::Attribute::SUBSTRINGMATCH, + val[1][1] + ) + } + | LSQUARE ident_with_namespace SUBSTRINGMATCH STRING RSQUARE { + result = Selectors::Attribute.new( + val[1][0], + interpret_string(val[3]), + Selectors::Attribute::SUBSTRINGMATCH, + val[1][1] + ) + } + | LSQUARE ident_with_namespace RSQUARE { + result = Selectors::Attribute.new( + val[1][0], + nil, + Selectors::Attribute::SET, + val[1][1] + ) + } + ; + pseudo + : COLON IDENT { + result = Selectors::pseudo interpret_identifier(val[1]) + } + | COLON COLON IDENT { + result = Selectors::PseudoElement.new( + interpret_identifier(val[2]) + ) + } + | COLON FUNCTION RPAREN { + result = Selectors::PseudoClass.new( + interpret_identifier(val[1].sub(/\($/, '')), + '' + ) + } + | COLON FUNCTION IDENT RPAREN { + result = Selectors::PseudoClass.new( + interpret_identifier(val[1].sub(/\($/, '')), + interpret_identifier(val[2]) + ) + } + | COLON NOT_PSEUDO_CLASS simple_selector RPAREN { + result = Selectors::PseudoClass.new( + 'not', + val[2].first.to_s + ) + } + | COLON NTH_PSEUDO_CLASS { + result = Selectors::PseudoClass.new( + interpret_identifier(val[1].sub(/\(.*/, '')), + interpret_identifier(val[1].sub(/.*\(/, '').sub(/\).*/, '')) + ) + } + | COLON MATCHES_PSEUDO_CLASS simple_selectors RPAREN { + result = Selectors::PseudoClass.new( + val[1].split('(').first.strip, + val[2].join(', ') + ) + } + | COLON MOZ_PSEUDO_ELEMENT optional_space any_number_of_idents optional_space RPAREN { + result = Selectors::PseudoElement.new( + interpret_identifier(val[1].sub(/\($/, '')) + ) + } + | COLON COLON MOZ_PSEUDO_ELEMENT optional_space any_number_of_idents optional_space RPAREN { + result = Selectors::PseudoElement.new( + interpret_identifier(val[2].sub(/\($/, '')) + ) + } + ; + any_number_of_idents + : + | multiple_idents + ; + multiple_idents + : IDENT + | IDENT COMMA multiple_idents + ; + # declarations can be separated by one *or more* semicolons. semi-colons at the start or end of a ruleset are also allowed + one_or_more_semis + : SEMI + | SEMI one_or_more_semis + ; + declarations + : declaration one_or_more_semis declarations + | one_or_more_semis declarations + | declaration one_or_more_semis + | declaration + | one_or_more_semis + ; + declaration + : declaration_internal { @handler.property val.first } + ; + declaration_internal + : property COLON expr prio + { result = Declaration.new(val.first, val[2], val[3]) } + | property COLON S expr prio + { result = Declaration.new(val.first, val[3], val[4]) } + | property S COLON expr prio + { result = Declaration.new(val.first, val[3], val[4]) } + | property S COLON S expr prio + { result = Declaration.new(val.first, val[4], val[5]) } + ; + prio + : IMPORTANT_SYM { result = true } + | { result = false } + ; + property + : IDENT { result = interpret_identifier val[0] } + | STAR IDENT { result = interpret_identifier val.join } + | VARIABLE_NAME { result = interpret_identifier val[0] } + ; + operator + : COMMA + | SLASH + | EQUAL + ; + expr + : term operator expr { + result = [val.first, val.last].flatten + val.last.first.operator = val[1] + } + | term expr { result = val.flatten } + | term { result = val } + ; + term + : ident + | ratio + | numeric + | string + | uri + | hexcolor + | calc + | function + | resolution + | VARIABLE_NAME + | uranges + ; + function + : function S { result = val.first } + | FUNCTION expr RPAREN { + name = interpret_identifier val.first.sub(/\($/, '') + if name == 'rgb' + result = Terms::Rgb.new(*val[1]) + else + result = Terms::Function.new name, val[1] + end + } + | FUNCTION RPAREN { + name = interpret_identifier val.first.sub(/\($/, '') + result = Terms::Function.new name + } + ; + function_no_quote + : function_no_quote S { result = val.first } + | FUNCTION_NO_QUOTE { + parts = val.first.split('(') + name = interpret_identifier parts.first + result = Terms::Function.new(name, [Terms::String.new(interpret_string_no_quote(parts.last))]) + } + ; + uranges + : UNICODE_RANGE COMMA uranges + | UNICODE_RANGE + ; + calc + : CALC_SYM calc_sum RPAREN optional_space { + result = Terms::Math.new(val.first.split('(').first, val[1]) + } + ; + # plus and minus are supposed to have whitespace around them, per http://dev.w3.org/csswg/css-values/#calc-syntax, but the numbers are eating trailing whitespace, so we inject it back in + calc_sum + : calc_product + | calc_product PLUS calc_sum { val.insert(1, ' '); result = val.join('') } + | calc_product MINUS calc_sum { val.insert(1, ' '); result = val.join('') } + ; + calc_product + : calc_value + | calc_value optional_space STAR calc_value { result = val.join('') } + | calc_value optional_space SLASH calc_value { result = val.join('') } + ; + calc_value + : numeric { result = val.join('') } + | function { result = val.join('') } # for var() variable references + | LPAREN calc_sum RPAREN { result = val.join('') } + ; + hexcolor + : hexcolor S { result = val.first } + | HASH { result = Terms::Hash.new val.first.sub(/^#/, '') } + ; + uri + : uri S { result = val.first } + | URI { result = Terms::URI.new interpret_uri val.first } + ; + string + : string S { result = val.first } + | STRING { result = Terms::String.new interpret_string val.first } + ; + numeric + : unary_operator numeric { + result = val[1] + val[1].unary_operator = val.first + } + | NUMBER { + result = Terms::Number.new numeric val.first + } + | PERCENTAGE { + result = Terms::Number.new numeric(val.first), nil, '%' + } + | LENGTH { + unit = val.first.gsub(/[\s\d.]/, '') + result = Terms::Number.new numeric(val.first), nil, unit + } + | ANGLE { + unit = val.first.gsub(/[\s\d.]/, '') + result = Terms::Number.new numeric(val.first), nil, unit + } + | TIME { + unit = val.first.gsub(/[\s\d.]/, '') + result = Terms::Number.new numeric(val.first), nil, unit + } + | FREQ { + unit = val.first.gsub(/[\s\d.]/, '') + result = Terms::Number.new numeric(val.first), nil, unit + } + ; + ratio + : RATIO { + result = Terms::Ratio.new(val[0], val[1]) + } + ; + unary_operator + : MINUS { result = :minus } + | PLUS { result = :plus } + ; + ident + : ident S { result = val.first } + | IDENT { result = Terms::Ident.new interpret_identifier val.first } + ; + +---- inner + +def numeric thing + thing = thing.gsub(/[^\d.]/, '') + Integer(thing) rescue Float(thing) +end + +def interpret_identifier s + interpret_escapes s +end + +def interpret_uri s + interpret_escapes s.match(/^url\((.*)\)$/mui)[1].strip.match(/^(['"]?)((?:\\.|.)*)\1$/mu)[2] +end + +def interpret_string_no_quote s + interpret_escapes s.match(/^(.*)\)$/mu)[1].strip.match(/^(['"]?)((?:\\.|.)*)\1$/mu)[2] +end + +def interpret_string s + interpret_escapes s.match(/^(['"])((?:\\.|.)*)\1$/mu)[2] +end + +def interpret_escapes s + token_exp = /\\(?:([0-9a-fA-F]{1,6}(?:\r\n|\s)?)|(.))/mu + return s.gsub(token_exp) do |escape_sequence| + if !$1.nil? + code = $1.chomp.to_i 16 + code = 0xFFFD if code > 0x10FFFF + next [code].pack('U') + end + next '' if $2 == "\n" + next $2 + end +end + +# override racc's on_error so we can have context in our error messages +def on_error(t, val, vstack) + errcontext = (@ss.pre_match[-10..-1] || @ss.pre_match) + + @ss.matched + @ss.post_match[0..9] + line_number = @ss.pre_match.lines.count + raise ParseError, sprintf("parse error on value %s (%s) " + + "on line %s around \"%s\"", + val.inspect, token_to_str(t) || '?', + line_number, errcontext) +end + +def before_pos(val) + # don't include leading whitespace + return current_pos - val.last.length + val.last[/\A\s*/].size +end + +def after_pos(val) + # don't include trailing whitespace + return current_pos - val.last[/\s*\z/].size +end + +# charpos will work with multibyte strings but is not available until ruby 2 +def current_pos + @ss.respond_to?('charpos') ? @ss.charpos : @ss.pos +end diff --git a/test/racc/assets/digraph.y b/test/racc/assets/digraph.y new file mode 100644 index 0000000000..17a034ee54 --- /dev/null +++ b/test/racc/assets/digraph.y @@ -0,0 +1,29 @@ +# ? detect digraph bug + +class P + token A B C D +rule + target : a b c d + a : A + | + b : B + | + c : C + | + d : D + | +end + +---- inner + + def parse + do_parse + end + + def next_token + [false, '$'] + end + +---- footer + +P.new.parse diff --git a/test/racc/assets/echk.y b/test/racc/assets/echk.y new file mode 100644 index 0000000000..0fda2685aa --- /dev/null +++ b/test/racc/assets/echk.y @@ -0,0 +1,118 @@ +# +# racc tester +# + +class Calcp + + prechigh + left '*' '/' + left '+' '-' + preclow + + convert + NUMBER 'Number' + end + +rule + + target : exp | /* none */ { result = 0 } ; + + exp : exp '+' exp { result += val[2]; a = 'plus' } + | exp '-' exp { result -= val[2]; "string test" } + | exp '*' exp { result *= val[2] } + | exp '/' exp { result /= val[2] } + | '(' { $emb = true } exp ')' + { + raise 'must not happen' unless $emb + result = val[2] + } + | '-' NUMBER { result = -val[1] } + | NUMBER + ; + +end + +----header + +class Number ; end + +----inner + + def parse( src ) + @src = src + do_parse + end + + def next_token + @src.shift + end + + def initialize + @yydebug = true + end + +----footer + +$parser = Calcp.new +$tidx = 1 + +def chk( src, ans ) + ret = $parser.parse( src ) + unless ret == ans then + bug! "test #{$tidx} fail" + end + $tidx += 1 +end + +chk( + [ [Number, 9], + [false, false], + [false, false] ], 9 +) + +chk( + [ [Number, 5], + ['*', nil], + [Number, 1], + ['-', nil], + [Number, 1], + ['*', nil], + [Number, 8], + [false, false], + [false, false] ], -3 +) + +chk( + [ [Number, 5], + ['+', nil], + [Number, 2], + ['-', nil], + [Number, 5], + ['+', nil], + [Number, 2], + ['-', nil], + [Number, 5], + [false, false], + [false, false] ], -1 +) + +chk( + [ ['-', nil], + [Number, 4], + [false, false], + [false, false] ], -4 +) + +chk( + [ [Number, 7], + ['*', nil], + ['(', nil], + [Number, 4], + ['+', nil], + [Number, 3], + [')', nil], + ['-', nil], + [Number, 9], + [false, false], + [false, false] ], 40 +) diff --git a/test/racc/assets/edtf.y b/test/racc/assets/edtf.y new file mode 100644 index 0000000000..4f5f6bb4fd --- /dev/null +++ b/test/racc/assets/edtf.y @@ -0,0 +1,583 @@ +# -*- racc -*- + +# Copyright 2011 Sylvester Keil. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +# EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# The views and conclusions contained in the software and documentation are +# those of the authors and should not be interpreted as representing official +# policies, either expressed or implied, of the copyright holder. + +class EDTF::Parser + +token T Z E X U UNKNOWN OPEN LONGYEAR UNMATCHED DOTS UA PUA + +expect 0 + +rule + + edtf : level_0_expression + | level_1_expression + | level_2_expression + ; + + # ---- Level 0 / ISO 8601 Rules ---- + + # NB: level 0 intervals are covered by the level 1 interval rules + level_0_expression : date + | date_time + ; + + date : positive_date + | negative_date + ; + + positive_date : + year { result = Date.new(val[0]).year_precision! } + | year_month { result = Date.new(*val.flatten).month_precision! } + | year_month_day { result = Date.new(*val.flatten).day_precision! } + ; + + negative_date : '-' positive_date { result = -val[1] } + + + date_time : date T time { + result = DateTime.new(val[0].year, val[0].month, val[0].day, *val[2]) + result.skip_timezone = (val[2].length == 3) + } + + time : base_time + | base_time zone_offset { result = val.flatten } + + base_time : hour ':' minute ':' second { result = val.values_at(0, 2, 4) } + | midnight + + midnight : '2' '4' ':' '0' '0' ':' '0' '0' { result = [24, 0, 0] } + + zone_offset : Z { result = 0 } + | '-' zone_offset_hour { result = -1 * val[1] } + | '+' positive_zone_offset { result = val[1] } + ; + + positive_zone_offset : zone_offset_hour + | '0' '0' ':' '0' '0' { result = 0 } + ; + + + zone_offset_hour : d01_13 ':' minute { result = Rational(val[0] * 60 + val[2], 1440) } + | '1' '4' ':' '0' '0' { result = Rational(840, 1440) } + | '0' '0' ':' d01_59 { result = Rational(val[3], 1440) } + ; + + year : digit digit digit digit { + result = val.zip([1000,100,10,1]).reduce(0) { |s,(a,b)| s += a * b } + } + + month : d01_12 + day : d01_31 + + year_month : year '-' month { result = [val[0], val[2]] } + + # We raise an exception if there are two many days for the month, but + # do not consider leap years, as the EDTF BNF did not either. + # NB: an exception will be raised regardless, because the Ruby Date + # implementation calculates leap years. + year_month_day : year_month '-' day { + result = val[0] << val[2] + if result[2] > 31 || (result[2] > 30 && [2,4,6,9,11].include?(result[1])) || (result[2] > 29 && result[1] == 2) + raise ArgumentError, "invalid date (invalid days #{result[2]} for month #{result[1]})" + end + } + + hour : d00_23 + minute : d00_59 + second : d00_59 + + # Completely covered by level_1_interval + # level_0_interval : date '/' date { result = Interval.new(val[0], val[1]) } + + + # ---- Level 1 Extension Rules ---- + + # NB: Uncertain/approximate Dates are covered by the Level 2 rules + level_1_expression : unspecified | level_1_interval | long_year_simple | season + + # uncertain_or_approximate_date : date UA { result = uoa(val[0], val[1]) } + + unspecified : unspecified_year + { + result = Date.new(val[0][0]).year_precision! + result.unspecified.year[2,2] = val[0][1] + } + | unspecified_month + | unspecified_day + | unspecified_day_and_month + ; + + unspecified_year : + digit digit digit U + { + result = [val[0,3].zip([1000,100,10]).reduce(0) { |s,(a,b)| s += a * b }, [false,true]] + } + | digit digit U U + { + result = [val[0,2].zip([1000,100]).reduce(0) { |s,(a,b)| s += a * b }, [true, true]] + } + + unspecified_month : year '-' U U { + result = Date.new(val[0]).unspecified!(:month) + result.precision = :month + } + + unspecified_day : year_month '-' U U { + result = Date.new(*val[0]).unspecified!(:day) + } + + unspecified_day_and_month : year '-' U U '-' U U { + result = Date.new(val[0]).unspecified!([:day,:month]) + } + + + level_1_interval : level_1_start '/' level_1_end { + result = Interval.new(val[0], val[2]) + } + + level_1_start : date | partial_uncertain_or_approximate | unspecified | partial_unspecified | UNKNOWN + + level_1_end : level_1_start | OPEN + + + long_year_simple : + LONGYEAR long_year + { + result = Date.new(val[1]) + result.precision = :year + } + | LONGYEAR '-' long_year + { + result = Date.new(-1 * val[2]) + result.precision = :year + } + ; + + long_year : + positive_digit digit digit digit digit { + result = val.zip([10000,1000,100,10,1]).reduce(0) { |s,(a,b)| s += a * b } + } + | long_year digit { result = 10 * val[0] + val[1] } + ; + + + season : year '-' season_number ua { + result = Season.new(val[0], val[2]) + val[3].each { |ua| result.send(ua) } + } + + season_number : '2' '1' { result = 21 } + | '2' '2' { result = 22 } + | '2' '3' { result = 23 } + | '2' '4' { result = 24 } + ; + + + # ---- Level 2 Extension Rules ---- + + # NB: Level 2 Intervals are covered by the Level 1 Interval rules. + level_2_expression : season_qualified + | partial_uncertain_or_approximate + | partial_unspecified + | choice_list + | inclusive_list + | masked_precision + | date_and_calendar + | long_year_scientific + ; + + + season_qualified : season '^' { result = val[0]; result.qualifier = val[1] } + + + long_year_scientific : + long_year_simple E integer + { + result = Date.new(val[0].year * 10 ** val[2]).year_precision! + } + | LONGYEAR int1_4 E integer + { + result = Date.new(val[1] * 10 ** val[3]).year_precision! + } + | LONGYEAR '-' int1_4 E integer + { + result = Date.new(-1 * val[2] * 10 ** val[4]).year_precision! + } + ; + + + date_and_calendar : date '^' { result = val[0]; result.calendar = val[1] } + + + masked_precision : + digit digit digit X + { + d = val[0,3].zip([1000,100,10]).reduce(0) { |s,(a,b)| s += a * b } + result = EDTF::Decade.new(d) + } + | digit digit X X + { + d = val[0,2].zip([1000,100]).reduce(0) { |s,(a,b)| s += a * b } + result = EDTF::Century.new(d) + } + ; + + + choice_list : '[' list ']' { result = val[1].choice! } + + inclusive_list : '{' list '}' { result = val[1] } + + list : earlier { result = EDTF::Set.new(val[0]).earlier! } + | earlier ',' list_elements ',' later { result = EDTF::Set.new([val[0]] + val[2] + [val[4]]).earlier!.later! } + | earlier ',' list_elements { result = EDTF::Set.new([val[0]] + val[2]).earlier! } + | earlier ',' later { result = EDTF::Set.new([val[0]] + [val[2]]).earlier!.later! } + | list_elements ',' later { result = EDTF::Set.new(val[0] + [val[2]]).later! } + | list_elements { result = EDTF::Set.new(*val[0]) } + | later { result = EDTF::Set.new(val[0]).later! } + ; + + list_elements : list_element { result = [val[0]].flatten } + | list_elements ',' list_element { result = val[0] + [val[2]].flatten } + ; + + list_element : atomic + | consecutives + ; + + atomic : date + | partial_uncertain_or_approximate + | unspecified + ; + + earlier : DOTS date { result = val[1] } + + later : year_month_day DOTS { result = Date.new(*val[0]).year_precision! } + | year_month DOTS { result = Date.new(*val[0]).month_precision! } + | year DOTS { result = Date.new(val[0]).year_precision! } + ; + + consecutives : year_month_day DOTS year_month_day { result = (Date.new(val[0]).day_precision! .. Date.new(val[2]).day_precision!) } + | year_month DOTS year_month { result = (Date.new(val[0]).month_precision! .. Date.new(val[2]).month_precision!) } + | year DOTS year { result = (Date.new(val[0]).year_precision! .. Date.new(val[2]).year_precision!) } + ; + + partial_unspecified : + unspecified_year '-' month '-' day + { + result = Date.new(val[0][0], val[2], val[4]) + result.unspecified.year[2,2] = val[0][1] + } + | unspecified_year '-' U U '-' day + { + result = Date.new(val[0][0], 1, val[5]) + result.unspecified.year[2,2] = val[0][1] + result.unspecified!(:month) + } + | unspecified_year '-' U U '-' U U + { + result = Date.new(val[0][0], 1, 1) + result.unspecified.year[2,2] = val[0][1] + result.unspecified!([:month, :day]) + } + | unspecified_year '-' month '-' U U + { + result = Date.new(val[0][0], val[2], 1) + result.unspecified.year[2,2] = val[0][1] + result.unspecified!(:day) + } + | year '-' U U '-' day + { + result = Date.new(val[0], 1, val[5]) + result.unspecified!(:month) + } + ; + + + partial_uncertain_or_approximate : pua_base + | '(' pua_base ')' UA { result = uoa(val[1], val[3]) } + + pua_base : + pua_year { result = val[0].year_precision! } + | pua_year_month { result = val[0][0].month_precision! } + | pua_year_month_day { result = val[0].day_precision! } + + pua_year : year UA { result = uoa(Date.new(val[0]), val[1], :year) } + + pua_year_month : + pua_year '-' month ua { + result = [uoa(val[0].change(:month => val[2]), val[3], [:month, :year])] + } + | year '-' month UA { + result = [uoa(Date.new(val[0], val[2]), val[3], [:year, :month])] + } + | year '-(' month ')' UA { + result = [uoa(Date.new(val[0], val[2]), val[4], [:month]), true] + } + | pua_year '-(' month ')' UA { + result = [uoa(val[0].change(:month => val[2]), val[4], [:month]), true] + } + ; + + pua_year_month_day : + pua_year_month '-' day ua { + result = uoa(val[0][0].change(:day => val[2]), val[3], val[0][1] ? [:day] : nil) + } + | pua_year_month '-(' day ')' UA { + result = uoa(val[0][0].change(:day => val[2]), val[4], [:day]) + } + | year '-(' month ')' UA day ua { + result = uoa(uoa(Date.new(val[0], val[2], val[5]), val[4], :month), val[6], :day) + } + | year_month '-' day UA { + result = uoa(Date.new(val[0][0], val[0][1], val[2]), val[3]) + } + | year_month '-(' day ')' UA { + result = uoa(Date.new(val[0][0], val[0][1], val[2]), val[4], [:day]) + } + | year '-(' month '-' day ')' UA { + result = uoa(Date.new(val[0], val[2], val[4]), val[6], [:month, :day]) + } + | year '-(' month '-(' day ')' UA ')' UA { + result = Date.new(val[0], val[2], val[4]) + result = uoa(result, val[6], [:day]) + result = uoa(result, val[8], [:month, :day]) + } + | pua_year '-(' month '-' day ')' UA { + result = val[0].change(:month => val[2], :day => val[4]) + result = uoa(result, val[6], [:month, :day]) + } + | pua_year '-(' month '-(' day ')' UA ')' UA { + result = val[0].change(:month => val[2], :day => val[4]) + result = uoa(result, val[6], [:day]) + result = uoa(result, val[8], [:month, :day]) + } + # | '(' pua_year '-(' month ')' UA ')' UA '-' day ua { + # result = val[1].change(:month => val[3], :day => val[9]) + # result = uoa(result, val[5], [:month]) + # result = [uoa(result, val[7], [:year]), true] + # } + ; + + ua : { result = [] } | UA + + # ---- Auxiliary Rules ---- + + digit : '0' { result = 0 } + | positive_digit + ; + + positive_digit : '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' + + d01_12 : '0' positive_digit { result = val[1] } + | '1' '0' { result = 10 } + | '1' '1' { result = 11 } + | '1' '2' { result = 12 } + ; + + d01_13 : d01_12 + | '1' '3' { result = 13 } + ; + + d01_23 : '0' positive_digit { result = val[1] } + | '1' digit { result = 10 + val[1] } + | '2' '0' { result = 20 } + | '2' '1' { result = 21 } + | '2' '2' { result = 22 } + | '2' '3' { result = 23 } + ; + + d00_23 : '0' '0' + | d01_23 + ; + + d01_29 : d01_23 + | '2' '4' { result = 24 } + | '2' '5' { result = 25 } + | '2' '6' { result = 26 } + | '2' '7' { result = 27 } + | '2' '8' { result = 28 } + | '2' '9' { result = 29 } + ; + + d01_30 : d01_29 + | '3' '0' { result = 30 } + ; + + d01_31 : d01_30 + | '3' '1' { result = 31 } + ; + + d01_59 : d01_29 + | '3' digit { result = 30 + val[1] } + | '4' digit { result = 40 + val[1] } + | '5' digit { result = 50 + val[1] } + ; + + d00_59 : '0' '0' + | d01_59 + ; + + int1_4 : positive_digit { result = val[0] } + | positive_digit digit { result = 10 * val[0] + val[1] } + | positive_digit digit digit + { + result = val.zip([100,10,1]).reduce(0) { |s,(a,b)| s += a * b } + } + | positive_digit digit digit digit + { + result = val.zip([1000,100,10,1]).reduce(0) { |s,(a,b)| s += a * b } + } + ; + + integer : positive_digit { result = val[0] } + | integer digit { result = 10 * val[0] + val[1] } + ; + + + +---- header +require 'strscan' + +---- inner + + @defaults = { + :level => 2, + :debug => false + }.freeze + + class << self; attr_reader :defaults; end + + attr_reader :options + + def initialize(options = {}) + @options = Parser.defaults.merge(options) + end + + def debug? + !!(options[:debug] || ENV['DEBUG']) + end + + def parse(input) + parse!(input) + rescue => e + warn e.message if debug? + nil + end + + def parse!(input) + @yydebug = debug? + @src = StringScanner.new(input) + do_parse + end + + def on_error(tid, value, stack) + raise ArgumentError, + "failed to parse date: unexpected '#{value}' at #{stack.inspect}" + end + + def apply_uncertainty(date, uncertainty, scope = nil) + uncertainty.each do |u| + scope.nil? ? date.send(u) : date.send(u, scope) + end + date + end + + alias uoa apply_uncertainty + + def next_token + case + when @src.eos? + nil + # when @src.scan(/\s+/) + # ignore whitespace + when @src.scan(/\(/) + ['(', @src.matched] + # when @src.scan(/\)\?~-/) + # [:PUA, [:uncertain!, :approximate!]] + # when @src.scan(/\)\?-/) + # [:PUA, [:uncertain!]] + # when @src.scan(/\)~-/) + # [:PUA, [:approximate!]] + when @src.scan(/\)/) + [')', @src.matched] + when @src.scan(/\[/) + ['[', @src.matched] + when @src.scan(/\]/) + [']', @src.matched] + when @src.scan(/\{/) + ['{', @src.matched] + when @src.scan(/\}/) + ['}', @src.matched] + when @src.scan(/T/) + [:T, @src.matched] + when @src.scan(/Z/) + [:Z, @src.matched] + when @src.scan(/\?~/) + [:UA, [:uncertain!, :approximate!]] + when @src.scan(/\?/) + [:UA, [:uncertain!]] + when @src.scan(/~/) + [:UA, [:approximate!]] + when @src.scan(/open/i) + [:OPEN, :open] + when @src.scan(/unkn?own/i) # matches 'unkown' typo too + [:UNKNOWN, :unknown] + when @src.scan(/u/) + [:U, @src.matched] + when @src.scan(/x/i) + [:X, @src.matched] + when @src.scan(/y/) + [:LONGYEAR, @src.matched] + when @src.scan(/e/) + [:E, @src.matched] + when @src.scan(/\+/) + ['+', @src.matched] + when @src.scan(/-\(/) + ['-(', @src.matched] + when @src.scan(/-/) + ['-', @src.matched] + when @src.scan(/:/) + [':', @src.matched] + when @src.scan(/\//) + ['/', @src.matched] + when @src.scan(/\s*\.\.\s*/) + [:DOTS, '..'] + when @src.scan(/\s*,\s*/) + [',', ','] + when @src.scan(/\^\w+/) + ['^', @src.matched[1..-1]] + when @src.scan(/\d/) + [@src.matched, @src.matched.to_i] + else @src.scan(/./) + [:UNMATCHED, @src.rest] + end + end + + +# -*- racc -*- diff --git a/test/racc/assets/err.y b/test/racc/assets/err.y new file mode 100644 index 0000000000..ae280957cc --- /dev/null +++ b/test/racc/assets/err.y @@ -0,0 +1,60 @@ + +class ErrTestp + +rule + +target: lines + ; + +lines: line + | lines line + ; + +line: A B C D E + | error E + ; + +end + +---- inner + +def initialize + @yydebug = false + @q = [ + [:A, 'a'], + # [:B, 'b'], + [:C, 'c'], + [:D, 'd'], + [:E, 'e'], + + [:A, 'a'], + [:B, 'b'], + [:C, 'c'], + [:D, 'd'], + [:E, 'e'], + + [:A, 'a'], + [:B, 'b'], + # [:C, 'c'], + [:D, 'd'], + [:E, 'e'], + [false, nil] + ] +end + +def next_token + @q.shift +end + +def on_error( t, val, values ) + $stderr.puts "error on token '#{val}'(#{t})" +end + +def parse + do_parse +end + +---- footer + +p = ErrTestp.new +p.parse diff --git a/test/racc/assets/error_recovery.y b/test/racc/assets/error_recovery.y new file mode 100644 index 0000000000..1fd21ac7d0 --- /dev/null +++ b/test/racc/assets/error_recovery.y @@ -0,0 +1,35 @@ +# Regression test case for the bug discussed here: +# https://github.com/whitequark/parser/issues/93 +# In short, a Racc-generated parser could go into an infinite loop when +# attempting error recovery at EOF + +class InfiniteLoop + +rule + + stmts: stmt + | error stmt + + stmt: '%' stmt + +end + +---- inner + + def parse + @errors = [] + do_parse + end + + def next_token + nil + end + + def on_error(error_token, error_value, value_stack) + # oh my, an error + @errors << [error_token, error_value] + end + +---- footer + +InfiniteLoop.new.parse \ No newline at end of file diff --git a/test/racc/assets/expect.y b/test/racc/assets/expect.y new file mode 100644 index 0000000000..24c27443e2 --- /dev/null +++ b/test/racc/assets/expect.y @@ -0,0 +1,7 @@ +class E + expect 1 +rule + list: inlist inlist + inlist: + | A +end diff --git a/test/racc/assets/firstline.y b/test/racc/assets/firstline.y new file mode 100644 index 0000000000..ab0692e543 --- /dev/null +++ b/test/racc/assets/firstline.y @@ -0,0 +1,4 @@ +class T +rule + a: A B C +end diff --git a/test/racc/assets/huia.y b/test/racc/assets/huia.y new file mode 100644 index 0000000000..de9d45150c --- /dev/null +++ b/test/racc/assets/huia.y @@ -0,0 +1,318 @@ +# Copyright (c) 2014 James Harton +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +class Huia::Parser + + token + IDENTIFIER EQUAL PLUS MINUS ASTERISK FWD_SLASH COLON FLOAT INTEGER STRING + EXPO INDENT OUTDENT OPAREN CPAREN DOT SIGNATURE NL EOF PIPE COMMA NIL TRUE + FALSE EQUALITY CALL SELF CONSTANT CHAR DOUBLE_TICK_STRING + DOUBLE_TICK_STRING_END INTERPOLATE_START INTERPOLATE_END BOX LSQUARE + RSQUARE FACES LFACE RFACE BANG TILDE RETURN NOT_EQUALITY OR AND GT LT + GTE LTE AT + + prechigh + left EXPO + left BANG TILDE + left ASTERISK FWD_SLASH PERCENT + left PLUS MINUS + + right EQUAL + preclow + + rule + statements: statement + | statements statement { return scope } + + statement: expr eol { return scope.append val[0] } + | expr { return scope.append val[0] } + | eol { return scope } + + eol: NL | EOF + nlq: NL | + + expr: literal + | grouped_expr + | binary_op + | unary_op + | method_call + | constant + | variable + | array + | hash + | return + + return: return_expr + | return_nil + return_expr: RETURN expr { return n(:Return, val[1]) } + return_nil: RETURN { return n(:Return, n(:Nil)) } + + array: empty_array + | array_list + + empty_array: BOX { return n :Array } + + array_list: LSQUARE array_items RSQUARE { return val[1] } + array_items: expr { return n :Array, [val[0]] } + | array_items COMMA expr { val[0].append(val[2]); return val[0] } + + hash: empty_hash + | hash_list + empty_hash: FACES { return n :Hash } + hash_list: LFACE hash_items RFACE { return val[1] } + hash_items: hash_item { return n :Hash, val[0] } + | hash_items COMMA hash_item { val[0].append(val[2]); return val[0] } + hash_item: expr COLON expr { return n :HashItem, val[0], val[2] } + + constant: CONSTANT { return constant val[0] } + + indented: indented_w_stmts + | indented_w_expr + | indented_wo_stmts + indented_w_stmts: indent statements outdent { return val[0] } + indented_w_expr: indent expr outdent { return val[0].append(val[1]) } + indented_wo_stmts: indent outdent { return val[0] } + outdent: OUTDENT { return pop_scope } + + + indent_w_args: indent_pipe indent_args PIPE nlq INDENT { return val[0] } + indent_pipe: PIPE { return push_scope } + indent_wo_args: INDENT { return push_scope } + indent: indent_w_args + | indent_wo_args + + indent_args: indent_arg + | indent_args COMMA indent_arg + indent_arg: arg_var { return scope.add_argument val[0] } + | arg_var EQUAL expr { return n :Assignment, val[0], val[2] } + arg_var: IDENTIFIER { return n :Variable, val[0] } + + method_call: method_call_on_object + | method_call_on_self + | method_call_on_closure + method_call_on_object: expr DOT call_signature { return n :MethodCall, val[0], val[2] } + | expr DOT IDENTIFIER { return n :MethodCall, val[0], n(:CallSignature, val[2]) } + method_call_on_self: call_signature { return n :MethodCall, scope_instance, val[0] } + + method_call_on_closure: AT call_signature { return n :MethodCall, this_closure, val[1] } + | AT IDENTIFIER { return n :MethodCall, this_closure, n(:CallSignature, val[1]) } + + call_signature: call_arguments + | call_simple_name + call_simple_name: CALL { return n :CallSignature, val[0] } + call_argument: SIGNATURE call_passed_arg { return n :CallSignature, val[0], [val[1]] } + call_passed_arg: call_passed_simple + | call_passed_indented + call_passed_simple: expr + | expr NL + call_passed_indented: indented + | indented NL + call_arguments: call_argument { return val[0] } + | call_arguments call_argument { return val[0].concat_signature val[1] } + + grouped_expr: OPAREN expr CPAREN { return n :Expression, val[1] } + + variable: IDENTIFIER { return allocate_local val[0] } + + binary_op: assignment + | addition + | subtraction + | multiplication + | division + | exponentiation + | modulo + | equality + | not_equality + | logical_or + | logical_and + | greater_than + | less_than + | greater_or_eq + | less_or_eq + + assignment: IDENTIFIER EQUAL expr { return allocate_local_assignment val[0], val[2] } + addition: expr PLUS expr { return binary val[0], val[2], 'plus:' } + subtraction: expr MINUS expr { return binary val[0], val[2], 'minus:' } + multiplication: expr ASTERISK expr { return binary val[0], val[2], 'multiplyBy:' } + division: expr FWD_SLASH expr { return binary val[0], val[2], 'divideBy:' } + exponentiation: expr EXPO expr { return binary val[0], val[2], 'toThePowerOf:' } + modulo: expr PERCENT expr { return binary val[0], val[2], 'moduloOf:' } + equality: expr EQUALITY expr { return binary val[0], val[2], 'isEqualTo:' } + not_equality: expr NOT_EQUALITY expr { return binary val[0], val[2], 'isNotEqualTo:' } + logical_or: expr OR expr { return binary val[0], val[2], 'logicalOr:' } + logical_and: expr AND expr { return binary val[0], val[2], 'logicalAnd:' } + greater_than: expr GT expr { return binary val[0], val[2], 'isGreaterThan:' } + less_than: expr LT expr { return binary val[0], val[2], 'isLessThan:' } + greater_or_eq: expr GTE expr { return binary val[0], val[2], 'isGreaterOrEqualTo:' } + less_or_eq: expr LTE expr { return binary val[0], val[2], 'isLessOrEqualTo:' } + + unary_op: unary_not + | unary_plus + | unary_minus + | unary_complement + + unary_not: BANG expr { return unary val[1], 'unaryNot' } + unary_plus: PLUS expr { return unary val[1], 'unaryPlus' } + unary_minus: MINUS expr { return unary val[1], 'unaryMinus' } + unary_complement: TILDE expr { return unary val[1], 'unaryComplement' } + + literal: integer + | float + | string + | nil + | true + | false + | self + + float: FLOAT { return n :Float, val[0] } + integer: INTEGER { return n :Integer, val[0] } + nil: NIL { return n :Nil } + true: TRUE { return n :True } + false: FALSE { return n :False } + self: SELF { return n :Self } + + string: STRING { return n :String, val[0] } + | interpolated_string + | empty_string + + interpolated_string: DOUBLE_TICK_STRING interpolated_string_contents DOUBLE_TICK_STRING_END { return val[1] } + interpolation: INTERPOLATE_START expr INTERPOLATE_END { return val[1] } + interpolated_string_contents: interpolated_string_chunk { return n :InterpolatedString, val[0] } + | interpolated_string_contents interpolated_string_chunk { val[0].append(val[1]); return val[0] } + interpolated_string_chunk: chars { return val[0] } + | interpolation { return to_string(val[0]) } + empty_string: DOUBLE_TICK_STRING DOUBLE_TICK_STRING_END { return n :String, '' } + + chars: CHAR { return n :String, val[0] } + | chars CHAR { val[0].append(val[1]); return val[0] } +end + +---- inner + +attr_accessor :lexer, :scopes, :state + +def initialize lexer + @lexer = lexer + @state = [] + @scopes = [] + push_scope +end + +def ast + @ast ||= do_parse + @scopes.first +end + +def on_error t, val, vstack + line = lexer.line + col = lexer.column + message = "Unexpected #{token_to_str t} at #{lexer.filename} line #{line}:#{col}:\n\n" + + start = line - 5 > 0 ? line - 5 : 0 + i_size = line.to_s.size + (start..(start + 5)).each do |i| + message << sprintf("\t%#{i_size}d: %s\n", i, lexer.get_line(i)) + message << "\t#{' ' * i_size} #{'-' * (col - 1)}^\n" if i == line + end + + raise SyntaxError, message +end + +def next_token + nt = lexer.next_computed_token + # just use a state stack for now, we'll have to do something + # more sophisticated soon. + if nt && nt.first == :state + if nt.last + state.push << nt.last + else + state.pop + end + next_token + else + nt + end +end + +def push_scope + new_scope = Huia::AST::Scope.new scope + new_scope.file = lexer.filename + new_scope.line = lexer.line + new_scope.column = lexer.column + scopes.push new_scope + new_scope +end + +def pop_scope + scopes.pop +end + +def scope + scopes.last +end + +def binary left, right, method + node(:MethodCall, left, node(:CallSignature, method, [right])) +end + +def unary left, method + node(:MethodCall, left, node(:CallSignature, method)) +end + +def node type, *args + Huia::AST.const_get(type).new(*args).tap do |n| + n.file = lexer.filename + n.line = lexer.line + n.column = lexer.column + end +end +alias n node + +def allocate_local name + node(:Variable, name).tap do |n| + scope.allocate_local n + end +end + +def allocate_local_assignment name, value + node(:Assignment, name, value).tap do |n| + scope.allocate_local n + end +end + +def this_closure + allocate_local('@') +end + +def scope_instance + node(:ScopeInstance, scope) +end + +def constant name + return scope_instance if name == 'self' + node(:Constant, name) +end + +def to_string expr + node(:MethodCall, expr, node(:CallSignature, 'toString')) +end diff --git a/test/racc/assets/ichk.y b/test/racc/assets/ichk.y new file mode 100644 index 0000000000..1d359df83e --- /dev/null +++ b/test/racc/assets/ichk.y @@ -0,0 +1,102 @@ +class Calculator + + prechigh + left '*' '/' + left '+' '-' + preclow + + convert + NUMBER 'Number' + end + +rule + + target : exp + | /* none */ { result = 0 } + + exp : exp '+' exp { result += val[2]; a = 'plus' } + | exp '-' exp { result -= val[2]; a = "string test" } + | exp '*' exp { result *= val[2] } + | exp '/' exp { result /= val[2] } + | '(' { $emb = true } exp ')' + { + raise 'must not happen' unless $emb + result = val[2] + } + | '-' NUMBER { result = -val[1] } + | NUMBER + +----header + +class Number +end + +----inner + + def initialize + @racc_debug_out = $stdout + @yydebug = false + end + + def validate(expected, src) + result = parse(src) + unless result == expected + raise "test #{@test_number} fail" + end + @test_number += 1 + end + + def parse(src) + @src = src + @test_number = 1 + yyparse self, :scan + end + + def scan(&block) + @src.each(&block) + end + +----footer + +calc = Calculator.new + +calc.validate(9, [[Number, 9], nil]) + +calc.validate(-3, + [[Number, 5], + ['*', '*'], + [Number, 1], + ['-', '*'], + [Number, 1], + ['*', '*'], + [Number, 8], + nil]) + +calc.validate(-1, + [[Number, 5], + ['+', '+'], + [Number, 2], + ['-', '-'], + [Number, 5], + ['+', '+'], + [Number, 2], + ['-', '-'], + [Number, 5], + nil]) + +calc.validate(-4, + [['-', 'UMINUS'], + [Number, 4], + nil]) + +calc.validate(40, + [[Number, 7], + ['*', '*'], + ['(', '('], + [Number, 4], + ['+', '+'], + [Number, 3], + [')', ')'], + ['-', '-'], + [Number, 9], + nil]) diff --git a/test/racc/assets/intp.y b/test/racc/assets/intp.y new file mode 100644 index 0000000000..24e547da61 --- /dev/null +++ b/test/racc/assets/intp.y @@ -0,0 +1,546 @@ +# +# intp +# + +class Intp::Parser + +prechigh + nonassoc UMINUS + left '*' '/' + left '+' '-' + nonassoc EQ +preclow + +rule + + program : stmt_list + { + result = RootNode.new( val[0] ) + } + + stmt_list : + { + result = [] + } + | stmt_list stmt EOL + { + result.push val[1] + } + | stmt_list EOL + + stmt : expr + | assign + | IDENT realprim + { + result = FuncallNode.new( @fname, val[0][0], + val[0][1], [val[1]] ) + } + | if_stmt + | while_stmt + | defun + + if_stmt : IF stmt THEN EOL stmt_list else_stmt END + { + result = IfNode.new( @fname, val[0][0], + val[1], val[4], val[5] ) + } + + else_stmt : ELSE EOL stmt_list + { + result = val[2] + } + | + { + result = nil + } + + while_stmt: WHILE stmt DO EOL stmt_list END + { + result = WhileNode.new(@fname, val[0][0], + val[1], val[4]) + } + + defun : DEF IDENT param EOL stmt_list END + { + result = DefNode.new(@fname, val[0][0], val[1][1], + Function.new(@fname, val[0][0], val[2], val[4])) + } + + param : '(' name_list ')' + { + result = val[1] + } + | '(' ')' + { + result = [] + } + | + { + result = [] + } + + name_list : IDENT + { + result = [ val[0][1] ] + } + | name_list ',' IDENT + { + result.push val[2][1] + } + + assign : IDENT '=' expr + { + result = AssignNode.new(@fname, val[0][0], val[0][1], val[2]) + } + + expr : expr '+' expr + { + result = FuncallNode.new(@fname, val[0].lineno, '+', [val[0], val[2]]) + } + | expr '-' expr + { + result = FuncallNode.new(@fname, val[0].lineno, '-', [val[0], val[2]]) + } + | expr '*' expr + { + result = FuncallNode.new(@fname, val[0].lineno, '*', [val[0], val[2]]) + } + | expr '/' expr + { + result = FuncallNode.new(@fname, val[0].lineno, + '/', [val[0], val[2]]) + } + | expr EQ expr + { + result = FuncallNode.new(@fname, val[0].lineno, '==', [val[0], val[2]]) + } + | primary + + primary : realprim + | '(' expr ')' + { + result = val[1] + } + | '-' expr =UMINUS + { + result = FuncallNode.new(@fname, val[0][0], '-@', [val[1]]) + } + + realprim : IDENT + { + result = VarRefNode.new(@fname, val[0][0], + val[0][1]) + } + | NUMBER + { + result = LiteralNode.new(@fname, *val[0]) + } + | STRING + { + result = StringNode.new(@fname, *val[0]) + } + | TRUE + { + result = LiteralNode.new(@fname, *val[0]) + } + | FALSE + { + result = LiteralNode.new(@fname, *val[0]) + } + | NIL + { + result = LiteralNode.new(@fname, *val[0]) + } + | funcall + + funcall : IDENT '(' args ')' + { + result = FuncallNode.new(@fname, val[0][0], val[0][1], val[2]) + } + | IDENT '(' ')' + { + result = FuncallNode.new(@fname, val[0][0], val[0][1], []) + } + + args : expr + { + result = val + } + | args ',' expr + { + result.push val[2] + } + +end + +---- header +# +# intp/parser.rb +# + +---- inner + + def initialize + @scope = {} + end + + RESERVED = { + 'if' => :IF, + 'else' => :ELSE, + 'while' => :WHILE, + 'then' => :THEN, + 'do' => :DO, + 'def' => :DEF, + 'true' => :TRUE, + 'false' => :FALSE, + 'nil' => :NIL, + 'end' => :END + } + + RESERVED_V = { + 'true' => true, + 'false' => false, + 'nil' => nil + } + + def parse(f, fname) + @q = [] + @fname = fname + lineno = 1 + f.each do |line| + line.strip! + until line.empty? + case line + when /\A\s+/, /\A\#.*/ + ; + when /\A[a-zA-Z_]\w*/ + word = $& + @q.push [(RESERVED[word] || :IDENT), + [lineno, RESERVED_V.key?(word) ? RESERVED_V[word] : word.intern]] + when /\A\d+/ + @q.push [:NUMBER, [lineno, $&.to_i]] + when /\A"(?:[^"\\]+|\\.)*"/, /\A'(?:[^'\\]+|\\.)*'/ + @q.push [:STRING, [lineno, eval($&)]] + when /\A==/ + @q.push [:EQ, [lineno, '==']] + when /\A./ + @q.push [$&, [lineno, $&]] + else + raise RuntimeError, 'must not happen' + end + line = $' + end + @q.push [:EOL, [lineno, nil]] + lineno += 1 + end + @q.push [false, '$'] + do_parse + end + + def next_token + @q.shift + end + + def on_error(t, v, values) + if v + line = v[0] + v = v[1] + else + line = 'last' + end + raise Racc::ParseError, "#{@fname}:#{line}: syntax error on #{v.inspect}" + end + +---- footer +# intp/node.rb + +module Intp + + class IntpError < StandardError; end + class IntpArgumentError < IntpError; end + + class Core + + def initialize + @ftab = {} + @obj = Object.new + @stack = [] + @stack.push Frame.new '(toplevel)' + end + + def frame + @stack[-1] + end + + def define_function(fname, node) + raise IntpError, "function #{fname} defined twice" if @ftab.key?(fname) + @ftab[fname] = node + end + + def call_function_or(fname, args) + call_intp_function_or(fname, args) { + call_ruby_toplevel_or(fname, args) { + yield + } + } + end + + def call_intp_function_or(fname, args) + if func = @ftab[fname] + frame = Frame.new(fname) + @stack.push frame + func.call self, frame, args + @stack.pop + else + yield + end + end + + def call_ruby_toplevel_or(fname, args) + if @obj.respond_to? fname, true + @obj.send fname, *args + else + yield + end + end + + end + + class Frame + + def initialize(fname) + @fname = fname + @lvars = {} + end + + attr :fname + + def lvar?(name) + @lvars.key? name + end + + def [](key) + @lvars[key] + end + + def []=(key, val) + @lvars[key] = val + end + + end + + + class Node + + def initialize(fname, lineno) + @filename = fname + @lineno = lineno + end + + attr_reader :filename + attr_reader :lineno + + def exec_list(intp, nodes) + v = nil + nodes.each {|i| v = i.evaluate(intp) } + v + end + + def intp_error!(msg) + raise IntpError, "in #{filename}:#{lineno}: #{msg}" + end + + def inspect + "#{self.class.name}/#{lineno}" + end + + end + + + class RootNode < Node + + def initialize(tree) + super nil, nil + @tree = tree + end + + def evaluate + exec_list Core.new, @tree + end + + end + + + class DefNode < Node + + def initialize(file, lineno, fname, func) + super file, lineno + @funcname = fname + @funcobj = func + end + + def evaluate(intp) + intp.define_function @funcname, @funcobj + end + + end + + class FuncallNode < Node + + def initialize(file, lineno, func, args) + super file, lineno + @funcname = func + @args = args + end + + def evaluate(intp) + args = @args.map {|i| i.evaluate intp } + begin + intp.call_intp_function_or(@funcname, args) { + if args.empty? or not args[0].respond_to?(@funcname) + intp.call_ruby_toplevel_or(@funcname, args) { + intp_error! "undefined function #{@funcname.id2name}" + } + else + recv = args.shift + recv.send @funcname, *args + end + } + rescue IntpArgumentError, ArgumentError + intp_error! $!.message + end + end + + end + + class Function < Node + + def initialize(file, lineno, params, body) + super file, lineno + @params = params + @body = body + end + + def call(intp, frame, args) + unless args.size == @params.size + raise IntpArgumentError, + "wrong # of arg for #{frame.fname}() (#{args.size} for #{@params.size})" + end + args.each_with_index do |v,i| + frame[@params[i]] = v + end + exec_list intp, @body + end + + end + + + class IfNode < Node + + def initialize(fname, lineno, cond, tstmt, fstmt) + super fname, lineno + @condition = cond + @tstmt = tstmt + @fstmt = fstmt + end + + def evaluate(intp) + if @condition.evaluate(intp) + exec_list intp, @tstmt + else + exec_list intp, @fstmt if @fstmt + end + end + + end + + class WhileNode < Node + + def initialize(fname, lineno, cond, body) + super fname, lineno + @condition = cond + @body = body + end + + def evaluate(intp) + while @condition.evaluate(intp) + exec_list intp, @body + end + end + + end + + + class AssignNode < Node + + def initialize(fname, lineno, vname, val) + super fname, lineno + @vname = vname + @val = val + end + + def evaluate(intp) + intp.frame[@vname] = @val.evaluate(intp) + end + + end + + class VarRefNode < Node + + def initialize(fname, lineno, vname) + super fname, lineno + @vname = vname + end + + def evaluate(intp) + if intp.frame.lvar?(@vname) + intp.frame[@vname] + else + intp.call_function_or(@vname, []) { + intp_error! "unknown method or local variable #{@vname.id2name}" + } + end + end + + end + + class StringNode < Node + + def initialize(fname, lineno, str) + super fname, lineno + @val = str + end + + def evaluate(intp) + @val.dup + end + + end + + class LiteralNode < Node + + def initialize(fname, lineno, val) + super fname, lineno + @val = val + end + + def evaluate(intp) + @val + end + + end + +end # module Intp + +begin + tree = nil + fname = 'src.intp' + File.open(fname) {|f| + tree = Intp::Parser.new.parse(f, fname) + } + tree.evaluate +rescue Racc::ParseError, Intp::IntpError, Errno::ENOENT + raise #### + $stderr.puts "#{File.basename $0}: #{$!}" + exit 1 +end diff --git a/test/racc/assets/journey.y b/test/racc/assets/journey.y new file mode 100644 index 0000000000..c2640f3339 --- /dev/null +++ b/test/racc/assets/journey.y @@ -0,0 +1,47 @@ +class Journey::Parser + +token SLASH LITERAL SYMBOL LPAREN RPAREN DOT STAR OR + +rule + expressions + : expressions expression { result = Cat.new(val.first, val.last) } + | expression { result = val.first } + | or + ; + expression + : terminal + | group + | star + ; + group + : LPAREN expressions RPAREN { result = Group.new(val[1]) } + ; + or + : expressions OR expression { result = Or.new([val.first, val.last]) } + ; + star + : STAR { result = Star.new(Symbol.new(val.last)) } + ; + terminal + : symbol + | literal + | slash + | dot + ; + slash + : SLASH { result = Slash.new('/') } + ; + symbol + : SYMBOL { result = Symbol.new(val.first) } + ; + literal + : LITERAL { result = Literal.new(val.first) } + dot + : DOT { result = Dot.new(val.first) } + ; + +end + +---- header + +require 'journey/parser_extras' diff --git a/test/racc/assets/liquor.y b/test/racc/assets/liquor.y new file mode 100644 index 0000000000..8045a072a4 --- /dev/null +++ b/test/racc/assets/liquor.y @@ -0,0 +1,313 @@ +# Copyright (c) 2012-2013 Peter Zotov +# 2012 Yaroslav Markin +# 2012 Nate Gadgibalaev +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +class Liquor::Parser + token comma dot endtag ident integer keyword lblock lblock2 lbracket + linterp lparen op_div op_eq op_gt op_geq op_lt op_leq op_minus + op_mod op_mul op_neq op_not op_plus pipe plaintext rblock + rbracket rinterp rparen string tag_ident + + prechigh + left dot + nonassoc op_uminus op_not + left op_mul op_div op_mod + left op_plus op_minus + left op_eq op_neq op_lt op_leq op_gt op_geq + left op_and + left op_or + preclow + + expect 15 + + start block + +rule + block: /* empty */ + { result = [] } + | plaintext block + { result = [ val[0], *val[1] ] } + | interp block + { result = [ val[0], *val[1] ] } + | tag block + { result = [ val[0], *val[1] ] } + + interp: + linterp expr rinterp + { result = [ :interp, retag(val), val[1] ] } + | linterp filter_chain rinterp + { result = [ :interp, retag(val), val[1] ] } + + primary_expr: + ident + | lparen expr rparen + { result = [ val[1][0], retag(val), *val[1][2..-1] ] } + + expr: + integer + | string + | tuple + | ident function_args + { result = [ :call, retag(val), val[0], val[1] ] } + | expr lbracket expr rbracket + { result = [ :index, retag(val), val[0], val[2] ] } + | expr dot ident function_args + { result = [ :external, retag(val), val[0], val[2], val[3] ] } + | expr dot ident + { result = [ :external, retag(val), val[0], val[2], nil ] } + | op_minus expr =op_uminus + { result = [ :uminus, retag(val), val[1] ] } + | op_not expr + { result = [ :not, retag(val), val[1] ] } + | expr op_mul expr + { result = [ :mul, retag(val), val[0], val[2] ] } + | expr op_div expr + { result = [ :div, retag(val), val[0], val[2] ] } + | expr op_mod expr + { result = [ :mod, retag(val), val[0], val[2] ] } + | expr op_plus expr + { result = [ :plus, retag(val), val[0], val[2] ] } + | expr op_minus expr + { result = [ :minus, retag(val), val[0], val[2] ] } + | expr op_eq expr + { result = [ :eq, retag(val), val[0], val[2] ] } + | expr op_neq expr + { result = [ :neq, retag(val), val[0], val[2] ] } + | expr op_lt expr + { result = [ :lt, retag(val), val[0], val[2] ] } + | expr op_leq expr + { result = [ :leq, retag(val), val[0], val[2] ] } + | expr op_gt expr + { result = [ :gt, retag(val), val[0], val[2] ] } + | expr op_geq expr + { result = [ :geq, retag(val), val[0], val[2] ] } + | expr op_and expr + { result = [ :and, retag(val), val[0], val[2] ] } + | expr op_or expr + { result = [ :or, retag(val), val[0], val[2] ] } + | primary_expr + + tuple: + lbracket tuple_content rbracket + { result = [ :tuple, retag(val), val[1].compact ] } + + tuple_content: + expr comma tuple_content + { result = [ val[0], *val[2] ] } + | expr + { result = [ val[0] ] } + | /* empty */ + { result = [ ] } + + function_args: + lparen function_args_inside rparen + { result = [ :args, retag(val), *val[1] ] } + + function_args_inside: + expr function_keywords + { result = [ val[0], val[1][2] ] } + | function_keywords + { result = [ nil, val[0][2] ] } + + function_keywords: + keyword expr function_keywords + { name = val[0][2].to_sym + tail = val[2][2] + loc = retag([ val[0], val[1] ]) + + if tail.include? name + @errors << SyntaxError.new("duplicate keyword argument `#{val[0][2]}'", + tail[name][1]) + end + + hash = { + name => [ val[1][0], loc, *val[1][2..-1] ] + }.merge(tail) + + result = [ :keywords, retag([ loc, val[2] ]), hash ] + } + | /* empty */ + { result = [ :keywords, nil, {} ] } + + filter_chain: + expr pipe filter_chain_cont + { result = [ val[0], *val[2] ]. + reduce { |tree, node| node[3][2] = tree; node } + } + + filter_chain_cont: + filter_call pipe filter_chain_cont + { result = [ val[0], *val[2] ] } + | filter_call + { result = [ val[0] ] } + + filter_call: + ident function_keywords + { ident_loc = val[0][1] + empty_args_loc = { line: ident_loc[:line], + start: ident_loc[:end] + 1, + end: ident_loc[:end] + 1, } + result = [ :call, val[0][1], val[0], + [ :args, val[1][1] || empty_args_loc, nil, val[1][2] ] ] + } + + tag: + lblock ident expr tag_first_cont + { result = [ :tag, retag(val), val[1], val[2], *reduce_tag_args(val[3][2]) ] } + | lblock ident tag_first_cont + { result = [ :tag, retag(val), val[1], nil, *reduce_tag_args(val[2][2]) ] } + + # Racc cannot do lookahead across rules. I had to add states + # explicitly to avoid S/R conflicts. You are not expected to + # understand this. + + tag_first_cont: + rblock + { result = [ :cont, retag(val), [] ] } + | keyword tag_first_cont2 + { result = [ :cont, retag(val), [ val[0], *val[1][2] ] ] } + + tag_first_cont2: + rblock block lblock2 tag_next_cont + { result = [ :cont2, val[0][1], [ [:block, val[0][1], val[1] ], *val[3] ] ] } + | expr tag_first_cont + { result = [ :cont2, retag(val), [ val[0], *val[1][2] ] ] } + + tag_next_cont: + endtag rblock + { result = [] } + | keyword tag_next_cont2 + { result = [ val[0], *val[1] ] } + + tag_next_cont2: + rblock block lblock2 tag_next_cont + { result = [ [:block, val[0][1], val[1] ], *val[3] ] } + | expr keyword tag_next_cont3 + { result = [ val[0], val[1], *val[2] ] } + + tag_next_cont3: + rblock block lblock2 tag_next_cont + { result = [ [:block, val[0][1], val[1] ], *val[3] ] } + | expr tag_next_cont + { result = [ val[0], *val[1] ] } + +---- inner + attr_reader :errors, :ast + + def initialize(tags={}) + super() + + @errors = [] + @ast = nil + @tags = tags + end + + def success? + @errors.empty? + end + + def parse(string, name='(code)') + @errors.clear + @name = name + @ast = nil + + begin + @stream = Lexer.lex(string, @name, @tags) + @ast = do_parse + rescue Liquor::SyntaxError => e + @errors << e + end + + success? + end + + def next_token + tok = @stream.shift + [ tok[0], tok ] if tok + end + + TOKEN_NAME_MAP = { + :comma => ',', + :dot => '.', + :lblock => '{%', + :rblock => '%}', + :linterp => '{{', + :rinterp => '}}', + :lbracket => '[', + :rbracket => ']', + :lparen => '(', + :rparen => ')', + :pipe => '|', + :op_not => '!', + :op_mul => '*', + :op_div => '/', + :op_mod => '%', + :op_plus => '+', + :op_minus => '-', + :op_eq => '==', + :op_neq => '!=', + :op_lt => '<', + :op_leq => '<=', + :op_gt => '>', + :op_geq => '>=', + :keyword => 'keyword argument name', + :kwarg => 'keyword argument', + :ident => 'identifier', + } + + def on_error(error_token_id, error_token, value_stack) + if token_to_str(error_token_id) == "$end" + raise Liquor::SyntaxError.new("unexpected end of program", { + file: @name + }) + else + type, (loc, value) = error_token + type = TOKEN_NAME_MAP[type] || type + + raise Liquor::SyntaxError.new("unexpected token `#{type}'", loc) + end + end + + def retag(nodes) + loc = nodes.map { |node| node[1] }.compact + first, *, last = loc + return first if last.nil? + + { + file: first[:file], + line: first[:line], + start: first[:start], + end: last[:end], + } + end + + def reduce_tag_args(list) + list.each_slice(2).reduce([]) { |args, (k, v)| + if v[0] == :block + args << [ :blockarg, retag([ k, v ]), k, v[2] || [] ] + else + args << [ :kwarg, retag([ k, v ]), k, v ] + end + } + end \ No newline at end of file diff --git a/test/racc/assets/machete.y b/test/racc/assets/machete.y new file mode 100644 index 0000000000..ea92d47a69 --- /dev/null +++ b/test/racc/assets/machete.y @@ -0,0 +1,423 @@ +# Copyright (c) 2011 SUSE +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +class Machete::Parser + +token NIL +token TRUE +token FALSE +token INTEGER +token SYMBOL +token STRING +token REGEXP +token ANY +token EVEN +token ODD +token METHOD_NAME +token CLASS_NAME + +start expression + +rule + +expression : primary + | expression "|" primary { + result = if val[0].is_a?(ChoiceMatcher) + ChoiceMatcher.new(val[0].alternatives << val[2]) + else + ChoiceMatcher.new([val[0], val[2]]) + end + } + +primary : node + | array + | literal + | any + +node : CLASS_NAME { + result = NodeMatcher.new(val[0].to_sym) + } + | CLASS_NAME "<" attrs ">" { + result = NodeMatcher.new(val[0].to_sym, val[2]) + } + +attrs : attr + | attrs "," attr { result = val[0].merge(val[2]) } + +attr : method_name "=" expression { result = { val[0].to_sym => val[2] } } + | method_name "^=" SYMBOL { + result = { + val[0].to_sym => SymbolRegexpMatcher.new( + Regexp.new("^" + Regexp.escape(symbol_value(val[2]).to_s)) + ) + } + } + | method_name "$=" SYMBOL { + result = { + val[0].to_sym => SymbolRegexpMatcher.new( + Regexp.new(Regexp.escape(symbol_value(val[2]).to_s) + "$") + ) + } + } + | method_name "*=" SYMBOL { + result = { + val[0].to_sym => SymbolRegexpMatcher.new( + Regexp.new(Regexp.escape(symbol_value(val[2]).to_s)) + ) + } + } + | method_name "^=" STRING { + result = { + val[0].to_sym => StringRegexpMatcher.new( + Regexp.new("^" + Regexp.escape(string_value(val[2]))) + ) + } + } + | method_name "$=" STRING { + result = { + val[0].to_sym => StringRegexpMatcher.new( + Regexp.new(Regexp.escape(string_value(val[2])) + "$") + ) + } + } + | method_name "*=" STRING { + result = { + val[0].to_sym => StringRegexpMatcher.new( + Regexp.new(Regexp.escape(string_value(val[2]))) + ) + } + } + | method_name "*=" REGEXP { + result = { + val[0].to_sym => IndifferentRegexpMatcher.new( + Regexp.new(regexp_value(val[2])) + ) + } + } + +# Hack to overcome the fact that some tokens will lex as simple tokens, not +# METHOD_NAME tokens, and that "reserved words" will lex as separate kinds of +# tokens. +method_name : METHOD_NAME + | NIL + | TRUE + | FALSE + | ANY + | EVEN + | ODD + | "*" + | "+" + | "<" + | ">" + | "^" + | "|" + +array : "[" items_opt "]" { result = ArrayMatcher.new(val[1]) } + +items_opt : /* empty */ { result = [] } + | items + +items : item { result = [val[0]] } + | items "," item { result = val[0] << val[2] } + +item : expression + | expression quantifier { result = Quantifier.new(val[0], *val[1]) } + +quantifier : "*" { result = [0, nil, 1] } + | "+" { result = [1, nil, 1] } + | "?" { result = [0, 1, 1] } + | "{" INTEGER "}" { + result = [integer_value(val[1]), integer_value(val[1]), 1] + } + | "{" INTEGER "," "}" { + result = [integer_value(val[1]), nil, 1] + } + | "{" "," INTEGER "}" { + result = [0, integer_value(val[2]), 1] + } + | "{" INTEGER "," INTEGER "}" { + result = [integer_value(val[1]), integer_value(val[3]), 1] + } + | "{" EVEN "}" { result = [0, nil, 2] } + | "{" ODD "}" { result = [1, nil, 2] } + +literal : NIL { result = LiteralMatcher.new(nil) } + | TRUE { result = LiteralMatcher.new(true) } + | FALSE { result = LiteralMatcher.new(false) } + | INTEGER { result = LiteralMatcher.new(integer_value(val[0])) } + | SYMBOL { result = LiteralMatcher.new(symbol_value(val[0])) } + | STRING { result = LiteralMatcher.new(string_value(val[0])) } + | REGEXP { result = LiteralMatcher.new(regexp_value(val[0])) } + +any : ANY { result = AnyMatcher.new } + +---- inner + +include Matchers + +class SyntaxError < StandardError; end + +def parse(input) + @input = input + @pos = 0 + + do_parse +end + +private + +def integer_value(value) + if value =~ /^0[bB]/ + value[2..-1].to_i(2) + elsif value =~ /^0[oO]/ + value[2..-1].to_i(8) + elsif value =~ /^0[dD]/ + value[2..-1].to_i(10) + elsif value =~ /^0[xX]/ + value[2..-1].to_i(16) + elsif value =~ /^0/ + value.to_i(8) + else + value.to_i + end +end + +def symbol_value(value) + value[1..-1].to_sym +end + +def string_value(value) + quote = value[0..0] + if quote == "'" + value[1..-2].gsub("\\\\", "\\").gsub("\\'", "'") + elsif quote == '"' + value[1..-2]. + gsub("\\\\", "\\"). + gsub('\\"', '"'). + gsub("\\n", "\n"). + gsub("\\t", "\t"). + gsub("\\r", "\r"). + gsub("\\f", "\f"). + gsub("\\v", "\v"). + gsub("\\a", "\a"). + gsub("\\e", "\e"). + gsub("\\b", "\b"). + gsub("\\s", "\s"). + gsub(/\\([0-7]{1,3})/) { $1.to_i(8).chr }. + gsub(/\\x([0-9a-fA-F]{1,2})/) { $1.to_i(16).chr } + else + raise "Unknown quote: #{quote.inspect}." + end +end + +REGEXP_OPTIONS = { + 'i' => Regexp::IGNORECASE, + 'm' => Regexp::MULTILINE, + 'x' => Regexp::EXTENDED +} + +def regexp_value(value) + /\A\/(.*)\/([imx]*)\z/ =~ value + pattern, options = $1, $2 + + Regexp.new(pattern, options.chars.map { |ch| REGEXP_OPTIONS[ch] }.inject(:|)) +end + +# "^" needs to be here because if it were among operators recognized by +# METHOD_NAME, "^=" would be recognized as two tokens. +SIMPLE_TOKENS = [ + "|", + "<", + ">", + ",", + "=", + "^=", + "^", + "$=", + "[", + "]", + "*=", + "*", + "+", + "?", + "{", + "}" +] + +COMPLEX_TOKENS = [ + [:NIL, /^nil/], + [:TRUE, /^true/], + [:FALSE, /^false/], + # INTEGER needs to be before METHOD_NAME, otherwise e.g. "+1" would be + # recognized as two tokens. + [ + :INTEGER, + /^ + [+-]? # sign + ( + 0[bB][01]+(_[01]+)* # binary (prefixed) + | + 0[oO][0-7]+(_[0-7]+)* # octal (prefixed) + | + 0[dD]\d+(_\d+)* # decimal (prefixed) + | + 0[xX][0-9a-fA-F]+(_[0-9a-fA-F]+)* # hexadecimal (prefixed) + | + 0[0-7]*(_[0-7]+)* # octal (unprefixed) + | + [1-9]\d*(_\d+)* # decimal (unprefixed) + ) + /x + ], + [ + :SYMBOL, + /^ + : + ( + # class name + [A-Z][a-zA-Z0-9_]* + | + # regular method name + [a-z_][a-zA-Z0-9_]*[?!=]? + | + # instance variable name + @[a-zA-Z_][a-zA-Z0-9_]* + | + # class variable name + @@[a-zA-Z_][a-zA-Z0-9_]* + | + # operator (sorted by length, then alphabetically) + (<=>|===|\[\]=|\*\*|\+@|-@|<<|<=|==|=~|>=|>>|\[\]|[%&*+\-\/<>^`|~]) + ) + /x + ], + [ + :STRING, + /^ + ( + ' # sinqle-quoted string + ( + \\[\\'] # escape + | + [^'] # regular character + )* + ' + | + " # double-quoted string + ( + \\ # escape + ( + [\\"ntrfvaebs] # one-character escape + | + [0-7]{1,3} # octal number escape + | + x[0-9a-fA-F]{1,2} # hexadecimal number escape + ) + | + [^"] # regular character + )* + " + ) + /x + ], + [ + :REGEXP, + /^ + \/ + ( + \\ # escape + ( + [\\\/ntrfvaebs\(\)\[\]\{\}\-\.\?\*\+\|\^\$] # one-character escape + | + [0-7]{2,3} # octal number escape + | + x[0-9a-fA-F]{1,2} # hexadecimal number escape + ) + | + [^\/] # regular character + )* + \/ + [imx]* + /x + ], + # ANY, EVEN and ODD need to be before METHOD_NAME, otherwise they would be + # recognized as method names. + [:ANY, /^any/], + [:EVEN, /^even/], + [:ODD, /^odd/], + # We exclude "*", "+", "<", ">", "^" and "|" from method names since they are + # lexed as simple tokens. This is because they have also other meanings in + # Machette patterns beside Ruby method names. + [ + :METHOD_NAME, + /^ + ( + # regular name + [a-z_][a-zA-Z0-9_]*[?!=]? + | + # operator (sorted by length, then alphabetically) + (<=>|===|\[\]=|\*\*|\+@|-@|<<|<=|==|=~|>=|>>|\[\]|[%&\-\/`~]) + ) + /x + ], + [:CLASS_NAME, /^[A-Z][a-zA-Z0-9_]*/] +] + +def next_token + skip_whitespace + + return false if remaining_input.empty? + + # Complex tokens need to be before simple tokens, otherwise e.g. "<<" would be + # recognized as two tokens. + + COMPLEX_TOKENS.each do |type, regexp| + if remaining_input =~ regexp + @pos += $&.length + return [type, $&] + end + end + + SIMPLE_TOKENS.each do |token| + if remaining_input[0...token.length] == token + @pos += token.length + return [token, token] + end + end + + raise SyntaxError, "Unexpected character: #{remaining_input[0..0].inspect}." +end + +def skip_whitespace + if remaining_input =~ /\A^[ \t\r\n]+/ + @pos += $&.length + end +end + +def remaining_input + @input[@pos..-1] +end + +def on_error(error_token_id, error_value, value_stack) + raise SyntaxError, "Unexpected token: #{error_value.inspect}." +end diff --git a/test/racc/assets/macruby.y b/test/racc/assets/macruby.y new file mode 100644 index 0000000000..5ede008308 --- /dev/null +++ b/test/racc/assets/macruby.y @@ -0,0 +1,2197 @@ +# Copyright (c) 2013 Peter Zotov +# +# Parts of the source are derived from ruby_parser: +# Copyright (c) Ryan Davis, seattle.rb +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +class Parser::MacRuby + +token kCLASS kMODULE kDEF kUNDEF kBEGIN kRESCUE kENSURE kEND kIF kUNLESS + kTHEN kELSIF kELSE kCASE kWHEN kWHILE kUNTIL kFOR kBREAK kNEXT + kREDO kRETRY kIN kDO kDO_COND kDO_BLOCK kDO_LAMBDA kRETURN kYIELD kSUPER + kSELF kNIL kTRUE kFALSE kAND kOR kNOT kIF_MOD kUNLESS_MOD kWHILE_MOD + kUNTIL_MOD kRESCUE_MOD kALIAS kDEFINED klBEGIN klEND k__LINE__ + k__FILE__ k__ENCODING__ tIDENTIFIER tFID tGVAR tIVAR tCONSTANT + tLABEL tCVAR tNTH_REF tBACK_REF tSTRING_CONTENT tINTEGER tFLOAT + tREGEXP_END tUPLUS tUMINUS tUMINUS_NUM tPOW tCMP tEQ tEQQ tNEQ + tGEQ tLEQ tANDOP tOROP tMATCH tNMATCH tDOT tDOT2 tDOT3 tAREF + tASET tLSHFT tRSHFT tCOLON2 tCOLON3 tOP_ASGN tASSOC tLPAREN + tLPAREN2 tRPAREN tLPAREN_ARG tLBRACK tLBRACK2 tRBRACK tLBRACE + tLBRACE_ARG tSTAR tSTAR2 tAMPER tAMPER2 tTILDE tPERCENT tDIVIDE + tPLUS tMINUS tLT tGT tPIPE tBANG tCARET tLCURLY tRCURLY + tBACK_REF2 tSYMBEG tSTRING_BEG tXSTRING_BEG tREGEXP_BEG tREGEXP_OPT + tWORDS_BEG tQWORDS_BEG tSTRING_DBEG tSTRING_DVAR tSTRING_END + tSTRING tSYMBOL tNL tEH tCOLON tCOMMA tSPACE tSEMI tLAMBDA tLAMBEG + tCHARACTER + +prechigh + right tBANG tTILDE tUPLUS + right tPOW + right tUMINUS_NUM tUMINUS + left tSTAR2 tDIVIDE tPERCENT + left tPLUS tMINUS + left tLSHFT tRSHFT + left tAMPER2 + left tPIPE tCARET + left tGT tGEQ tLT tLEQ + nonassoc tCMP tEQ tEQQ tNEQ tMATCH tNMATCH + left tANDOP + left tOROP + nonassoc tDOT2 tDOT3 + right tEH tCOLON + left kRESCUE_MOD + right tEQL tOP_ASGN + nonassoc kDEFINED + right kNOT + left kOR kAND + nonassoc kIF_MOD kUNLESS_MOD kWHILE_MOD kUNTIL_MOD + nonassoc tLBRACE_ARG + nonassoc tLOWEST +preclow + +rule + + program: top_compstmt + + top_compstmt: top_stmts opt_terms + { + result = @builder.compstmt(val[0]) + } + + top_stmts: # nothing + { + result = [] + } + | top_stmt + { + result = [ val[0] ] + } + | top_stmts terms top_stmt + { + result = val[0] << val[2] + } + | error top_stmt + { + result = [ val[1] ] + } + + top_stmt: stmt + | klBEGIN tLCURLY top_compstmt tRCURLY + { + result = @builder.preexe(val[0], val[1], val[2], val[3]) + } + + bodystmt: compstmt opt_rescue opt_else opt_ensure + { + rescue_bodies = val[1] + else_t, else_ = val[2] + ensure_t, ensure_ = val[3] + + if rescue_bodies.empty? && !else_.nil? + diagnostic :warning, :useless_else, nil, else_t + end + + result = @builder.begin_body(val[0], + rescue_bodies, + else_t, else_, + ensure_t, ensure_) + } + + compstmt: stmts opt_terms + { + result = @builder.compstmt(val[0]) + } + + stmts: # nothing + { + result = [] + } + | stmt + { + result = [ val[0] ] + } + | stmts terms stmt + { + result = val[0] << val[2] + } + | error stmt + { + result = [ val[1] ] + } + + stmt: kALIAS fitem + { + @lexer.state = :expr_fname + } + fitem + { + result = @builder.alias(val[0], val[1], val[3]) + } + | kALIAS tGVAR tGVAR + { + result = @builder.alias(val[0], + @builder.gvar(val[1]), + @builder.gvar(val[2])) + } + | kALIAS tGVAR tBACK_REF + { + result = @builder.alias(val[0], + @builder.gvar(val[1]), + @builder.back_ref(val[2])) + } + | kALIAS tGVAR tNTH_REF + { + diagnostic :error, :nth_ref_alias, nil, val[2] + } + | kUNDEF undef_list + { + result = @builder.undef_method(val[0], val[1]) + } + | stmt kIF_MOD expr_value + { + result = @builder.condition_mod(val[0], nil, + val[1], val[2]) + } + | stmt kUNLESS_MOD expr_value + { + result = @builder.condition_mod(nil, val[0], + val[1], val[2]) + } + | stmt kWHILE_MOD expr_value + { + result = @builder.loop_mod(:while, val[0], val[1], val[2]) + } + | stmt kUNTIL_MOD expr_value + { + result = @builder.loop_mod(:until, val[0], val[1], val[2]) + } + | stmt kRESCUE_MOD stmt + { + rescue_body = @builder.rescue_body(val[1], + nil, nil, nil, + nil, val[2]) + + result = @builder.begin_body(val[0], [ rescue_body ]) + } + | klEND tLCURLY compstmt tRCURLY + { + result = @builder.postexe(val[0], val[1], val[2], val[3]) + } + | lhs tEQL command_call + { + result = @builder.assign(val[0], val[1], val[2]) + } + | mlhs tEQL command_call + { + result = @builder.multi_assign(val[0], val[1], val[2]) + } + | var_lhs tOP_ASGN command_call + { + result = @builder.op_assign(val[0], val[1], val[2]) + } + | primary_value tLBRACK2 opt_call_args rbracket tOP_ASGN command_call + { + result = @builder.op_assign( + @builder.index( + val[0], val[1], val[2], val[3]), + val[4], val[5]) + } + | primary_value tDOT tIDENTIFIER tOP_ASGN command_call + { + result = @builder.op_assign( + @builder.call_method( + val[0], val[1], val[2]), + val[3], val[4]) + } + | primary_value tDOT tCONSTANT tOP_ASGN command_call + { + result = @builder.op_assign( + @builder.call_method( + val[0], val[1], val[2]), + val[3], val[4]) + } + | primary_value tCOLON2 tCONSTANT tOP_ASGN command_call + { + result = @builder.op_assign( + @builder.call_method( + val[0], val[1], val[2]), + val[3], val[4]) + } + | primary_value tCOLON2 tIDENTIFIER tOP_ASGN command_call + { + result = @builder.op_assign( + @builder.call_method( + val[0], val[1], val[2]), + val[3], val[4]) + } + | backref tOP_ASGN command_call + { + @builder.op_assign(val[0], val[1], val[2]) + } + | lhs tEQL mrhs + { + result = @builder.assign(val[0], val[1], + @builder.array(nil, val[2], nil)) + } + | mlhs tEQL arg_value + { + result = @builder.multi_assign(val[0], val[1], val[2]) + } + | mlhs tEQL mrhs + { + result = @builder.multi_assign(val[0], val[1], + @builder.array(nil, val[2], nil)) + } + | expr + + expr: command_call + | expr kAND expr + { + result = @builder.logical_op(:and, val[0], val[1], val[2]) + } + | expr kOR expr + { + result = @builder.logical_op(:or, val[0], val[1], val[2]) + } + | kNOT opt_nl expr + { + result = @builder.not_op(val[0], nil, val[2], nil) + } + | tBANG command_call + { + result = @builder.not_op(val[0], nil, val[1], nil) + } + | arg + + expr_value: expr + + command_call: command + | block_command + + block_command: block_call + | block_call tDOT operation2 command_args + { + result = @builder.call_method(val[0], val[1], val[2], + *val[3]) + } + | block_call tCOLON2 operation2 command_args + { + result = @builder.call_method(val[0], val[1], val[2], + *val[3]) + } + + cmd_brace_block: tLBRACE_ARG + { + @static_env.extend_dynamic + } + opt_block_param compstmt tRCURLY + { + result = [ val[0], val[2], val[3], val[4] ] + + @static_env.unextend + } + + command: operation command_args =tLOWEST + { + result = @builder.call_method(nil, nil, val[0], + *val[1]) + } + | operation command_args cmd_brace_block + { + method_call = @builder.call_method(nil, nil, val[0], + *val[1]) + + begin_t, args, body, end_t = val[2] + result = @builder.block(method_call, + begin_t, args, body, end_t) + } + | primary_value tDOT operation2 command_args =tLOWEST + { + result = @builder.call_method(val[0], val[1], val[2], + *val[3]) + } + | primary_value tDOT operation2 command_args cmd_brace_block + { + method_call = @builder.call_method(val[0], val[1], val[2], + *val[3]) + + begin_t, args, body, end_t = val[4] + result = @builder.block(method_call, + begin_t, args, body, end_t) + } + | primary_value tCOLON2 operation2 command_args =tLOWEST + { + result = @builder.call_method(val[0], val[1], val[2], + *val[3]) + } + | primary_value tCOLON2 operation2 command_args cmd_brace_block + { + method_call = @builder.call_method(val[0], val[1], val[2], + *val[3]) + + begin_t, args, body, end_t = val[4] + result = @builder.block(method_call, + begin_t, args, body, end_t) + } + | kSUPER command_args + { + result = @builder.keyword_cmd(:super, val[0], + *val[1]) + } + | kYIELD command_args + { + result = @builder.keyword_cmd(:yield, val[0], + *val[1]) + } + | kRETURN call_args + { + result = @builder.keyword_cmd(:return, val[0], + nil, val[1], nil) + } + | kBREAK call_args + { + result = @builder.keyword_cmd(:break, val[0], + nil, val[1], nil) + } + | kNEXT call_args + { + result = @builder.keyword_cmd(:next, val[0], + nil, val[1], nil) + } + + mlhs: mlhs_basic + { + result = @builder.multi_lhs(nil, val[0], nil) + } + | tLPAREN mlhs_inner rparen + { + result = @builder.begin(val[0], val[1], val[2]) + } + + mlhs_inner: mlhs_basic + { + result = @builder.multi_lhs(nil, val[0], nil) + } + | tLPAREN mlhs_inner rparen + { + result = @builder.multi_lhs(val[0], val[1], val[2]) + } + + mlhs_basic: mlhs_head + | mlhs_head mlhs_item + { + result = val[0]. + push(val[1]) + } + | mlhs_head tSTAR mlhs_node + { + result = val[0]. + push(@builder.splat(val[1], val[2])) + } + | mlhs_head tSTAR mlhs_node tCOMMA mlhs_post + { + result = val[0]. + push(@builder.splat(val[1], val[2])). + concat(val[4]) + } + | mlhs_head tSTAR + { + result = val[0]. + push(@builder.splat(val[1])) + } + | mlhs_head tSTAR tCOMMA mlhs_post + { + result = val[0]. + push(@builder.splat(val[1])). + concat(val[3]) + } + | tSTAR mlhs_node + { + result = [ @builder.splat(val[0], val[1]) ] + } + | tSTAR mlhs_node tCOMMA mlhs_post + { + result = [ @builder.splat(val[0], val[1]), + *val[3] ] + } + | tSTAR + { + result = [ @builder.splat(val[0]) ] + } + | tSTAR tCOMMA mlhs_post + { + result = [ @builder.splat(val[0]), + *val[2] ] + } + + mlhs_item: mlhs_node + | tLPAREN mlhs_inner rparen + { + result = @builder.begin(val[0], val[1], val[2]) + } + + mlhs_head: mlhs_item tCOMMA + { + result = [ val[0] ] + } + | mlhs_head mlhs_item tCOMMA + { + result = val[0] << val[1] + } + + mlhs_post: mlhs_item + { + result = [ val[0] ] + } + | mlhs_post tCOMMA mlhs_item + { + result = val[0] << val[2] + } + + mlhs_node: variable + { + result = @builder.assignable(val[0]) + } + | primary_value tLBRACK2 opt_call_args rbracket + { + result = @builder.index_asgn(val[0], val[1], val[2], val[3]) + } + | primary_value tDOT tIDENTIFIER + { + result = @builder.attr_asgn(val[0], val[1], val[2]) + } + | primary_value tCOLON2 tIDENTIFIER + { + result = @builder.attr_asgn(val[0], val[1], val[2]) + } + | primary_value tDOT tCONSTANT + { + result = @builder.attr_asgn(val[0], val[1], val[2]) + } + | primary_value tCOLON2 tCONSTANT + { + result = @builder.assignable( + @builder.const_fetch(val[0], val[1], val[2])) + } + | tCOLON3 tCONSTANT + { + result = @builder.assignable( + @builder.const_global(val[0], val[1])) + } + | backref + { + result = @builder.assignable(val[0]) + } + + lhs: variable + { + result = @builder.assignable(val[0]) + } + | primary_value tLBRACK2 opt_call_args rbracket + { + result = @builder.index_asgn(val[0], val[1], val[2], val[3]) + } + | primary_value tDOT tIDENTIFIER + { + result = @builder.attr_asgn(val[0], val[1], val[2]) + } + | primary_value tCOLON2 tIDENTIFIER + { + result = @builder.attr_asgn(val[0], val[1], val[2]) + } + | primary_value tDOT tCONSTANT + { + result = @builder.attr_asgn(val[0], val[1], val[2]) + } + | primary_value tCOLON2 tCONSTANT + { + result = @builder.assignable( + @builder.const_fetch(val[0], val[1], val[2])) + } + | tCOLON3 tCONSTANT + { + result = @builder.assignable( + @builder.const_global(val[0], val[1])) + } + | backref + { + result = @builder.assignable(val[0]) + } + + cname: tIDENTIFIER + { + diagnostic :error, :module_name_const, nil, val[0] + } + | tCONSTANT + + cpath: tCOLON3 cname + { + result = @builder.const_global(val[0], val[1]) + } + | cname + { + result = @builder.const(val[0]) + } + | primary_value tCOLON2 cname + { + result = @builder.const_fetch(val[0], val[1], val[2]) + } + + fname: tIDENTIFIER | tCONSTANT | tFID + | op + | reswords + + fsym: fname + { + result = @builder.symbol(val[0]) + } + | symbol + + fitem: fsym + | dsym + + undef_list: fitem + { + result = [ val[0] ] + } + | undef_list tCOMMA + { + @lexer.state = :expr_fname + } + fitem + { + result = val[0] << val[3] + } + + op: tPIPE | tCARET | tAMPER2 | tCMP | tEQ | tEQQ + | tMATCH | tNMATCH | tGT | tGEQ | tLT | tLEQ + | tNEQ | tLSHFT | tRSHFT | tPLUS | tMINUS | tSTAR2 + | tSTAR | tDIVIDE | tPERCENT | tPOW | tBANG | tTILDE + | tUPLUS | tUMINUS | tAREF | tASET | tBACK_REF2 + + reswords: k__LINE__ | k__FILE__ | k__ENCODING__ | klBEGIN | klEND + | kALIAS | kAND | kBEGIN | kBREAK | kCASE + | kCLASS | kDEF | kDEFINED | kDO | kELSE + | kELSIF | kEND | kENSURE | kFALSE | kFOR + | kIN | kMODULE | kNEXT | kNIL | kNOT + | kOR | kREDO | kRESCUE | kRETRY | kRETURN + | kSELF | kSUPER | kTHEN | kTRUE | kUNDEF + | kWHEN | kYIELD | kIF | kUNLESS | kWHILE + | kUNTIL + + arg: lhs tEQL arg + { + result = @builder.assign(val[0], val[1], val[2]) + } + | lhs tEQL arg kRESCUE_MOD arg + { + rescue_body = @builder.rescue_body(val[3], + nil, nil, nil, + nil, val[4]) + + rescue_ = @builder.begin_body(val[2], [ rescue_body ]) + + result = @builder.assign(val[0], val[1], rescue_) + } + | var_lhs tOP_ASGN arg + { + result = @builder.op_assign(val[0], val[1], val[2]) + } + | var_lhs tOP_ASGN arg kRESCUE_MOD arg + { + rescue_body = @builder.rescue_body(val[3], + nil, nil, nil, + nil, val[4]) + + rescue_ = @builder.begin_body(val[2], [ rescue_body ]) + + result = @builder.op_assign(val[0], val[1], rescue_) + } + | primary_value tLBRACK2 opt_call_args rbracket tOP_ASGN arg + { + result = @builder.op_assign( + @builder.index( + val[0], val[1], val[2], val[3]), + val[4], val[5]) + } + | primary_value tDOT tIDENTIFIER tOP_ASGN arg + { + result = @builder.op_assign( + @builder.call_method( + val[0], val[1], val[2]), + val[3], val[4]) + } + | primary_value tDOT tCONSTANT tOP_ASGN arg + { + result = @builder.op_assign( + @builder.call_method( + val[0], val[1], val[2]), + val[3], val[4]) + } + | primary_value tCOLON2 tIDENTIFIER tOP_ASGN arg + { + result = @builder.op_assign( + @builder.call_method( + val[0], val[1], val[2]), + val[3], val[4]) + } + | primary_value tCOLON2 tCONSTANT tOP_ASGN arg + { + diagnostic :error, :dynamic_const, nil, val[2], [ val[3] ] + } + | tCOLON3 tCONSTANT tOP_ASGN arg + { + diagnostic :error, :dynamic_const, nil, val[1], [ val[2] ] + } + | backref tOP_ASGN arg + { + result = @builder.op_assign(val[0], val[1], val[2]) + } + | arg tDOT2 arg + { + result = @builder.range_inclusive(val[0], val[1], val[2]) + } + | arg tDOT3 arg + { + result = @builder.range_exclusive(val[0], val[1], val[2]) + } + | arg tPLUS arg + { + result = @builder.binary_op(val[0], val[1], val[2]) + } + | arg tMINUS arg + { + result = @builder.binary_op(val[0], val[1], val[2]) + } + | arg tSTAR2 arg + { + result = @builder.binary_op(val[0], val[1], val[2]) + } + | arg tDIVIDE arg + { + result = @builder.binary_op(val[0], val[1], val[2]) + } + | arg tPERCENT arg + { + result = @builder.binary_op(val[0], val[1], val[2]) + } + | arg tPOW arg + { + result = @builder.binary_op(val[0], val[1], val[2]) + } + | tUMINUS_NUM tINTEGER tPOW arg + { + result = @builder.unary_op(val[0], + @builder.binary_op( + @builder.integer(val[1]), + val[2], val[3])) + } + | tUMINUS_NUM tFLOAT tPOW arg + { + result = @builder.unary_op(val[0], + @builder.binary_op( + @builder.float(val[1]), + val[2], val[3])) + } + | tUPLUS arg + { + result = @builder.unary_op(val[0], val[1]) + } + | tUMINUS arg + { + result = @builder.unary_op(val[0], val[1]) + } + | arg tPIPE arg + { + result = @builder.binary_op(val[0], val[1], val[2]) + } + | arg tCARET arg + { + result = @builder.binary_op(val[0], val[1], val[2]) + } + | arg tAMPER2 arg + { + result = @builder.binary_op(val[0], val[1], val[2]) + } + | arg tCMP arg + { + result = @builder.binary_op(val[0], val[1], val[2]) + } + | arg tGT arg + { + result = @builder.binary_op(val[0], val[1], val[2]) + } + | arg tGEQ arg + { + result = @builder.binary_op(val[0], val[1], val[2]) + } + | arg tLT arg + { + result = @builder.binary_op(val[0], val[1], val[2]) + } + | arg tLEQ arg + { + result = @builder.binary_op(val[0], val[1], val[2]) + } + | arg tEQ arg + { + result = @builder.binary_op(val[0], val[1], val[2]) + } + | arg tEQQ arg + { + result = @builder.binary_op(val[0], val[1], val[2]) + } + | arg tNEQ arg + { + result = @builder.binary_op(val[0], val[1], val[2]) + } + | arg tMATCH arg + { + result = @builder.match_op(val[0], val[1], val[2]) + } + | arg tNMATCH arg + { + result = @builder.binary_op(val[0], val[1], val[2]) + } + | tBANG arg + { + result = @builder.not_op(val[0], nil, val[1], nil) + } + | tTILDE arg + { + result = @builder.unary_op(val[0], val[1]) + } + | arg tLSHFT arg + { + result = @builder.binary_op(val[0], val[1], val[2]) + } + | arg tRSHFT arg + { + result = @builder.binary_op(val[0], val[1], val[2]) + } + | arg tANDOP arg + { + result = @builder.logical_op(:and, val[0], val[1], val[2]) + } + | arg tOROP arg + { + result = @builder.logical_op(:or, val[0], val[1], val[2]) + } + | kDEFINED opt_nl arg + { + result = @builder.keyword_cmd(:defined?, val[0], nil, [ val[2] ], nil) + } + + | arg tEH arg opt_nl tCOLON arg + { + result = @builder.ternary(val[0], val[1], + val[2], val[4], val[5]) + } + | primary + + arg_value: arg + + aref_args: none + | args trailer + | args tCOMMA assocs trailer + { + result = val[0] << @builder.associate(nil, val[2], nil) + } + | assocs trailer + { + result = [ @builder.associate(nil, val[0], nil) ] + } + + paren_args: tLPAREN2 opt_call_args rparen + { + result = val + } + + opt_paren_args: # nothing + { + result = [ nil, [], nil ] + } + | paren_args + + opt_call_args: # nothing + { + result = [] + } + | call_args + + call_args: command + { + result = [ val[0] ] + } + | args opt_block_arg + { + result = val[0].concat(val[1]) + } + | assocs opt_block_arg + { + result = [ @builder.associate(nil, val[0], nil) ] + result.concat(val[1]) + } + | args tCOMMA assocs opt_block_arg + { + assocs = @builder.associate(nil, val[2], nil) + result = val[0] << assocs + result.concat(val[3]) + } + | args tCOMMA assocs tCOMMA args opt_block_arg + { + val[2][-1] = @builder.objc_varargs(val[2][-1], val[4]) + assocs = @builder.associate(nil, val[2], nil) + result = val[0] << assocs + result.concat(val[5]) + } + | block_arg + { + result = [ val[0] ] + } + + call_args2: arg_value tCOMMA args opt_block_arg + { + result = [ val[0], *val[2].concat(val[3]) ] + } + | arg_value tCOMMA block_arg + { + result = [ val[0], val[2] ] + } + | assocs opt_block_arg + { + result = [ @builder.associate(nil, val[0], nil), + *val[1] ] + } + | arg_value tCOMMA assocs opt_block_arg + { + result = [ val[0], + @builder.associate(nil, val[2], nil), + *val[3] ] + } + | arg_value tCOMMA args tCOMMA assocs opt_block_arg + { + result = [ val[0], + *val[2]. + push(@builder.associate(nil, val[4], nil)). + concat(val[5]) ] + } + | block_arg + { + result = [ val[0] ] + } + + command_args: { + result = @lexer.cmdarg.dup + @lexer.cmdarg.push(true) + } + open_args + { + @lexer.cmdarg = val[0] + + result = val[1] + } + + open_args: call_args + { + result = [ nil, val[0], nil ] + } + | tLPAREN_ARG + { + @lexer.state = :expr_endarg + } + rparen + { + result = [ val[0], [], val[2] ] + } + | tLPAREN_ARG call_args2 + { + @lexer.state = :expr_endarg + } + rparen + { + result = [ val[0], val[1], val[3] ] + } + + block_arg: tAMPER arg_value + { + result = @builder.block_pass(val[0], val[1]) + } + + opt_block_arg: tCOMMA block_arg + { + result = [ val[1] ] + } + | tCOMMA + { + result = [] + } + | # nothing + { + result = [] + } + + args: arg_value + { + result = [ val[0] ] + } + | tSTAR arg_value + { + result = [ @builder.splat(val[0], val[1]) ] + } + | args tCOMMA arg_value + { + result = val[0] << val[2] + } + | args tCOMMA tSTAR arg_value + { + result = val[0] << @builder.splat(val[2], val[3]) + } + + mrhs: args tCOMMA arg_value + { + result = val[0] << val[2] + } + | args tCOMMA tSTAR arg_value + { + result = val[0] << @builder.splat(val[2], val[3]) + } + | tSTAR arg_value + { + result = [ @builder.splat(val[0], val[1]) ] + } + + primary: literal + | strings + | xstring + | regexp + | words + | qwords + | var_ref + | backref + | tFID + { + result = @builder.call_method(nil, nil, val[0]) + } + | kBEGIN bodystmt kEND + { + result = @builder.begin_keyword(val[0], val[1], val[2]) + } + | tLPAREN_ARG expr + { + @lexer.state = :expr_endarg + } + rparen + { + result = @builder.begin(val[0], val[1], val[3]) + } + | tLPAREN compstmt tRPAREN + { + result = @builder.begin(val[0], val[1], val[2]) + } + | primary_value tCOLON2 tCONSTANT + { + result = @builder.const_fetch(val[0], val[1], val[2]) + } + | tCOLON3 tCONSTANT + { + result = @builder.const_global(val[0], val[1]) + } + | tLBRACK aref_args tRBRACK + { + result = @builder.array(val[0], val[1], val[2]) + } + | tLBRACE assoc_list tRCURLY + { + result = @builder.associate(val[0], val[1], val[2]) + } + | kRETURN + { + result = @builder.keyword_cmd(:return, val[0]) + } + | kYIELD tLPAREN2 call_args rparen + { + result = @builder.keyword_cmd(:yield, val[0], val[1], val[2], val[3]) + } + | kYIELD tLPAREN2 rparen + { + result = @builder.keyword_cmd(:yield, val[0], val[1], [], val[2]) + } + | kYIELD + { + result = @builder.keyword_cmd(:yield, val[0]) + } + | kDEFINED opt_nl tLPAREN2 expr rparen + { + result = @builder.keyword_cmd(:defined?, val[0], + val[2], [ val[3] ], val[4]) + } + | kNOT tLPAREN2 expr rparen + { + result = @builder.not_op(val[0], val[1], val[2], val[3]) + } + | kNOT tLPAREN2 rparen + { + result = @builder.not_op(val[0], val[1], nil, val[2]) + } + | operation brace_block + { + method_call = @builder.call_method(nil, nil, val[0]) + + begin_t, args, body, end_t = val[1] + result = @builder.block(method_call, + begin_t, args, body, end_t) + } + | method_call + | method_call brace_block + { + begin_t, args, body, end_t = val[1] + result = @builder.block(val[0], + begin_t, args, body, end_t) + } + | tLAMBDA lambda + { + lambda_call = @builder.call_lambda(val[0]) + + args, (begin_t, body, end_t) = val[1] + result = @builder.block(lambda_call, + begin_t, args, body, end_t) + } + | kIF expr_value then compstmt if_tail kEND + { + else_t, else_ = val[4] + result = @builder.condition(val[0], val[1], val[2], + val[3], else_t, + else_, val[5]) + } + | kUNLESS expr_value then compstmt opt_else kEND + { + else_t, else_ = val[4] + result = @builder.condition(val[0], val[1], val[2], + else_, else_t, + val[3], val[5]) + } + | kWHILE + { + @lexer.cond.push(true) + } + expr_value do + { + @lexer.cond.pop + } + compstmt kEND + { + result = @builder.loop(:while, val[0], val[2], val[3], + val[5], val[6]) + } + | kUNTIL + { + @lexer.cond.push(true) + } + expr_value do + { + @lexer.cond.pop + } + compstmt kEND + { + result = @builder.loop(:until, val[0], val[2], val[3], + val[5], val[6]) + } + | kCASE expr_value opt_terms case_body kEND + { + *when_bodies, (else_t, else_body) = *val[3] + + result = @builder.case(val[0], val[1], + when_bodies, else_t, else_body, + val[4]) + } + | kCASE opt_terms case_body kEND + { + *when_bodies, (else_t, else_body) = *val[2] + + result = @builder.case(val[0], nil, + when_bodies, else_t, else_body, + val[3]) + } + | kFOR for_var kIN + { + @lexer.cond.push(true) + } + expr_value do + { + @lexer.cond.pop + } + compstmt kEND + { + result = @builder.for(val[0], val[1], + val[2], val[4], + val[5], val[7], val[8]) + } + | kCLASS cpath superclass + { + @static_env.extend_static + @lexer.push_cmdarg + } + bodystmt kEND + { + if in_def? + diagnostic :error, :class_in_def, nil, val[0] + end + + lt_t, superclass = val[2] + result = @builder.def_class(val[0], val[1], + lt_t, superclass, + val[4], val[5]) + + @lexer.pop_cmdarg + @static_env.unextend + } + | kCLASS tLSHFT expr term + { + result = @def_level + @def_level = 0 + + @static_env.extend_static + @lexer.push_cmdarg + } + bodystmt kEND + { + result = @builder.def_sclass(val[0], val[1], val[2], + val[5], val[6]) + + @lexer.pop_cmdarg + @static_env.unextend + + @def_level = val[4] + } + | kMODULE cpath + { + @static_env.extend_static + @lexer.push_cmdarg + } + bodystmt kEND + { + if in_def? + diagnostic :error, :module_in_def, nil, val[0] + end + + result = @builder.def_module(val[0], val[1], + val[3], val[4]) + + @lexer.pop_cmdarg + @static_env.unextend + } + | kDEF fname + { + @def_level += 1 + @static_env.extend_static + @lexer.push_cmdarg + } + f_arglist bodystmt kEND + { + result = @builder.def_method(val[0], val[1], + val[3], val[4], val[5]) + + @lexer.pop_cmdarg + @static_env.unextend + @def_level -= 1 + } + | kDEF singleton dot_or_colon + { + @lexer.state = :expr_fname + } + fname + { + @def_level += 1 + @static_env.extend_static + @lexer.push_cmdarg + } + f_arglist bodystmt kEND + { + result = @builder.def_singleton(val[0], val[1], val[2], + val[4], val[6], val[7], val[8]) + + @lexer.pop_cmdarg + @static_env.unextend + @def_level -= 1 + } + | kBREAK + { + result = @builder.keyword_cmd(:break, val[0]) + } + | kNEXT + { + result = @builder.keyword_cmd(:next, val[0]) + } + | kREDO + { + result = @builder.keyword_cmd(:redo, val[0]) + } + | kRETRY + { + result = @builder.keyword_cmd(:retry, val[0]) + } + + primary_value: primary + + then: term + | kTHEN + | term kTHEN + { + result = val[1] + } + + do: term + | kDO_COND + + if_tail: opt_else + | kELSIF expr_value then compstmt if_tail + { + else_t, else_ = val[4] + result = [ val[0], + @builder.condition(val[0], val[1], val[2], + val[3], else_t, + else_, nil), + ] + } + + opt_else: none + | kELSE compstmt + { + result = val + } + + for_var: lhs + | mlhs + + f_marg: f_norm_arg + | tLPAREN f_margs rparen + { + result = @builder.multi_lhs(val[0], val[1], val[2]) + } + + f_marg_list: f_marg + { + result = [ val[0] ] + } + | f_marg_list tCOMMA f_marg + { + result = val[0] << val[2] + } + + f_margs: f_marg_list + | f_marg_list tCOMMA tSTAR f_norm_arg + { + result = val[0]. + push(@builder.objc_restarg(val[2], val[3])) + } + | f_marg_list tCOMMA tSTAR f_norm_arg tCOMMA f_marg_list + { + result = val[0]. + push(@builder.objc_restarg(val[2], val[3])). + concat(val[5]) + } + | f_marg_list tCOMMA tSTAR + { + result = val[0]. + push(@builder.objc_restarg(val[2])) + } + | f_marg_list tCOMMA tSTAR tCOMMA f_marg_list + { + result = val[0]. + push(@builder.objc_restarg(val[2])). + concat(val[4]) + } + | tSTAR f_norm_arg + { + result = [ @builder.objc_restarg(val[0], val[1]) ] + } + | tSTAR f_norm_arg tCOMMA f_marg_list + { + result = [ @builder.objc_restarg(val[0], val[1]), + *val[3] ] + } + | tSTAR + { + result = [ @builder.objc_restarg(val[0]) ] + } + | tSTAR tCOMMA f_marg_list + { + result = [ @builder.objc_restarg(val[0]), + *val[2] ] + } + + block_param: f_arg tCOMMA f_block_optarg tCOMMA f_rest_arg opt_f_block_arg + { + result = val[0]. + concat(val[2]). + concat(val[4]). + concat(val[5]) + } + | f_arg tCOMMA f_block_optarg tCOMMA f_rest_arg tCOMMA f_arg opt_f_block_arg + { + result = val[0]. + concat(val[2]). + concat(val[4]). + concat(val[6]). + concat(val[7]) + } + | f_arg tCOMMA f_block_optarg opt_f_block_arg + { + result = val[0]. + concat(val[2]). + concat(val[3]) + } + | f_arg tCOMMA f_block_optarg tCOMMA f_arg opt_f_block_arg + { + result = val[0]. + concat(val[2]). + concat(val[4]). + concat(val[5]) + } + | f_arg tCOMMA f_rest_arg opt_f_block_arg + { + result = val[0]. + concat(val[2]). + concat(val[3]) + } + | f_arg tCOMMA + | f_arg tCOMMA f_rest_arg tCOMMA f_arg opt_f_block_arg + { + result = val[0]. + concat(val[2]). + concat(val[4]). + concat(val[5]) + } + | f_arg opt_f_block_arg + { + result = val[0].concat(val[1]) + } + | f_block_optarg tCOMMA f_rest_arg opt_f_block_arg + { + result = val[0]. + concat(val[2]). + concat(val[3]) + } + | f_block_optarg tCOMMA f_rest_arg tCOMMA f_arg opt_f_block_arg + { + result = val[0]. + concat(val[2]). + concat(val[4]). + concat(val[5]) + } + | f_block_optarg opt_f_block_arg + { + result = val[0]. + concat(val[1]) + } + | f_block_optarg tCOMMA f_arg opt_f_block_arg + { + result = val[0]. + concat(val[2]). + concat(val[3]) + } + | f_rest_arg opt_f_block_arg + { + result = val[0]. + concat(val[1]) + } + | f_rest_arg tCOMMA f_arg opt_f_block_arg + { + result = val[0]. + concat(val[2]). + concat(val[3]) + } + | f_block_arg + { + result = [ val[0] ] + } + + opt_block_param: # nothing + { + result = @builder.args(nil, [], nil) + } + | block_param_def + { + @lexer.state = :expr_value + } + + block_param_def: tPIPE opt_bv_decl tPIPE + { + result = @builder.args(val[0], val[1], val[2]) + } + | tOROP + { + result = @builder.args(val[0], [], val[0]) + } + | tPIPE block_param opt_bv_decl tPIPE + { + result = @builder.args(val[0], val[1].concat(val[2]), val[3]) + } + + opt_bv_decl: # nothing + { + result = [] + } + | tSEMI bv_decls + { + result = val[1] + } + + bv_decls: bvar + { + result = [ val[0] ] + } + | bv_decls tCOMMA bvar + { + result = val[0] << val[2] + } + + bvar: tIDENTIFIER + { + result = @builder.shadowarg(val[0]) + } + | f_bad_arg + + lambda: { + @static_env.extend_dynamic + } + f_larglist lambda_body + { + result = [ val[1], val[2] ] + + @static_env.unextend + } + + f_larglist: tLPAREN2 f_args opt_bv_decl rparen + { + result = @builder.args(val[0], val[1].concat(val[2]), val[3]) + } + | f_args + { + result = @builder.args(nil, val[0], nil) + } + + lambda_body: tLAMBEG compstmt tRCURLY + { + result = [ val[0], val[1], val[2] ] + } + | kDO_LAMBDA compstmt kEND + { + result = [ val[0], val[1], val[2] ] + } + + do_block: kDO_BLOCK + { + @static_env.extend_dynamic + } + opt_block_param compstmt kEND + { + result = [ val[0], val[2], val[3], val[4] ] + + @static_env.unextend + } + + block_call: command do_block + { + begin_t, block_args, body, end_t = val[1] + result = @builder.block(val[0], + begin_t, block_args, body, end_t) + } + | block_call tDOT operation2 opt_paren_args + { + lparen_t, args, rparen_t = val[3] + result = @builder.call_method(val[0], val[1], val[2], + lparen_t, args, rparen_t) + } + | block_call tCOLON2 operation2 opt_paren_args + { + lparen_t, args, rparen_t = val[3] + result = @builder.call_method(val[0], val[1], val[2], + lparen_t, args, rparen_t) + } + + method_call: operation paren_args + { + lparen_t, args, rparen_t = val[1] + result = @builder.call_method(nil, nil, val[0], + lparen_t, args, rparen_t) + } + | primary_value tDOT operation2 opt_paren_args + { + lparen_t, args, rparen_t = val[3] + result = @builder.call_method(val[0], val[1], val[2], + lparen_t, args, rparen_t) + } + | primary_value tCOLON2 operation2 paren_args + { + lparen_t, args, rparen_t = val[3] + result = @builder.call_method(val[0], val[1], val[2], + lparen_t, args, rparen_t) + } + | primary_value tCOLON2 operation3 + { + result = @builder.call_method(val[0], val[1], val[2]) + } + | primary_value tDOT paren_args + { + lparen_t, args, rparen_t = val[2] + result = @builder.call_method(val[0], val[1], nil, + lparen_t, args, rparen_t) + } + | primary_value tCOLON2 paren_args + { + lparen_t, args, rparen_t = val[2] + result = @builder.call_method(val[0], val[1], nil, + lparen_t, args, rparen_t) + } + | kSUPER paren_args + { + lparen_t, args, rparen_t = val[1] + result = @builder.keyword_cmd(:super, val[0], + lparen_t, args, rparen_t) + } + | kSUPER + { + result = @builder.keyword_cmd(:zsuper, val[0]) + } + | primary_value tLBRACK2 opt_call_args rbracket + { + result = @builder.index(val[0], val[1], val[2], val[3]) + } + + brace_block: tLCURLY + { + @static_env.extend_dynamic + } + opt_block_param compstmt tRCURLY + { + result = [ val[0], val[2], val[3], val[4] ] + + @static_env.unextend + } + | kDO + { + @static_env.extend_dynamic + } + opt_block_param compstmt kEND + { + result = [ val[0], val[2], val[3], val[4] ] + + @static_env.unextend + } + + case_body: kWHEN args then compstmt cases + { + result = [ @builder.when(val[0], val[1], val[2], val[3]), + *val[4] ] + } + + cases: opt_else + { + result = [ val[0] ] + } + | case_body + + opt_rescue: kRESCUE exc_list exc_var then compstmt opt_rescue + { + assoc_t, exc_var = val[2] + + if val[1] + exc_list = @builder.array(nil, val[1], nil) + end + + result = [ @builder.rescue_body(val[0], + exc_list, assoc_t, exc_var, + val[3], val[4]), + *val[5] ] + } + | + { + result = [] + } + + exc_list: arg_value + { + result = [ val[0] ] + } + | mrhs + | none + + exc_var: tASSOC lhs + { + result = [ val[0], val[1] ] + } + | none + + opt_ensure: kENSURE compstmt + { + result = [ val[0], val[1] ] + } + | none + + literal: numeric + | symbol + | dsym + + strings: string + { + result = @builder.string_compose(nil, val[0], nil) + } + + string: string1 + { + result = [ val[0] ] + } + | string string1 + { + result = val[0] << val[1] + } + + string1: tSTRING_BEG string_contents tSTRING_END + { + result = @builder.string_compose(val[0], val[1], val[2]) + } + | tSTRING + { + result = @builder.string(val[0]) + } + | tCHARACTER + { + result = @builder.character(val[0]) + } + + xstring: tXSTRING_BEG xstring_contents tSTRING_END + { + result = @builder.xstring_compose(val[0], val[1], val[2]) + } + + regexp: tREGEXP_BEG regexp_contents tSTRING_END tREGEXP_OPT + { + opts = @builder.regexp_options(val[3]) + result = @builder.regexp_compose(val[0], val[1], val[2], opts) + } + + words: tWORDS_BEG word_list tSTRING_END + { + result = @builder.words_compose(val[0], val[1], val[2]) + } + + word_list: # nothing + { + result = [] + } + | word_list word tSPACE + { + result = val[0] << @builder.word(val[1]) + } + + word: string_content + { + result = [ val[0] ] + } + | word string_content + { + result = val[0] << val[1] + } + + qwords: tQWORDS_BEG qword_list tSTRING_END + { + result = @builder.words_compose(val[0], val[1], val[2]) + } + + qword_list: # nothing + { + result = [] + } + | qword_list tSTRING_CONTENT tSPACE + { + result = val[0] << @builder.string_internal(val[1]) + } + + string_contents: # nothing + { + result = [] + } + | string_contents string_content + { + result = val[0] << val[1] + } + +xstring_contents: # nothing + { + result = [] + } + | xstring_contents string_content + { + result = val[0] << val[1] + } + +regexp_contents: # nothing + { + result = [] + } + | regexp_contents string_content + { + result = val[0] << val[1] + } + + string_content: tSTRING_CONTENT + { + result = @builder.string_internal(val[0]) + } + | tSTRING_DVAR string_dvar + { + result = val[1] + } + | tSTRING_DBEG + { + @lexer.cond.push(false) + @lexer.cmdarg.push(false) + } + compstmt tRCURLY + { + @lexer.cond.lexpop + @lexer.cmdarg.lexpop + + result = @builder.begin(val[0], val[2], val[3]) + } + + string_dvar: tGVAR + { + result = @builder.gvar(val[0]) + } + | tIVAR + { + result = @builder.ivar(val[0]) + } + | tCVAR + { + result = @builder.cvar(val[0]) + } + | backref + + + symbol: tSYMBOL + { + result = @builder.symbol(val[0]) + } + + dsym: tSYMBEG xstring_contents tSTRING_END + { + result = @builder.symbol_compose(val[0], val[1], val[2]) + } + + numeric: tINTEGER + { + result = @builder.integer(val[0]) + } + | tFLOAT + { + result = @builder.float(val[0]) + } + | tUMINUS_NUM tINTEGER =tLOWEST + { + result = @builder.negate(val[0], + @builder.integer(val[1])) + } + | tUMINUS_NUM tFLOAT =tLOWEST + { + result = @builder.negate(val[0], + @builder.float(val[1])) + } + + variable: tIDENTIFIER + { + result = @builder.ident(val[0]) + } + | tIVAR + { + result = @builder.ivar(val[0]) + } + | tGVAR + { + result = @builder.gvar(val[0]) + } + | tCONSTANT + { + result = @builder.const(val[0]) + } + | tCVAR + { + result = @builder.cvar(val[0]) + } + | kNIL + { + result = @builder.nil(val[0]) + } + | kSELF + { + result = @builder.self(val[0]) + } + | kTRUE + { + result = @builder.true(val[0]) + } + | kFALSE + { + result = @builder.false(val[0]) + } + | k__FILE__ + { + result = @builder.__FILE__(val[0]) + } + | k__LINE__ + { + result = @builder.__LINE__(val[0]) + } + | k__ENCODING__ + { + result = @builder.__ENCODING__(val[0]) + } + + var_ref: variable + { + result = @builder.accessible(val[0]) + } + + var_lhs: variable + { + result = @builder.assignable(val[0]) + } + + backref: tNTH_REF + { + result = @builder.nth_ref(val[0]) + } + | tBACK_REF + { + result = @builder.back_ref(val[0]) + } + + superclass: term + { + result = nil + } + | tLT expr_value term + { + result = [ val[0], val[1] ] + } + | error term + { + yyerrok + result = nil + } + + f_arglist: tLPAREN2 f_args rparen + { + result = @builder.args(val[0], val[1], val[2]) + + @lexer.state = :expr_value + } + | f_args term + { + result = @builder.args(nil, val[0], nil) + } + + f_args: f_arg tCOMMA f_optarg tCOMMA f_rest_arg opt_f_block_arg + { + result = val[0]. + concat(val[2]). + concat(val[4]). + concat(val[5]) + } + | f_arg tCOMMA f_optarg tCOMMA f_rest_arg tCOMMA f_arg opt_f_block_arg + { + result = val[0]. + concat(val[2]). + concat(val[4]). + concat(val[6]). + concat(val[7]) + } + | f_arg tCOMMA f_optarg opt_f_block_arg + { + result = val[0]. + concat(val[2]). + concat(val[3]) + } + | f_arg tCOMMA f_optarg tCOMMA f_arg opt_f_block_arg + { + result = val[0]. + concat(val[2]). + concat(val[4]). + concat(val[5]) + } + | f_arg tCOMMA f_rest_arg opt_f_block_arg + { + result = val[0]. + concat(val[2]). + concat(val[3]) + } + | f_arg tCOMMA f_rest_arg tCOMMA f_arg opt_f_block_arg + { + result = val[0]. + concat(val[2]). + concat(val[4]). + concat(val[5]) + } + | f_arg opt_f_block_arg + { + result = val[0]. + concat(val[1]) + } + | f_optarg tCOMMA f_rest_arg opt_f_block_arg + { + result = val[0]. + concat(val[2]). + concat(val[3]) + } + | f_optarg tCOMMA f_rest_arg tCOMMA f_arg opt_f_block_arg + { + result = val[0]. + concat(val[2]). + concat(val[4]). + concat(val[5]) + } + | f_optarg opt_f_block_arg + { + result = val[0]. + concat(val[1]) + } + | f_optarg tCOMMA f_arg opt_f_block_arg + { + result = val[0]. + concat(val[2]). + concat(val[3]) + } + | f_rest_arg opt_f_block_arg + { + result = val[0]. + concat(val[1]) + } + | f_rest_arg tCOMMA f_arg opt_f_block_arg + { + result = val[0]. + concat(val[2]). + concat(val[3]) + } + | f_block_arg + { + result = [ val[0] ] + } + | # nothing + { + result = [] + } + + f_bad_arg: tCONSTANT + { + diagnostic :error, :argument_const, nil, val[0] + } + | tIVAR + { + diagnostic :error, :argument_ivar, nil, val[0] + } + | tGVAR + { + diagnostic :error, :argument_gvar, nil, val[0] + } + | tCVAR + { + diagnostic :error, :argument_cvar, nil, val[0] + } + + f_norm_arg: f_bad_arg + | tIDENTIFIER + { + @static_env.declare val[0][0] + + result = @builder.arg(val[0]) + } + | tIDENTIFIER tASSOC tIDENTIFIER + { + @static_env.declare val[2][0] + + result = @builder.objc_kwarg(val[0], val[1], val[2]) + } + | tLABEL tIDENTIFIER + { + @static_env.declare val[1][0] + + result = @builder.objc_kwarg(val[0], nil, val[1]) + } + + f_arg_item: f_norm_arg + | tLPAREN f_margs rparen + { + result = @builder.multi_lhs(val[0], val[1], val[2]) + } + + f_arg: f_arg_item + { + result = [ val[0] ] + } + | f_arg tCOMMA f_arg_item + { + result = val[0] << val[2] + } + + f_opt: tIDENTIFIER tEQL arg_value + { + @static_env.declare val[0][0] + + result = @builder.optarg(val[0], val[1], val[2]) + } + + f_block_opt: tIDENTIFIER tEQL primary_value + { + @static_env.declare val[0][0] + + result = @builder.optarg(val[0], val[1], val[2]) + } + + f_block_optarg: f_block_opt + { + result = [ val[0] ] + } + | f_block_optarg tCOMMA f_block_opt + { + result = val[0] << val[2] + } + + f_optarg: f_opt + { + result = [ val[0] ] + } + | f_optarg tCOMMA f_opt + { + result = val[0] << val[2] + } + + restarg_mark: tSTAR2 | tSTAR + + f_rest_arg: restarg_mark tIDENTIFIER + { + @static_env.declare val[1][0] + + result = [ @builder.restarg(val[0], val[1]) ] + } + | restarg_mark + { + result = [ @builder.restarg(val[0]) ] + } + + blkarg_mark: tAMPER2 | tAMPER + + f_block_arg: blkarg_mark tIDENTIFIER + { + @static_env.declare val[1][0] + + result = @builder.blockarg(val[0], val[1]) + } + + opt_f_block_arg: tCOMMA f_block_arg + { + result = [ val[1] ] + } + | # nothing + { + result = [] + } + + singleton: var_ref + | tLPAREN2 expr rparen + { + result = val[1] + } + + assoc_list: # nothing + { + result = [] + } + | assocs trailer + + assocs: assoc + { + result = [ val[0] ] + } + | assocs tCOMMA assoc + { + result = val[0] << val[2] + } + + assoc: arg_value tASSOC arg_value + { + result = @builder.pair(val[0], val[1], val[2]) + } + | tLABEL arg_value + { + result = @builder.pair_keyword(val[0], val[1]) + } + + operation: tIDENTIFIER | tCONSTANT | tFID + operation2: tIDENTIFIER | tCONSTANT | tFID | op + operation3: tIDENTIFIER | tFID | op + dot_or_colon: tDOT | tCOLON2 + opt_terms: | terms + opt_nl: | tNL + rparen: opt_nl tRPAREN + { + result = val[1] + } + rbracket: opt_nl tRBRACK + { + result = val[1] + } + trailer: | tNL | tCOMMA + + term: tSEMI + { + yyerrok + } + | tNL + + terms: term + | terms tSEMI + + none: # nothing + { + result = nil + } +end + +---- header + +require 'parser' + +Parser.check_for_encoding_support + +---- inner + + def version + 19 # closest released match: v1_9_0_2 + end + + def default_encoding + Encoding::BINARY + end diff --git a/test/racc/assets/mailp.y b/test/racc/assets/mailp.y new file mode 100644 index 0000000000..da332a33ba --- /dev/null +++ b/test/racc/assets/mailp.y @@ -0,0 +1,437 @@ +# +# mailp for test +# + +class Testp + +rule + + content : DateH datetime { @field.date = val[1] } + | RecvH received + | RetpathH returnpath + | MaddrH addrs { @field.addrs.replace val[1] } + | SaddrH addr { @field.addr = val[1] } + | MmboxH mboxes { @field.addrs.replace val[1] } + | SmboxH mbox { @field.addr = val[1] } + | MsgidH msgid { @field.msgid = val[1] } + | KeyH keys { @field.keys.replace val[1] } + | EncH enc + | VersionH version + | CTypeH ctype + | CEncodingH cencode + | CDispositionH cdisp + | Mbox mbox + { + mb = val[1] + @field.phrase = mb.phrase + @field.setroute mb.route + @field.local = mb.local + @field.domain = mb.domain + } + | Spec spec + { + mb = val[1] + @field.local = mb.local + @field.domain = mb.domain + } + ; + + datetime : day DIGIT ATOM DIGIT hour zone + # 0 1 2 3 4 5 + # day month year + { + t = Time.gm( val[3].to_i, val[2], val[1].to_i, 0, 0, 0 ) + result = (t + val[4] - val[5]).localtime + } + ; + + day : /* none */ + | ATOM ',' + ; + + hour : DIGIT ':' DIGIT + { + result = (result.to_i * 60 * 60) + (val[2].to_i * 60) + } + | DIGIT ':' DIGIT ':' DIGIT + { + result = (result.to_i * 60 * 60) + + (val[2].to_i * 60) + + val[4].to_i + } + ; + + zone : ATOM + { + result = ::TMail.zonestr2i( val[0] ) * 60 + } + ; + + received : from by via with id for recvdatetime + ; + + from : /* none */ + | FROM domain + { + @field.from = Address.join( val[1] ) + } + | FROM domain '@' domain + { + @field.from = Address.join( val[3] ) + } + | FROM domain DOMLIT + { + @field.from = Address.join( val[1] ) + } + ; + + by : /* none */ + | BY domain + { + @field.by = Address.join( val[1] ) + } + ; + + via : /* none */ + | VIA ATOM + { + @field.via = val[1] + } + ; + + with : /* none */ + | WITH ATOM + { + @field.with.push val[1] + } + ; + + id : /* none */ + | ID msgid + { + @field.msgid = val[1] + } + | ID ATOM + { + @field.msgid = val[1] + } + ; + + for : /* none */ + | FOR addr + { + @field.for_ = val[1].address + } + ; + + recvdatetime + : /* none */ + | ';' datetime + { + @field.date = val[1] + } + ; + + returnpath: '<' '>' + | routeaddr + { + @field.route.replace result.route + @field.addr = result.addr + } + ; + + addrs : addr { result = val } + | addrs ',' addr { result.push val[2] } + ; + + addr : mbox + | group + ; + + mboxes : mbox + { + result = val + } + | mboxes ',' mbox + { + result.push val[2] + } + ; + + mbox : spec + | routeaddr + | phrase routeaddr + { + val[1].phrase = HFdecoder.decode( result ) + result = val[1] + } + ; + + group : phrase ':' mboxes ';' + { + result = AddressGroup.new( result, val[2] ) + } + # | phrase ':' ';' { result = AddressGroup.new( result ) } + ; + + routeaddr : '<' route spec '>' + { + result = val[2] + result.route = val[1] + } + | '<' spec '>' + { + result = val[1] + } + ; + + route : at_domains ':' + ; + + at_domains: '@' domain { result = [ val[1] ] } + | at_domains ',' '@' domain { result.push val[3] } + ; + + spec : local '@' domain { result = Address.new( val[0], val[2] ) } + | local { result = Address.new( result, nil ) } + ; + + local : word { result = val } + | local '.' word { result.push val[2] } + ; + + domain : domword { result = val } + | domain '.' domword { result.push val[2] } + ; + + domword : atom + | DOMLIT + | DIGIT + ; + + msgid : '<' spec '>' + { + val[1] = val[1].addr + result = val.join('') + } + ; + + phrase : word + | phrase word { result << ' ' << val[1] } + ; + + word : atom + | QUOTED + | DIGIT + ; + + keys : phrase + | keys ',' phrase + ; + + enc : word + { + @field.encrypter = val[0] + } + | word word + { + @field.encrypter = val[0] + @field.keyword = val[1] + } + ; + + version : DIGIT '.' DIGIT + { + @field.major = val[0].to_i + @field.minor = val[2].to_i + } + ; + + ctype : TOKEN '/' TOKEN params + { + @field.main = val[0] + @field.sub = val[2] + } + | TOKEN params + { + @field.main = val[0] + @field.sub = '' + } + ; + + params : /* none */ + | params ';' TOKEN '=' value + { + @field.params[ val[2].downcase ] = val[4] + } + ; + + value : TOKEN + | QUOTED + ; + + cencode : TOKEN + { + @field.encoding = val[0] + } + ; + + cdisp : TOKEN disp_params + { + @field.disposition = val[0] + } + ; + + disp_params + : /* none */ + | disp_params ';' disp_param + ; + + disp_param: /* none */ + | TOKEN '=' value + { + @field.params[ val[0].downcase ] = val[2] + } + ; + + atom : ATOM + | FROM + | BY + | VIA + | WITH + | ID + | FOR + ; + +end + + +---- header +# +# mailp for test +# + +require 'tmail/mails' + + +module TMail + +---- inner + + MAILP_DEBUG = false + + def initialize + self.debug = MAILP_DEBUG + end + + def debug=( flag ) + @yydebug = flag && Racc_debug_parser + @scanner_debug = flag + end + + def debug + @yydebug + end + + + def Mailp.parse( str, obj, ident ) + new.parse( str, obj, ident ) + end + + + NATIVE_ROUTINE = { + 'TMail::MsgidH' => :msgid_parse, + 'TMail::RefH' => :refs_parse + } + + def parse( str, obj, ident ) + return if /\A\s*\z/ === str + + @field = obj + + if mid = NATIVE_ROUTINE[ obj.type.name ] then + send mid, str + else + unless ident then + ident = obj.type.name.split('::')[-1].to_s + cmt = [] + obj.comments.replace cmt + else + cmt = nil + end + + @scanner = MailScanner.new( str, ident, cmt ) + @scanner.debug = @scanner_debug + @first = [ ident.intern, ident ] + @pass_array = [nil, nil] + + do_parse + end + end + + + private + + + def next_token + if @first then + ret = @first + @first = nil + ret + else + @scanner.scan @pass_array + end + end + + def on_error( tok, val, vstack ) + raise ParseError, + "\nparse error in '#{@field.name}' header, on token #{val.inspect}" + end + + + + def refs_parse( str ) + arr = [] + + while mdata = ::TMail::MSGID.match( str ) do + str = mdata.post_match + + pre = mdata.pre_match + pre.strip! + proc_phrase pre, arr unless pre.empty? + arr.push mdata.to_s + end + str.strip! + proc_phrase str, arr if not pre or pre.empty? + + @field.refs.replace arr + end + + def proc_phrase( str, arr ) + while mdata = /"([^\\]*(?:\\.[^"\\]*)*)"/.match( str ) do + str = mdata.post_match + + pre = mdata.pre_match + pre.strip! + arr.push pre unless pre.empty? + arr.push mdata[1] + end + str.strip! + arr.push unless str.empty? + end + + + def msgid_parse( str ) + if mdata = ::TMail::MSGID.match( str ) then + @field.msgid = mdata.to_s + else + raise ParseError, "wrong Message-ID format: #{str}" + end + end + +---- footer + +end # module TMail + +mp = TMail::Testp.new +mp.parse diff --git a/test/racc/assets/mediacloth.y b/test/racc/assets/mediacloth.y new file mode 100644 index 0000000000..94cc411ea7 --- /dev/null +++ b/test/racc/assets/mediacloth.y @@ -0,0 +1,599 @@ +# Copyright (c) 2006 Pluron Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +# The parser for the MediaWiki language. +# +# Usage together with a lexer: +# inputFile = File.new("data/input1", "r") +# input = inputFile.read +# parser = MediaWikiParser.new +# parser.lexer = MediaWikiLexer.new +# parser.parse(input) + +class MediaWikiParser + +token TEXT BOLD_START BOLD_END ITALIC_START ITALIC_END LINK_START LINK_END LINKSEP + INTLINK_START INTLINK_END INTLINKSEP RESOURCESEP CHAR_ENT + PRE_START PRE_END PREINDENT_START PREINDENT_END + SECTION_START SECTION_END HLINE SIGNATURE_NAME SIGNATURE_DATE SIGNATURE_FULL + PARA_START PARA_END UL_START UL_END OL_START OL_END LI_START LI_END + DL_START DL_END DT_START DT_END DD_START DD_END TAG_START TAG_END ATTR_NAME ATTR_VALUE + TABLE_START TABLE_END ROW_START ROW_END HEAD_START HEAD_END CELL_START CELL_END + KEYWORD TEMPLATE_START TEMPLATE_END CATEGORY PASTE_START PASTE_END + + +rule + +wiki: + repeated_contents + { + @nodes.push WikiAST.new(0, @wiki_ast_length) + #@nodes.last.children.insert(0, val[0]) + #puts val[0] + @nodes.last.children += val[0] + } + ; + +contents: + text + { + result = val[0] + } + | bulleted_list + { + result = val[0] + } + | numbered_list + { + result = val[0] + } + | dictionary_list + { + list = ListAST.new(@ast_index, @ast_length) + list.list_type = :Dictionary + list.children = val[0] + result = list + } + | preformatted + { + result = val[0] + } + | section + { + result = val[0] + } + | tag + { + result = val[0] + } + | template + { + result = val[0] + } + | KEYWORD + { + k = KeywordAST.new(@ast_index, @ast_length) + k.text = val[0] + result = k + } + | PARA_START para_contents PARA_END + { + p = ParagraphAST.new(@ast_index, @ast_length) + p.children = val[1] + result = p + } + | LINK_START link_contents LINK_END + { + l = LinkAST.new(@ast_index, @ast_length) + l.link_type = val[0] + l.url = val[1][0] + l.children += val[1][1..-1] if val[1].length > 1 + result = l + } + | PASTE_START para_contents PASTE_END + { + p = PasteAST.new(@ast_index, @ast_length) + p.children = val[1] + result = p + } + | INTLINK_START TEXT RESOURCESEP TEXT reslink_repeated_contents INTLINK_END + { + l = ResourceLinkAST.new(@ast_index, @ast_length) + l.prefix = val[1] + l.locator = val[3] + l.children = val[4] unless val[4].nil? or val[4].empty? + result = l + } + | INTLINK_START TEXT intlink_repeated_contents INTLINK_END + { + l = InternalLinkAST.new(@ast_index, @ast_length) + l.locator = val[1] + l.children = val[2] unless val[2].nil? or val[2].empty? + result = l + } + | INTLINK_START CATEGORY TEXT cat_sort_contents INTLINK_END + { + l = CategoryAST.new(@ast_index, @ast_length) + l.locator = val[2] + l.sort_as = val[3] + result = l + } + | INTLINK_START RESOURCESEP CATEGORY TEXT intlink_repeated_contents INTLINK_END + { + l = CategoryLinkAST.new(@ast_index, @ast_length) + l.locator = val[3] + l.children = val[4] unless val[4].nil? or val[4].empty? + result = l + } + | table + ; + +para_contents: + { + result = nil + } + | repeated_contents + { + result = val[0] + } + ; + +tag: + TAG_START tag_attributes TAG_END + { + if val[0] != val[2] + raise Racc::ParseError.new("XHTML end tag #{val[2]} does not match start tag #{val[0]}") + end + elem = ElementAST.new(@ast_index, @ast_length) + elem.name = val[0] + elem.attributes = val[1] + result = elem + } + | TAG_START tag_attributes repeated_contents TAG_END + { + if val[0] != val[3] + raise Racc::ParseError.new("XHTML end tag #{val[3]} does not match start tag #{val[0]}") + end + elem = ElementAST.new(@ast_index, @ast_length) + elem.name = val[0] + elem.attributes = val[1] + elem.children += val[2] + result = elem + } + ; + +tag_attributes: + { + result = nil + } + | ATTR_NAME tag_attributes + { + attr_map = val[2] ? val[2] : {} + attr_map[val[0]] = true + result = attr_map + } + | ATTR_NAME ATTR_VALUE tag_attributes + { + attr_map = val[2] ? val[2] : {} + attr_map[val[0]] = val[1] + result = attr_map + } + ; + + +link_contents: + TEXT + { + result = val + } + | TEXT LINKSEP link_repeated_contents + { + result = [val[0]] + result += val[2] + } + ; + + +link_repeated_contents: + repeated_contents + { + result = val[0] + } + | repeated_contents LINKSEP link_repeated_contents + { + result = val[0] + result += val[2] if val[2] + } + ; + + +intlink_repeated_contents: + { + result = nil + } + | INTLINKSEP repeated_contents + { + result = val[1] + } + ; + +cat_sort_contents: + { + result = nil + } + | INTLINKSEP TEXT + { + result = val[1] + } + ; + +reslink_repeated_contents: + { + result = nil + } + | INTLINKSEP reslink_repeated_contents + { + result = val[1] + } + | INTLINKSEP repeated_contents reslink_repeated_contents + { + i = InternalLinkItemAST.new(@ast_index, @ast_length) + i.children = val[1] + result = [i] + result += val[2] if val[2] + } + ; + +repeated_contents: contents + { + result = [] + result << val[0] + } + | repeated_contents contents + { + result = [] + result += val[0] + result << val[1] + } + ; + +text: element + { + p = TextAST.new(@ast_index, @ast_length) + p.formatting = val[0][0] + p.contents = val[0][1] + result = p + } + | formatted_element + { + result = val[0] + } + ; + +table: + TABLE_START table_contents TABLE_END + { + table = TableAST.new(@ast_index, @ast_length) + table.children = val[1] unless val[1].nil? or val[1].empty? + result = table + } + | TABLE_START TEXT table_contents TABLE_END + { + table = TableAST.new(@ast_index, @ast_length) + table.options = val[1] + table.children = val[2] unless val[2].nil? or val[2].empty? + result = table + } + +table_contents: + { + result = nil + } + | ROW_START row_contents ROW_END table_contents + { + row = TableRowAST.new(@ast_index, @ast_length) + row.children = val[1] unless val[1].nil? or val[1].empty? + result = [row] + result += val[3] unless val[3].nil? or val[3].empty? + } + | ROW_START TEXT row_contents ROW_END table_contents + { + row = TableRowAST.new(@ast_index, @ast_length) + row.children = val[2] unless val[2].nil? or val[2].empty? + row.options = val[1] + result = [row] + result += val[4] unless val[4].nil? or val[4].empty? + } + +row_contents: + { + result = nil + } + | HEAD_START HEAD_END row_contents + { + cell = TableCellAST.new(@ast_index, @ast_length) + cell.type = :head + result = [cell] + result += val[2] unless val[2].nil? or val[2].empty? + } + | HEAD_START repeated_contents HEAD_END row_contents + { + cell = TableCellAST.new(@ast_index, @ast_length) + cell.children = val[1] unless val[1].nil? or val[1].empty? + cell.type = :head + result = [cell] + result += val[3] unless val[3].nil? or val[3].empty? + } + | CELL_START CELL_END row_contents + { + cell = TableCellAST.new(@ast_index, @ast_length) + cell.type = :body + result = [cell] + result += val[2] unless val[2].nil? or val[2].empty? + } + | CELL_START repeated_contents CELL_END row_contents + { + if val[2] == 'attributes' + result = [] + else + cell = TableCellAST.new(@ast_index, @ast_length) + cell.children = val[1] unless val[1].nil? or val[1].empty? + cell.type = :body + result = [cell] + end + result += val[3] unless val[3].nil? or val[3].empty? + if val[2] == 'attributes' and val[3] and val[3].first.class == TableCellAST + val[3].first.attributes = val[1] + end + result + } + + +element: + TEXT + { return [:None, val[0]] } + | HLINE + { return [:HLine, val[0]] } + | CHAR_ENT + { return [:CharacterEntity, val[0]] } + | SIGNATURE_DATE + { return [:SignatureDate, val[0]] } + | SIGNATURE_NAME + { return [:SignatureName, val[0]] } + | SIGNATURE_FULL + { return [:SignatureFull, val[0]] } + ; + +formatted_element: + BOLD_START BOLD_END + { + result = FormattedAST.new(@ast_index, @ast_length) + result.formatting = :Bold + result + } + | ITALIC_START ITALIC_END + { + result = FormattedAST.new(@ast_index, @ast_length) + result.formatting = :Italic + result + } + | BOLD_START repeated_contents BOLD_END + { + p = FormattedAST.new(@ast_index, @ast_length) + p.formatting = :Bold + p.children += val[1] + result = p + } + | ITALIC_START repeated_contents ITALIC_END + { + p = FormattedAST.new(@ast_index, @ast_length) + p.formatting = :Italic + p.children += val[1] + result = p + } + ; + +bulleted_list: UL_START list_item list_contents UL_END + { + list = ListAST.new(@ast_index, @ast_length) + list.list_type = :Bulleted + list.children << val[1] + list.children += val[2] + result = list + } + ; + +numbered_list: OL_START list_item list_contents OL_END + { + list = ListAST.new(@ast_index, @ast_length) + list.list_type = :Numbered + list.children << val[1] + list.children += val[2] + result = list + } + ; + +list_contents: + { result = [] } + list_item list_contents + { + result << val[1] + result += val[2] + } + | + { result = [] } + ; + +list_item: + LI_START LI_END + { + result = ListItemAST.new(@ast_index, @ast_length) + } + | LI_START repeated_contents LI_END + { + li = ListItemAST.new(@ast_index, @ast_length) + li.children += val[1] + result = li + } + ; + +dictionary_list: + DL_START dictionary_term dictionary_contents DL_END + { + result = [val[1]] + result += val[2] + } + | DL_START dictionary_contents DL_END + { + result = val[1] + } + ; + +dictionary_term: + DT_START DT_END + { + result = ListTermAST.new(@ast_index, @ast_length) + } + | DT_START repeated_contents DT_END + { + term = ListTermAST.new(@ast_index, @ast_length) + term.children += val[1] + result = term + } + +dictionary_contents: + dictionary_definition dictionary_contents + { + result = [val[0]] + result += val[1] if val[1] + } + | + { + result = [] + } + +dictionary_definition: + DD_START DD_END + { + result = ListDefinitionAST.new(@ast_index, @ast_length) + } + | DD_START repeated_contents DD_END + { + term = ListDefinitionAST.new(@ast_index, @ast_length) + term.children += val[1] + result = term + } + +preformatted: PRE_START repeated_contents PRE_END + { + p = PreformattedAST.new(@ast_index, @ast_length) + p.children += val[1] + result = p + } + | PREINDENT_START repeated_contents PREINDENT_END + { + p = PreformattedAST.new(@ast_index, @ast_length) + p.indented = true + p.children += val[1] + result = p + } + ; + +section: SECTION_START repeated_contents SECTION_END + { result = [val[1], val[0].length] + s = SectionAST.new(@ast_index, @ast_length) + s.children = val[1] + s.level = val[0].length + result = s + } + ; + +template: TEMPLATE_START TEXT template_parameters TEMPLATE_END + { + t = TemplateAST.new(@ast_index, @ast_length) + t.template_name = val[1] + t.children = val[2] unless val[2].nil? or val[2].empty? + result = t + } + ; + +template_parameters: + { + result = nil + } + | INTLINKSEP TEXT template_parameters + { + p = TemplateParameterAST.new(@ast_index, @ast_length) + p.parameter_value = val[1] + result = [p] + result += val[2] if val[2] + } + | INTLINKSEP template template_parameters + { + p = TemplateParameterAST.new(@ast_index, @ast_length) + p.children << val[1] + result = [p] + result += val[2] if val[2] + } + ; + +end + +---- header ---- +require 'mediacloth/mediawikiast' + +---- inner ---- + +attr_accessor :lexer + +def initialize + @nodes = [] + @context = [] + @wiki_ast_length = 0 + super +end + +#Tokenizes input string and parses it. +def parse(input) + @yydebug=true + lexer.tokenize(input) + do_parse + return @nodes.last +end + +#Asks the lexer to return the next token. +def next_token + token = @lexer.lex + if token[0].to_s.upcase.include? "_START" + @context << token[2..3] + elsif token[0].to_s.upcase.include? "_END" + @ast_index = @context.last[0] + @ast_length = token[2] + token[3] - @context.last[0] + @context.pop + else + @ast_index = token[2] + @ast_length = token[3] + end + + @wiki_ast_length += token[3] + + return token[0..1] +end diff --git a/test/racc/assets/mof.y b/test/racc/assets/mof.y new file mode 100644 index 0000000000..1adc5ade14 --- /dev/null +++ b/test/racc/assets/mof.y @@ -0,0 +1,649 @@ +# Distributed under the Ruby license +# See http://www.ruby-lang.org/en/LICENSE.txt for the full license text +# Copyright (c) 2010 Klaus Kämpf + +/* + * According to appendix A of + * http://www.dmtf.org/standards/cim/cim_spec_v22 + */ + +class MOF::Parser + prechigh +/* nonassoc UMINUS */ + left '*' '/' + left '+' '-' + preclow + + token PRAGMA INCLUDE IDENTIFIER CLASS ASSOCIATION INDICATION + AMENDED ENABLEOVERRIDE DISABLEOVERRIDE RESTRICTED TOSUBCLASS TOINSTANCE + TRANSLATABLE QUALIFIER SCOPE SCHEMA PROPERTY REFERENCE + METHOD PARAMETER FLAVOR INSTANCE + AS REF ANY OF + DT_VOID + DT_UINT8 DT_SINT8 DT_UINT16 DT_SINT16 DT_UINT32 DT_SINT32 + DT_UINT64 DT_SINT64 DT_REAL32 DT_REAL64 DT_CHAR16 DT_STR + DT_BOOLEAN DT_DATETIME + positiveDecimalValue + stringValue + realValue + charValue + booleanValue + nullValue + binaryValue + octalValue + decimalValue + hexValue + +rule + + /* Returns a Hash of filename and MofResult */ + mofSpecification + : /* empty */ + { result = Hash.new } + | mofProduction + { result = { @name => @result } } + | mofSpecification mofProduction + { result = val[0] + result[@name] = @result + } + ; + + mofProduction + : compilerDirective + | classDeclaration + { #puts "Class '#{val[0].name}'" + @result.classes << val[0] + } + | qualifierDeclaration + { @result.qualifiers << val[0] + @qualifiers[val[0].name.downcase] = val[0] + } + | instanceDeclaration + { @result.instances << val[0] } + ; + +/*** + * compilerDirective + * + */ + + compilerDirective + : "#" PRAGMA INCLUDE pragmaParameters_opt + { raise MOF::Helper::Error.new(@name,@lineno,@line,"Missing filename after '#pragma include'") unless val[3] + open val[3], :pragma + } + | "#" PRAGMA pragmaName pragmaParameters_opt + | "#" INCLUDE pragmaParameters_opt + { raise StyleError.new(@name,@lineno,@line,"Use '#pragma include' instead of '#include'") unless @style == :wmi + raise MOF::Helper::Error.new(@name,@lineno,@line,"Missing filename after '#include'") unless val[2] + open val[2], :pragma + } + ; + + pragmaName + : IDENTIFIER + ; + + pragmaParameters_opt + : /* empty */ + { raise StyleError.new(@name,@lineno,@line,"#pragma parameter missing") unless @style == :wmi } + | "(" pragmaParameterValues ")" + { result = val[1] } + ; + + pragmaParameterValues + : pragmaParameterValue + | pragmaParameterValues "," pragmaParameterValue + ; + + pragmaParameterValue + : string + | integerValue + { raise StyleError.new(@name,@lineno,@line,"#pragma parameter missing") unless @style == :wmi } + | IDENTIFIER + ; + +/*** + * classDeclaration + * + */ + + classDeclaration + : qualifierList_opt CLASS className alias_opt superClass_opt "{" classFeatures "}" ";" + { qualifiers = val[0] + features = val[6] + # FIXME: features must not include references + result = CIM::Class.new(val[2],qualifiers,val[3],val[4],features) + } + ; + + classFeatures + : /* empty */ + { result = [] } + | classFeatures classFeature + { result = val[0] << val[1] } + ; + + classFeature + : propertyDeclaration + | methodDeclaration + | referenceDeclaration /* must have association qualifier */ + ; + + + qualifierList_opt + : /* empty */ + | qualifierList + { result = CIM::QualifierSet.new val[0] } + ; + + qualifierList + : "[" qualifier qualifiers "]" + { result = val[2] + result.unshift val[1] if val[1] } + ; + + qualifiers + : /* empty */ + { result = [] } + | qualifiers "," qualifier + { result = val[0] + result << val[2] if val[2] + } + ; + + qualifier + : qualifierName qualifierParameter_opt flavor_opt + { # Get qualifier decl + qualifier = case val[0] + when CIM::Qualifier then val[0].definition + when CIM::QualifierDeclaration then val[0] + when String then @qualifiers[val[0].downcase] + else + nil + end + raise MOF::Helper::Error.new(@name,@lineno,@line,"'#{val[0]}' is not a valid qualifier") unless qualifier + value = val[1] + raise MOF::Helper::Error.new(@name,@lineno,@line,"#{value.inspect} does not match qualifier type '#{qualifier.type}'") unless qualifier.type.matches?(value)||@style == :wmi + # Don't propagate a boolean 'false' + if qualifier.type == :boolean && value == false + result = nil + else + result = CIM::Qualifier.new(qualifier,value,val[2]) + end + } + ; + + flavor_opt + : /* empty */ + | ":" flavor + { result = CIM::QualifierFlavors.new val[1] } + ; + + qualifierParameter_opt + : /* empty */ + | qualifierParameter + ; + + qualifierParameter + : "(" constantValue ")" + { result = val[1] } + | arrayInitializer + ; + + /* CIM::Flavors */ + flavor + : AMENDED | ENABLEOVERRIDE | DISABLEOVERRIDE | RESTRICTED | TOSUBCLASS | TRANSLATABLE | TOINSTANCE + { case val[0].to_sym + when :amended, :toinstance + raise StyleError.new(@name,@lineno,@line,"'#{val[0]}' is not a valid flavor") unless @style == :wmi + end + } + ; + + alias_opt + : /* empty */ + | alias + ; + + superClass_opt + : /* empty */ + | superClass + ; + + className + : IDENTIFIER /* must be _ in CIM v2.x */ + { raise ParseError.new("Class name must be prefixed by '_'") unless val[0].include?("_") || @style == :wmi } + ; + + alias + : AS aliasIdentifier + { result = val[1] } + ; + + aliasIdentifier + : "$" IDENTIFIER /* NO whitespace ! */ + { result = val[1] } + ; + + superClass + : ":" className + { result = val[1] } + ; + + + propertyDeclaration + : qualifierList_opt dataType propertyName array_opt defaultValue_opt ";" + { if val[3] + type = CIM::Array.new val[3],val[1] + else + type = val[1] + end + result = CIM::Property.new(type,val[2],val[0],val[4]) + } + ; + + referenceDeclaration + : qualifierList_opt objectRef referenceName array_opt defaultValue_opt ";" + { if val[4] + raise StyleError.new(@name,@lineno,@line,"Array not allowed in reference declaration") unless @style == :wmi + end + result = CIM::Reference.new(val[1],val[2],val[0],val[4]) } + ; + + methodDeclaration + : qualifierList_opt dataType methodName "(" parameterList_opt ")" ";" + { result = CIM::Method.new(val[1],val[2],val[0],val[4]) } + ; + + propertyName + : IDENTIFIER + | PROPERTY + { # tmplprov.mof has 'string Property;' + raise StyleError.new(@name,@lineno,@line,"Invalid keyword '#{val[0]}' used for property name") unless @style == :wmi + } + ; + + referenceName + : IDENTIFIER + | INDICATION + { result = "Indication" } + ; + + methodName + : IDENTIFIER + ; + + dataType + : DT_UINT8 + | DT_SINT8 + | DT_UINT16 + | DT_SINT16 + | DT_UINT32 + | DT_SINT32 + | DT_UINT64 + | DT_SINT64 + | DT_REAL32 + | DT_REAL64 + | DT_CHAR16 + | DT_STR + | DT_BOOLEAN + | DT_DATETIME + | DT_VOID + { raise StyleError.new(@name,@lineno,@line,"'void' is not a valid datatype") unless @style == :wmi } + ; + + objectRef + : className + { # WMI uses class names as data types (without REF ?!) + raise StyleError.new(@name,@lineno,@line,"Expected 'ref' keyword after classname '#{val[0]}'") unless @style == :wmi + result = CIM::ReferenceType.new val[0] + } + + | className REF + { result = CIM::ReferenceType.new val[0] } + ; + + parameterList_opt + : /* empty */ + | parameterList + ; + + parameterList + : parameter parameters + { result = val[1].unshift val[0] } + ; + + parameters + : /* empty */ + { result = [] } + | parameters "," parameter + { result = val[0] << val[2] } + ; + + parameter + : qualifierList_opt typespec parameterName array_opt parameterValue_opt + { if val[3] + type = CIM::Array.new val[3], val[1] + else + type = val[1] + end + result = CIM::Property.new(type,val[2],val[0]) + } + ; + + typespec + : dataType + | objectRef + ; + + parameterName + : IDENTIFIER + ; + + array_opt + : /* empty */ + | array + ; + + parameterValue_opt + : /* empty */ + | defaultValue + { raise "Default parameter value not allowed in syntax style '{@style}'" unless @style == :wmi } + ; + + array + : "[" positiveDecimalValue_opt "]" + { result = val[1] } + ; + + positiveDecimalValue_opt + : /* empty */ + { result = -1 } + | positiveDecimalValue + ; + + defaultValue_opt + : /* empty */ + | defaultValue + ; + + defaultValue + : "=" initializer + { result = val[1] } + ; + + initializer + : constantValue + | arrayInitializer + | referenceInitializer + ; + + arrayInitializer + : "{" constantValues "}" + { result = val[1] } + ; + + constantValues + : /* empty */ + | constantValue + { result = [ val[0] ] } + | constantValues "," constantValue + { result = val[0] << val[2] } + ; + + constantValue + : integerValue + | realValue + | charValue + | string + | booleanValue + | nullValue + | instance + { raise "Instance as property value not allowed in syntax style '{@style}'" unless @style == :wmi } + ; + + integerValue + : binaryValue + | octalValue + | decimalValue + | positiveDecimalValue + | hexValue + ; + + string + : stringValue + | string stringValue + { result = val[0] + val[1] } + ; + + referenceInitializer + : objectHandle + | aliasIdentifier + ; + + objectHandle + : namespace_opt modelPath + ; + + namespace_opt + : /* empty */ + | namespaceHandle ":" + ; + + namespaceHandle + : IDENTIFIER + ; + + /* + * Note + : structure depends on type of namespace + */ + + modelPath + : className "." keyValuePairList + ; + + keyValuePairList + : keyValuePair keyValuePairs + ; + + keyValuePairs + : /* empty */ + | keyValuePairs "," keyValuePair + ; + + keyValuePair + : keyname "=" initializer + ; + + keyname + : propertyName | referenceName + ; + +/*** + * qualifierDeclaration + * + */ + + qualifierDeclaration + /* 0 1 2 3 4 */ + : QUALIFIER qualifierName qualifierType scope defaultFlavor_opt ";" + { result = CIM::QualifierDeclaration.new( val[1], val[2][0], val[2][1], val[3], val[4]) } + ; + + defaultFlavor_opt + : /* empty */ + | defaultFlavor + ; + + qualifierName + : IDENTIFIER + | ASSOCIATION /* meta qualifier */ + | INDICATION /* meta qualifier */ + | REFERENCE /* Added in DSP0004 2.7.0 */ + | SCHEMA + ; + + /* [type, value] */ + qualifierType + : ":" dataType array_opt defaultValue_opt + { type = val[2].nil? ? val[1] : CIM::Array.new(val[2],val[1]) + result = [ type, val[3] ] + } + ; + + scope + : "," SCOPE "(" metaElements ")" + { result = CIM::QualifierScopes.new(val[3]) } + ; + + metaElements + : metaElement + { result = [ val[0] ] } + | metaElements "," metaElement + { result = val[0] << val[2] } + ; + + metaElement + : SCHEMA + | CLASS + | ASSOCIATION + | INDICATION + | QUALIFIER + | PROPERTY + | REFERENCE + | METHOD + | PARAMETER + | ANY + ; + + defaultFlavor + : "," FLAVOR "(" flavors ")" + { result = CIM::QualifierFlavors.new val[3] } + ; + + flavors + : flavor + { result = [ val[0] ] } + | flavors "," flavor + { result = val[0] << val[2] } + ; + +/*** + * instanceDeclaration + * + */ + + instanceDeclaration + : instance ";" + ; + + instance + : qualifierList_opt INSTANCE OF className alias_opt "{" valueInitializers "}" + ; + + valueInitializers + : valueInitializer + | valueInitializers valueInitializer + ; + + valueInitializer + : qualifierList_opt keyname "=" initializer ";" + | qualifierList_opt keyname ";" + { raise "Instance property '#{val[1]} must have a value" unless @style == :wmi } + ; + +end # class Parser + +---- header ---- + +# parser.rb - generated by racc + +require 'strscan' +require 'rubygems' +require 'cim' +require File.join(File.dirname(__FILE__), 'result') +require File.join(File.dirname(__FILE__), 'scanner') +require File.join(File.dirname(__FILE__), 'helper') + +---- inner ---- + +# +# Initialize MOF::Parser +# MOF::Parser.new options = {} +# +# options -> Hash of options +# :debug -> boolean +# :includes -> array of include dirs +# :style -> :cim or :wmi +# +def initialize options = {} + @yydebug = options[:debug] + @includes = options[:includes] || [] + @quiet = options[:quiet] + @style = options[:style] || :cim # default to style CIM v2.2 syntax + + @lineno = 1 + @file = nil + @iconv = nil + @eol = "\n" + @fname = nil + @fstack = [] + @in_comment = false + @seen_files = [] + @qualifiers = {} +end + +# +# Make options hash from argv +# +# returns [ files, options ] +# + + def self.argv_handler name, argv + files = [] + options = { :namespace => "" } + while argv.size > 0 + case opt = argv.shift + when "-h" + $stderr.puts "Ruby MOF compiler" + $stderr.puts "#{name} [-h] [-d] [-I ] []" + $stderr.puts "Compiles " + $stderr.puts "\t-d debug" + $stderr.puts "\t-h this help" + $stderr.puts "\t-I include dir" + $stderr.puts "\t-f force" + $stderr.puts "\t-n " + $stderr.puts "\t-o " + $stderr.puts "\t-s