mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
4c86818f47
(Sentence#each): defined. * test/ruby/test_assignment.rb: use Sentence#expand. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13253 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
668 lines
17 KiB
Ruby
668 lines
17 KiB
Ruby
# == sentence library
|
|
#
|
|
# = Features
|
|
#
|
|
# * syntax based sentences generation
|
|
# * sentence operations such as substitution.
|
|
#
|
|
# = Example
|
|
#
|
|
# Some arithmetic expressions using "+", "-", "*" and "/" are generated as follows.
|
|
#
|
|
# require 'sentence'
|
|
# Sentence.each({
|
|
# :exp => [["num"],
|
|
# [:exp, "+", :exp],
|
|
# [:exp, "-", :exp],
|
|
# [:exp, "*", :exp],
|
|
# [:exp, "/", :exp]]
|
|
# }, :exp, 2) {|sent| p sent }
|
|
# #=>
|
|
# #<Sentence: "num">
|
|
# #<Sentence: ("num") "+" ("num")>
|
|
# #<Sentence: ("num") "+" (("num") "+" ("num"))>
|
|
# #<Sentence: ("num") "+" (("num") "-" ("num"))>
|
|
# #<Sentence: ("num") "+" (("num") "*" ("num"))>
|
|
# #<Sentence: ("num") "+" (("num") "/" ("num"))>
|
|
# #<Sentence: (("num") "+" ("num")) "+" ("num")>
|
|
# ...
|
|
#
|
|
# Sentence.each takes 3 arguments.
|
|
# The first argument is the syntax for the expressions.
|
|
# The second argument, :exp, is a generating nonterminal.
|
|
# The third argument, 2, limits derivation to restrict results finitely.
|
|
#
|
|
# Some arithmetic expressions including parenthesis can be generated as follows.
|
|
#
|
|
# syntax = {
|
|
# :factor => [["n"],
|
|
# ["(", :exp, ")"]],
|
|
# :term => [[:factor],
|
|
# [:term, "*", :factor],
|
|
# [:term, "/", :factor]],
|
|
# :exp => [[:term],
|
|
# [:exp, "+", :term],
|
|
# [:exp, "-", :term]]
|
|
# }
|
|
# Sentence.each(syntax, :exp, 2) {|sent| p sent }
|
|
# #=>
|
|
# #<Sentence: (("n"))>
|
|
# #<Sentence: (("(" ((("n"))) ")"))>
|
|
# #<Sentence: (("(" ((("(" ((("n"))) ")"))) ")"))>
|
|
# #<Sentence: (("(" (((("n")) "*" ("n"))) ")"))>
|
|
# #<Sentence: (("(" (((("n")) "/" ("n"))) ")"))>
|
|
# #<Sentence: (("(" (((("n"))) "+" (("n"))) ")"))>
|
|
# #<Sentence: (("(" (((("n"))) "-" (("n"))) ")"))>
|
|
# #<Sentence: ((("n")) "*" ("n"))>
|
|
# #<Sentence: ((("n")) "*" ("(" ((("n"))) ")"))>
|
|
# ...
|
|
#
|
|
# Sentence#to_s can be used to concatenate strings
|
|
# in a sentence:
|
|
#
|
|
# Sentence.each(syntax, :exp, 2) {|sent| p sent.to_s }
|
|
# #=>
|
|
# "n"
|
|
# "(n)"
|
|
# "((n))"
|
|
# "(n*n)"
|
|
# "(n/n)"
|
|
# "(n+n)"
|
|
# "(n-n)"
|
|
# "n*n"
|
|
# "n*(n)"
|
|
# ...
|
|
#
|
|
|
|
# Sentence() instantiates a sentence object.
|
|
#
|
|
# Sentence("foo", "bar")
|
|
# #=> #<Sentence: "foo" "bar">
|
|
#
|
|
# Sentence("foo", ["bar", "baz"])
|
|
# #=> #<Sentence: "foo" ("bar" "baz")>
|
|
#
|
|
def Sentence(*ary)
|
|
Sentence.new(ary)
|
|
end
|
|
|
|
# Sentence class represents a tree with string leaves.
|
|
#
|
|
class Sentence
|
|
# _ary_ represents a tree.
|
|
# It should be a possibly nested array which contains strings.
|
|
#
|
|
# Note that _ary_ is not copied.
|
|
# Don't modify _ary_ after the sentence object is instantiated.
|
|
#
|
|
# Sentence.new(["a", "pen"])
|
|
# #<Sentence: "a" "pen">
|
|
#
|
|
# Sentence.new(["I", "have", ["a", "pen"]])
|
|
# #<Sentence: "I" "have" ("a" "pen")>
|
|
#
|
|
def initialize(ary)
|
|
@sent = ary
|
|
end
|
|
|
|
# returns a string which is concatenation of all strings.
|
|
# No separator is used.
|
|
#
|
|
# Sentence("2", "+", "3").to_s
|
|
# "2+3"
|
|
#
|
|
# Sentence("2", "+", ["3", "*", "5"]).to_s
|
|
# "2+3*5"
|
|
#
|
|
def to_s
|
|
@sent.join('')
|
|
end
|
|
|
|
# returns a string which is concatenation of all strings separated by _sep_.
|
|
# If _sep_ is not given, single space is used.
|
|
#
|
|
# Sentence("I", "have", ["a", "pen"]).join
|
|
# "I have a pen"
|
|
#
|
|
# Sentence("I", "have", ["a", "pen"]).join("/")
|
|
# "I/have/a/pen"
|
|
#
|
|
# Sentence("a", [], "b").join("/")
|
|
# "a/b"
|
|
#
|
|
def join(sep=' ')
|
|
@sent.flatten.join(sep)
|
|
end
|
|
|
|
# returns a tree as a nested array.
|
|
#
|
|
# Note that the result is not copied.
|
|
# Don't modify the result.
|
|
#
|
|
# Sentence(["foo", "bar"], "baz").to_a
|
|
# #=> [["foo", "bar"], "baz"]
|
|
#
|
|
def to_a
|
|
@sent
|
|
end
|
|
|
|
# returns <i>i</i>th element as a sentence or string.
|
|
#
|
|
# s = Sentence(["foo", "bar"], "baz")
|
|
# s #=> #<Sentence: ("foo" "bar") "baz">
|
|
# s[0] #=> #<Sentence: "foo" "bar">
|
|
# s[1] #=> "baz"
|
|
#
|
|
def [](i)
|
|
e = @sent[i]
|
|
e.respond_to?(:to_ary) ? Sentence.new(e) : e
|
|
end
|
|
|
|
# returns the number of top level elements.
|
|
#
|
|
# Sentence.new(%w[foo bar]).length
|
|
# #=> 2
|
|
#
|
|
# Sentence(%w[2 * 7], "+", %w[3 * 5]).length
|
|
# #=> 3
|
|
#
|
|
def length
|
|
@sent.length
|
|
end
|
|
|
|
# iterates over children.
|
|
#
|
|
# Sentence(%w[2 * 7], "+", %w[3 * 5]).each {|v| p v }
|
|
# #=>
|
|
# #<Sentence: "2" "*" "7">
|
|
# "+"
|
|
# #<Sentence: "3" "*" "5">
|
|
#
|
|
def each # :yield: element
|
|
@sent.each_index {|i|
|
|
yield self[i]
|
|
}
|
|
end
|
|
include Enumerable
|
|
|
|
def inspect
|
|
"#<#{self.class}: #{inner_inspect(@sent, '')}>"
|
|
end
|
|
|
|
# :stopdoc:
|
|
def inner_inspect(ary, r)
|
|
first = true
|
|
ary.each {|obj|
|
|
r << ' ' if !first
|
|
first = false
|
|
if obj.respond_to? :to_ary
|
|
r << '('
|
|
inner_inspect(obj, r)
|
|
r << ')'
|
|
else
|
|
r << obj.inspect
|
|
end
|
|
}
|
|
r
|
|
end
|
|
# :startdoc:
|
|
|
|
# returns new sentence object which
|
|
# _target_ is substituted by the block.
|
|
#
|
|
# Sentence#subst invokes <tt>_target_ === _string_</tt> for each
|
|
# string in the sentence.
|
|
# The strings which === returns true are substituted by the block.
|
|
# The block is invoked with the substituting string.
|
|
#
|
|
# Sentence.new(%w[2 + 3]).subst("+") { "*" }
|
|
# #<Sentence: "2" "*" "3">
|
|
#
|
|
# Sentence.new(%w[2 + 3]).subst(/\A\d+\z/) {|s| ((s.to_i)*2).to_s }
|
|
# #=> #<Sentence: "4" "+" "6">
|
|
#
|
|
def subst(target, &b) # :yield: string
|
|
Sentence.new(subst_rec(@sent, target, &b))
|
|
end
|
|
|
|
# :stopdoc:
|
|
def subst_rec(obj, target, &b)
|
|
if obj.respond_to? :to_ary
|
|
a = []
|
|
obj.each {|e| a << subst_rec(e, target, &b) }
|
|
a
|
|
elsif target === obj
|
|
yield obj
|
|
else
|
|
obj
|
|
end
|
|
end
|
|
# :startdoc:
|
|
|
|
# find a subsentence and return it.
|
|
# The block is invoked for each subsentence in preorder manner.
|
|
# The first subsentence which the block returns true is returned.
|
|
#
|
|
# Sentence(%w[2 * 7], "+", %w[3 * 5]).find_subtree {|s| s[1] == "*" }
|
|
# #=> #<Sentence: "2" "*" "7">
|
|
#
|
|
def find_subtree(&b) # :yield: sentence
|
|
find_subtree_rec(@sent, &b)
|
|
end
|
|
|
|
# :stopdoc:
|
|
def find_subtree_rec(obj, &b)
|
|
if obj.respond_to? :to_ary
|
|
s = Sentence.new(obj)
|
|
if b.call s
|
|
return s
|
|
else
|
|
obj.each {|e|
|
|
r = find_subtree_rec(e, &b)
|
|
return r if r
|
|
}
|
|
end
|
|
end
|
|
nil
|
|
end
|
|
# :startdoc:
|
|
|
|
# returns a new sentence object which expands according to the condition
|
|
# given by the block.
|
|
#
|
|
# The block is invoked for each subsentence.
|
|
# The subsentences which the block returns true are
|
|
# expanded into parent.
|
|
#
|
|
# s = Sentence(%w[2 * 7], "+", %w[3 * 5])
|
|
# #=> #<Sentence: ("2" "*" "7") "+" ("3" "*" "5")>
|
|
#
|
|
# s.expand { true }
|
|
# #=> #<Sentence: "2" "*" "7" "+" "3" "*" "5">
|
|
#
|
|
# s.expand {|s| s[0] == "3" }
|
|
# #=> #<Sentence: (("2" "*" "7") "+" "3" "*" "5")>
|
|
#
|
|
def expand(&b) # :yield: sentence
|
|
Sentence.new(expand_rec(@sent, &b))
|
|
end
|
|
|
|
# :stopdoc:
|
|
def expand_rec(obj, r=[], &b)
|
|
if obj.respond_to? :to_ary
|
|
obj.each {|o|
|
|
s = Sentence.new(o)
|
|
if b.call s
|
|
expand_rec(o, r, &b)
|
|
else
|
|
a = []
|
|
expand_rec(o, a, &b)
|
|
r << a
|
|
end
|
|
}
|
|
else
|
|
r << obj
|
|
end
|
|
r
|
|
end
|
|
# :startdoc:
|
|
|
|
# Sentence.each generates sentences
|
|
# by deriving the start symbol _sym_ using _syntax_.
|
|
# The derivation is restricted by an positive integer _limit_ to
|
|
# avoid infinite generation.
|
|
#
|
|
# Sentence.each yields the block with a generated sentence.
|
|
#
|
|
# Sentence.each({
|
|
# :exp => [["n"],
|
|
# [:exp, "+", :exp],
|
|
# [:exp, "*", :exp]]
|
|
# }, :exp, 1) {|sent| p sent }
|
|
# #=>
|
|
# #<Sentence: "n">
|
|
# #<Sentence: ("n") "+" ("n")>
|
|
# #<Sentence: ("n") "*" ("n")>
|
|
#
|
|
# Sentence.each({
|
|
# :exp => [["n"],
|
|
# [:exp, "+", :exp],
|
|
# [:exp, "*", :exp]]
|
|
# }, :exp, 2) {|sent| p sent }
|
|
# #=>
|
|
# #<Sentence: "n">
|
|
# #<Sentence: ("n") "+" ("n")>
|
|
# #<Sentence: ("n") "+" (("n") "+" ("n"))>
|
|
# #<Sentence: ("n") "+" (("n") "*" ("n"))>
|
|
# #<Sentence: (("n") "+" ("n")) "+" ("n")>
|
|
# #<Sentence: (("n") "*" ("n")) "+" ("n")>
|
|
# #<Sentence: ("n") "*" ("n")>
|
|
# #<Sentence: ("n") "*" (("n") "+" ("n"))>
|
|
# #<Sentence: ("n") "*" (("n") "*" ("n"))>
|
|
# #<Sentence: (("n") "+" ("n")) "*" ("n")>
|
|
# #<Sentence: (("n") "*" ("n")) "*" ("n")>
|
|
#
|
|
def Sentence.each(syntax, sym, limit)
|
|
Gen.new(syntax).each_tree(sym, limit) {|tree|
|
|
yield Sentence.new(tree)
|
|
}
|
|
end
|
|
|
|
# Sentence.expand_syntax returns an expanded syntax:
|
|
# * No rule derives to empty sequence
|
|
# * Underivable rule simplified
|
|
# * No channel rule
|
|
# * Symbols which has zero or one choices are not appered in rhs.
|
|
#
|
|
# Note that the rules which can derive empty and non-empty
|
|
# sequences are modified to derive only non-empty sequences.
|
|
#
|
|
# Sentence.expand_syntax({
|
|
# :underivable1 => [],
|
|
# :underivable2 => [[:underivable1]],
|
|
# :underivable3 => [[:underivable3]],
|
|
# :empty_only1 => [[]],
|
|
# :empty_only2 => [[:just_empty1, :just_empty1]],
|
|
# :empty_or_not => [[], ["foo"]],
|
|
# :empty_or_not_2 => [[:empty_or_not, :empty_or_not]],
|
|
# :empty_or_not_3 => [[:empty_or_not, :empty_or_not, :empty_or_not]],
|
|
# :empty_or_not_4 => [[:empty_or_not_2, :empty_or_not_2]],
|
|
# :channel1 => [[:channeled_data]],
|
|
# :channeled_data => [["a", "b"], ["c", "d"]],
|
|
# :single_choice => [["single", "choice"]],
|
|
# :single_choice_2 => [[:single_choice, :single_choice]],
|
|
# })
|
|
# #=>
|
|
# {
|
|
# :underivable1=>[], # underivable rules are simplified to [].
|
|
# :underivable2=>[],
|
|
# :underivable3=>[],
|
|
# :empty_only1=>[], # derivation to empty sequence are removed.
|
|
# :empty_only2=>[],
|
|
# :empty_or_not=>[["foo"]], # empty sequences are removed too.
|
|
# :empty_or_not_2=>[["foo"], ["foo", "foo"]],
|
|
# :empty_or_not_3=>[["foo"], ["foo", "foo"], ["foo", "foo", "foo"]],
|
|
# :empty_or_not_4=> [["foo"], ["foo", "foo"], [:empty_or_not_2, :empty_or_not_2]],
|
|
# :channel1=>[["a", "b"], ["c", "d"]], # channel rules are removed.
|
|
# :channeled_data=>[["a", "b"], ["c", "d"]],
|
|
# :single_choice=>[["single", "choice"]], # single choice rules are expanded.
|
|
# :single_choice_2=>[["single", "choice", "single", "choice"]],
|
|
# }
|
|
#
|
|
# Sentence.expand_syntax({
|
|
# :factor => [["n"],
|
|
# ["(", :exp, ")"]],
|
|
# :term => [[:factor],
|
|
# [:term, "*", :factor],
|
|
# [:term, "/", :factor]],
|
|
# :exp => [[:term],
|
|
# [:exp, "+", :term],
|
|
# [:exp, "-", :term]]
|
|
# })
|
|
# #=>
|
|
# {:exp=> [["n"],
|
|
# ["(", :exp, ")"],
|
|
# [:exp, "+", :term],
|
|
# [:exp, "-", :term],
|
|
# [:term, "*", :factor],
|
|
# [:term, "/", :factor]],
|
|
# :factor=> [["n"],
|
|
# ["(", :exp, ")"]],
|
|
# :term=> [["n"],
|
|
# ["(", :exp, ")"],
|
|
# [:term, "*", :factor],
|
|
# [:term, "/", :factor]]
|
|
# }
|
|
#
|
|
def Sentence.expand_syntax(syntax)
|
|
Sentence::Gen.expand_syntax(syntax)
|
|
end
|
|
|
|
# :stopdoc:
|
|
class Gen
|
|
def Gen.each_tree(syntax, sym, limit, &b)
|
|
Gen.new(syntax).each_tree(sym, limit, &b)
|
|
end
|
|
|
|
def Gen.each_string(syntax, sym, limit, &b)
|
|
Gen.new(syntax).each_string(sym, limit, &b)
|
|
end
|
|
|
|
def initialize(syntax)
|
|
@syntax = syntax
|
|
end
|
|
|
|
def self.expand_syntax(syntax)
|
|
syntax = simplify_underivable_rules(syntax)
|
|
syntax = simplify_emptyonly_rules(syntax)
|
|
syntax = make_rules_no_empseq(syntax)
|
|
syntax = expand_channel_rules(syntax)
|
|
|
|
syntax = expand_noalt_rules(syntax)
|
|
syntax = reorder_rules(syntax)
|
|
end
|
|
|
|
def self.simplify_underivable_rules(syntax)
|
|
deribable_syms = {}
|
|
changed = true
|
|
while changed
|
|
changed = false
|
|
syntax.each {|sym, rules|
|
|
next if deribable_syms[sym]
|
|
rules.each {|rhs|
|
|
if rhs.all? {|e| String === e || deribable_syms[e] }
|
|
deribable_syms[sym] = true
|
|
changed = true
|
|
break
|
|
end
|
|
}
|
|
}
|
|
end
|
|
result = {}
|
|
syntax.each {|sym, rules|
|
|
if deribable_syms[sym]
|
|
rules2 = []
|
|
rules.each {|rhs|
|
|
rules2 << rhs if rhs.all? {|e| String === e || deribable_syms[e] }
|
|
}
|
|
result[sym] = rules2.uniq
|
|
else
|
|
result[sym] = []
|
|
end
|
|
}
|
|
result
|
|
end
|
|
|
|
def self.simplify_emptyonly_rules(syntax)
|
|
justempty_syms = {}
|
|
changed = true
|
|
while changed
|
|
changed = false
|
|
syntax.each {|sym, rules|
|
|
next if justempty_syms[sym]
|
|
if !rules.empty? && rules.all? {|rhs| rhs.all? {|e| justempty_syms[e] } }
|
|
justempty_syms[sym] = true
|
|
changed = true
|
|
end
|
|
}
|
|
end
|
|
result = {}
|
|
syntax.each {|sym, rules|
|
|
result[sym] = rules.map {|rhs| rhs.reject {|e| justempty_syms[e] } }.uniq
|
|
}
|
|
result
|
|
end
|
|
|
|
def self.expand_emptyable_syms(rhs, emptyable_syms)
|
|
if rhs.empty?
|
|
yield []
|
|
else
|
|
first = rhs[0]
|
|
rest = rhs[1..-1]
|
|
if emptyable_syms[first]
|
|
expand_emptyable_syms(rest, emptyable_syms) {|rhs2|
|
|
yield [first] + rhs2
|
|
yield rhs2
|
|
}
|
|
else
|
|
expand_emptyable_syms(rest, emptyable_syms) {|rhs2|
|
|
yield [first] + rhs2
|
|
}
|
|
end
|
|
end
|
|
end
|
|
|
|
def self.make_rules_no_empseq(syntax)
|
|
emptyable_syms = {}
|
|
changed = true
|
|
while changed
|
|
changed = false
|
|
syntax.each {|sym, rules|
|
|
next if emptyable_syms[sym]
|
|
rules.each {|rhs|
|
|
if rhs.all? {|e| emptyable_syms[e] }
|
|
emptyable_syms[sym] = true
|
|
changed = true
|
|
break
|
|
end
|
|
}
|
|
}
|
|
end
|
|
result = {}
|
|
syntax.each {|sym, rules|
|
|
rules2 = []
|
|
rules.each {|rhs|
|
|
expand_emptyable_syms(rhs, emptyable_syms) {|rhs2|
|
|
next if rhs2.empty?
|
|
rules2 << rhs2
|
|
}
|
|
}
|
|
result[sym] = rules2.uniq
|
|
}
|
|
result
|
|
end
|
|
|
|
def self.expand_channel_rules(syntax)
|
|
channel_rules = {}
|
|
syntax.each {|sym, rules|
|
|
channel_rules[sym] = {sym=>true}
|
|
rules.each {|rhs|
|
|
if rhs.length == 1 && Symbol === rhs[0]
|
|
channel_rules[sym][rhs[0]] = true
|
|
end
|
|
}
|
|
}
|
|
changed = true
|
|
while changed
|
|
changed = false
|
|
channel_rules.each {|sym, set|
|
|
n1 = set.size
|
|
set.keys.each {|s|
|
|
set.update(channel_rules[s])
|
|
}
|
|
n2 = set.size
|
|
changed = true if n1 < n2
|
|
}
|
|
end
|
|
result = {}
|
|
syntax.each {|sym, rules|
|
|
rules2 = []
|
|
channel_rules[sym].each_key {|s|
|
|
syntax[s].each {|rhs|
|
|
unless rhs.length == 1 && Symbol === rhs[0]
|
|
rules2 << rhs
|
|
end
|
|
}
|
|
}
|
|
result[sym] = rules2.uniq
|
|
}
|
|
result
|
|
end
|
|
|
|
def self.expand_noalt_rules(syntax)
|
|
noalt_syms = {}
|
|
syntax.each {|sym, rules|
|
|
if rules.length == 1
|
|
noalt_syms[sym] = true
|
|
end
|
|
}
|
|
result = {}
|
|
syntax.each {|sym, rules|
|
|
rules2 = []
|
|
rules.each {|rhs|
|
|
rhs2 = []
|
|
rhs.each {|e|
|
|
if noalt_syms[e]
|
|
rhs2.concat syntax[e][0]
|
|
else
|
|
rhs2 << e
|
|
end
|
|
}
|
|
rules2 << rhs2
|
|
}
|
|
result[sym] = rules2.uniq
|
|
}
|
|
result
|
|
end
|
|
|
|
def self.reorder_rules(syntax)
|
|
result = {}
|
|
syntax.each {|sym, rules|
|
|
result[sym] = rules.sort_by {|rhs|
|
|
[rhs.find_all {|e| Symbol === e }.length, rhs.length]
|
|
}
|
|
}
|
|
result
|
|
end
|
|
|
|
def each_tree(sym, limit)
|
|
generate_from_sym(sym, limit) {|_, tree|
|
|
yield tree
|
|
}
|
|
nil
|
|
end
|
|
|
|
def each_string(sym, limit)
|
|
generate_from_sym(sym, limit) {|_, tree|
|
|
yield [tree].join('')
|
|
}
|
|
nil
|
|
end
|
|
|
|
def generate_from_sym(sym, limit, &b)
|
|
return if limit < 0
|
|
if String === sym
|
|
yield limit, sym
|
|
else
|
|
rules = @syntax[sym]
|
|
raise "undefined rule: #{sym}" if !rules
|
|
rules.each {|rhs|
|
|
if rhs.length == 1 || rules.length == 1
|
|
limit1 = limit
|
|
else
|
|
limit1 = limit-1
|
|
end
|
|
generate_from_rhs(rhs, limit1, &b)
|
|
}
|
|
end
|
|
nil
|
|
end
|
|
|
|
def generate_from_rhs(rhs, limit)
|
|
return if limit < 0
|
|
if rhs.empty?
|
|
yield limit, []
|
|
else
|
|
generate_from_sym(rhs[0], limit) {|limit1, child|
|
|
generate_from_rhs(rhs[1..-1], limit1) {|limit2, arr|
|
|
yield limit2, [child, *arr]
|
|
}
|
|
}
|
|
end
|
|
nil
|
|
end
|
|
end
|
|
# :startdoc:
|
|
|
|
end
|
|
|