Merge branch 'module-processor' of git://github.com/medcat/ast
This commit is contained in:
commit
b4d331fab5
|
@ -8,5 +8,5 @@ This is a design choice. It does create some pressure on
|
|||
garbage collector, but completely eliminates all concurrency
|
||||
and aliasing problems.
|
||||
|
||||
See also {AST::Node}, {AST::Processor} and {AST::Sexp} for additional
|
||||
recommendations and design patterns.
|
||||
See also {AST::Node}, {AST::Processor::Mixin} and {AST::Sexp} for
|
||||
additional recommendations and design patterns.
|
||||
|
|
|
@ -7,8 +7,8 @@
|
|||
# garbage collector, but completely eliminates all concurrency
|
||||
# and aliasing problems.
|
||||
#
|
||||
# See also {AST::Node}, {AST::Processor} and {AST::Sexp} for additional
|
||||
# recommendations and design patterns.
|
||||
# See also {AST::Node}, {AST::Processor::Mixin} and {AST::Sexp} for
|
||||
# additional recommendations and design patterns.
|
||||
#
|
||||
module AST
|
||||
require 'ast/node'
|
||||
|
|
|
@ -1,266 +1,12 @@
|
|||
module AST
|
||||
# Processor is a class which helps transforming one AST into another.
|
||||
# In a nutshell, the {#process} method accepts a {Node} and dispatches
|
||||
# it to a handler corresponding to its type, and returns a (possibly)
|
||||
# updated variant of the node.
|
||||
# This class includes {AST::Processor::Mixin}; however, it is
|
||||
# deprecated, since the module defines all of the behaviors that
|
||||
# the processor includes. Any new libraries should use
|
||||
# {AST::Processor::Mixin} instead of subclassing this.
|
||||
#
|
||||
# Processor has a set of associated design patterns. They are best
|
||||
# explained with a concrete example. Let's define a simple arithmetic
|
||||
# language and an AST format for it:
|
||||
#
|
||||
# Terminals (AST nodes which do not have other AST nodes inside):
|
||||
#
|
||||
# * `(integer <int-literal>)`,
|
||||
#
|
||||
# Nonterminals (AST nodes with other nodes as children):
|
||||
#
|
||||
# * `(add <node> <node>)`,
|
||||
# * `(multiply <node> <node>)`,
|
||||
# * `(divide <node> <node>)`,
|
||||
# * `(negate <node>)`,
|
||||
# * `(store <node> <string-literal>)`: stores value of `<node>` into a variable named `<string-literal>`,
|
||||
# * `(load <string-literal>)`: loads value of a variable named `<string-literal>`,
|
||||
# * `(each <node> ...): computes each of the `<node>`s and prints the result.
|
||||
#
|
||||
# All AST nodes have the same Ruby class, and therefore they don't
|
||||
# know how to traverse themselves. (A solution which dynamically checks the
|
||||
# type of children is possible, but is slow and error-prone.) So, a subclass
|
||||
# of Processor which knows how to traverse the entire tree should be defined.
|
||||
# Such subclass has a handler for each nonterminal node which recursively
|
||||
# processes children nodes:
|
||||
#
|
||||
# require 'ast'
|
||||
#
|
||||
# class ArithmeticsProcessor < AST::Processor
|
||||
# # This method traverses any binary operators such as (add) or (multiply).
|
||||
# def process_binary_op(node)
|
||||
# # Children aren't decomposed automatically; it is suggested to use Ruby
|
||||
# # multiple assignment expansion, as it is very convenient here.
|
||||
# left_expr, right_expr = *node
|
||||
#
|
||||
# # AST::Node#updated won't change node type if nil is passed as a first
|
||||
# # argument, which allows to reuse the same handler for multiple node types
|
||||
# # using `alias' (below).
|
||||
# node.updated(nil, [
|
||||
# process(left_expr),
|
||||
# process(right_expr)
|
||||
# ])
|
||||
# end
|
||||
# alias on_add process_binary_op
|
||||
# alias on_multiply process_binary_op
|
||||
# alias on_divide process_binary_op
|
||||
#
|
||||
# def on_negate(node)
|
||||
# # It is also possible to use #process_all for more compact code
|
||||
# # if every child is a Node.
|
||||
# node.updated(nil, process_all(node))
|
||||
# end
|
||||
#
|
||||
# def on_store(node)
|
||||
# expr, variable_name = *node
|
||||
#
|
||||
# # Note that variable_name is not a Node and thus isn't passed to #process.
|
||||
# node.updated(nil, [
|
||||
# process(expr),
|
||||
# variable_name
|
||||
# ])
|
||||
# end
|
||||
#
|
||||
# # (load) is effectively a terminal node, and so it does not need
|
||||
# # an explicit handler, as the following is the default behavior.
|
||||
# def on_load(node)
|
||||
# nil
|
||||
# end
|
||||
#
|
||||
# def on_each(node)
|
||||
# node.updated(nil, process_all(node))
|
||||
# end
|
||||
# end
|
||||
#
|
||||
# Let's test our ArithmeticsProcessor:
|
||||
#
|
||||
# include AST::Sexp
|
||||
# expr = s(:add, s(:integer, 2), s(:integer, 2))
|
||||
#
|
||||
# p ArithmeticsProcessor.new.process(expr) == expr # => true
|
||||
#
|
||||
# As expected, it does not change anything at all. This isn't actually
|
||||
# very useful, so let's now define a Calculator, which will compute the
|
||||
# expression values:
|
||||
#
|
||||
# # This Processor folds nonterminal nodes and returns an (integer)
|
||||
# # terminal node.
|
||||
# class ArithmeticsCalculator < ArithmeticsProcessor
|
||||
# def compute_op(node)
|
||||
# # First, node children are processed and then unpacked to local
|
||||
# # variables.
|
||||
# nodes = process_all(node)
|
||||
#
|
||||
# if nodes.all? { |node| node.type == :integer }
|
||||
# # If each of those nodes represents a literal, we can fold this
|
||||
# # node!
|
||||
# values = nodes.map { |node| node.children.first }
|
||||
# AST::Node.new(:integer, [
|
||||
# yield(values)
|
||||
# ])
|
||||
# else
|
||||
# # Otherwise, we can just leave the current node in the tree and
|
||||
# # only update it with processed children nodes, which can be
|
||||
# # partially folded.
|
||||
# node.updated(nil, nodes)
|
||||
# end
|
||||
# end
|
||||
#
|
||||
# def on_add(node)
|
||||
# compute_op(node) { |left, right| left + right }
|
||||
# end
|
||||
#
|
||||
# def on_multiply(node)
|
||||
# compute_op(node) { |left, right| left * right }
|
||||
# end
|
||||
# end
|
||||
#
|
||||
# Let's check:
|
||||
#
|
||||
# p ArithmeticsCalculator.new.process(expr) # => (integer 4)
|
||||
#
|
||||
# Excellent, the calculator works! Now, a careful reader could notice that
|
||||
# the ArithmeticsCalculator does not know how to divide numbers. What if we
|
||||
# pass an expression with division to it?
|
||||
#
|
||||
# expr_with_division = \
|
||||
# s(:add,
|
||||
# s(:integer, 1),
|
||||
# s(:divide,
|
||||
# s(:add, s(:integer, 8), s(:integer, 4)),
|
||||
# s(:integer, 3))) # 1 + (8 + 4) / 3
|
||||
#
|
||||
# folded_expr_with_division = ArithmeticsCalculator.new.process(expr_with_division)
|
||||
# p folded_expr_with_division
|
||||
# # => (add
|
||||
# # (integer 1)
|
||||
# # (divide
|
||||
# # (integer 12)
|
||||
# # (integer 3)))
|
||||
#
|
||||
# As you can see, the expression was folded _partially_: the inner `(add)` node which
|
||||
# could be computed was folded to `(integer 12)`, the `(divide)` node is left as-is
|
||||
# because there is no computing handler for it, and the root `(add)` node was also left
|
||||
# as it is because some of its children were not literals.
|
||||
#
|
||||
# Note that this partial folding is only possible because the _data_ format, i.e.
|
||||
# the format in which the computed values of the nodes are represented, is the same as
|
||||
# the AST itself.
|
||||
#
|
||||
# Let's extend our ArithmeticsCalculator class further.
|
||||
#
|
||||
# class ArithmeticsCalculator
|
||||
# def on_divide(node)
|
||||
# compute_op(node) { |left, right| left / right }
|
||||
# end
|
||||
#
|
||||
# def on_negate(node)
|
||||
# # Note how #compute_op works regardless of the operator arity.
|
||||
# compute_op(node) { |value| -value }
|
||||
# end
|
||||
# end
|
||||
#
|
||||
# Now, let's apply our renewed ArithmeticsCalculator to a partial result of previous
|
||||
# evaluation:
|
||||
#
|
||||
# p ArithmeticsCalculator.new.process(expr_with_division) # => (integer 5)
|
||||
#
|
||||
# Five! Excellent. This is also pretty much how CRuby 1.8 executed its programs.
|
||||
#
|
||||
# Now, let's do some automated bug searching. Division by zero is an error, right?
|
||||
# So if we could detect that someone has divided by zero before the program is even
|
||||
# run, that could save some debugging time.
|
||||
#
|
||||
# class DivisionByZeroVerifier < ArithmeticsProcessor
|
||||
# class VerificationFailure < Exception; end
|
||||
#
|
||||
# def on_divide(node)
|
||||
# # You need to process the children to handle nested divisions
|
||||
# # such as:
|
||||
# # (divide
|
||||
# # (integer 1)
|
||||
# # (divide (integer 1) (integer 0))
|
||||
# left, right = process_all(node)
|
||||
#
|
||||
# if right.type == :integer &&
|
||||
# right.children.first == 0
|
||||
# raise VerificationFailure, "Ouch! This code divides by zero."
|
||||
# end
|
||||
# end
|
||||
#
|
||||
# def divides_by_zero?(ast)
|
||||
# process(ast)
|
||||
# false
|
||||
# rescue VerificationFailure
|
||||
# true
|
||||
# end
|
||||
# end
|
||||
#
|
||||
# nice_expr = \
|
||||
# s(:divide,
|
||||
# s(:add, s(:integer, 10), s(:integer, 2)),
|
||||
# s(:integer, 4))
|
||||
#
|
||||
# p DivisionByZeroVerifier.new.divides_by_zero?(nice_expr)
|
||||
# # => false. Good.
|
||||
#
|
||||
# bad_expr = \
|
||||
# s(:add, s(:integer, 10),
|
||||
# s(:divide, s(:integer, 1), s(:integer, 0)))
|
||||
#
|
||||
# p DivisionByZeroVerifier.new.divides_by_zero?(bad_expr)
|
||||
# # => true. WHOOPS. DO NOT RUN THIS.
|
||||
#
|
||||
# Of course, this won't detect more complex cases... unless you use some partial
|
||||
# evaluation before! The possibilites are endless. Have fun.
|
||||
# @deprecated Use {AST::Processor::Mixin} instead.
|
||||
class Processor
|
||||
# Dispatches `node`. If a node has type `:foo`, then a handler named
|
||||
# `on_foo` is invoked with one argument, the `node`; if there isn't
|
||||
# such a handler, {#handler_missing} is invoked with the same argument.
|
||||
#
|
||||
# If the handler returns `nil`, `node` is returned; otherwise, the return
|
||||
# value of the handler is passed along.
|
||||
#
|
||||
# @param [AST::Node, nil] node
|
||||
# @return [AST::Node, nil]
|
||||
def process(node)
|
||||
return if node.nil?
|
||||
|
||||
node = node.to_ast
|
||||
|
||||
# Invoke a specific handler
|
||||
on_handler = :"on_#{node.type}"
|
||||
if respond_to? on_handler
|
||||
new_node = send on_handler, node
|
||||
else
|
||||
new_node = handler_missing(node)
|
||||
end
|
||||
|
||||
node = new_node if new_node
|
||||
|
||||
node
|
||||
end
|
||||
|
||||
# {#process}es each node from `nodes` and returns an array of results.
|
||||
#
|
||||
# @param [Array<AST::Node>] nodes
|
||||
# @return [Array<AST::Node>]
|
||||
def process_all(nodes)
|
||||
nodes.to_a.map do |node|
|
||||
process node
|
||||
end
|
||||
end
|
||||
|
||||
# Default handler. Does nothing.
|
||||
#
|
||||
# @param [AST::Node] node
|
||||
# @return [AST::Node, nil]
|
||||
def handler_missing(node)
|
||||
end
|
||||
require 'ast/processor/mixin'
|
||||
include Mixin
|
||||
end
|
||||
end
|
||||
|
|
|
@ -0,0 +1,288 @@
|
|||
module AST
|
||||
class Processor
|
||||
# The processor module is a module which helps transforming one
|
||||
# AST into another. In a nutshell, the {#process} method accepts
|
||||
# a {Node} and dispatches it to a handler corresponding to its
|
||||
# type, and returns a (possibly) updated variant of the node.
|
||||
#
|
||||
# The processor module has a set of associated design patterns.
|
||||
# They are best explained with a concrete example. Let's define a
|
||||
# simple arithmetic language and an AST format for it:
|
||||
#
|
||||
# Terminals (AST nodes which do not have other AST nodes inside):
|
||||
#
|
||||
# * `(integer <int-literal>)`,
|
||||
#
|
||||
# Nonterminals (AST nodes with other nodes as children):
|
||||
#
|
||||
# * `(add <node> <node>)`,
|
||||
# * `(multiply <node> <node>)`,
|
||||
# * `(divide <node> <node>)`,
|
||||
# * `(negate <node>)`,
|
||||
# * `(store <node> <string-literal>)`: stores value of `<node>`
|
||||
# into a variable named `<string-literal>`,
|
||||
# * `(load <string-literal>)`: loads value of a variable named
|
||||
# `<string-literal>`,
|
||||
# * `(each <node> ...): computes each of the `<node>`s and
|
||||
# prints the result.
|
||||
#
|
||||
# All AST nodes have the same Ruby class, and therefore they don't
|
||||
# know how to traverse themselves. (A solution which dynamically
|
||||
# checks the type of children is possible, but is slow and
|
||||
# error-prone.) So, a class including the module which knows how
|
||||
# to traverse the entire tree should be defined. Such classes
|
||||
# have a handler for each nonterminal node which recursively
|
||||
# processes children nodes:
|
||||
#
|
||||
# require 'ast'
|
||||
#
|
||||
# class ArithmeticsProcessor
|
||||
# include AST::Processor::Module
|
||||
# # This method traverses any binary operators such as (add)
|
||||
# # or (multiply).
|
||||
# def process_binary_op(node)
|
||||
# # Children aren't decomposed automatically; it is
|
||||
# # suggested to use Ruby multiple assignment expansion,
|
||||
# # as it is very convenient here.
|
||||
# left_expr, right_expr = *node
|
||||
#
|
||||
# # AST::Node#updated won't change node type if nil is
|
||||
# # passed as a first argument, which allows to reuse the
|
||||
# # same handler for multiple node types using `alias'
|
||||
# # (below).
|
||||
# node.updated(nil, [
|
||||
# process(left_expr),
|
||||
# process(right_expr)
|
||||
# ])
|
||||
# end
|
||||
# alias_method :on_add, :process_binary_op
|
||||
# alias_method :on_multiply, :process_binary_op
|
||||
# alias_method :on_divide, :process_binary_op
|
||||
#
|
||||
# def on_negate(node)
|
||||
# # It is also possible to use #process_all for more
|
||||
# # compact code if every child is a Node.
|
||||
# node.updated(nil, process_all(node))
|
||||
# end
|
||||
#
|
||||
# def on_store(node)
|
||||
# expr, variable_name = *node
|
||||
#
|
||||
# # Note that variable_name is not a Node and thus isn't
|
||||
# # passed to #process.
|
||||
# node.updated(nil, [
|
||||
# process(expr),
|
||||
# variable_name
|
||||
# ])
|
||||
# end
|
||||
#
|
||||
# # (load) is effectively a terminal node, and so it does
|
||||
# # not need an explicit handler, as the following is the
|
||||
# # default behavior. Essentially, for any nodes that don't
|
||||
# # have a defined handler, the node remains unchanged.
|
||||
# def on_load(node)
|
||||
# nil
|
||||
# end
|
||||
#
|
||||
# def on_each(node)
|
||||
# node.updated(nil, process_all(node))
|
||||
# end
|
||||
# end
|
||||
#
|
||||
# Let's test our ArithmeticsProcessor:
|
||||
#
|
||||
# include AST::Sexp
|
||||
# expr = s(:add, s(:integer, 2), s(:integer, 2))
|
||||
#
|
||||
# p ArithmeticsProcessor.new.process(expr) == expr # => true
|
||||
#
|
||||
# As expected, it does not change anything at all. This isn't
|
||||
# actually very useful, so let's now define a Calculator, which
|
||||
# will compute the expression values:
|
||||
#
|
||||
# # This Processor folds nonterminal nodes and returns an
|
||||
# # (integer) terminal node.
|
||||
# class ArithmeticsCalculator < ArithmeticsProcessor
|
||||
# def compute_op(node)
|
||||
# # First, node children are processed and then unpacked
|
||||
# # to local variables.
|
||||
# nodes = process_all(node)
|
||||
#
|
||||
# if nodes.all? { |node| node.type == :integer }
|
||||
# # If each of those nodes represents a literal, we can
|
||||
# # fold this node!
|
||||
# values = nodes.map { |node| node.children.first }
|
||||
# AST::Node.new(:integer, [
|
||||
# yield(values)
|
||||
# ])
|
||||
# else
|
||||
# # Otherwise, we can just leave the current node in the
|
||||
# # tree and only update it with processed children
|
||||
# # nodes, which can be partially folded.
|
||||
# node.updated(nil, nodes)
|
||||
# end
|
||||
# end
|
||||
#
|
||||
# def on_add(node)
|
||||
# compute_op(node) { |left, right| left + right }
|
||||
# end
|
||||
#
|
||||
# def on_multiply(node)
|
||||
# compute_op(node) { |left, right| left * right }
|
||||
# end
|
||||
# end
|
||||
#
|
||||
# Let's check:
|
||||
#
|
||||
# p ArithmeticsCalculator.new.process(expr) # => (integer 4)
|
||||
#
|
||||
# Excellent, the calculator works! Now, a careful reader could
|
||||
# notice that the ArithmeticsCalculator does not know how to
|
||||
# divide numbers. What if we pass an expression with division to
|
||||
# it?
|
||||
#
|
||||
# expr_with_division = \
|
||||
# s(:add,
|
||||
# s(:integer, 1),
|
||||
# s(:divide,
|
||||
# s(:add, s(:integer, 8), s(:integer, 4)),
|
||||
# s(:integer, 3))) # 1 + (8 + 4) / 3
|
||||
#
|
||||
# folded_expr_with_division = ArithmeticsCalculator.new.process(expr_with_division)
|
||||
# p folded_expr_with_division
|
||||
# # => (add
|
||||
# # (integer 1)
|
||||
# # (divide
|
||||
# # (integer 12)
|
||||
# # (integer 3)))
|
||||
#
|
||||
# As you can see, the expression was folded _partially_: the inner
|
||||
# `(add)` node which could be computed was folded to
|
||||
# `(integer 12)`, the `(divide)` node is left as-is because there
|
||||
# is no computing handler for it, and the root `(add)` node was
|
||||
# also left as it is because some of its children were not
|
||||
# literals.
|
||||
#
|
||||
# Note that this partial folding is only possible because the
|
||||
# _data_ format, i.e. the format in which the computed values of
|
||||
# the nodes are represented, is the same as the AST itself.
|
||||
#
|
||||
# Let's extend our ArithmeticsCalculator class further.
|
||||
#
|
||||
# class ArithmeticsCalculator
|
||||
# def on_divide(node)
|
||||
# compute_op(node) { |left, right| left / right }
|
||||
# end
|
||||
#
|
||||
# def on_negate(node)
|
||||
# # Note how #compute_op works regardless of the operator
|
||||
# # arity.
|
||||
# compute_op(node) { |value| -value }
|
||||
# end
|
||||
# end
|
||||
#
|
||||
# Now, let's apply our renewed ArithmeticsCalculator to a partial
|
||||
# result of previous evaluation:
|
||||
#
|
||||
# p ArithmeticsCalculator.new.process(expr_with_division) # => (integer 5)
|
||||
#
|
||||
# Five! Excellent. This is also pretty much how CRuby 1.8 executed
|
||||
# its programs.
|
||||
#
|
||||
# Now, let's do some automated bug searching. Division by zero is
|
||||
# an error, right? So if we could detect that someone has divided
|
||||
# by zero before the program is even run, that could save some
|
||||
# debugging time.
|
||||
#
|
||||
# class DivisionByZeroVerifier < ArithmeticsProcessor
|
||||
# class VerificationFailure < Exception; end
|
||||
#
|
||||
# def on_divide(node)
|
||||
# # You need to process the children to handle nested divisions
|
||||
# # such as:
|
||||
# # (divide
|
||||
# # (integer 1)
|
||||
# # (divide (integer 1) (integer 0))
|
||||
# left, right = process_all(node)
|
||||
#
|
||||
# if right.type == :integer &&
|
||||
# right.children.first == 0
|
||||
# raise VerificationFailure, "Ouch! This code divides by zero."
|
||||
# end
|
||||
# end
|
||||
#
|
||||
# def divides_by_zero?(ast)
|
||||
# process(ast)
|
||||
# false
|
||||
# rescue VerificationFailure
|
||||
# true
|
||||
# end
|
||||
# end
|
||||
#
|
||||
# nice_expr = \
|
||||
# s(:divide,
|
||||
# s(:add, s(:integer, 10), s(:integer, 2)),
|
||||
# s(:integer, 4))
|
||||
#
|
||||
# p DivisionByZeroVerifier.new.divides_by_zero?(nice_expr)
|
||||
# # => false. Good.
|
||||
#
|
||||
# bad_expr = \
|
||||
# s(:add, s(:integer, 10),
|
||||
# s(:divide, s(:integer, 1), s(:integer, 0)))
|
||||
#
|
||||
# p DivisionByZeroVerifier.new.divides_by_zero?(bad_expr)
|
||||
# # => true. WHOOPS. DO NOT RUN THIS.
|
||||
#
|
||||
# Of course, this won't detect more complex cases... unless you
|
||||
# use some partial evaluation before! The possibilites are
|
||||
# endless. Have fun.
|
||||
module Mixin
|
||||
# Dispatches `node`. If a node has type `:foo`, then a handler
|
||||
# named `on_foo` is invoked with one argument, the `node`; if
|
||||
# there isn't such a handler, {#handler_missing} is invoked
|
||||
# with the same argument.
|
||||
#
|
||||
# If the handler returns `nil`, `node` is returned; otherwise,
|
||||
# the return value of the handler is passed along.
|
||||
#
|
||||
# @param [AST::Node, nil] node
|
||||
# @return [AST::Node, nil]
|
||||
def process(node)
|
||||
return if node.nil?
|
||||
|
||||
node = node.to_ast
|
||||
|
||||
# Invoke a specific handler
|
||||
on_handler = :"on_#{node.type}"
|
||||
if respond_to? on_handler
|
||||
new_node = send on_handler, node
|
||||
else
|
||||
new_node = handler_missing(node)
|
||||
end
|
||||
|
||||
node = new_node if new_node
|
||||
|
||||
node
|
||||
end
|
||||
|
||||
# {#process}es each node from `nodes` and returns an array of
|
||||
# results.
|
||||
#
|
||||
# @param [Array<AST::Node>] nodes
|
||||
# @return [Array<AST::Node>]
|
||||
def process_all(nodes)
|
||||
nodes.to_a.map do |node|
|
||||
process node
|
||||
end
|
||||
end
|
||||
|
||||
# Default handler. Does nothing.
|
||||
#
|
||||
# @param [AST::Node] node
|
||||
# @return [AST::Node, nil]
|
||||
def handler_missing(node)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue