Self-compiler: array slice literals.

2010-02-09 19:30:28 -05:00 · 2010-02-09 19:30:28 -05:00 · 91a7102f11
parent a451e90374
commit 91a7102f11
5 changed files with 220 additions and 110 deletions
--- a/lib/coffee_script/lexer.js
+++ b/lib/coffee_script/lexer.js
@ -284,8 +284,7 @@
  // Helpers =============================================================
  // Add a token to the results, taking note of the line number.
  lex.prototype.token = function token(tag, value) {
-    return this.tokens.push([tag, value]);
-    // this.tokens.push([tag, Value.new(value, @line)])
+    return this.tokens.push([tag, value, this.line]);
  };
  // Look at a tag in the current token stream.
  lex.prototype.tag = function tag(index, tag) {
--- a/lib/coffee_script/nodes.js
+++ b/lib/coffee_script/nodes.js
@ -1,5 +1,5 @@
 (function(){
-  var AccessorNode, CallNode, CommentNode, Expressions, ExtendsNode, IndexNode, LiteralNode, Node, ReturnNode, TAB, TRAILING_WHITESPACE, ThisNode, ValueNode, any, compact, del, dup, flatten, inherit, merge, statement;
+  var AccessorNode, CallNode, CommentNode, Expressions, ExtendsNode, IndexNode, LiteralNode, Node, RangeNode, ReturnNode, SliceNode, TAB, TRAILING_WHITESPACE, ThisNode, ValueNode, any, compact, del, dup, flatten, inherit, merge, statement;
  var __hasProp = Object.prototype.hasOwnProperty;
  process.mixin(require('./scope'));
  // The abstract base class for all CoffeeScript nodes.
@ -690,10 +690,68 @@
  // A this-reference, using '@'.
  ThisNode = (exports.ThisNode = inherit(Node, {
    constructor: function constructor(property) {
-      return this.property = property || null;
+      this.property = property || null;
+      return this;
    },
    compile_node: function compile_node(o) {
      return 'this' + (this.property ? '.' + this.property : '');
    }
  }));
+  // A range literal. Ranges can be used to extract portions (slices) of arrays,
+  // or to specify a range for list comprehensions.
+  RangeNode = (exports.RangeNode = inherit(Node, {
+    constructor: function constructor(from, to, exclusive) {
+      this.from = from;
+      this.to = to;
+      this.children = [from, to];
+      this.exclusive = !!exclusive;
+      return this;
+    },
+    compile_variables: function compile_variables(o) {
+      this.indent = o.indent;
+      this.from_var = o.scope.free_variable();
+      this.to_var = o.scope.free_variable();
+      return this.from_var + ' = ' + this.from.compile(o) + '; ' + this.to_var + ' = ' + this.to.compile(o) + ";\n" + this.idt();
+    },
+    compile_node: function compile_node(o) {
+      var compare, equals, idx, incr, intro, step;
+      if (!(o.index)) {
+        return this.compile_array(o);
+      }
+      idx = del(o, 'index');
+      step = del(o, 'step');
+      equals = this.exclusive ? '' : '=';
+      intro = '(' + this.from_var + ' <= ' + this.to_var + ' ? ' + idx;
+      compare = intro + ' <' + equals + ' ' + this.to_var + ' : ' + idx + ' >' + equals + ' ' + this.to_var + ')';
+      incr = intro + ' += ' + step + ' : ' + idx + ' -= ' + step + ')';
+      return vars + '; ' + compare + '; ' + incr;
+    },
+    // Expand the range into the equivalent array, if it's not being used as
+    // part of a comprehension, slice, or splice.
+    // TODO: This generates pretty ugly code ... shrink it.
+    compile_array: function compile_array(o) {
+      var arr, body;
+      body = Expressions.wrap(new LiteralNode('i'));
+      arr = Expressions.wrap(new ForNode(body, {
+        source: (new ValueNode(this))
+      }, 'i'));
+      return (new ParentheticalNode(new CallNode(new CodeNode([], arr)))).compile(o);
+    }
+  }));
+  // An array slice literal. Unlike JavaScript's Array#slice, the second parameter
+  // specifies the index of the end of the slice (just like the first parameter)
+  // is the index of the beginning.
+  SliceNode = (exports.SliceNode = inherit(Node, {
+    constructor: function constructor(range) {
+      this.children = [(this.range = range)];
+      return this;
+    },
+    compile_node: function compile_node(o) {
+      var from, plus_part, to;
+      from = this.range.from.compile(o);
+      to = this.range.to.compile(o);
+      plus_part = this.range.exclusive ? '' : ' + 1';
+      return ".slice(" + from + ', ' + to + plus_part + ')';
+    }
+  }));
 })();
--- a/lib/coffee_script/nodes.rb
+++ b/lib/coffee_script/nodes.rb
@ -439,7 +439,7 @@ module CoffeeScript
  end

  # A range literal. Ranges can be used to extract portions (slices) of arrays,
-  # or to specify a range for array comprehensions.
+  # or to specify a range for list comprehensions.
  class RangeNode < Node
    children :from, :to

@ -464,8 +464,9 @@ module CoffeeScript
      vars      = "#{idx}=#{@from_var}"
      step      = step ? step.compile(o) : '1'
      equals    = @exclusive ? '' : '='
-      compare   = "(#{@from_var} <= #{@to_var} ? #{idx} <#{equals} #{@to_var} : #{idx} >#{equals} #{@to_var})"
-      incr      = "(#{@from_var} <= #{@to_var} ? #{idx} += #{step} : #{idx} -= #{step})"
+      intro     = "(#{@from_var} <= #{@to_var} ? #{idx}"
+      compare   = "#{intro} <#{equals} #{@to_var} : #{idx} >#{equals} #{@to_var})"
+      incr      = "#{intro} += #{step} : #{idx} -= #{step})"
      write("#{vars}; #{compare}; #{incr}")
    end

--- a/src/lexer.coffee
+++ b/src/lexer.coffee
@ -59,186 +59,185 @@ CALLABLE: ['IDENTIFIER', 'SUPER', ')', ']', '}', 'STRING']

 # Scan by attempting to match tokens one character at a time. Slow and steady.
 lex::tokenize: (code) ->
-  this.code    : code       # Cleanup code by remove extra line breaks, TODO: chomp
-  this.i       : 0          # Current character position we're parsing
-  this.line    : 1          # The current line.
-  this.indent  : 0          # The current indent level.
-  this.indents : []         # The stack of all indent levels we are currently within.
-  this.tokens  : []         # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
-  this.spaced  : null       # The last token that has a space following it.
-  while this.i < this.code.length
-    this.chunk: this.code.slice(this.i)
-    this.extract_next_token()
-  this.close_indentation()
-  (new Rewriter()).rewrite this.tokens
+  @code    : code       # Cleanup code by remove extra line breaks, TODO: chomp
+  @i       : 0          # Current character position we're parsing
+  @line    : 1          # The current line.
+  @indent  : 0          # The current indent level.
+  @indents : []         # The stack of all indent levels we are currently within.
+  @tokens  : []         # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
+  @spaced  : null       # The last token that has a space following it.
+  while @i < @code.length
+    @chunk: @code.slice(@i)
+    @extract_next_token()
+  @close_indentation()
+  (new Rewriter()).rewrite @tokens

 # At every position, run through this list of attempted matches,
 # short-circuiting if any of them succeed.
 lex::extract_next_token: ->
-  return if this.identifier_token()
-  return if this.number_token()
-  return if this.heredoc_token()
-  return if this.string_token()
-  return if this.js_token()
-  return if this.regex_token()
-  return if this.indent_token()
-  return if this.comment_token()
-  return if this.whitespace_token()
-  return    this.literal_token()
+  return if @identifier_token()
+  return if @number_token()
+  return if @heredoc_token()
+  return if @string_token()
+  return if @js_token()
+  return if @regex_token()
+  return if @indent_token()
+  return if @comment_token()
+  return if @whitespace_token()
+  return    @literal_token()

 # Tokenizers ==========================================================

 # Matches identifying literals: variables, keywords, method names, etc.
 lex::identifier_token: ->
-  return false unless id: this.match IDENTIFIER, 1
+  return false unless id: @match IDENTIFIER, 1
  # Keywords are special identifiers tagged with their own name,
  # 'if' will result in an ['IF', "if"] token.
  tag: if KEYWORDS.indexOf(id) >= 0 then id.toUpperCase() else 'IDENTIFIER'
-  tag: 'LEADING_WHEN' if tag is 'WHEN' and (this.tag() is 'OUTDENT' or this.tag() is 'INDENT')
-  this.tag(-1, 'PROTOTYPE_ACCESS') if tag is 'IDENTIFIER' and this.value() is '::'
-  if tag is 'IDENTIFIER' and this.value() is '.' and !(this.value(2) is '.')
-    if this.tag(2) is '?'
-      this.tag(1, 'SOAK_ACCESS')
-      this.tokens.splice(-2, 1)
+  tag: 'LEADING_WHEN' if tag is 'WHEN' and (@tag() is 'OUTDENT' or @tag() is 'INDENT')
+  @tag(-1, 'PROTOTYPE_ACCESS') if tag is 'IDENTIFIER' and @value() is '::'
+  if tag is 'IDENTIFIER' and @value() is '.' and !(@value(2) is '.')
+    if @tag(2) is '?'
+      @tag(1, 'SOAK_ACCESS')
+      @tokens.splice(-2, 1)
    else
-      this.tag(1, 'PROPERTY_ACCESS')
-  this.token(tag, id)
-  this.i += id.length
+      @tag(1, 'PROPERTY_ACCESS')
+  @token(tag, id)
+  @i += id.length
  true

 # Matches numbers, including decimals, hex, and exponential notation.
 lex::number_token: ->
-  return false unless number: this.match NUMBER, 1
-  this.token 'NUMBER', number
-  this.i += number.length
+  return false unless number: @match NUMBER, 1
+  @token 'NUMBER', number
+  @i += number.length
  true

 # Matches strings, including multi-line strings.
 lex::string_token: ->
-  return false unless string: this.match STRING, 1
+  return false unless string: @match STRING, 1
  escaped: string.replace STRING_NEWLINES, " \\\n"
-  this.token 'STRING', escaped
-  this.line += this.count string, "\n"
-  this.i += string.length
+  @token 'STRING', escaped
+  @line += @count string, "\n"
+  @i += string.length
  true

 # Matches heredocs, adjusting indentation to the correct level.
 lex::heredoc_token: ->
-  return false unless match = this.chunk.match(HEREDOC)
+  return false unless match = @chunk.match(HEREDOC)
  doc: match[2] or match[4]
  indent: doc.match(HEREDOC_INDENT).sort()[0]
  doc: doc.replace(new RegExp("^" + indent, 'g'), '')
          .replace(MULTILINER, "\\n")
          .replace('"', '\\"')
-  this.token 'STRING', '"' + doc + '"'
-  this.line += this.count match[1], "\n"
-  this.i += match[1].length
+  @token 'STRING', '"' + doc + '"'
+  @line += @count match[1], "\n"
+  @i += match[1].length
  true

 # Matches interpolated JavaScript.
 lex::js_token: ->
-  return false unless script: this.match JS, 1
-  this.token 'JS', script.replace(JS_CLEANER, '')
-  this.i += script.length
+  return false unless script: @match JS, 1
+  @token 'JS', script.replace(JS_CLEANER, '')
+  @i += script.length
  true

 # Matches regular expression literals.
 lex::regex_token: ->
-  return false unless regex: this.match REGEX, 1
-  return false if NOT_REGEX.indexOf(this.tag()) >= 0
-  this.token 'REGEX', regex
-  this.i += regex.length
+  return false unless regex: @match REGEX, 1
+  return false if NOT_REGEX.indexOf(@tag()) >= 0
+  @token 'REGEX', regex
+  @i += regex.length
  true

 # Matches and conumes comments.
 lex::comment_token: ->
-  return false unless comment: this.match COMMENT, 1
-  this.line += (comment.match(MULTILINER) or []).length
-  this.token 'COMMENT', comment.replace(COMMENT_CLEANER, '').split(MULTILINER)
-  this.token 'TERMINATOR', "\n"
-  this.i += comment.length
+  return false unless comment: @match COMMENT, 1
+  @line += (comment.match(MULTILINER) or []).length
+  @token 'COMMENT', comment.replace(COMMENT_CLEANER, '').split(MULTILINER)
+  @token 'TERMINATOR', "\n"
+  @i += comment.length
  true

 # Record tokens for indentation differing from the previous line.
 lex::indent_token: ->
-  return false unless indent: this.match MULTI_DENT, 1
-  this.line += indent.match(MULTILINER).length
-  this.i    += indent.length
-  next_character: this.chunk.match(MULTI_DENT)[4]
-  no_newlines: next_character is '.' or (this.value().match(NO_NEWLINE) and this.tokens[this.tokens.length - 2][0] isnt '.' and not this.value().match(CODE))
-  return this.suppress_newlines(indent) if no_newlines
+  return false unless indent: @match MULTI_DENT, 1
+  @line += indent.match(MULTILINER).length
+  @i    += indent.length
+  next_character: @chunk.match(MULTI_DENT)[4]
+  no_newlines: next_character is '.' or (@value().match(NO_NEWLINE) and @tokens[@tokens.length - 2][0] isnt '.' and not @value().match(CODE))
+  return @suppress_newlines(indent) if no_newlines
  size: indent.match(LAST_DENTS).reverse()[0].match(LAST_DENT)[1].length
-  return this.newline_token(indent) if size is this.indent
-  if size > this.indent
-    diff: size - this.indent
-    this.token 'INDENT', diff
-    this.indents.push diff
+  return @newline_token(indent) if size is @indent
+  if size > @indent
+    diff: size - @indent
+    @token 'INDENT', diff
+    @indents.push diff
  else
-    this.outdent_token this.indent - size
-  this.indent: size
+    @outdent_token @indent - size
+  @indent: size
  true

 # Record an oudent token or tokens, if we're moving back inwards past
 # multiple recorded indents.
 lex::outdent_token: (move_out) ->
-  while move_out > 0 and this.indents.length
-    last_indent: this.indents.pop()
-    this.token 'OUTDENT', last_indent
+  while move_out > 0 and @indents.length
+    last_indent: @indents.pop()
+    @token 'OUTDENT', last_indent
    move_out -= last_indent
-  this.token 'TERMINATOR', "\n"
+  @token 'TERMINATOR', "\n"
  true

 # Matches and consumes non-meaningful whitespace.
 lex::whitespace_token: ->
-  return false unless space: this.match WHITESPACE, 1
-  this.spaced: this.value()
-  this.i += space.length
+  return false unless space: @match WHITESPACE, 1
+  @spaced: @value()
+  @i += space.length
  true

 # Multiple newlines get merged together.
 # Use a trailing \ to escape newlines.
 lex::newline_token: (newlines) ->
-  this.token 'TERMINATOR', "\n" unless this.value() is "\n"
+  @token 'TERMINATOR', "\n" unless @value() is "\n"
  true

 # Tokens to explicitly escape newlines are removed once their job is done.
 lex::suppress_newlines: (newlines) ->
-  this.tokens.pop() if this.value() is "\\"
+  @tokens.pop() if @value() is "\\"
  true

 # We treat all other single characters as a token. Eg.: ( ) , . !
 # Multi-character operators are also literal tokens, so that Racc can assign
 # the proper order of operations.
 lex::literal_token: ->
-  match: this.chunk.match(OPERATOR)
+  match: @chunk.match(OPERATOR)
  value: match and match[1]
-  this.tag_parameters() if value and value.match(CODE)
-  value ||= this.chunk.substr(0, 1)
+  @tag_parameters() if value and value.match(CODE)
+  value ||= @chunk.substr(0, 1)
  tag: if value.match(ASSIGNMENT) then 'ASSIGN' else value
  tag: 'TERMINATOR' if value == ';'
-  if this.value() isnt this.spaced and CALLABLE.indexOf(this.tag()) >= 0
+  if @value() isnt @spaced and CALLABLE.indexOf(@tag()) >= 0
    tag: 'CALL_START'  if value is '('
    tag: 'INDEX_START' if value is '['
-  this.token tag, value
-  this.i += value.length
+  @token tag, value
+  @i += value.length
  true

 # Helpers =============================================================

 # Add a token to the results, taking note of the line number.
 lex::token: (tag, value) ->
-  this.tokens.push([tag, value])
-  # this.tokens.push([tag, Value.new(value, @line)])
+  @tokens.push([tag, value, @line])

 # Look at a tag in the current token stream.
 lex::tag: (index, tag) ->
-  return unless tok: this.tokens[this.tokens.length - (index or 1)]
+  return unless tok: @tokens[@tokens.length - (index or 1)]
  return tok[0]: tag if tag?
  tok[0]

 # Look at a value in the current token stream.
 lex::value: (index, val) ->
-  return unless tok: this.tokens[this.tokens.length - (index or 1)]
+  return unless tok: @tokens[@tokens.length - (index or 1)]
  return tok[1]: val if val?
  tok[1]

@ -254,7 +253,7 @@ lex::count: (string, letter) ->
 # Attempt to match a string against the current chunk, returning the indexed
 # match.
 lex::match: (regex, index) ->
-  return false unless m: this.chunk.match(regex)
+  return false unless m: @chunk.match(regex)
  if m then m[index] else false

 # A source of ambiguity in our grammar was parameter lists in function
@ -262,11 +261,11 @@ lex::match: (regex, index) ->
 # parameter identifiers in order to avoid this. Also, parameter lists can
 # make use of splats.
 lex::tag_parameters: ->
-  return if this.tag() isnt ')'
+  return if @tag() isnt ')'
  i: 0
  while true
    i += 1
-    tok: this.tokens[this.tokens.length - i]
+    tok: @tokens[@tokens.length - i]
    return if not tok
    switch tok[0]
      when 'IDENTIFIER' then tok[0]: 'PARAM'
@ -277,4 +276,4 @@ lex::tag_parameters: ->
 # Close up all remaining open blocks. IF the first token is an indent,
 # axe it.
 lex::close_indentation: ->
-  this.outdent_token(this.indent)
+  @outdent_token(@indent)
--- a/src/nodes.coffee
+++ b/src/nodes.coffee
@ -38,7 +38,6 @@ exports.IfNode            : -> @name: this.constructor.name; @values: arguments

 exports.Expressions.wrap  : (values) -> @values: values

-
 # Some helper functions

 # Tabs are two spaces for pretty printing.
@ -101,7 +100,6 @@ statement: (klass, only) ->
  klass::is_statement:       -> true
  (klass::is_statement_only:  -> true) if only

-
 # The abstract base class for all CoffeeScript nodes.
 # All nodes are implement a "compile_node" method, which performs the
 # code generation for that node. To compile a node, call the "compile"
@ -151,7 +149,6 @@ Node::is_statement:       -> false
 Node::is_statement_only:  -> false
 Node::top_sensitive:      -> false

-
 # A collection of nodes, each one representing an expression.
 Expressions: exports.Expressions: inherit Node, {

@ -233,7 +230,6 @@ Expressions.wrap: (nodes) ->

 statement Expressions

-
 # Literals are static values that can be passed through directly into
 # JavaScript without translation, eg.: strings, numbers, true, false, null...
 LiteralNode: exports.LiteralNode: inherit Node, {
@ -257,7 +253,6 @@ LiteralNode: exports.LiteralNode: inherit Node, {

 LiteralNode::is_statement_only: LiteralNode::is_statement

-
 # Return an expression, or wrap it in a closure and return it.
 ReturnNode: exports.ReturnNode: inherit Node, {

@ -274,7 +269,6 @@ ReturnNode: exports.ReturnNode: inherit Node, {

 statement ReturnNode, true

-
 # A value, indexed or dotted into, or vanilla.
 ValueNode: exports.ValueNode: inherit Node, {

@ -341,7 +335,6 @@ ValueNode: exports.ValueNode: inherit Node, {

 }

-
 # Pass through CoffeeScript comments into JavaScript comments at the
 # same position.
 CommentNode: exports.CommentNode: inherit Node, {
@ -358,7 +351,6 @@ CommentNode: exports.CommentNode: inherit Node, {

 statement CommentNode

-
 # Node for a function invocation. Takes care of converting super() calls into
 # calls against the prototype's function of the same name.
 CallNode: exports.CallNode: inherit Node, {
@ -415,7 +407,6 @@ CallNode: exports.CallNode: inherit Node, {

 }

-
 # Node to extend an object's prototype with an ancestor object.
 # After goog.inherits from the Closure Library.
 ExtendsNode: exports.ExtendsNode: inherit Node, {
@ -441,7 +432,6 @@ ExtendsNode: exports.ExtendsNode: inherit Node, {

 statement ExtendsNode

-
 # A dotted accessor into a part of a value, or the :: shorthand for
 # an accessor into the object's prototype.
 AccessorNode: exports.AccessorNode: inherit Node, {
@ -458,7 +448,6 @@ AccessorNode: exports.AccessorNode: inherit Node, {

 }

-
 # An indexed accessor into a part of an array or object.
 IndexNode: exports.IndexNode: inherit Node, {

@ -471,18 +460,82 @@ IndexNode: exports.IndexNode: inherit Node, {

 }

-
 # A this-reference, using '@'.
 ThisNode: exports.ThisNode: inherit Node, {

  constructor: (property) ->
    @property: property or null
+    this

  compile_node: (o) ->
    'this' + (if @property then '.' + @property else '')

 }

+# A range literal. Ranges can be used to extract portions (slices) of arrays,
+# or to specify a range for list comprehensions.
+RangeNode: exports.RangeNode: inherit Node, {
+
+  constructor: (from, to, exclusive) ->
+    @from:      from
+    @to:        to
+    @children:  [from, to]
+    @exclusive: !!exclusive
+    this
+
+  compile_variables: (o) ->
+    @indent:   o.indent
+    @from_var: o.scope.free_variable()
+    @to_var:   o.scope.free_variable()
+    @from_var + ' = ' + @from.compile(o) + '; ' + @to_var + ' = ' + @to.compile(o) + ";\n" + @idt()
+
+  compile_node: (o) ->
+    return    @compile_array(o) unless o.index
+    idx:      del o, 'index'
+    step:     del o, 'step'
+    equals:   if @exclusive then '' else '='
+    intro:    '(' + @from_var + ' <= ' + @to_var + ' ? ' + idx
+    compare:  intro + ' <' + equals + ' ' + @to_var + ' : ' + idx + ' >' + equals + ' ' + @to_var + ')'
+    incr:     intro + ' += ' + step + ' : ' + idx + ' -= ' + step + ')'
+    vars + '; ' + compare + '; ' + incr
+
+  # Expand the range into the equivalent array, if it's not being used as
+  # part of a comprehension, slice, or splice.
+  # TODO: This generates pretty ugly code ... shrink it.
+  compile_array: (o) ->
+    body: Expressions.wrap(new LiteralNode 'i')
+    arr:  Expressions.wrap(new ForNode(body, {source: (new ValueNode(this))}, 'i'))
+    (new ParentheticalNode(new CallNode(new CodeNode([], arr)))).compile(o)
+
+}
+
+# An array slice literal. Unlike JavaScript's Array#slice, the second parameter
+# specifies the index of the end of the slice (just like the first parameter)
+# is the index of the beginning.
+SliceNode: exports.SliceNode: inherit Node, {
+
+  constructor: (range) ->
+    @children: [@range: range]
+    this
+
+  compile_node: (o) ->
+    from:       @range.from.compile(o)
+    to:         @range.to.compile(o)
+    plus_part:  if @range.exclusive then '' else ' + 1'
+    ".slice(" + from + ', ' + to + plus_part + ')'
+
+}
+
+
+
+
+
+
+
+
+
+
+