Self-compiler: array slice literals.

This commit is contained in:
Jeremy Ashkenas 2010-02-09 19:30:28 -05:00
parent a451e90374
commit 91a7102f11
5 changed files with 220 additions and 110 deletions

View File

@ -284,8 +284,7 @@
// Helpers =============================================================
// Add a token to the results, taking note of the line number.
lex.prototype.token = function token(tag, value) {
return this.tokens.push([tag, value]);
// this.tokens.push([tag,, @line)])
return this.tokens.push([tag, value, this.line]);
// Look at a tag in the current token stream.
lex.prototype.tag = function tag(index, tag) {

View File

@ -1,5 +1,5 @@
var AccessorNode, CallNode, CommentNode, Expressions, ExtendsNode, IndexNode, LiteralNode, Node, ReturnNode, TAB, TRAILING_WHITESPACE, ThisNode, ValueNode, any, compact, del, dup, flatten, inherit, merge, statement;
var AccessorNode, CallNode, CommentNode, Expressions, ExtendsNode, IndexNode, LiteralNode, Node, RangeNode, ReturnNode, SliceNode, TAB, TRAILING_WHITESPACE, ThisNode, ValueNode, any, compact, del, dup, flatten, inherit, merge, statement;
var __hasProp = Object.prototype.hasOwnProperty;
// The abstract base class for all CoffeeScript nodes.
@ -690,10 +690,68 @@
// A this-reference, using '@'.
ThisNode = (exports.ThisNode = inherit(Node, {
constructor: function constructor(property) {
return = property || null; = property || null;
return this;
compile_node: function compile_node(o) {
return 'this' + ( ? '.' + : '');
// A range literal. Ranges can be used to extract portions (slices) of arrays,
// or to specify a range for list comprehensions.
RangeNode = (exports.RangeNode = inherit(Node, {
constructor: function constructor(from, to, exclusive) {
this.from = from; = to;
this.children = [from, to];
this.exclusive = !!exclusive;
return this;
compile_variables: function compile_variables(o) {
this.indent = o.indent;
this.from_var = o.scope.free_variable();
this.to_var = o.scope.free_variable();
return this.from_var + ' = ' + this.from.compile(o) + '; ' + this.to_var + ' = ' + + ";\n" + this.idt();
compile_node: function compile_node(o) {
var compare, equals, idx, incr, intro, step;
if (!(o.index)) {
return this.compile_array(o);
idx = del(o, 'index');
step = del(o, 'step');
equals = this.exclusive ? '' : '=';
intro = '(' + this.from_var + ' <= ' + this.to_var + ' ? ' + idx;
compare = intro + ' <' + equals + ' ' + this.to_var + ' : ' + idx + ' >' + equals + ' ' + this.to_var + ')';
incr = intro + ' += ' + step + ' : ' + idx + ' -= ' + step + ')';
return vars + '; ' + compare + '; ' + incr;
// Expand the range into the equivalent array, if it's not being used as
// part of a comprehension, slice, or splice.
// TODO: This generates pretty ugly code ... shrink it.
compile_array: function compile_array(o) {
var arr, body;
body = Expressions.wrap(new LiteralNode('i'));
arr = Expressions.wrap(new ForNode(body, {
source: (new ValueNode(this))
}, 'i'));
return (new ParentheticalNode(new CallNode(new CodeNode([], arr)))).compile(o);
// An array slice literal. Unlike JavaScript's Array#slice, the second parameter
// specifies the index of the end of the slice (just like the first parameter)
// is the index of the beginning.
SliceNode = (exports.SliceNode = inherit(Node, {
constructor: function constructor(range) {
this.children = [(this.range = range)];
return this;
compile_node: function compile_node(o) {
var from, plus_part, to;
from = this.range.from.compile(o);
to =;
plus_part = this.range.exclusive ? '' : ' + 1';
return ".slice(" + from + ', ' + to + plus_part + ')';

View File

@ -439,7 +439,7 @@ module CoffeeScript
# A range literal. Ranges can be used to extract portions (slices) of arrays,
# or to specify a range for array comprehensions.
# or to specify a range for list comprehensions.
class RangeNode < Node
children :from, :to
@ -464,8 +464,9 @@ module CoffeeScript
vars = "#{idx}=#{@from_var}"
step = step ? step.compile(o) : '1'
equals = @exclusive ? '' : '='
compare = "(#{@from_var} <= #{@to_var} ? #{idx} <#{equals} #{@to_var} : #{idx} >#{equals} #{@to_var})"
incr = "(#{@from_var} <= #{@to_var} ? #{idx} += #{step} : #{idx} -= #{step})"
intro = "(#{@from_var} <= #{@to_var} ? #{idx}"
compare = "#{intro} <#{equals} #{@to_var} : #{idx} >#{equals} #{@to_var})"
incr = "#{intro} += #{step} : #{idx} -= #{step})"
write("#{vars}; #{compare}; #{incr}")

View File

@ -59,186 +59,185 @@ CALLABLE: ['IDENTIFIER', 'SUPER', ')', ']', '}', 'STRING']
# Scan by attempting to match tokens one character at a time. Slow and steady.
lex::tokenize: (code) ->
this.code : code # Cleanup code by remove extra line breaks, TODO: chomp
this.i : 0 # Current character position we're parsing
this.line : 1 # The current line.
this.indent : 0 # The current indent level.
this.indents : [] # The stack of all indent levels we are currently within.
this.tokens : [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
this.spaced : null # The last token that has a space following it.
while this.i < this.code.length
this.chunk: this.code.slice(this.i)
(new Rewriter()).rewrite this.tokens
@code : code # Cleanup code by remove extra line breaks, TODO: chomp
@i : 0 # Current character position we're parsing
@line : 1 # The current line.
@indent : 0 # The current indent level.
@indents : [] # The stack of all indent levels we are currently within.
@tokens : [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
@spaced : null # The last token that has a space following it.
while @i < @code.length
@chunk: @code.slice(@i)
(new Rewriter()).rewrite @tokens
# At every position, run through this list of attempted matches,
# short-circuiting if any of them succeed.
lex::extract_next_token: ->
return if this.identifier_token()
return if this.number_token()
return if this.heredoc_token()
return if this.string_token()
return if this.js_token()
return if this.regex_token()
return if this.indent_token()
return if this.comment_token()
return if this.whitespace_token()
return this.literal_token()
return if @identifier_token()
return if @number_token()
return if @heredoc_token()
return if @string_token()
return if @js_token()
return if @regex_token()
return if @indent_token()
return if @comment_token()
return if @whitespace_token()
return @literal_token()
# Tokenizers ==========================================================
# Matches identifying literals: variables, keywords, method names, etc.
lex::identifier_token: ->
return false unless id: this.match IDENTIFIER, 1
return false unless id: @match IDENTIFIER, 1
# Keywords are special identifiers tagged with their own name,
# 'if' will result in an ['IF', "if"] token.
tag: if KEYWORDS.indexOf(id) >= 0 then id.toUpperCase() else 'IDENTIFIER'
tag: 'LEADING_WHEN' if tag is 'WHEN' and (this.tag() is 'OUTDENT' or this.tag() is 'INDENT')
this.tag(-1, 'PROTOTYPE_ACCESS') if tag is 'IDENTIFIER' and this.value() is '::'
if tag is 'IDENTIFIER' and this.value() is '.' and !(this.value(2) is '.')
if this.tag(2) is '?'
this.tag(1, 'SOAK_ACCESS')
this.tokens.splice(-2, 1)
tag: 'LEADING_WHEN' if tag is 'WHEN' and (@tag() is 'OUTDENT' or @tag() is 'INDENT')
@tag(-1, 'PROTOTYPE_ACCESS') if tag is 'IDENTIFIER' and @value() is '::'
if tag is 'IDENTIFIER' and @value() is '.' and !(@value(2) is '.')
if @tag(2) is '?'
@tag(1, 'SOAK_ACCESS')
@tokens.splice(-2, 1)
this.tag(1, 'PROPERTY_ACCESS')
this.token(tag, id)
this.i += id.length
@token(tag, id)
@i += id.length
# Matches numbers, including decimals, hex, and exponential notation.
lex::number_token: ->
return false unless number: this.match NUMBER, 1
this.token 'NUMBER', number
this.i += number.length
return false unless number: @match NUMBER, 1
@token 'NUMBER', number
@i += number.length
# Matches strings, including multi-line strings.
lex::string_token: ->
return false unless string: this.match STRING, 1
return false unless string: @match STRING, 1
escaped: string.replace STRING_NEWLINES, " \\\n"
this.token 'STRING', escaped
this.line += this.count string, "\n"
this.i += string.length
@token 'STRING', escaped
@line += @count string, "\n"
@i += string.length
# Matches heredocs, adjusting indentation to the correct level.
lex::heredoc_token: ->
return false unless match = this.chunk.match(HEREDOC)
return false unless match = @chunk.match(HEREDOC)
doc: match[2] or match[4]
indent: doc.match(HEREDOC_INDENT).sort()[0]
doc: doc.replace(new RegExp("^" + indent, 'g'), '')
.replace(MULTILINER, "\\n")
.replace('"', '\\"')
this.token 'STRING', '"' + doc + '"'
this.line += this.count match[1], "\n"
this.i += match[1].length
@token 'STRING', '"' + doc + '"'
@line += @count match[1], "\n"
@i += match[1].length
# Matches interpolated JavaScript.
lex::js_token: ->
return false unless script: this.match JS, 1
this.token 'JS', script.replace(JS_CLEANER, '')
this.i += script.length
return false unless script: @match JS, 1
@token 'JS', script.replace(JS_CLEANER, '')
@i += script.length
# Matches regular expression literals.
lex::regex_token: ->
return false unless regex: this.match REGEX, 1
return false if NOT_REGEX.indexOf(this.tag()) >= 0
this.token 'REGEX', regex
this.i += regex.length
return false unless regex: @match REGEX, 1
return false if NOT_REGEX.indexOf(@tag()) >= 0
@token 'REGEX', regex
@i += regex.length
# Matches and conumes comments.
lex::comment_token: ->
return false unless comment: this.match COMMENT, 1
this.line += (comment.match(MULTILINER) or []).length
this.token 'COMMENT', comment.replace(COMMENT_CLEANER, '').split(MULTILINER)
this.token 'TERMINATOR', "\n"
this.i += comment.length
return false unless comment: @match COMMENT, 1
@line += (comment.match(MULTILINER) or []).length
@token 'COMMENT', comment.replace(COMMENT_CLEANER, '').split(MULTILINER)
@token 'TERMINATOR', "\n"
@i += comment.length
# Record tokens for indentation differing from the previous line.
lex::indent_token: ->
return false unless indent: this.match MULTI_DENT, 1
this.line += indent.match(MULTILINER).length
this.i += indent.length
next_character: this.chunk.match(MULTI_DENT)[4]
no_newlines: next_character is '.' or (this.value().match(NO_NEWLINE) and this.tokens[this.tokens.length - 2][0] isnt '.' and not this.value().match(CODE))
return this.suppress_newlines(indent) if no_newlines
return false unless indent: @match MULTI_DENT, 1
@line += indent.match(MULTILINER).length
@i += indent.length
next_character: @chunk.match(MULTI_DENT)[4]
no_newlines: next_character is '.' or (@value().match(NO_NEWLINE) and @tokens[@tokens.length - 2][0] isnt '.' and not @value().match(CODE))
return @suppress_newlines(indent) if no_newlines
size: indent.match(LAST_DENTS).reverse()[0].match(LAST_DENT)[1].length
return this.newline_token(indent) if size is this.indent
if size > this.indent
diff: size - this.indent
this.token 'INDENT', diff
this.indents.push diff
return @newline_token(indent) if size is @indent
if size > @indent
diff: size - @indent
@token 'INDENT', diff
@indents.push diff
this.outdent_token this.indent - size
this.indent: size
@outdent_token @indent - size
@indent: size
# Record an oudent token or tokens, if we're moving back inwards past
# multiple recorded indents.
lex::outdent_token: (move_out) ->
while move_out > 0 and this.indents.length
last_indent: this.indents.pop()
this.token 'OUTDENT', last_indent
while move_out > 0 and @indents.length
last_indent: @indents.pop()
@token 'OUTDENT', last_indent
move_out -= last_indent
this.token 'TERMINATOR', "\n"
@token 'TERMINATOR', "\n"
# Matches and consumes non-meaningful whitespace.
lex::whitespace_token: ->
return false unless space: this.match WHITESPACE, 1
this.spaced: this.value()
this.i += space.length
return false unless space: @match WHITESPACE, 1
@spaced: @value()
@i += space.length
# Multiple newlines get merged together.
# Use a trailing \ to escape newlines.
lex::newline_token: (newlines) ->
this.token 'TERMINATOR', "\n" unless this.value() is "\n"
@token 'TERMINATOR', "\n" unless @value() is "\n"
# Tokens to explicitly escape newlines are removed once their job is done.
lex::suppress_newlines: (newlines) ->
this.tokens.pop() if this.value() is "\\"
@tokens.pop() if @value() is "\\"
# We treat all other single characters as a token. Eg.: ( ) , . !
# Multi-character operators are also literal tokens, so that Racc can assign
# the proper order of operations.
lex::literal_token: ->
match: this.chunk.match(OPERATOR)
match: @chunk.match(OPERATOR)
value: match and match[1]
this.tag_parameters() if value and value.match(CODE)
value ||= this.chunk.substr(0, 1)
@tag_parameters() if value and value.match(CODE)
value ||= @chunk.substr(0, 1)
tag: if value.match(ASSIGNMENT) then 'ASSIGN' else value
tag: 'TERMINATOR' if value == ';'
if this.value() isnt this.spaced and CALLABLE.indexOf(this.tag()) >= 0
if @value() isnt @spaced and CALLABLE.indexOf(@tag()) >= 0
tag: 'CALL_START' if value is '('
tag: 'INDEX_START' if value is '['
this.token tag, value
this.i += value.length
@token tag, value
@i += value.length
# Helpers =============================================================
# Add a token to the results, taking note of the line number.
lex::token: (tag, value) ->
this.tokens.push([tag, value])
# this.tokens.push([tag,, @line)])
@tokens.push([tag, value, @line])
# Look at a tag in the current token stream.
lex::tag: (index, tag) ->
return unless tok: this.tokens[this.tokens.length - (index or 1)]
return unless tok: @tokens[@tokens.length - (index or 1)]
return tok[0]: tag if tag?
# Look at a value in the current token stream.
lex::value: (index, val) ->
return unless tok: this.tokens[this.tokens.length - (index or 1)]
return unless tok: @tokens[@tokens.length - (index or 1)]
return tok[1]: val if val?
@ -254,7 +253,7 @@ lex::count: (string, letter) ->
# Attempt to match a string against the current chunk, returning the indexed
# match.
lex::match: (regex, index) ->
return false unless m: this.chunk.match(regex)
return false unless m: @chunk.match(regex)
if m then m[index] else false
# A source of ambiguity in our grammar was parameter lists in function
@ -262,11 +261,11 @@ lex::match: (regex, index) ->
# parameter identifiers in order to avoid this. Also, parameter lists can
# make use of splats.
lex::tag_parameters: ->
return if this.tag() isnt ')'
return if @tag() isnt ')'
i: 0
while true
i += 1
tok: this.tokens[this.tokens.length - i]
tok: @tokens[@tokens.length - i]
return if not tok
switch tok[0]
when 'IDENTIFIER' then tok[0]: 'PARAM'
@ -277,4 +276,4 @@ lex::tag_parameters: ->
# Close up all remaining open blocks. IF the first token is an indent,
# axe it.
lex::close_indentation: ->

View File

@ -38,7 +38,6 @@ exports.IfNode : -> @name:; @values: arguments
exports.Expressions.wrap : (values) -> @values: values
# Some helper functions
# Tabs are two spaces for pretty printing.
@ -101,7 +100,6 @@ statement: (klass, only) ->
klass::is_statement: -> true
(klass::is_statement_only: -> true) if only
# The abstract base class for all CoffeeScript nodes.
# All nodes are implement a "compile_node" method, which performs the
# code generation for that node. To compile a node, call the "compile"
@ -151,7 +149,6 @@ Node::is_statement: -> false
Node::is_statement_only: -> false
Node::top_sensitive: -> false
# A collection of nodes, each one representing an expression.
Expressions: exports.Expressions: inherit Node, {
@ -233,7 +230,6 @@ Expressions.wrap: (nodes) ->
statement Expressions
# Literals are static values that can be passed through directly into
# JavaScript without translation, eg.: strings, numbers, true, false, null...
LiteralNode: exports.LiteralNode: inherit Node, {
@ -257,7 +253,6 @@ LiteralNode: exports.LiteralNode: inherit Node, {
LiteralNode::is_statement_only: LiteralNode::is_statement
# Return an expression, or wrap it in a closure and return it.
ReturnNode: exports.ReturnNode: inherit Node, {
@ -274,7 +269,6 @@ ReturnNode: exports.ReturnNode: inherit Node, {
statement ReturnNode, true
# A value, indexed or dotted into, or vanilla.
ValueNode: exports.ValueNode: inherit Node, {
@ -341,7 +335,6 @@ ValueNode: exports.ValueNode: inherit Node, {
# Pass through CoffeeScript comments into JavaScript comments at the
# same position.
CommentNode: exports.CommentNode: inherit Node, {
@ -358,7 +351,6 @@ CommentNode: exports.CommentNode: inherit Node, {
statement CommentNode
# Node for a function invocation. Takes care of converting super() calls into
# calls against the prototype's function of the same name.
CallNode: exports.CallNode: inherit Node, {
@ -415,7 +407,6 @@ CallNode: exports.CallNode: inherit Node, {
# Node to extend an object's prototype with an ancestor object.
# After goog.inherits from the Closure Library.
ExtendsNode: exports.ExtendsNode: inherit Node, {
@ -441,7 +432,6 @@ ExtendsNode: exports.ExtendsNode: inherit Node, {
statement ExtendsNode
# A dotted accessor into a part of a value, or the :: shorthand for
# an accessor into the object's prototype.
AccessorNode: exports.AccessorNode: inherit Node, {
@ -458,7 +448,6 @@ AccessorNode: exports.AccessorNode: inherit Node, {
# An indexed accessor into a part of an array or object.
IndexNode: exports.IndexNode: inherit Node, {
@ -471,18 +460,82 @@ IndexNode: exports.IndexNode: inherit Node, {
# A this-reference, using '@'.
ThisNode: exports.ThisNode: inherit Node, {
constructor: (property) ->
@property: property or null
compile_node: (o) ->
'this' + (if @property then '.' + @property else '')
# A range literal. Ranges can be used to extract portions (slices) of arrays,
# or to specify a range for list comprehensions.
RangeNode: exports.RangeNode: inherit Node, {
constructor: (from, to, exclusive) ->
@from: from
@to: to
@children: [from, to]
@exclusive: !!exclusive
compile_variables: (o) ->
@indent: o.indent
@from_var: o.scope.free_variable()
@to_var: o.scope.free_variable()
@from_var + ' = ' + @from.compile(o) + '; ' + @to_var + ' = ' + @to.compile(o) + ";\n" + @idt()
compile_node: (o) ->
return @compile_array(o) unless o.index
idx: del o, 'index'
step: del o, 'step'
equals: if @exclusive then '' else '='
intro: '(' + @from_var + ' <= ' + @to_var + ' ? ' + idx
compare: intro + ' <' + equals + ' ' + @to_var + ' : ' + idx + ' >' + equals + ' ' + @to_var + ')'
incr: intro + ' += ' + step + ' : ' + idx + ' -= ' + step + ')'
vars + '; ' + compare + '; ' + incr
# Expand the range into the equivalent array, if it's not being used as
# part of a comprehension, slice, or splice.
# TODO: This generates pretty ugly code ... shrink it.
compile_array: (o) ->
body: Expressions.wrap(new LiteralNode 'i')
arr: Expressions.wrap(new ForNode(body, {source: (new ValueNode(this))}, 'i'))
(new ParentheticalNode(new CallNode(new CodeNode([], arr)))).compile(o)
# An array slice literal. Unlike JavaScript's Array#slice, the second parameter
# specifies the index of the end of the slice (just like the first parameter)
# is the index of the beginning.
SliceNode: exports.SliceNode: inherit Node, {
constructor: (range) ->
@children: [@range: range]
compile_node: (o) ->
from: @range.from.compile(o)
plus_part: if @range.exclusive then '' else ' + 1'
".slice(" + from + ', ' + to + plus_part + ')'