From 91a7102f113174b73312e65b4d007961a50565e5 Mon Sep 17 00:00:00 2001 From: Jeremy Ashkenas Date: Tue, 9 Feb 2010 19:30:28 -0500 Subject: [PATCH] Self-compiler: array slice literals. --- lib/coffee_script/lexer.js | 3 +- lib/coffee_script/nodes.js | 62 ++++++++++++- lib/coffee_script/nodes.rb | 7 +- src/lexer.coffee | 181 ++++++++++++++++++------------------- src/nodes.coffee | 77 +++++++++++++--- 5 files changed, 220 insertions(+), 110 deletions(-) diff --git a/lib/coffee_script/lexer.js b/lib/coffee_script/lexer.js index a79a18b1..48885f27 100644 --- a/lib/coffee_script/lexer.js +++ b/lib/coffee_script/lexer.js @@ -284,8 +284,7 @@ // Helpers ============================================================= // Add a token to the results, taking note of the line number. lex.prototype.token = function token(tag, value) { - return this.tokens.push([tag, value]); - // this.tokens.push([tag, Value.new(value, @line)]) + return this.tokens.push([tag, value, this.line]); }; // Look at a tag in the current token stream. lex.prototype.tag = function tag(index, tag) { diff --git a/lib/coffee_script/nodes.js b/lib/coffee_script/nodes.js index b816f433..94b997ab 100644 --- a/lib/coffee_script/nodes.js +++ b/lib/coffee_script/nodes.js @@ -1,5 +1,5 @@ (function(){ - var AccessorNode, CallNode, CommentNode, Expressions, ExtendsNode, IndexNode, LiteralNode, Node, ReturnNode, TAB, TRAILING_WHITESPACE, ThisNode, ValueNode, any, compact, del, dup, flatten, inherit, merge, statement; + var AccessorNode, CallNode, CommentNode, Expressions, ExtendsNode, IndexNode, LiteralNode, Node, RangeNode, ReturnNode, SliceNode, TAB, TRAILING_WHITESPACE, ThisNode, ValueNode, any, compact, del, dup, flatten, inherit, merge, statement; var __hasProp = Object.prototype.hasOwnProperty; process.mixin(require('./scope')); // The abstract base class for all CoffeeScript nodes. @@ -690,10 +690,68 @@ // A this-reference, using '@'. ThisNode = (exports.ThisNode = inherit(Node, { constructor: function constructor(property) { - return this.property = property || null; + this.property = property || null; + return this; }, compile_node: function compile_node(o) { return 'this' + (this.property ? '.' + this.property : ''); } })); + // A range literal. Ranges can be used to extract portions (slices) of arrays, + // or to specify a range for list comprehensions. + RangeNode = (exports.RangeNode = inherit(Node, { + constructor: function constructor(from, to, exclusive) { + this.from = from; + this.to = to; + this.children = [from, to]; + this.exclusive = !!exclusive; + return this; + }, + compile_variables: function compile_variables(o) { + this.indent = o.indent; + this.from_var = o.scope.free_variable(); + this.to_var = o.scope.free_variable(); + return this.from_var + ' = ' + this.from.compile(o) + '; ' + this.to_var + ' = ' + this.to.compile(o) + ";\n" + this.idt(); + }, + compile_node: function compile_node(o) { + var compare, equals, idx, incr, intro, step; + if (!(o.index)) { + return this.compile_array(o); + } + idx = del(o, 'index'); + step = del(o, 'step'); + equals = this.exclusive ? '' : '='; + intro = '(' + this.from_var + ' <= ' + this.to_var + ' ? ' + idx; + compare = intro + ' <' + equals + ' ' + this.to_var + ' : ' + idx + ' >' + equals + ' ' + this.to_var + ')'; + incr = intro + ' += ' + step + ' : ' + idx + ' -= ' + step + ')'; + return vars + '; ' + compare + '; ' + incr; + }, + // Expand the range into the equivalent array, if it's not being used as + // part of a comprehension, slice, or splice. + // TODO: This generates pretty ugly code ... shrink it. + compile_array: function compile_array(o) { + var arr, body; + body = Expressions.wrap(new LiteralNode('i')); + arr = Expressions.wrap(new ForNode(body, { + source: (new ValueNode(this)) + }, 'i')); + return (new ParentheticalNode(new CallNode(new CodeNode([], arr)))).compile(o); + } + })); + // An array slice literal. Unlike JavaScript's Array#slice, the second parameter + // specifies the index of the end of the slice (just like the first parameter) + // is the index of the beginning. + SliceNode = (exports.SliceNode = inherit(Node, { + constructor: function constructor(range) { + this.children = [(this.range = range)]; + return this; + }, + compile_node: function compile_node(o) { + var from, plus_part, to; + from = this.range.from.compile(o); + to = this.range.to.compile(o); + plus_part = this.range.exclusive ? '' : ' + 1'; + return ".slice(" + from + ', ' + to + plus_part + ')'; + } + })); })(); \ No newline at end of file diff --git a/lib/coffee_script/nodes.rb b/lib/coffee_script/nodes.rb index 0d160e17..da680813 100644 --- a/lib/coffee_script/nodes.rb +++ b/lib/coffee_script/nodes.rb @@ -439,7 +439,7 @@ module CoffeeScript end # A range literal. Ranges can be used to extract portions (slices) of arrays, - # or to specify a range for array comprehensions. + # or to specify a range for list comprehensions. class RangeNode < Node children :from, :to @@ -464,8 +464,9 @@ module CoffeeScript vars = "#{idx}=#{@from_var}" step = step ? step.compile(o) : '1' equals = @exclusive ? '' : '=' - compare = "(#{@from_var} <= #{@to_var} ? #{idx} <#{equals} #{@to_var} : #{idx} >#{equals} #{@to_var})" - incr = "(#{@from_var} <= #{@to_var} ? #{idx} += #{step} : #{idx} -= #{step})" + intro = "(#{@from_var} <= #{@to_var} ? #{idx}" + compare = "#{intro} <#{equals} #{@to_var} : #{idx} >#{equals} #{@to_var})" + incr = "#{intro} += #{step} : #{idx} -= #{step})" write("#{vars}; #{compare}; #{incr}") end diff --git a/src/lexer.coffee b/src/lexer.coffee index e8b6852e..1b389bda 100644 --- a/src/lexer.coffee +++ b/src/lexer.coffee @@ -59,186 +59,185 @@ CALLABLE: ['IDENTIFIER', 'SUPER', ')', ']', '}', 'STRING'] # Scan by attempting to match tokens one character at a time. Slow and steady. lex::tokenize: (code) -> - this.code : code # Cleanup code by remove extra line breaks, TODO: chomp - this.i : 0 # Current character position we're parsing - this.line : 1 # The current line. - this.indent : 0 # The current indent level. - this.indents : [] # The stack of all indent levels we are currently within. - this.tokens : [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value] - this.spaced : null # The last token that has a space following it. - while this.i < this.code.length - this.chunk: this.code.slice(this.i) - this.extract_next_token() - this.close_indentation() - (new Rewriter()).rewrite this.tokens + @code : code # Cleanup code by remove extra line breaks, TODO: chomp + @i : 0 # Current character position we're parsing + @line : 1 # The current line. + @indent : 0 # The current indent level. + @indents : [] # The stack of all indent levels we are currently within. + @tokens : [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value] + @spaced : null # The last token that has a space following it. + while @i < @code.length + @chunk: @code.slice(@i) + @extract_next_token() + @close_indentation() + (new Rewriter()).rewrite @tokens # At every position, run through this list of attempted matches, # short-circuiting if any of them succeed. lex::extract_next_token: -> - return if this.identifier_token() - return if this.number_token() - return if this.heredoc_token() - return if this.string_token() - return if this.js_token() - return if this.regex_token() - return if this.indent_token() - return if this.comment_token() - return if this.whitespace_token() - return this.literal_token() + return if @identifier_token() + return if @number_token() + return if @heredoc_token() + return if @string_token() + return if @js_token() + return if @regex_token() + return if @indent_token() + return if @comment_token() + return if @whitespace_token() + return @literal_token() # Tokenizers ========================================================== # Matches identifying literals: variables, keywords, method names, etc. lex::identifier_token: -> - return false unless id: this.match IDENTIFIER, 1 + return false unless id: @match IDENTIFIER, 1 # Keywords are special identifiers tagged with their own name, # 'if' will result in an ['IF', "if"] token. tag: if KEYWORDS.indexOf(id) >= 0 then id.toUpperCase() else 'IDENTIFIER' - tag: 'LEADING_WHEN' if tag is 'WHEN' and (this.tag() is 'OUTDENT' or this.tag() is 'INDENT') - this.tag(-1, 'PROTOTYPE_ACCESS') if tag is 'IDENTIFIER' and this.value() is '::' - if tag is 'IDENTIFIER' and this.value() is '.' and !(this.value(2) is '.') - if this.tag(2) is '?' - this.tag(1, 'SOAK_ACCESS') - this.tokens.splice(-2, 1) + tag: 'LEADING_WHEN' if tag is 'WHEN' and (@tag() is 'OUTDENT' or @tag() is 'INDENT') + @tag(-1, 'PROTOTYPE_ACCESS') if tag is 'IDENTIFIER' and @value() is '::' + if tag is 'IDENTIFIER' and @value() is '.' and !(@value(2) is '.') + if @tag(2) is '?' + @tag(1, 'SOAK_ACCESS') + @tokens.splice(-2, 1) else - this.tag(1, 'PROPERTY_ACCESS') - this.token(tag, id) - this.i += id.length + @tag(1, 'PROPERTY_ACCESS') + @token(tag, id) + @i += id.length true # Matches numbers, including decimals, hex, and exponential notation. lex::number_token: -> - return false unless number: this.match NUMBER, 1 - this.token 'NUMBER', number - this.i += number.length + return false unless number: @match NUMBER, 1 + @token 'NUMBER', number + @i += number.length true # Matches strings, including multi-line strings. lex::string_token: -> - return false unless string: this.match STRING, 1 + return false unless string: @match STRING, 1 escaped: string.replace STRING_NEWLINES, " \\\n" - this.token 'STRING', escaped - this.line += this.count string, "\n" - this.i += string.length + @token 'STRING', escaped + @line += @count string, "\n" + @i += string.length true # Matches heredocs, adjusting indentation to the correct level. lex::heredoc_token: -> - return false unless match = this.chunk.match(HEREDOC) + return false unless match = @chunk.match(HEREDOC) doc: match[2] or match[4] indent: doc.match(HEREDOC_INDENT).sort()[0] doc: doc.replace(new RegExp("^" + indent, 'g'), '') .replace(MULTILINER, "\\n") .replace('"', '\\"') - this.token 'STRING', '"' + doc + '"' - this.line += this.count match[1], "\n" - this.i += match[1].length + @token 'STRING', '"' + doc + '"' + @line += @count match[1], "\n" + @i += match[1].length true # Matches interpolated JavaScript. lex::js_token: -> - return false unless script: this.match JS, 1 - this.token 'JS', script.replace(JS_CLEANER, '') - this.i += script.length + return false unless script: @match JS, 1 + @token 'JS', script.replace(JS_CLEANER, '') + @i += script.length true # Matches regular expression literals. lex::regex_token: -> - return false unless regex: this.match REGEX, 1 - return false if NOT_REGEX.indexOf(this.tag()) >= 0 - this.token 'REGEX', regex - this.i += regex.length + return false unless regex: @match REGEX, 1 + return false if NOT_REGEX.indexOf(@tag()) >= 0 + @token 'REGEX', regex + @i += regex.length true # Matches and conumes comments. lex::comment_token: -> - return false unless comment: this.match COMMENT, 1 - this.line += (comment.match(MULTILINER) or []).length - this.token 'COMMENT', comment.replace(COMMENT_CLEANER, '').split(MULTILINER) - this.token 'TERMINATOR', "\n" - this.i += comment.length + return false unless comment: @match COMMENT, 1 + @line += (comment.match(MULTILINER) or []).length + @token 'COMMENT', comment.replace(COMMENT_CLEANER, '').split(MULTILINER) + @token 'TERMINATOR', "\n" + @i += comment.length true # Record tokens for indentation differing from the previous line. lex::indent_token: -> - return false unless indent: this.match MULTI_DENT, 1 - this.line += indent.match(MULTILINER).length - this.i += indent.length - next_character: this.chunk.match(MULTI_DENT)[4] - no_newlines: next_character is '.' or (this.value().match(NO_NEWLINE) and this.tokens[this.tokens.length - 2][0] isnt '.' and not this.value().match(CODE)) - return this.suppress_newlines(indent) if no_newlines + return false unless indent: @match MULTI_DENT, 1 + @line += indent.match(MULTILINER).length + @i += indent.length + next_character: @chunk.match(MULTI_DENT)[4] + no_newlines: next_character is '.' or (@value().match(NO_NEWLINE) and @tokens[@tokens.length - 2][0] isnt '.' and not @value().match(CODE)) + return @suppress_newlines(indent) if no_newlines size: indent.match(LAST_DENTS).reverse()[0].match(LAST_DENT)[1].length - return this.newline_token(indent) if size is this.indent - if size > this.indent - diff: size - this.indent - this.token 'INDENT', diff - this.indents.push diff + return @newline_token(indent) if size is @indent + if size > @indent + diff: size - @indent + @token 'INDENT', diff + @indents.push diff else - this.outdent_token this.indent - size - this.indent: size + @outdent_token @indent - size + @indent: size true # Record an oudent token or tokens, if we're moving back inwards past # multiple recorded indents. lex::outdent_token: (move_out) -> - while move_out > 0 and this.indents.length - last_indent: this.indents.pop() - this.token 'OUTDENT', last_indent + while move_out > 0 and @indents.length + last_indent: @indents.pop() + @token 'OUTDENT', last_indent move_out -= last_indent - this.token 'TERMINATOR', "\n" + @token 'TERMINATOR', "\n" true # Matches and consumes non-meaningful whitespace. lex::whitespace_token: -> - return false unless space: this.match WHITESPACE, 1 - this.spaced: this.value() - this.i += space.length + return false unless space: @match WHITESPACE, 1 + @spaced: @value() + @i += space.length true # Multiple newlines get merged together. # Use a trailing \ to escape newlines. lex::newline_token: (newlines) -> - this.token 'TERMINATOR', "\n" unless this.value() is "\n" + @token 'TERMINATOR', "\n" unless @value() is "\n" true # Tokens to explicitly escape newlines are removed once their job is done. lex::suppress_newlines: (newlines) -> - this.tokens.pop() if this.value() is "\\" + @tokens.pop() if @value() is "\\" true # We treat all other single characters as a token. Eg.: ( ) , . ! # Multi-character operators are also literal tokens, so that Racc can assign # the proper order of operations. lex::literal_token: -> - match: this.chunk.match(OPERATOR) + match: @chunk.match(OPERATOR) value: match and match[1] - this.tag_parameters() if value and value.match(CODE) - value ||= this.chunk.substr(0, 1) + @tag_parameters() if value and value.match(CODE) + value ||= @chunk.substr(0, 1) tag: if value.match(ASSIGNMENT) then 'ASSIGN' else value tag: 'TERMINATOR' if value == ';' - if this.value() isnt this.spaced and CALLABLE.indexOf(this.tag()) >= 0 + if @value() isnt @spaced and CALLABLE.indexOf(@tag()) >= 0 tag: 'CALL_START' if value is '(' tag: 'INDEX_START' if value is '[' - this.token tag, value - this.i += value.length + @token tag, value + @i += value.length true # Helpers ============================================================= # Add a token to the results, taking note of the line number. lex::token: (tag, value) -> - this.tokens.push([tag, value]) - # this.tokens.push([tag, Value.new(value, @line)]) + @tokens.push([tag, value, @line]) # Look at a tag in the current token stream. lex::tag: (index, tag) -> - return unless tok: this.tokens[this.tokens.length - (index or 1)] + return unless tok: @tokens[@tokens.length - (index or 1)] return tok[0]: tag if tag? tok[0] # Look at a value in the current token stream. lex::value: (index, val) -> - return unless tok: this.tokens[this.tokens.length - (index or 1)] + return unless tok: @tokens[@tokens.length - (index or 1)] return tok[1]: val if val? tok[1] @@ -254,7 +253,7 @@ lex::count: (string, letter) -> # Attempt to match a string against the current chunk, returning the indexed # match. lex::match: (regex, index) -> - return false unless m: this.chunk.match(regex) + return false unless m: @chunk.match(regex) if m then m[index] else false # A source of ambiguity in our grammar was parameter lists in function @@ -262,11 +261,11 @@ lex::match: (regex, index) -> # parameter identifiers in order to avoid this. Also, parameter lists can # make use of splats. lex::tag_parameters: -> - return if this.tag() isnt ')' + return if @tag() isnt ')' i: 0 while true i += 1 - tok: this.tokens[this.tokens.length - i] + tok: @tokens[@tokens.length - i] return if not tok switch tok[0] when 'IDENTIFIER' then tok[0]: 'PARAM' @@ -277,4 +276,4 @@ lex::tag_parameters: -> # Close up all remaining open blocks. IF the first token is an indent, # axe it. lex::close_indentation: -> - this.outdent_token(this.indent) + @outdent_token(@indent) diff --git a/src/nodes.coffee b/src/nodes.coffee index c058d144..89208fba 100644 --- a/src/nodes.coffee +++ b/src/nodes.coffee @@ -38,7 +38,6 @@ exports.IfNode : -> @name: this.constructor.name; @values: arguments exports.Expressions.wrap : (values) -> @values: values - # Some helper functions # Tabs are two spaces for pretty printing. @@ -101,7 +100,6 @@ statement: (klass, only) -> klass::is_statement: -> true (klass::is_statement_only: -> true) if only - # The abstract base class for all CoffeeScript nodes. # All nodes are implement a "compile_node" method, which performs the # code generation for that node. To compile a node, call the "compile" @@ -151,7 +149,6 @@ Node::is_statement: -> false Node::is_statement_only: -> false Node::top_sensitive: -> false - # A collection of nodes, each one representing an expression. Expressions: exports.Expressions: inherit Node, { @@ -233,7 +230,6 @@ Expressions.wrap: (nodes) -> statement Expressions - # Literals are static values that can be passed through directly into # JavaScript without translation, eg.: strings, numbers, true, false, null... LiteralNode: exports.LiteralNode: inherit Node, { @@ -257,7 +253,6 @@ LiteralNode: exports.LiteralNode: inherit Node, { LiteralNode::is_statement_only: LiteralNode::is_statement - # Return an expression, or wrap it in a closure and return it. ReturnNode: exports.ReturnNode: inherit Node, { @@ -274,7 +269,6 @@ ReturnNode: exports.ReturnNode: inherit Node, { statement ReturnNode, true - # A value, indexed or dotted into, or vanilla. ValueNode: exports.ValueNode: inherit Node, { @@ -341,7 +335,6 @@ ValueNode: exports.ValueNode: inherit Node, { } - # Pass through CoffeeScript comments into JavaScript comments at the # same position. CommentNode: exports.CommentNode: inherit Node, { @@ -358,7 +351,6 @@ CommentNode: exports.CommentNode: inherit Node, { statement CommentNode - # Node for a function invocation. Takes care of converting super() calls into # calls against the prototype's function of the same name. CallNode: exports.CallNode: inherit Node, { @@ -415,7 +407,6 @@ CallNode: exports.CallNode: inherit Node, { } - # Node to extend an object's prototype with an ancestor object. # After goog.inherits from the Closure Library. ExtendsNode: exports.ExtendsNode: inherit Node, { @@ -441,7 +432,6 @@ ExtendsNode: exports.ExtendsNode: inherit Node, { statement ExtendsNode - # A dotted accessor into a part of a value, or the :: shorthand for # an accessor into the object's prototype. AccessorNode: exports.AccessorNode: inherit Node, { @@ -458,7 +448,6 @@ AccessorNode: exports.AccessorNode: inherit Node, { } - # An indexed accessor into a part of an array or object. IndexNode: exports.IndexNode: inherit Node, { @@ -471,18 +460,82 @@ IndexNode: exports.IndexNode: inherit Node, { } - # A this-reference, using '@'. ThisNode: exports.ThisNode: inherit Node, { constructor: (property) -> @property: property or null + this compile_node: (o) -> 'this' + (if @property then '.' + @property else '') } +# A range literal. Ranges can be used to extract portions (slices) of arrays, +# or to specify a range for list comprehensions. +RangeNode: exports.RangeNode: inherit Node, { + + constructor: (from, to, exclusive) -> + @from: from + @to: to + @children: [from, to] + @exclusive: !!exclusive + this + + compile_variables: (o) -> + @indent: o.indent + @from_var: o.scope.free_variable() + @to_var: o.scope.free_variable() + @from_var + ' = ' + @from.compile(o) + '; ' + @to_var + ' = ' + @to.compile(o) + ";\n" + @idt() + + compile_node: (o) -> + return @compile_array(o) unless o.index + idx: del o, 'index' + step: del o, 'step' + equals: if @exclusive then '' else '=' + intro: '(' + @from_var + ' <= ' + @to_var + ' ? ' + idx + compare: intro + ' <' + equals + ' ' + @to_var + ' : ' + idx + ' >' + equals + ' ' + @to_var + ')' + incr: intro + ' += ' + step + ' : ' + idx + ' -= ' + step + ')' + vars + '; ' + compare + '; ' + incr + + # Expand the range into the equivalent array, if it's not being used as + # part of a comprehension, slice, or splice. + # TODO: This generates pretty ugly code ... shrink it. + compile_array: (o) -> + body: Expressions.wrap(new LiteralNode 'i') + arr: Expressions.wrap(new ForNode(body, {source: (new ValueNode(this))}, 'i')) + (new ParentheticalNode(new CallNode(new CodeNode([], arr)))).compile(o) + +} + +# An array slice literal. Unlike JavaScript's Array#slice, the second parameter +# specifies the index of the end of the slice (just like the first parameter) +# is the index of the beginning. +SliceNode: exports.SliceNode: inherit Node, { + + constructor: (range) -> + @children: [@range: range] + this + + compile_node: (o) -> + from: @range.from.compile(o) + to: @range.to.compile(o) + plus_part: if @range.exclusive then '' else ' + 1' + ".slice(" + from + ', ' + to + plus_part + ')' + +} + + + + + + + + + + +