waypoint -- it's beginning to parser

This commit is contained in:
Jeremy Ashkenas 2010-02-07 15:15:36 -05:00
parent 7ec0a8d653
commit 56499984ca
8 changed files with 839 additions and 926 deletions

View File

@ -1,6 +1,5 @@
(function(){
var ASSIGNMENT, CALLABLE, CODE, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, JS, JS_CLEANER, KEYWORDS, LAST_DENT, LAST_DENTS, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, Rewriter, STRING, STRING_NEWLINES, WHITESPACE, lex, sys;
sys = require('sys');
var ASSIGNMENT, CALLABLE, CODE, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, JS, JS_CLEANER, KEYWORDS, LAST_DENT, LAST_DENTS, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, Rewriter, STRING, STRING_NEWLINES, WHITESPACE, lex;
Rewriter = require('./rewriter').Rewriter;
// The lexer reads a stream of CoffeeScript and divvys it up into tagged
// tokens. A minor bit of the ambiguity in the grammar has been avoided by
@ -57,7 +56,6 @@
this.chunk = this.code.slice(this.i);
this.extract_next_token();
}
// sys.puts "original stream: " + this.tokens if process.ENV['VERBOSE']
this.close_indentation();
return (new Rewriter()).rewrite(this.tokens);
};
@ -188,7 +186,7 @@
}
this.line += comment.match(MULTILINER).length;
this.token('COMMENT', comment.replace(COMMENT_CLEANER, '').split(MULTILINER));
this.token("\n", "\n");
this.token('TERMINATOR', "\n");
this.i += comment.length;
return true;
};
@ -228,7 +226,7 @@
this.token('OUTDENT', last_indent);
move_out -= last_indent;
}
this.token("\n", "\n");
this.token('TERMINATOR', "\n");
return true;
};
// Matches and consumes non-meaningful whitespace.
@ -245,7 +243,7 @@
// Use a trailing \ to escape newlines.
lex.prototype.newline_token = function newline_token(newlines) {
if (!(this.value() === "\n")) {
this.token("\n", "\n");
this.token('TERMINATOR', "\n");
}
return true;
};
@ -268,6 +266,9 @@
}
value = value || this.chunk.substr(0, 1);
tag = value.match(ASSIGNMENT) ? 'ASSIGN' : value;
if (value === ';') {
tag = 'TERMINATOR';
}
if (this.value() !== this.spaced && CALLABLE.indexOf(this.tag()) >= 0) {
if (value === '(') {
tag = 'CALL_START';
@ -309,13 +310,13 @@
return tok[1];
};
// Count the occurences of a character in a string.
lex.prototype.count = function count(string, char) {
lex.prototype.count = function count(string, letter) {
var num, pos;
num = 0;
pos = string.indexOf(char);
pos = string.indexOf(letter);
while (pos !== -1) {
count += 1;
pos = string.indexOf(char, pos + 1);
pos = string.indexOf(letter, pos + 1);
}
return count;
};

View File

@ -5,6 +5,9 @@
__a = this.values = arguments;
return Node === this.constructor ? this : __a;
};
exports.Node.wrap = function wrap(values) {
return this.values = values;
};
exports.Expressions = exports.Node;
exports.LiteralNode = exports.Node;
exports.ReturnNode = exports.Node;

View File

@ -2,6 +2,7 @@
var Parser, __a, __b, __c, __d, __e, __f, bnf, grammar, name, non_terminal, o, operators, option, parser, part, tokens, unwrap;
var __hasProp = Object.prototype.hasOwnProperty;
Parser = require('jison').Parser;
process.mixin(require('./nodes'));
// DSL ===================================================================
// Detect functions: [
unwrap = /function\s*\(\)\s*\{\s*return\s*([\s\S]*);\s*\}/;
@ -22,33 +23,45 @@
// All parsing will end in this rule, being the trunk of the AST.
Root: [o("", function() {
return new Expressions();
}), o("Terminator", function() {
}), o("TERMINATOR", function() {
return new Expressions();
}), o("Expressions"), o("Block Terminator")
}), o("Expressions"), o("Block TERMINATOR")
],
// Any list of expressions or method body, seperated by line breaks or semis.
Expressions: [o("Expression", function() {
return Expressions.wrap([$1]);
}), o("Expressions Terminator Expression", function() {
}), o("Expressions TERMINATOR Expression", function() {
return $1.push($3);
}), o("Expressions Terminator")
}), o("Expressions TERMINATOR")
],
// All types of expressions in our language. The basic unit of CoffeeScript
// is the expression.
Expression: [o("Value"), o("Call"), o("Code"), o("Operation"), o("Assign"), o("If"), o("Try"), o("Throw"), o("Return"), o("While"), o("For"), o("Switch"), o("Extends"), o("Splat"), o("Existence"), o("Comment")],
// A block of expressions. Note that the Rewriter will convert some postfix
// forms into blocks for us, by altering the token stream.
Block: [o("INDENT Expressions OUTDENT", function() {
return $2;
}), o("INDENT OUTDENT", function() {
return new Expressions();
})
Expression: [o("Value"),
// o "Call"
// o "Code"
// o "Operation"
// o "Assign"
// o "If"
// o "Try"
// o "Throw"
// o "Return"
// o "While"
// o "For"
// o "Switch"
// o "Extends"
// o "Splat"
// o "Existence"
// o "Comment"
],
// Tokens that can terminate an expression.
Terminator: [o("\n"), o(";")],
// # A block of expressions. Note that the Rewriter will convert some postfix
// # forms into blocks for us, by altering the token stream.
// Block: [
// o "INDENT Expressions OUTDENT", -> $2
// o "INDENT OUTDENT", -> new Expressions()
// ]
// All hard-coded values. These can be printed straight to JavaScript.
Literal: [o("NUMBER", function() {
return new LiteralNode($1);
return new LiteralNode(yytext);
}), o("STRING", function() {
return new LiteralNode($1);
}), o("JS", function() {
@ -75,487 +88,387 @@
return new LiteralNode(false);
})
],
// Assignment to a variable (or index).
Assign: [o("Value ASSIGN Expression", function() {
return new AssignNode($1, $3);
})
],
// Assignment within an object literal (can be quoted).
AssignObj: [o("IDENTIFIER ASSIGN Expression", function() {
return new AssignNode(new ValueNode($1), $3, 'object');
}), o("STRING ASSIGN Expression", function() {
return new AssignNode(new ValueNode(new LiteralNode($1)), $3, 'object');
}), o("Comment")
],
// A return statement.
Return: [o("RETURN Expression", function() {
return new ReturnNode($2);
}), o("RETURN", function() {
return new ReturnNode(new ValueNode(new LiteralNode('null')));
})
],
// A comment.
Comment: [o("COMMENT", function() {
return new CommentNode($1);
})
],
// Arithmetic and logical operators
// For Ruby's Operator precedence, see: [
// https://www.cs.auckland.ac.nz/references/ruby/ProgrammingRuby/language.html
Operation: [o("! Expression", function() {
return new OpNode($1, $2);
}), o("!! Expression", function() {
return new OpNode($1, $2);
}), o("- Expression", function() {
return new OpNode($1, $2);
}), o("+ Expression", function() {
return new OpNode($1, $2);
}), o("NOT Expression", function() {
return new OpNode($1, $2);
}), o("~ Expression", function() {
return new OpNode($1, $2);
}), o("-- Expression", function() {
return new OpNode($1, $2);
}), o("++ Expression", function() {
return new OpNode($1, $2);
}), o("DELETE Expression", function() {
return new OpNode($1, $2);
}), o("TYPEOF Expression", function() {
return new OpNode($1, $2);
}), o("Expression --", function() {
return new OpNode($2, $1, null, true);
}), o("Expression ++", function() {
return new OpNode($2, $1, null, true);
}), o("Expression * Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression / Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression % Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression + Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression - Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression << Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression >> Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression >>> Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression & Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression | Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression ^ Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression <= Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression < Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression > Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression >= Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression == Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression != Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression IS Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression ISNT Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression && Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression || Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression AND Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression OR Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression ? Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression -= Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression += Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression /= Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression *= Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression %= Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression ||= Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression &&= Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression ?= Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression INSTANCEOF Expression", function() {
return new OpNode($2, $1, $3);
}), o("Expression IN Expression", function() {
return new OpNode($2, $1, $3);
})
],
// Try abbreviated expressions to make the grammar build faster:
// UnaryOp: [
// o "!"
// o "!!"
// o "NOT"
// o "~"
// o "--"
// o "++"
// o "DELETE"
// o "TYPEOF"
// # Assignment to a variable (or index).
// Assign: [
// o "Value ASSIGN Expression", -> new AssignNode($1, $3)
// ]
//
// BinaryOp: [
// o "*"
// o "/"
// o "%"
// o "+"
// o "-"
// o "<<"
// o ">>"
// o ">>>"
// o "&"
// o "|"
// o "^"
// o "<="
// o "<"
// o ">"
// o ">="
// o "=="
// o "!="
// o "IS"
// o "ISNT"
// o "&&"
// o "||"
// o "AND"
// o "OR"
// o "?"
// o "-="
// o "+="
// o "/="
// o "*="
// o "%="
// o "||="
// o "&&="
// o "?="
// o "INSTANCEOF"
// o "IN"
// # Assignment within an object literal (can be quoted).
// AssignObj: [
// o "IDENTIFIER ASSIGN Expression", -> new AssignNode(new ValueNode($1), $3, 'object')
// o "STRING ASSIGN Expression", -> new AssignNode(new ValueNode(new LiteralNode($1)), $3, 'object')
// o "Comment"
// ]
//
// # A return statement.
// Return: [
// o "RETURN Expression", -> new ReturnNode($2)
// o "RETURN", -> new ReturnNode(new ValueNode(new LiteralNode('null')))
// ]
//
// # A comment.
// Comment: [
// o "COMMENT", -> new CommentNode($1)
// ]
//
// # Arithmetic and logical operators
// # For Ruby's Operator precedence, see: [
// # https://www.cs.auckland.ac.nz/references/ruby/ProgrammingRuby/language.html
// Operation: [
// o "Expression BinaryOp Expression", -> new OpNode($2, $1, $3)
// o "UnaryOp Expression", -> new OpNode($1, $2)
// o "! Expression", -> new OpNode($1, $2)
// o "!! Expression", -> new OpNode($1, $2)
// o "- Expression", -> new OpNode($1, $2)
// o "+ Expression", -> new OpNode($1, $2)
// o "NOT Expression", -> new OpNode($1, $2)
// o "~ Expression", -> new OpNode($1, $2)
// o "-- Expression", -> new OpNode($1, $2)
// o "++ Expression", -> new OpNode($1, $2)
// o "DELETE Expression", -> new OpNode($1, $2)
// o "TYPEOF Expression", -> new OpNode($1, $2)
// o "Expression --", -> new OpNode($2, $1, null, true)
// o "Expression ++", -> new OpNode($2, $1, null, true)
//
// o "Expression * Expression", -> new OpNode($2, $1, $3)
// o "Expression / Expression", -> new OpNode($2, $1, $3)
// o "Expression % Expression", -> new OpNode($2, $1, $3)
//
// o "Expression + Expression", -> new OpNode($2, $1, $3)
// o "Expression - Expression", -> new OpNode($2, $1, $3)
//
// o "Expression << Expression", -> new OpNode($2, $1, $3)
// o "Expression >> Expression", -> new OpNode($2, $1, $3)
// o "Expression >>> Expression", -> new OpNode($2, $1, $3)
//
// o "Expression & Expression", -> new OpNode($2, $1, $3)
// o "Expression | Expression", -> new OpNode($2, $1, $3)
// o "Expression ^ Expression", -> new OpNode($2, $1, $3)
//
// o "Expression <= Expression", -> new OpNode($2, $1, $3)
// o "Expression < Expression", -> new OpNode($2, $1, $3)
// o "Expression > Expression", -> new OpNode($2, $1, $3)
// o "Expression >= Expression", -> new OpNode($2, $1, $3)
//
// o "Expression == Expression", -> new OpNode($2, $1, $3)
// o "Expression != Expression", -> new OpNode($2, $1, $3)
// o "Expression IS Expression", -> new OpNode($2, $1, $3)
// o "Expression ISNT Expression", -> new OpNode($2, $1, $3)
//
// o "Expression && Expression", -> new OpNode($2, $1, $3)
// o "Expression || Expression", -> new OpNode($2, $1, $3)
// o "Expression AND Expression", -> new OpNode($2, $1, $3)
// o "Expression OR Expression", -> new OpNode($2, $1, $3)
// o "Expression ? Expression", -> new OpNode($2, $1, $3)
//
// o "Expression -= Expression", -> new OpNode($2, $1, $3)
// o "Expression += Expression", -> new OpNode($2, $1, $3)
// o "Expression /= Expression", -> new OpNode($2, $1, $3)
// o "Expression *= Expression", -> new OpNode($2, $1, $3)
// o "Expression %= Expression", -> new OpNode($2, $1, $3)
// o "Expression ||= Expression", -> new OpNode($2, $1, $3)
// o "Expression &&= Expression", -> new OpNode($2, $1, $3)
// o "Expression ?= Expression", -> new OpNode($2, $1, $3)
//
// o "Expression INSTANCEOF Expression", -> new OpNode($2, $1, $3)
// o "Expression IN Expression", -> new OpNode($2, $1, $3)
// ]
//
// # Try abbreviated expressions to make the grammar build faster:
//
// # UnaryOp: [
// # o "!"
// # o "!!"
// # o "NOT"
// # o "~"
// # o "--"
// # o "++"
// # o "DELETE"
// # o "TYPEOF"
// # ]
// #
// # BinaryOp: [
// # o "*"
// # o "/"
// # o "%"
// # o "+"
// # o "-"
// # o "<<"
// # o ">>"
// # o ">>>"
// # o "&"
// # o "|"
// # o "^"
// # o "<="
// # o "<"
// # o ">"
// # o ">="
// # o "=="
// # o "!="
// # o "IS"
// # o "ISNT"
// # o "&&"
// # o "||"
// # o "AND"
// # o "OR"
// # o "?"
// # o "-="
// # o "+="
// # o "/="
// # o "*="
// # o "%="
// # o "||="
// # o "&&="
// # o "?="
// # o "INSTANCEOF"
// # o "IN"
// # ]
// #
// # Operation: [
// # o "Expression BinaryOp Expression", -> new OpNode($2, $1, $3)
// # o "UnaryOp Expression", -> new OpNode($1, $2)
// # ]
//
// # The existence operator.
// Existence: [
// o "Expression ?", -> new ExistenceNode($1)
// ]
//
// # Function definition.
// Code: [
// o "PARAM_START ParamList PARAM_END FuncGlyph Block", -> new CodeNode($2, $5, $4)
// o "FuncGlyph Block", -> new CodeNode([], $2, $1)
// ]
//
// # The symbols to signify functions, and bound functions.
// FuncGlyph: [
// o "->", -> 'func'
// o "=>", -> 'boundfunc'
// ]
//
// # The parameters to a function definition.
// ParamList: [
// o "Param", -> [$1]
// o "ParamList , Param", -> $1.push($3)
// ]
//
// # A Parameter (or ParamSplat) in a function definition.
// Param: [
// o "PARAM"
// o "PARAM . . .", -> new SplatNode($1)
// ]
//
// # A regular splat.
// Splat: [
// o "Expression . . .", -> new SplatNode($1)
// ]
// The existence operator.
Existence: [o("Expression ?", function() {
return new ExistenceNode($1);
})
],
// Function definition.
Code: [o("PARAM_START ParamList PARAM_END FuncGlyph Block", function() {
return new CodeNode($2, $5, $4);
}), o("FuncGlyph Block", function() {
return new CodeNode([], $2, $1);
})
],
// The symbols to signify functions, and bound functions.
FuncGlyph: [o("->", function() {
return 'func';
}), o("=>", function() {
return 'boundfunc';
})
],
// The parameters to a function definition.
ParamList: [o("Param", function() {
return [$1];
}), o("ParamList , Param", function() {
return $1.push($3);
})
],
// A Parameter (or ParamSplat) in a function definition.
Param: [o("PARAM"), o("PARAM . . .", function() {
return new SplatNode($1);
})
],
// A regular splat.
Splat: [o("Expression . . .", function() {
return new SplatNode($1);
})
],
// Expressions that can be treated as values.
Value: [o("IDENTIFIER", function() {
return new ValueNode($1);
return new ValueNode(yytext);
}), o("Literal", function() {
return new ValueNode($1);
}), o("Array", function() {
return new ValueNode($1);
}), o("Object", function() {
return new ValueNode($1);
}), o("Parenthetical", function() {
return new ValueNode($1);
}), o("Range", function() {
return new ValueNode($1);
}), o("Value Accessor", function() {
return $1.push($2);
}), o("Invocation Accessor", function() {
return new ValueNode($1, [$2]);
})
],
// Accessing into an object or array, through dot or index notation.
Accessor: [o("PROPERTY_ACCESS IDENTIFIER", function() {
return new AccessorNode($2);
}), o("PROTOTYPE_ACCESS IDENTIFIER", function() {
return new AccessorNode($2, 'prototype');
}), o("SOAK_ACCESS IDENTIFIER", function() {
return new AccessorNode($2, 'soak');
}), o("Index"), o("Slice", function() {
return new SliceNode($1);
})
],
// Indexing into an object or array.
Index: [o("INDEX_START Expression INDEX_END", function() {
return new IndexNode($2);
})
],
// An object literal.
Object: [o("{ AssignList }", function() {
return new ObjectNode($2);
})
],
// Assignment within an object literal (comma or newline separated).
AssignList: [o("", function() {
return [];
}), o("AssignObj", function() {
return [$1];
}), o("AssignList , AssignObj", function() {
return $1.push($3);
}), o("AssignList Terminator AssignObj", function() {
return $1.push($3);
}), o("AssignList , Terminator AssignObj", function() {
return $1.push($4);
}), o("INDENT AssignList OUTDENT", function() {
return $2;
})
],
// All flavors of function call (instantiation, super, and regular).
Call: [o("Invocation", function() {
return $1;
}), o("NEW Invocation", function() {
return $2.new_instance();
}), o("Super", function() {
return $1;
})
],
// Extending an object's prototype.
Extends: [o("Value EXTENDS Value", function() {
return new ExtendsNode($1, $3);
})
],
// A generic function invocation.
Invocation: [o("Value Arguments", function() {
return new CallNode($1, $2);
}), o("Invocation Arguments", function() {
return new CallNode($1, $2);
})
],
// The list of arguments to a function invocation.
Arguments: [o("CALL_START ArgList CALL_END", function() {
return $2;
})
],
// Calling super.
Super: [o("SUPER CALL_START ArgList CALL_END", function() {
return new CallNode('super', $3);
})
],
// The range literal.
Range: [o("[ Expression . . Expression ]", function() {
return new RangeNode($2, $5);
}), o("[ Expression . . . Expression ]", function() {
return new RangeNode($2, $6, true);
})
],
// The slice literal.
Slice: [o("INDEX_START Expression . . Expression INDEX_END", function() {
return new RangeNode($2, $5);
}), o("INDEX_START Expression . . . Expression INDEX_END", function() {
return new RangeNode($2, $6, true);
})
],
// The array literal.
Array: [o("[ ArgList ]", function() {
return new ArrayNode($2);
})
],
// A list of arguments to a method call, or as the contents of an array.
ArgList: [o("", function() {
return [];
}), o("Expression", function() {
return val;
}), o("INDENT Expression", function() {
return [$2];
}), o("ArgList , Expression", function() {
return $1.push($3);
}), o("ArgList Terminator Expression", function() {
return $1.push($3);
}), o("ArgList , Terminator Expression", function() {
return $1.push($4);
}), o("ArgList , INDENT Expression", function() {
return $1.push($4);
}), o("ArgList OUTDENT", function() {
return $1;
})
],
// Just simple, comma-separated, required arguments (no fancy syntax).
SimpleArgs: [o("Expression", function() {
return $1;
}), o("SimpleArgs , Expression", function() {
return ([$1].push($3)).reduce(function(a, b) {
return a.concat(b);
});
})
],
// Try/catch/finally exception handling blocks.
Try: [o("TRY Block Catch", function() {
return new TryNode($2, $3[0], $3[1]);
}), o("TRY Block FINALLY Block", function() {
return new TryNode($2, nil, nil, $4);
}), o("TRY Block Catch FINALLY Block", function() {
return new TryNode($2, $3[0], $3[1], $5);
})
],
// A catch clause.
Catch: [o("CATCH IDENTIFIER Block", function() {
return [$2, $3];
})
],
// Throw an exception.
Throw: [o("THROW Expression", function() {
return new ThrowNode($2);
})
],
// Parenthetical expressions.
Parenthetical: [o("( Expression )", function() {
return new ParentheticalNode($2);
})
],
// The while loop. (there is no do..while).
While: [o("WHILE Expression Block", function() {
return new WhileNode($2, $3);
}), o("WHILE Expression", function() {
return new WhileNode($2, nil);
}), o("Expression WHILE Expression", function() {
return new WhileNode($3, Expressions.wrap($1));
})
],
// Array comprehensions, including guard and current index.
// Looks a little confusing, check nodes.rb for the arguments to ForNode.
For: [o("Expression FOR ForVariables ForSource", function() {
return new ForNode($1, $4, $3[0], $3[1]);
}), o("FOR ForVariables ForSource Block", function() {
return new ForNode($4, $3, $2[0], $2[1]);
})
],
// An array comprehension has variables for the current element and index.
ForVariables: [o("IDENTIFIER", function() {
return [$1];
}), o("IDENTIFIER , IDENTIFIER", function() {
return [$1, $3];
})
],
// The source of the array comprehension can optionally be filtered.
ForSource: [o("IN Expression", function() {
return {
source: $2
};
}), o("OF Expression", function() {
return {
source: $2,
object: true
};
}), o("ForSource WHEN Expression", function() {
$1.filter = $3;
return $1;
}), o("ForSource BY Expression", function() {
$1.step = $3;
return $1;
})
],
// Switch/When blocks.
Switch: [o("SWITCH Expression INDENT Whens OUTDENT", function() {
return $4.rewrite_condition($2);
}), o("SWITCH Expression INDENT Whens ELSE Block OUTDENT", function() {
return $4.rewrite_condition($2).add_else($6);
})
],
// The inner list of whens.
Whens: [o("When", function() {
return $1;
}), o("Whens When", function() {
return $1.push($2);
})
],
// An individual when.
When: [o("LEADING_WHEN SimpleArgs Block", function() {
return new IfNode($2, $3, nil, {
statement: true
});
}), o("LEADING_WHEN SimpleArgs Block Terminator", function() {
return new IfNode($2, $3, nil, {
statement: true
});
}), o("Comment Terminator When", function() {
return $3.add_comment($1);
})
],
// The most basic form of "if".
IfBlock: [o("IF Expression Block", function() {
return new IfNode($2, $3);
})
],
// An elsif portion of an if-else block.
ElsIf: [o("ELSE IfBlock", function() {
return $2.force_statement();
})
],
// Multiple elsifs can be chained together.
ElsIfs: [o("ElsIf", function() {
return $1;
}), o("ElsIfs ElsIf", function() {
return $1.add_else($2);
})
],
// Terminating else bodies are strictly optional.
ElseBody: [o("", function() {
return null;
}), o("ELSE Block", function() {
return $2;
})
],
// All the alternatives for ending an if-else block.
IfEnd: [o("ElseBody", function() {
return $1;
}), o("ElsIfs ElseBody", function() {
return $1.add_else($2);
})
],
// The full complement of if blocks, including postfix one-liner ifs and unlesses.
If: [o("IfBlock IfEnd", function() {
return $1.add_else($2);
}), o("Expression IF Expression", function() {
return new IfNode($3, Expressions.wrap($1), nil, {
statement: true
});
}), o("Expression UNLESS Expression", function() {
return new IfNode($3, Expressions.wrap($1), nil, {
statement: true,
invert: true
});
})
}),
// o "Array", -> new ValueNode($1)
// o "Object", -> new ValueNode($1)
// o "Parenthetical", -> new ValueNode($1)
// o "Range", -> new ValueNode($1)
// o "Value Accessor", -> $1.push($2)
// o "Invocation Accessor", -> new ValueNode($1, [$2])
]
// # Accessing into an object or array, through dot or index notation.
// Accessor: [
// o "PROPERTY_ACCESS IDENTIFIER", -> new AccessorNode($2)
// o "PROTOTYPE_ACCESS IDENTIFIER", -> new AccessorNode($2, 'prototype')
// o "SOAK_ACCESS IDENTIFIER", -> new AccessorNode($2, 'soak')
// o "Index"
// o "Slice", -> new SliceNode($1)
// ]
//
// # Indexing into an object or array.
// Index: [
// o "INDEX_START Expression INDEX_END", -> new IndexNode($2)
// ]
//
// # An object literal.
// Object: [
// o "{ AssignList }", -> new ObjectNode($2)
// ]
//
// # Assignment within an object literal (comma or newline separated).
// AssignList: [
// o "", -> []
// o "AssignObj", -> [$1]
// o "AssignList , AssignObj", -> $1.push $3
// o "AssignList TERMINATOR AssignObj", -> $1.push $3
// o "AssignList , TERMINATOR AssignObj", -> $1.push $4
// o "INDENT AssignList OUTDENT", -> $2
// ]
//
// # All flavors of function call (instantiation, super, and regular).
// Call: [
// o "Invocation", -> $1
// o "NEW Invocation", -> $2.new_instance()
// o "Super", -> $1
// ]
//
// # Extending an object's prototype.
// Extends: [
// o "Value EXTENDS Value", -> new ExtendsNode($1, $3)
// ]
//
// # A generic function invocation.
// Invocation: [
// o "Value Arguments", -> new CallNode($1, $2)
// o "Invocation Arguments", -> new CallNode($1, $2)
// ]
//
// # The list of arguments to a function invocation.
// Arguments: [
// o "CALL_START ArgList CALL_END", -> $2
// ]
//
// # Calling super.
// Super: [
// o "SUPER CALL_START ArgList CALL_END", -> new CallNode('super', $3)
// ]
//
// # The range literal.
// Range: [
// o "[ Expression . . Expression ]", -> new RangeNode($2, $5)
// o "[ Expression . . . Expression ]", -> new RangeNode($2, $6, true)
// ]
//
// # The slice literal.
// Slice: [
// o "INDEX_START Expression . . Expression INDEX_END", -> new RangeNode($2, $5)
// o "INDEX_START Expression . . . Expression INDEX_END", -> new RangeNode($2, $6, true)
// ]
//
// # The array literal.
// Array: [
// o "[ ArgList ]", -> new ArrayNode($2)
// ]
//
// # A list of arguments to a method call, or as the contents of an array.
// ArgList: [
// o "", -> []
// o "Expression", -> val
// o "INDENT Expression", -> [$2]
// o "ArgList , Expression", -> $1.push $3
// o "ArgList TERMINATOR Expression", -> $1.push $3
// o "ArgList , TERMINATOR Expression", -> $1.push $4
// o "ArgList , INDENT Expression", -> $1.push $4
// o "ArgList OUTDENT", -> $1
// ]
//
// # Just simple, comma-separated, required arguments (no fancy syntax).
// SimpleArgs: [
// o "Expression", -> $1
// o "SimpleArgs , Expression", ->
// ([$1].push($3)).reduce (a, b) -> a.concat(b)
// ]
//
// # Try/catch/finally exception handling blocks.
// Try: [
// o "TRY Block Catch", -> new TryNode($2, $3[0], $3[1])
// o "TRY Block FINALLY Block", -> new TryNode($2, nil, nil, $4)
// o "TRY Block Catch FINALLY Block", -> new TryNode($2, $3[0], $3[1], $5)
// ]
//
// # A catch clause.
// Catch: [
// o "CATCH IDENTIFIER Block", -> [$2, $3]
// ]
//
// # Throw an exception.
// Throw: [
// o "THROW Expression", -> new ThrowNode($2)
// ]
//
// # Parenthetical expressions.
// Parenthetical: [
// o "( Expression )", -> new ParentheticalNode($2)
// ]
//
// # The while loop. (there is no do..while).
// While: [
// o "WHILE Expression Block", -> new WhileNode($2, $3)
// o "WHILE Expression", -> new WhileNode($2, nil)
// o "Expression WHILE Expression", -> new WhileNode($3, Expressions.wrap($1))
// ]
//
// # Array comprehensions, including guard and current index.
// # Looks a little confusing, check nodes.rb for the arguments to ForNode.
// For: [
// o "Expression FOR ForVariables ForSource", -> new ForNode($1, $4, $3[0], $3[1])
// o "FOR ForVariables ForSource Block", -> new ForNode($4, $3, $2[0], $2[1])
// ]
//
// # An array comprehension has variables for the current element and index.
// ForVariables: [
// o "IDENTIFIER", -> [$1]
// o "IDENTIFIER , IDENTIFIER", -> [$1, $3]
// ]
//
// # The source of the array comprehension can optionally be filtered.
// ForSource: [
// o "IN Expression", -> {source: $2}
// o "OF Expression", -> {source: $2, object: true}
// o "ForSource WHEN Expression", -> $1.filter: $3; $1
// o "ForSource BY Expression", -> $1.step: $3; $1
// ]
//
// # Switch/When blocks.
// Switch: [
// o "SWITCH Expression INDENT Whens OUTDENT", -> $4.rewrite_condition($2)
// o "SWITCH Expression INDENT Whens ELSE Block OUTDENT", -> $4.rewrite_condition($2).add_else($6)
// ]
//
// # The inner list of whens.
// Whens: [
// o "When", -> $1
// o "Whens When", -> $1.push $2
// ]
//
// # An individual when.
// When: [
// o "LEADING_WHEN SimpleArgs Block", -> new IfNode($2, $3, nil, {statement: true})
// o "LEADING_WHEN SimpleArgs Block TERMINATOR", -> new IfNode($2, $3, nil, {statement: true})
// o "Comment TERMINATOR When", -> $3.add_comment($1)
// ]
//
// # The most basic form of "if".
// IfBlock: [
// o "IF Expression Block", -> new IfNode($2, $3)
// ]
//
// # An elsif portion of an if-else block.
// ElsIf: [
// o "ELSE IfBlock", -> $2.force_statement()
// ]
//
// # Multiple elsifs can be chained together.
// ElsIfs: [
// o "ElsIf", -> $1
// o "ElsIfs ElsIf", -> $1.add_else($2)
// ]
//
// # Terminating else bodies are strictly optional.
// ElseBody: [
// o "", -> null
// o "ELSE Block", -> $2
// ]
//
// # All the alternatives for ending an if-else block.
// IfEnd: [
// o "ElseBody", -> $1
// o "ElsIfs ElseBody", -> $1.add_else($2)
// ]
//
// # The full complement of if blocks, including postfix one-liner ifs and unlesses.
// If: [
// o "IfBlock IfEnd", -> $1.add_else($2)
// o "Expression IF Expression", -> new IfNode($3, Expressions.wrap($1), nil, {statement: true})
// o "Expression UNLESS Expression", -> new IfNode($3, Expressions.wrap($1), nil, {statement: true, invert: true})
// ]
};
// Helpers ==============================================================
// Make the Jison parser.
@ -588,7 +501,8 @@
parser = new Parser({
tokens: tokens,
bnf: bnf,
operators: operators
operators: operators,
startSymbol: 'Root'
}, {
debug: false
});
@ -598,7 +512,7 @@
var token;
token = this.tokens[this.pos] || [""];
this.pos += 1;
// this.yylineno: token and token[1] and token[1][1]
this.yylineno = token[2];
this.yytext = token[1];
return token[0];
},

View File

@ -29,7 +29,7 @@
EXPRESSION_CLOSE = ['CATCH', 'WHEN', 'ELSE', 'FINALLY'].concat(EXPRESSION_TAIL);
// Tokens pairs that, in immediate succession, indicate an implicit call.
IMPLICIT_FUNC = ['IDENTIFIER', 'SUPER', ')', 'CALL_END', ']', 'INDEX_END'];
IMPLICIT_END = ['IF', 'UNLESS', 'FOR', 'WHILE', "\n", 'OUTDENT'];
IMPLICIT_END = ['IF', 'UNLESS', 'FOR', 'WHILE', 'TERMINATOR', 'OUTDENT'];
IMPLICIT_CALL = ['IDENTIFIER', 'NUMBER', 'STRING', 'JS', 'REGEX', 'NEW', 'PARAM_START', 'TRY', 'DELETE', 'TYPEOF', 'SWITCH', 'ARGUMENTS', 'TRUE', 'FALSE', 'YES', 'NO', 'ON', 'OFF', '!', '!!', 'NOT', '->', '=>', '[', '(', '{'];
// The inverse mappings of token pairs we're trying to fix up.
INVERSES = {
@ -43,7 +43,7 @@
// Single-line flavors of block expressions that have unclosed endings.
// The grammar can't disambiguate them, so we insert the implicit indentation.
SINGLE_LINERS = ['ELSE', "->", "=>", 'TRY', 'FINALLY', 'THEN'];
SINGLE_CLOSERS = ["\n", 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN', 'PARAM_START'];
SINGLE_CLOSERS = ['TERMINATOR', 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN', 'PARAM_START'];
// Rewrite the token stream in multiple passes, one logical filter at
// a time. This could certainly be changed into a single pass through the
// stream, with a big ol' efficient switch, but it's much nicer like this.
@ -64,14 +64,14 @@
// Allow the return value of the block to tell us how many tokens to move
// forwards (or backwards) in the stream, to make sure we don't miss anything
// as the stream changes length under our feet.
re.prototype.scan_tokens = function scan_tokens(yield) {
re.prototype.scan_tokens = function scan_tokens(block) {
var i, move;
i = 0;
while (true) {
if (!(this.tokens[i])) {
break;
}
move = yield(this.tokens[i - 1], this.tokens[i], this.tokens[i + 1], i);
move = block(this.tokens[i - 1], this.tokens[i], this.tokens[i + 1], i);
i += move;
}
return true;
@ -91,12 +91,12 @@
this.tokens.splice(i + 2, 1);
this.tokens.splice(i - 2, 1);
return 0;
} else if (prev[0] === "\n" && after[0] === 'INDENT') {
} else if (prev[0] === 'TERMINATOR' && after[0] === 'INDENT') {
this.tokens.splice(i + 2, 1);
this.tokens[i - 1] = after;
return 1;
} else if (prev[0] !== "\n" && prev[0] !== 'INDENT' && prev[0] !== 'OUTDENT') {
this.tokens.splice(i, 0, ["\n", "\n"]);
} else if (prev[0] !== 'TERMINATOR' && prev[0] !== 'INDENT' && prev[0] !== 'OUTDENT') {
this.tokens.splice(i, 0, ['TERMINATOR', "\n", prev[2]]);
return 2;
} else {
return 1;
@ -110,7 +110,7 @@
// Leading newlines would introduce an ambiguity in the grammar, so we
// dispatch them here.
re.prototype.remove_leading_newlines = function remove_leading_newlines() {
if (this.tokens[0][0] === "\n") {
if (this.tokens[0][0] === 'TERMINATOR') {
return this.tokens.shift();
}
};
@ -119,7 +119,7 @@
re.prototype.remove_mid_expression_newlines = function remove_mid_expression_newlines() {
return this.scan_tokens((function(__this) {
var __func = function(prev, token, post, i) {
if (!(post && EXPRESSION_CLOSE.indexOf(post[0]) >= 0 && token[0] === "\n")) {
if (!(post && EXPRESSION_CLOSE.indexOf(post[0]) >= 0 && token[0] === 'TERMINATOR')) {
return 1;
}
this.tokens.splice(i, 1);

View File

@ -1,4 +1,3 @@
sys: require 'sys'
Rewriter: require('./rewriter').Rewriter
# The lexer reads a stream of CoffeeScript and divvys it up into tagged
@ -70,7 +69,6 @@ lex::tokenize: (code) ->
while this.i < this.code.length
this.chunk: this.code.slice(this.i)
this.extract_next_token()
# sys.puts "original stream: " + this.tokens if process.ENV['VERBOSE']
this.close_indentation()
(new Rewriter()).rewrite this.tokens
@ -157,7 +155,7 @@ lex::comment_token: ->
return false unless comment: this.match COMMENT, 1
this.line += comment.match(MULTILINER).length
this.token 'COMMENT', comment.replace(COMMENT_CLEANER, '').split(MULTILINER)
this.token "\n", "\n"
this.token 'TERMINATOR', "\n"
this.i += comment.length
true
@ -187,7 +185,7 @@ lex::outdent_token: (move_out) ->
last_indent: this.indents.pop()
this.token 'OUTDENT', last_indent
move_out -= last_indent
this.token "\n", "\n"
this.token 'TERMINATOR', "\n"
true
# Matches and consumes non-meaningful whitespace.
@ -200,7 +198,7 @@ lex::whitespace_token: ->
# Multiple newlines get merged together.
# Use a trailing \ to escape newlines.
lex::newline_token: (newlines) ->
this.token "\n", "\n" unless this.value() is "\n"
this.token 'TERMINATOR', "\n" unless this.value() is "\n"
true
# Tokens to explicitly escape newlines are removed once their job is done.
@ -217,6 +215,7 @@ lex::literal_token: ->
this.tag_parameters() if value and value.match(CODE)
value ||= this.chunk.substr(0, 1)
tag: if value.match(ASSIGNMENT) then 'ASSIGN' else value
tag: 'TERMINATOR' if value == ';'
if this.value() isnt this.spaced and CALLABLE.indexOf(this.tag()) >= 0
tag: 'CALL_START' if value is '('
tag: 'INDEX_START' if value is '['
@ -244,12 +243,12 @@ lex::value: (index, val) ->
tok[1]
# Count the occurences of a character in a string.
lex::count: (string, char) ->
lex::count: (string, letter) ->
num: 0
pos: string.indexOf(char)
pos: string.indexOf(letter)
while pos isnt -1
count += 1
pos: string.indexOf(char, pos + 1)
pos: string.indexOf(letter, pos + 1)
count
# Attempt to match a string against the current chunk, returning the indexed

View File

@ -1,4 +1,5 @@
exports.Node: -> this.values: arguments
exports.Node: -> @values: arguments
exports.Node.wrap: (values) -> @values: values
exports.Expressions : exports.Node
exports.LiteralNode : exports.Node

View File

@ -1,4 +1,5 @@
Parser: require('jison').Parser
process.mixin require './nodes'
# DSL ===================================================================
@ -45,55 +46,49 @@ grammar: {
# All parsing will end in this rule, being the trunk of the AST.
Root: [
o "", -> new Expressions()
o "Terminator", -> new Expressions()
o "TERMINATOR", -> new Expressions()
o "Expressions"
o "Block Terminator"
o "Block TERMINATOR"
]
# Any list of expressions or method body, seperated by line breaks or semis.
Expressions: [
o "Expression", -> Expressions.wrap([$1])
o "Expressions Terminator Expression", -> $1.push($3)
o "Expressions Terminator"
o "Expressions TERMINATOR Expression", -> $1.push($3)
o "Expressions TERMINATOR"
]
# All types of expressions in our language. The basic unit of CoffeeScript
# is the expression.
Expression: [
o "Value"
o "Call"
o "Code"
o "Operation"
o "Assign"
o "If"
o "Try"
o "Throw"
o "Return"
o "While"
o "For"
o "Switch"
o "Extends"
o "Splat"
o "Existence"
o "Comment"
# o "Call"
# o "Code"
# o "Operation"
# o "Assign"
# o "If"
# o "Try"
# o "Throw"
# o "Return"
# o "While"
# o "For"
# o "Switch"
# o "Extends"
# o "Splat"
# o "Existence"
# o "Comment"
]
# A block of expressions. Note that the Rewriter will convert some postfix
# forms into blocks for us, by altering the token stream.
Block: [
o "INDENT Expressions OUTDENT", -> $2
o "INDENT OUTDENT", -> new Expressions()
]
# Tokens that can terminate an expression.
Terminator: [
o "\n"
o ";"
]
# # A block of expressions. Note that the Rewriter will convert some postfix
# # forms into blocks for us, by altering the token stream.
# Block: [
# o "INDENT Expressions OUTDENT", -> $2
# o "INDENT OUTDENT", -> new Expressions()
# ]
# All hard-coded values. These can be printed straight to JavaScript.
Literal: [
o "NUMBER", -> new LiteralNode($1)
o "NUMBER", -> new LiteralNode(yytext)
o "STRING", -> new LiteralNode($1)
o "JS", -> new LiteralNode($1)
o "REGEX", -> new LiteralNode($1)
@ -108,387 +103,387 @@ grammar: {
o "OFF", -> new LiteralNode(false)
]
# Assignment to a variable (or index).
Assign: [
o "Value ASSIGN Expression", -> new AssignNode($1, $3)
]
# Assignment within an object literal (can be quoted).
AssignObj: [
o "IDENTIFIER ASSIGN Expression", -> new AssignNode(new ValueNode($1), $3, 'object')
o "STRING ASSIGN Expression", -> new AssignNode(new ValueNode(new LiteralNode($1)), $3, 'object')
o "Comment"
]
# A return statement.
Return: [
o "RETURN Expression", -> new ReturnNode($2)
o "RETURN", -> new ReturnNode(new ValueNode(new LiteralNode('null')))
]
# A comment.
Comment: [
o "COMMENT", -> new CommentNode($1)
]
# Arithmetic and logical operators
# For Ruby's Operator precedence, see: [
# https://www.cs.auckland.ac.nz/references/ruby/ProgrammingRuby/language.html
Operation: [
o "! Expression", -> new OpNode($1, $2)
o "!! Expression", -> new OpNode($1, $2)
o "- Expression", -> new OpNode($1, $2)
o "+ Expression", -> new OpNode($1, $2)
o "NOT Expression", -> new OpNode($1, $2)
o "~ Expression", -> new OpNode($1, $2)
o "-- Expression", -> new OpNode($1, $2)
o "++ Expression", -> new OpNode($1, $2)
o "DELETE Expression", -> new OpNode($1, $2)
o "TYPEOF Expression", -> new OpNode($1, $2)
o "Expression --", -> new OpNode($2, $1, null, true)
o "Expression ++", -> new OpNode($2, $1, null, true)
o "Expression * Expression", -> new OpNode($2, $1, $3)
o "Expression / Expression", -> new OpNode($2, $1, $3)
o "Expression % Expression", -> new OpNode($2, $1, $3)
o "Expression + Expression", -> new OpNode($2, $1, $3)
o "Expression - Expression", -> new OpNode($2, $1, $3)
o "Expression << Expression", -> new OpNode($2, $1, $3)
o "Expression >> Expression", -> new OpNode($2, $1, $3)
o "Expression >>> Expression", -> new OpNode($2, $1, $3)
o "Expression & Expression", -> new OpNode($2, $1, $3)
o "Expression | Expression", -> new OpNode($2, $1, $3)
o "Expression ^ Expression", -> new OpNode($2, $1, $3)
o "Expression <= Expression", -> new OpNode($2, $1, $3)
o "Expression < Expression", -> new OpNode($2, $1, $3)
o "Expression > Expression", -> new OpNode($2, $1, $3)
o "Expression >= Expression", -> new OpNode($2, $1, $3)
o "Expression == Expression", -> new OpNode($2, $1, $3)
o "Expression != Expression", -> new OpNode($2, $1, $3)
o "Expression IS Expression", -> new OpNode($2, $1, $3)
o "Expression ISNT Expression", -> new OpNode($2, $1, $3)
o "Expression && Expression", -> new OpNode($2, $1, $3)
o "Expression || Expression", -> new OpNode($2, $1, $3)
o "Expression AND Expression", -> new OpNode($2, $1, $3)
o "Expression OR Expression", -> new OpNode($2, $1, $3)
o "Expression ? Expression", -> new OpNode($2, $1, $3)
o "Expression -= Expression", -> new OpNode($2, $1, $3)
o "Expression += Expression", -> new OpNode($2, $1, $3)
o "Expression /= Expression", -> new OpNode($2, $1, $3)
o "Expression *= Expression", -> new OpNode($2, $1, $3)
o "Expression %= Expression", -> new OpNode($2, $1, $3)
o "Expression ||= Expression", -> new OpNode($2, $1, $3)
o "Expression &&= Expression", -> new OpNode($2, $1, $3)
o "Expression ?= Expression", -> new OpNode($2, $1, $3)
o "Expression INSTANCEOF Expression", -> new OpNode($2, $1, $3)
o "Expression IN Expression", -> new OpNode($2, $1, $3)
]
# Try abbreviated expressions to make the grammar build faster:
# UnaryOp: [
# o "!"
# o "!!"
# o "NOT"
# o "~"
# o "--"
# o "++"
# o "DELETE"
# o "TYPEOF"
# # Assignment to a variable (or index).
# Assign: [
# o "Value ASSIGN Expression", -> new AssignNode($1, $3)
# ]
#
# BinaryOp: [
# o "*"
# o "/"
# o "%"
# o "+"
# o "-"
# o "<<"
# o ">>"
# o ">>>"
# o "&"
# o "|"
# o "^"
# o "<="
# o "<"
# o ">"
# o ">="
# o "=="
# o "!="
# o "IS"
# o "ISNT"
# o "&&"
# o "||"
# o "AND"
# o "OR"
# o "?"
# o "-="
# o "+="
# o "/="
# o "*="
# o "%="
# o "||="
# o "&&="
# o "?="
# o "INSTANCEOF"
# o "IN"
# # Assignment within an object literal (can be quoted).
# AssignObj: [
# o "IDENTIFIER ASSIGN Expression", -> new AssignNode(new ValueNode($1), $3, 'object')
# o "STRING ASSIGN Expression", -> new AssignNode(new ValueNode(new LiteralNode($1)), $3, 'object')
# o "Comment"
# ]
#
# # A return statement.
# Return: [
# o "RETURN Expression", -> new ReturnNode($2)
# o "RETURN", -> new ReturnNode(new ValueNode(new LiteralNode('null')))
# ]
#
# # A comment.
# Comment: [
# o "COMMENT", -> new CommentNode($1)
# ]
#
# # Arithmetic and logical operators
# # For Ruby's Operator precedence, see: [
# # https://www.cs.auckland.ac.nz/references/ruby/ProgrammingRuby/language.html
# Operation: [
# o "Expression BinaryOp Expression", -> new OpNode($2, $1, $3)
# o "UnaryOp Expression", -> new OpNode($1, $2)
# o "! Expression", -> new OpNode($1, $2)
# o "!! Expression", -> new OpNode($1, $2)
# o "- Expression", -> new OpNode($1, $2)
# o "+ Expression", -> new OpNode($1, $2)
# o "NOT Expression", -> new OpNode($1, $2)
# o "~ Expression", -> new OpNode($1, $2)
# o "-- Expression", -> new OpNode($1, $2)
# o "++ Expression", -> new OpNode($1, $2)
# o "DELETE Expression", -> new OpNode($1, $2)
# o "TYPEOF Expression", -> new OpNode($1, $2)
# o "Expression --", -> new OpNode($2, $1, null, true)
# o "Expression ++", -> new OpNode($2, $1, null, true)
#
# o "Expression * Expression", -> new OpNode($2, $1, $3)
# o "Expression / Expression", -> new OpNode($2, $1, $3)
# o "Expression % Expression", -> new OpNode($2, $1, $3)
#
# o "Expression + Expression", -> new OpNode($2, $1, $3)
# o "Expression - Expression", -> new OpNode($2, $1, $3)
#
# o "Expression << Expression", -> new OpNode($2, $1, $3)
# o "Expression >> Expression", -> new OpNode($2, $1, $3)
# o "Expression >>> Expression", -> new OpNode($2, $1, $3)
#
# o "Expression & Expression", -> new OpNode($2, $1, $3)
# o "Expression | Expression", -> new OpNode($2, $1, $3)
# o "Expression ^ Expression", -> new OpNode($2, $1, $3)
#
# o "Expression <= Expression", -> new OpNode($2, $1, $3)
# o "Expression < Expression", -> new OpNode($2, $1, $3)
# o "Expression > Expression", -> new OpNode($2, $1, $3)
# o "Expression >= Expression", -> new OpNode($2, $1, $3)
#
# o "Expression == Expression", -> new OpNode($2, $1, $3)
# o "Expression != Expression", -> new OpNode($2, $1, $3)
# o "Expression IS Expression", -> new OpNode($2, $1, $3)
# o "Expression ISNT Expression", -> new OpNode($2, $1, $3)
#
# o "Expression && Expression", -> new OpNode($2, $1, $3)
# o "Expression || Expression", -> new OpNode($2, $1, $3)
# o "Expression AND Expression", -> new OpNode($2, $1, $3)
# o "Expression OR Expression", -> new OpNode($2, $1, $3)
# o "Expression ? Expression", -> new OpNode($2, $1, $3)
#
# o "Expression -= Expression", -> new OpNode($2, $1, $3)
# o "Expression += Expression", -> new OpNode($2, $1, $3)
# o "Expression /= Expression", -> new OpNode($2, $1, $3)
# o "Expression *= Expression", -> new OpNode($2, $1, $3)
# o "Expression %= Expression", -> new OpNode($2, $1, $3)
# o "Expression ||= Expression", -> new OpNode($2, $1, $3)
# o "Expression &&= Expression", -> new OpNode($2, $1, $3)
# o "Expression ?= Expression", -> new OpNode($2, $1, $3)
#
# o "Expression INSTANCEOF Expression", -> new OpNode($2, $1, $3)
# o "Expression IN Expression", -> new OpNode($2, $1, $3)
# ]
#
# # Try abbreviated expressions to make the grammar build faster:
#
# # UnaryOp: [
# # o "!"
# # o "!!"
# # o "NOT"
# # o "~"
# # o "--"
# # o "++"
# # o "DELETE"
# # o "TYPEOF"
# # ]
# #
# # BinaryOp: [
# # o "*"
# # o "/"
# # o "%"
# # o "+"
# # o "-"
# # o "<<"
# # o ">>"
# # o ">>>"
# # o "&"
# # o "|"
# # o "^"
# # o "<="
# # o "<"
# # o ">"
# # o ">="
# # o "=="
# # o "!="
# # o "IS"
# # o "ISNT"
# # o "&&"
# # o "||"
# # o "AND"
# # o "OR"
# # o "?"
# # o "-="
# # o "+="
# # o "/="
# # o "*="
# # o "%="
# # o "||="
# # o "&&="
# # o "?="
# # o "INSTANCEOF"
# # o "IN"
# # ]
# #
# # Operation: [
# # o "Expression BinaryOp Expression", -> new OpNode($2, $1, $3)
# # o "UnaryOp Expression", -> new OpNode($1, $2)
# # ]
#
# # The existence operator.
# Existence: [
# o "Expression ?", -> new ExistenceNode($1)
# ]
#
# # Function definition.
# Code: [
# o "PARAM_START ParamList PARAM_END FuncGlyph Block", -> new CodeNode($2, $5, $4)
# o "FuncGlyph Block", -> new CodeNode([], $2, $1)
# ]
#
# # The symbols to signify functions, and bound functions.
# FuncGlyph: [
# o "->", -> 'func'
# o "=>", -> 'boundfunc'
# ]
#
# # The parameters to a function definition.
# ParamList: [
# o "Param", -> [$1]
# o "ParamList , Param", -> $1.push($3)
# ]
#
# # A Parameter (or ParamSplat) in a function definition.
# Param: [
# o "PARAM"
# o "PARAM . . .", -> new SplatNode($1)
# ]
#
# # A regular splat.
# Splat: [
# o "Expression . . .", -> new SplatNode($1)
# ]
# The existence operator.
Existence: [
o "Expression ?", -> new ExistenceNode($1)
]
# Function definition.
Code: [
o "PARAM_START ParamList PARAM_END FuncGlyph Block", -> new CodeNode($2, $5, $4)
o "FuncGlyph Block", -> new CodeNode([], $2, $1)
]
# The symbols to signify functions, and bound functions.
FuncGlyph: [
o "->", -> 'func'
o "=>", -> 'boundfunc'
]
# The parameters to a function definition.
ParamList: [
o "Param", -> [$1]
o "ParamList , Param", -> $1.push($3)
]
# A Parameter (or ParamSplat) in a function definition.
Param: [
o "PARAM"
o "PARAM . . .", -> new SplatNode($1)
]
# A regular splat.
Splat: [
o "Expression . . .", -> new SplatNode($1)
]
# Expressions that can be treated as values.
Value: [
o "IDENTIFIER", -> new ValueNode($1)
o "IDENTIFIER", -> new ValueNode(yytext)
o "Literal", -> new ValueNode($1)
o "Array", -> new ValueNode($1)
o "Object", -> new ValueNode($1)
o "Parenthetical", -> new ValueNode($1)
o "Range", -> new ValueNode($1)
o "Value Accessor", -> $1.push($2)
o "Invocation Accessor", -> new ValueNode($1, [$2])
# o "Array", -> new ValueNode($1)
# o "Object", -> new ValueNode($1)
# o "Parenthetical", -> new ValueNode($1)
# o "Range", -> new ValueNode($1)
# o "Value Accessor", -> $1.push($2)
# o "Invocation Accessor", -> new ValueNode($1, [$2])
]
# Accessing into an object or array, through dot or index notation.
Accessor: [
o "PROPERTY_ACCESS IDENTIFIER", -> new AccessorNode($2)
o "PROTOTYPE_ACCESS IDENTIFIER", -> new AccessorNode($2, 'prototype')
o "SOAK_ACCESS IDENTIFIER", -> new AccessorNode($2, 'soak')
o "Index"
o "Slice", -> new SliceNode($1)
]
# Indexing into an object or array.
Index: [
o "INDEX_START Expression INDEX_END", -> new IndexNode($2)
]
# An object literal.
Object: [
o "{ AssignList }", -> new ObjectNode($2)
]
# Assignment within an object literal (comma or newline separated).
AssignList: [
o "", -> []
o "AssignObj", -> [$1]
o "AssignList , AssignObj", -> $1.push $3
o "AssignList Terminator AssignObj", -> $1.push $3
o "AssignList , Terminator AssignObj", -> $1.push $4
o "INDENT AssignList OUTDENT", -> $2
]
# All flavors of function call (instantiation, super, and regular).
Call: [
o "Invocation", -> $1
o "NEW Invocation", -> $2.new_instance()
o "Super", -> $1
]
# Extending an object's prototype.
Extends: [
o "Value EXTENDS Value", -> new ExtendsNode($1, $3)
]
# A generic function invocation.
Invocation: [
o "Value Arguments", -> new CallNode($1, $2)
o "Invocation Arguments", -> new CallNode($1, $2)
]
# The list of arguments to a function invocation.
Arguments: [
o "CALL_START ArgList CALL_END", -> $2
]
# Calling super.
Super: [
o "SUPER CALL_START ArgList CALL_END", -> new CallNode('super', $3)
]
# The range literal.
Range: [
o "[ Expression . . Expression ]", -> new RangeNode($2, $5)
o "[ Expression . . . Expression ]", -> new RangeNode($2, $6, true)
]
# The slice literal.
Slice: [
o "INDEX_START Expression . . Expression INDEX_END", -> new RangeNode($2, $5)
o "INDEX_START Expression . . . Expression INDEX_END", -> new RangeNode($2, $6, true)
]
# The array literal.
Array: [
o "[ ArgList ]", -> new ArrayNode($2)
]
# A list of arguments to a method call, or as the contents of an array.
ArgList: [
o "", -> []
o "Expression", -> val
o "INDENT Expression", -> [$2]
o "ArgList , Expression", -> $1.push $3
o "ArgList Terminator Expression", -> $1.push $3
o "ArgList , Terminator Expression", -> $1.push $4
o "ArgList , INDENT Expression", -> $1.push $4
o "ArgList OUTDENT", -> $1
]
# Just simple, comma-separated, required arguments (no fancy syntax).
SimpleArgs: [
o "Expression", -> $1
o "SimpleArgs , Expression", ->
([$1].push($3)).reduce (a, b) -> a.concat(b)
]
# Try/catch/finally exception handling blocks.
Try: [
o "TRY Block Catch", -> new TryNode($2, $3[0], $3[1])
o "TRY Block FINALLY Block", -> new TryNode($2, nil, nil, $4)
o "TRY Block Catch FINALLY Block", -> new TryNode($2, $3[0], $3[1], $5)
]
# A catch clause.
Catch: [
o "CATCH IDENTIFIER Block", -> [$2, $3]
]
# Throw an exception.
Throw: [
o "THROW Expression", -> new ThrowNode($2)
]
# Parenthetical expressions.
Parenthetical: [
o "( Expression )", -> new ParentheticalNode($2)
]
# The while loop. (there is no do..while).
While: [
o "WHILE Expression Block", -> new WhileNode($2, $3)
o "WHILE Expression", -> new WhileNode($2, nil)
o "Expression WHILE Expression", -> new WhileNode($3, Expressions.wrap($1))
]
# Array comprehensions, including guard and current index.
# Looks a little confusing, check nodes.rb for the arguments to ForNode.
For: [
o "Expression FOR ForVariables ForSource", -> new ForNode($1, $4, $3[0], $3[1])
o "FOR ForVariables ForSource Block", -> new ForNode($4, $3, $2[0], $2[1])
]
# An array comprehension has variables for the current element and index.
ForVariables: [
o "IDENTIFIER", -> [$1]
o "IDENTIFIER , IDENTIFIER", -> [$1, $3]
]
# The source of the array comprehension can optionally be filtered.
ForSource: [
o "IN Expression", -> {source: $2}
o "OF Expression", -> {source: $2, object: true}
o "ForSource WHEN Expression", -> $1.filter: $3; $1
o "ForSource BY Expression", -> $1.step: $3; $1
]
# Switch/When blocks.
Switch: [
o "SWITCH Expression INDENT Whens OUTDENT", -> $4.rewrite_condition($2)
o "SWITCH Expression INDENT Whens ELSE Block OUTDENT", -> $4.rewrite_condition($2).add_else($6)
]
# The inner list of whens.
Whens: [
o "When", -> $1
o "Whens When", -> $1.push $2
]
# An individual when.
When: [
o "LEADING_WHEN SimpleArgs Block", -> new IfNode($2, $3, nil, {statement: true})
o "LEADING_WHEN SimpleArgs Block Terminator", -> new IfNode($2, $3, nil, {statement: true})
o "Comment Terminator When", -> $3.add_comment($1)
]
# The most basic form of "if".
IfBlock: [
o "IF Expression Block", -> new IfNode($2, $3)
]
# An elsif portion of an if-else block.
ElsIf: [
o "ELSE IfBlock", -> $2.force_statement()
]
# Multiple elsifs can be chained together.
ElsIfs: [
o "ElsIf", -> $1
o "ElsIfs ElsIf", -> $1.add_else($2)
]
# Terminating else bodies are strictly optional.
ElseBody: [
o "", -> null
o "ELSE Block", -> $2
]
# All the alternatives for ending an if-else block.
IfEnd: [
o "ElseBody", -> $1
o "ElsIfs ElseBody", -> $1.add_else($2)
]
# The full complement of if blocks, including postfix one-liner ifs and unlesses.
If: [
o "IfBlock IfEnd", -> $1.add_else($2)
o "Expression IF Expression", -> new IfNode($3, Expressions.wrap($1), nil, {statement: true})
o "Expression UNLESS Expression", -> new IfNode($3, Expressions.wrap($1), nil, {statement: true, invert: true})
]
# # Accessing into an object or array, through dot or index notation.
# Accessor: [
# o "PROPERTY_ACCESS IDENTIFIER", -> new AccessorNode($2)
# o "PROTOTYPE_ACCESS IDENTIFIER", -> new AccessorNode($2, 'prototype')
# o "SOAK_ACCESS IDENTIFIER", -> new AccessorNode($2, 'soak')
# o "Index"
# o "Slice", -> new SliceNode($1)
# ]
#
# # Indexing into an object or array.
# Index: [
# o "INDEX_START Expression INDEX_END", -> new IndexNode($2)
# ]
#
# # An object literal.
# Object: [
# o "{ AssignList }", -> new ObjectNode($2)
# ]
#
# # Assignment within an object literal (comma or newline separated).
# AssignList: [
# o "", -> []
# o "AssignObj", -> [$1]
# o "AssignList , AssignObj", -> $1.push $3
# o "AssignList TERMINATOR AssignObj", -> $1.push $3
# o "AssignList , TERMINATOR AssignObj", -> $1.push $4
# o "INDENT AssignList OUTDENT", -> $2
# ]
#
# # All flavors of function call (instantiation, super, and regular).
# Call: [
# o "Invocation", -> $1
# o "NEW Invocation", -> $2.new_instance()
# o "Super", -> $1
# ]
#
# # Extending an object's prototype.
# Extends: [
# o "Value EXTENDS Value", -> new ExtendsNode($1, $3)
# ]
#
# # A generic function invocation.
# Invocation: [
# o "Value Arguments", -> new CallNode($1, $2)
# o "Invocation Arguments", -> new CallNode($1, $2)
# ]
#
# # The list of arguments to a function invocation.
# Arguments: [
# o "CALL_START ArgList CALL_END", -> $2
# ]
#
# # Calling super.
# Super: [
# o "SUPER CALL_START ArgList CALL_END", -> new CallNode('super', $3)
# ]
#
# # The range literal.
# Range: [
# o "[ Expression . . Expression ]", -> new RangeNode($2, $5)
# o "[ Expression . . . Expression ]", -> new RangeNode($2, $6, true)
# ]
#
# # The slice literal.
# Slice: [
# o "INDEX_START Expression . . Expression INDEX_END", -> new RangeNode($2, $5)
# o "INDEX_START Expression . . . Expression INDEX_END", -> new RangeNode($2, $6, true)
# ]
#
# # The array literal.
# Array: [
# o "[ ArgList ]", -> new ArrayNode($2)
# ]
#
# # A list of arguments to a method call, or as the contents of an array.
# ArgList: [
# o "", -> []
# o "Expression", -> val
# o "INDENT Expression", -> [$2]
# o "ArgList , Expression", -> $1.push $3
# o "ArgList TERMINATOR Expression", -> $1.push $3
# o "ArgList , TERMINATOR Expression", -> $1.push $4
# o "ArgList , INDENT Expression", -> $1.push $4
# o "ArgList OUTDENT", -> $1
# ]
#
# # Just simple, comma-separated, required arguments (no fancy syntax).
# SimpleArgs: [
# o "Expression", -> $1
# o "SimpleArgs , Expression", ->
# ([$1].push($3)).reduce (a, b) -> a.concat(b)
# ]
#
# # Try/catch/finally exception handling blocks.
# Try: [
# o "TRY Block Catch", -> new TryNode($2, $3[0], $3[1])
# o "TRY Block FINALLY Block", -> new TryNode($2, nil, nil, $4)
# o "TRY Block Catch FINALLY Block", -> new TryNode($2, $3[0], $3[1], $5)
# ]
#
# # A catch clause.
# Catch: [
# o "CATCH IDENTIFIER Block", -> [$2, $3]
# ]
#
# # Throw an exception.
# Throw: [
# o "THROW Expression", -> new ThrowNode($2)
# ]
#
# # Parenthetical expressions.
# Parenthetical: [
# o "( Expression )", -> new ParentheticalNode($2)
# ]
#
# # The while loop. (there is no do..while).
# While: [
# o "WHILE Expression Block", -> new WhileNode($2, $3)
# o "WHILE Expression", -> new WhileNode($2, nil)
# o "Expression WHILE Expression", -> new WhileNode($3, Expressions.wrap($1))
# ]
#
# # Array comprehensions, including guard and current index.
# # Looks a little confusing, check nodes.rb for the arguments to ForNode.
# For: [
# o "Expression FOR ForVariables ForSource", -> new ForNode($1, $4, $3[0], $3[1])
# o "FOR ForVariables ForSource Block", -> new ForNode($4, $3, $2[0], $2[1])
# ]
#
# # An array comprehension has variables for the current element and index.
# ForVariables: [
# o "IDENTIFIER", -> [$1]
# o "IDENTIFIER , IDENTIFIER", -> [$1, $3]
# ]
#
# # The source of the array comprehension can optionally be filtered.
# ForSource: [
# o "IN Expression", -> {source: $2}
# o "OF Expression", -> {source: $2, object: true}
# o "ForSource WHEN Expression", -> $1.filter: $3; $1
# o "ForSource BY Expression", -> $1.step: $3; $1
# ]
#
# # Switch/When blocks.
# Switch: [
# o "SWITCH Expression INDENT Whens OUTDENT", -> $4.rewrite_condition($2)
# o "SWITCH Expression INDENT Whens ELSE Block OUTDENT", -> $4.rewrite_condition($2).add_else($6)
# ]
#
# # The inner list of whens.
# Whens: [
# o "When", -> $1
# o "Whens When", -> $1.push $2
# ]
#
# # An individual when.
# When: [
# o "LEADING_WHEN SimpleArgs Block", -> new IfNode($2, $3, nil, {statement: true})
# o "LEADING_WHEN SimpleArgs Block TERMINATOR", -> new IfNode($2, $3, nil, {statement: true})
# o "Comment TERMINATOR When", -> $3.add_comment($1)
# ]
#
# # The most basic form of "if".
# IfBlock: [
# o "IF Expression Block", -> new IfNode($2, $3)
# ]
#
# # An elsif portion of an if-else block.
# ElsIf: [
# o "ELSE IfBlock", -> $2.force_statement()
# ]
#
# # Multiple elsifs can be chained together.
# ElsIfs: [
# o "ElsIf", -> $1
# o "ElsIfs ElsIf", -> $1.add_else($2)
# ]
#
# # Terminating else bodies are strictly optional.
# ElseBody: [
# o "", -> null
# o "ELSE Block", -> $2
# ]
#
# # All the alternatives for ending an if-else block.
# IfEnd: [
# o "ElseBody", -> $1
# o "ElsIfs ElseBody", -> $1.add_else($2)
# ]
#
# # The full complement of if blocks, including postfix one-liner ifs and unlesses.
# If: [
# o "IfBlock IfEnd", -> $1.add_else($2)
# o "Expression IF Expression", -> new IfNode($3, Expressions.wrap($1), nil, {statement: true})
# o "Expression UNLESS Expression", -> new IfNode($3, Expressions.wrap($1), nil, {statement: true, invert: true})
# ]
}
@ -506,14 +501,14 @@ for name, non_terminal of grammar
option[1] = "return " + option[1]
option
tokens: tokens.join(" ")
parser: new Parser({tokens: tokens, bnf: bnf, operators: operators}, {debug: false})
parser: new Parser({tokens: tokens, bnf: bnf, operators: operators, startSymbol: 'Root'}, {debug: false})
# Thin wrapper around the real lexer
parser.lexer: {
lex: ->
token: this.tokens[this.pos] or [""]
this.pos += 1
# this.yylineno: token and token[1] and token[1][1]
this.yylineno: token[2]
this.yytext: token[1]
token[0]
setInput: (tokens) ->

View File

@ -18,7 +18,7 @@ EXPRESSION_CLOSE: ['CATCH', 'WHEN', 'ELSE', 'FINALLY'].concat(EXPRESSION_TAIL)
# Tokens pairs that, in immediate succession, indicate an implicit call.
IMPLICIT_FUNC: ['IDENTIFIER', 'SUPER', ')', 'CALL_END', ']', 'INDEX_END']
IMPLICIT_END: ['IF', 'UNLESS', 'FOR', 'WHILE', "\n", 'OUTDENT']
IMPLICIT_END: ['IF', 'UNLESS', 'FOR', 'WHILE', 'TERMINATOR', 'OUTDENT']
IMPLICIT_CALL: ['IDENTIFIER', 'NUMBER', 'STRING', 'JS', 'REGEX', 'NEW', 'PARAM_START',
'TRY', 'DELETE', 'TYPEOF', 'SWITCH', 'ARGUMENTS',
'TRUE', 'FALSE', 'YES', 'NO', 'ON', 'OFF', '!', '!!', 'NOT',
@ -33,7 +33,7 @@ for pair in BALANCED_PAIRS
# Single-line flavors of block expressions that have unclosed endings.
# The grammar can't disambiguate them, so we insert the implicit indentation.
SINGLE_LINERS: ['ELSE', "->", "=>", 'TRY', 'FINALLY', 'THEN']
SINGLE_CLOSERS: ["\n", 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN', 'PARAM_START']
SINGLE_CLOSERS: ['TERMINATOR', 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN', 'PARAM_START']
# Rewrite the token stream in multiple passes, one logical filter at
# a time. This could certainly be changed into a single pass through the
@ -55,11 +55,11 @@ re::rewrite: (tokens) ->
# Allow the return value of the block to tell us how many tokens to move
# forwards (or backwards) in the stream, to make sure we don't miss anything
# as the stream changes length under our feet.
re::scan_tokens: (yield) ->
re::scan_tokens: (block) ->
i: 0
while true
break unless this.tokens[i]
move: yield(this.tokens[i - 1], this.tokens[i], this.tokens[i + 1], i)
move: block(this.tokens[i - 1], this.tokens[i], this.tokens[i + 1], i)
i += move
true
@ -77,12 +77,12 @@ re::adjust_comments: ->
this.tokens.splice(i + 2, 1)
this.tokens.splice(i - 2, 1)
return 0
else if prev[0] is "\n" and after[0] is 'INDENT'
else if prev[0] is 'TERMINATOR' and after[0] is 'INDENT'
this.tokens.splice(i + 2, 1)
this.tokens[i - 1]: after
return 1
else if prev[0] isnt "\n" and prev[0] isnt 'INDENT' and prev[0] isnt 'OUTDENT'
this.tokens.splice(i, 0, ["\n", "\n"])
else if prev[0] isnt 'TERMINATOR' and prev[0] isnt 'INDENT' and prev[0] isnt 'OUTDENT'
this.tokens.splice(i, 0, ['TERMINATOR', "\n", prev[2]])
return 2
else
return 1
@ -90,13 +90,13 @@ re::adjust_comments: ->
# Leading newlines would introduce an ambiguity in the grammar, so we
# dispatch them here.
re::remove_leading_newlines: ->
this.tokens.shift() if this.tokens[0][0] is "\n"
this.tokens.shift() if this.tokens[0][0] is 'TERMINATOR'
# Some blocks occur in the middle of expressions -- when we're expecting
# this, remove their trailing newlines.
re::remove_mid_expression_newlines: ->
this.scan_tokens (prev, token, post, i) =>
return 1 unless post and EXPRESSION_CLOSE.indexOf(post[0]) >= 0 and token[0] is "\n"
return 1 unless post and EXPRESSION_CLOSE.indexOf(post[0]) >= 0 and token[0] is 'TERMINATOR'
this.tokens.splice(i, 1)
return 0