refactored operator parsing

This commit is contained in:
satyr 2010-10-26 03:56:02 +09:00
parent 0d6d221568
commit ad79e142ca
8 changed files with 44 additions and 73 deletions

View File

@ -163,13 +163,13 @@
}), o('This')
],
Accessor: [
o('PROPERTY_ACCESS Identifier', function() {
o('. Identifier', function() {
return new Accessor($2);
}), o('PROTOTYPE_ACCESS Identifier', function() {
}), o(':: Identifier', function() {
return new Accessor($2, 'prototype');
}), o('::', function() {
return new Accessor(new Literal('prototype'));
}), o('SOAK_ACCESS Identifier', function() {
}), o('?. Identifier', function() {
return new Accessor($2, 'soak');
}), o('Index')
],
@ -590,7 +590,7 @@
})
]
};
operators = [['left', 'CALL_START', 'CALL_END'], ['nonassoc', '++', '--'], ['left', '?'], ['right', 'UNARY'], ['left', 'MATH'], ['left', '+', '-'], ['left', 'SHIFT'], ['left', 'RELATION'], ['left', 'COMPARE'], ['left', 'LOGIC'], ['left', '.'], ['nonassoc', 'INDENT', 'OUTDENT'], ['right', '=', ':', 'COMPOUND_ASSIGN', 'RETURN'], ['right', 'WHEN', 'LEADING_WHEN', 'FORIN', 'FOROF', 'FROM', 'TO', 'BY', 'THROW', 'IF', 'UNLESS', 'ELSE', 'FOR', 'WHILE', 'UNTIL', 'LOOP', 'SUPER', 'CLASS', 'EXTENDS'], ['right', 'POST_IF', 'POST_UNLESS']];
operators = [['left', '.', '?.', '::'], ['left', 'CALL_START', 'CALL_END'], ['nonassoc', '++', '--'], ['left', '?'], ['right', 'UNARY'], ['left', 'MATH'], ['left', '+', '-'], ['left', 'SHIFT'], ['left', 'RELATION'], ['left', 'COMPARE'], ['left', 'LOGIC'], ['nonassoc', 'INDENT', 'OUTDENT'], ['right', '=', ':', 'COMPOUND_ASSIGN', 'RETURN'], ['right', 'WHEN', 'LEADING_WHEN', 'FORIN', 'FOROF', 'FROM', 'TO', 'BY', 'THROW', 'IF', 'UNLESS', 'ELSE', 'FOR', 'WHILE', 'UNTIL', 'LOOP', 'SUPER', 'CLASS', 'EXTENDS'], ['right', 'POST_IF', 'POST_UNLESS']];
tokens = [];
for (name in grammar) {
alternatives = grammar[name];

View File

@ -1,5 +1,5 @@
(function() {
var ASSIGNED, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HEREDOC, HEREDOC_INDENT, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDEXABLE, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LEADING_SPACES, LINE_BREAK, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NEXT_CHARACTER, NEXT_ELLIPSIS, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RELATION, RESERVED, Rewriter, SHIFT, SIMPLESTR, TRAILING_SPACES, UNARY, WHITESPACE, _ref, compact, count, last, op, starts;
var ASSIGNED, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_KEYWORDS, COMMENT, COMPARE, COMPOUND_ASSIGN, HEREDOC, HEREDOC_INDENT, HEREGEX, HEREGEX_OMIT, IDENTIFIER, INDEXABLE, JSTOKEN, JS_FORBIDDEN, JS_KEYWORDS, LEADING_SPACES, LINE_BREAK, LINE_CONTINUER, LOGIC, Lexer, MATH, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, RELATION, RESERVED, Rewriter, SHIFT, SIMPLESTR, TRAILING_SPACES, UNARY, WHITESPACE, _ref, compact, count, last, op, starts;
var __indexOf = Array.prototype.indexOf || function(item) {
for (var i = 0, l = this.length; i < l; i++) {
if (this[i] === item) return i;
@ -38,7 +38,7 @@
return (new Rewriter).rewrite(this.tokens);
};
Lexer.prototype.identifierToken = function() {
var _ref2, colon, forcedIdentifier, id, input, match, tag;
var _ref2, _ref3, colon, forcedIdentifier, id, input, match, prev, tag;
if (!(match = IDENTIFIER.exec(this.chunk))) {
return 0;
}
@ -58,11 +58,11 @@
this.token('TO', id);
return id.length;
}
forcedIdentifier = colon || this.tagAccessor();
forcedIdentifier = colon || (prev = last(this.tokens)) && !prev.spaced && ((_ref2 = prev[0]) === '.' || _ref2 === '?.' || _ref2 === '@' || _ref2 === '::');
tag = 'IDENTIFIER';
if (__indexOf.call(JS_KEYWORDS, id) >= 0 || !forcedIdentifier && __indexOf.call(COFFEE_KEYWORDS, id) >= 0) {
tag = id.toUpperCase();
if (tag === 'WHEN' && (_ref2 = this.tag(), __indexOf.call(LINE_BREAK, _ref2) >= 0)) {
if (tag === 'WHEN' && (_ref3 = this.tag(), __indexOf.call(LINE_BREAK, _ref3) >= 0)) {
tag = 'LEADING_WHEN';
} else if (tag === 'FOR') {
this.seenFor = true;
@ -108,9 +108,6 @@
return 0;
}
number = match[0];
if (this.tag() === '.' && number.charAt(0) === '.') {
return 0;
}
this.token('NUMBER', number);
return number.length;
};
@ -242,7 +239,7 @@
return heregex.length;
};
Lexer.prototype.lineToken = function() {
var diff, indent, match, nextCharacter, noNewlines, prev, size;
var diff, indent, match, noNewlines, prev, size;
if (!(match = MULTI_DENT.exec(this.chunk))) {
return 0;
}
@ -250,8 +247,7 @@
this.line += count(indent, '\n');
prev = last(this.tokens, 1);
size = indent.length - 1 - indent.lastIndexOf('\n');
nextCharacter = NEXT_CHARACTER.exec(this.chunk)[1];
noNewlines = ((nextCharacter === '.' || nextCharacter === ',') && !NEXT_ELLIPSIS.test(this.chunk)) || this.unfinished();
noNewlines = this.unfinished();
if (size - this.indebt === this.indent) {
if (noNewlines) {
this.suppressNewlines();
@ -383,25 +379,6 @@
this.token(tag, value);
return value.length;
};
Lexer.prototype.tagAccessor = function() {
var prev;
if (!(prev = last(this.tokens)) || prev.spaced) {
return false;
}
if (prev[1] === '::') {
this.tag(0, 'PROTOTYPE_ACCESS');
} else if (prev[1] === '.' && this.value(1) !== '.') {
if (this.tag(1) === '?') {
this.tag(0, 'SOAK_ACCESS');
this.tokens.splice(-2, 1);
} else {
this.tag(0, 'PROPERTY_ACCESS');
}
} else {
return prev[0] === '@';
}
return true;
};
Lexer.prototype.sanitizeHeredoc = function(doc, options) {
var _ref2, attempt, herecomment, indent, match;
indent = options.indent, herecomment = options.herecomment;
@ -570,7 +547,7 @@
};
Lexer.prototype.unfinished = function() {
var prev, value;
return (prev = last(this.tokens, 1)) && prev[0] !== '.' && (value = this.value()) && !value.reserved && NO_NEWLINE.test(value) && !CODE.test(value) && !ASSIGNED.test(this.chunk);
return LINE_CONTINUER.test(this.chunk) || (prev = last(this.tokens, 1)) && prev[0] !== '.' && (value = this.value()) && !value.reserved && NO_NEWLINE.test(value) && !CODE.test(value) && !ASSIGNED.test(this.chunk);
};
Lexer.prototype.escapeLines = function(str, heredoc) {
return str.replace(MULTILINER, heredoc ? '\\n' : '');
@ -607,7 +584,7 @@
IDENTIFIER = /^([$A-Za-z_][$\w]*)([^\n\S]*:(?!:))?/;
NUMBER = /^0x[\da-f]+|^(?:\d+(\.\d+)?|\.\d+)(?:e[+-]?\d+)?/i;
HEREDOC = /^("""|''')([\s\S]*?)(?:\n[ \t]*)?\1/;
OPERATOR = /^(?:-[-=>]?|\+[+=]?|\.{3}|[*&|\/%=<>^:!?]+)/;
OPERATOR = /^(?:[-=]>|[-+*\/%<>&|^!?=]=|>>>=?|([-+:])\1|([&|<>])\2=?|\?\.|\.{3})/;
WHITESPACE = /^[ \t]+/;
COMMENT = /^###([^#][\s\S]*?)(?:###[ \t]*\n|(?:###)?$)|^(?:\s*#(?!##[^#]).*)+/;
CODE = /^[-=]>/;
@ -620,8 +597,7 @@
MULTILINER = /\n/g;
HEREDOC_INDENT = /\n+([ \t]*)/g;
ASSIGNED = /^\s*@?[$A-Za-z_][$\w]*[ \t]*?[:=][^:=>]/;
NEXT_CHARACTER = /^\s*(\S?)/;
NEXT_ELLIPSIS = /^\s*\.{3}/;
LINE_CONTINUER = /^\s*(?:,|\??\.(?!\.)|::)/;
LEADING_SPACES = /^\s+/;
TRAILING_SPACES = /\s+$/;
NO_NEWLINE = /^(?:[-+*&|\/%=<>!.\\][<>=&|]*|and|or|is(?:nt)?|n(?:ot|ew)|delete|typeof|instanceof)$/;

File diff suppressed because one or more lines are too long

View File

@ -213,7 +213,7 @@
if (tag === 'IF' || tag === 'ELSE' || tag === 'UNLESS' || tag === '->' || tag === '=>') {
seenSingle = true;
}
if (tag === 'PROPERTY_ACCESS' && this.tag(i - 1) === 'OUTDENT') {
if ((tag === '.' || tag === '?.' || tag === '::') && this.tag(i - 1) === 'OUTDENT') {
return true;
}
return !token.generated && this.tag(i - 1) !== ',' && __indexOf.call(IMPLICIT_END, tag) >= 0 && (tag !== 'INDENT' || (this.tag(i - 2) !== 'CLASS' && (_ref3 = this.tag(i - 1), __indexOf.call(IMPLICIT_BLOCK, _ref3) < 0) && !((post = this.tokens[i + 1]) && post.generated && post[0] === '{')));

View File

@ -237,10 +237,10 @@ grammar =
# The general group of accessors into an object, by property, by prototype
# or by array index or slice.
Accessor: [
o 'PROPERTY_ACCESS Identifier', -> new Accessor $2
o 'PROTOTYPE_ACCESS Identifier', -> new Accessor $2, 'prototype'
o '. Identifier', -> new Accessor $2
o ':: Identifier', -> new Accessor $2, 'prototype'
o '::', -> new Accessor new Literal 'prototype'
o 'SOAK_ACCESS Identifier', -> new Accessor $2, 'soak'
o '?. Identifier', -> new Accessor $2, 'soak'
o 'Index'
]
@ -557,6 +557,7 @@ grammar =
#
# (2 + 3) * 4
operators = [
['left', '.', '?.', '::']
['left', 'CALL_START', 'CALL_END']
['nonassoc', '++', '--']
['left', '?']
@ -567,7 +568,6 @@ operators = [
['left', 'RELATION']
['left', 'COMPARE']
['left', 'LOGIC']
['left', '.']
['nonassoc', 'INDENT', 'OUTDENT']
['right', '=', ':', 'COMPOUND_ASSIGN', 'RETURN']
['right', 'WHEN', 'LEADING_WHEN', 'FORIN', 'FOROF', 'FROM', 'TO', 'BY',

View File

@ -86,7 +86,8 @@ exports.Lexer = class Lexer
@seenFrom = no
@token 'TO', id
return id.length
forcedIdentifier = colon or @tagAccessor()
forcedIdentifier = colon or
(prev = last @tokens) and not prev.spaced and prev[0] in ['.', '?.', '@', '::']
tag = 'IDENTIFIER'
if id in JS_KEYWORDS or
not forcedIdentifier and id in COFFEE_KEYWORDS
@ -134,7 +135,6 @@ exports.Lexer = class Lexer
numberToken: ->
return 0 unless match = NUMBER.exec @chunk
number = match[0]
return 0 if @tag() is '.' and number.charAt(0) is '.'
@token 'NUMBER', number
number.length
@ -240,8 +240,7 @@ exports.Lexer = class Lexer
@line += count indent, '\n'
prev = last @tokens, 1
size = indent.length - 1 - indent.lastIndexOf '\n'
nextCharacter = NEXT_CHARACTER.exec(@chunk)[1]
noNewlines = (nextCharacter in ['.', ','] and not NEXT_ELLIPSIS.test(@chunk)) or @unfinished()
noNewlines = @unfinished()
if size - @indebt is @indent
if noNewlines then @suppressNewlines() else @newlineToken()
return indent.length
@ -343,23 +342,6 @@ exports.Lexer = class Lexer
# Token Manipulators
# ------------------
# As we consume a new `IDENTIFIER`, look at the previous token to determine
# if it's a special kind of accessor. Return `true` if any type of accessor
# is the previous token.
tagAccessor: ->
return false if not (prev = last @tokens) or prev.spaced
if prev[1] is '::'
@tag 0, 'PROTOTYPE_ACCESS'
else if prev[1] is '.' and @value(1) isnt '.'
if @tag(1) is '?'
@tag 0, 'SOAK_ACCESS'
@tokens.splice -2, 1
else
@tag 0, 'PROPERTY_ACCESS'
else
return prev[0] is '@'
true
# Sanitize a heredoc or herecomment by
# erasing all external indentation on the left-hand side.
sanitizeHeredoc: (doc, options) ->
@ -490,7 +472,8 @@ exports.Lexer = class Lexer
# Are we in the midst of an unfinished expression?
unfinished: ->
(prev = last @tokens, 1) and prev[0] isnt '.' and
LINE_CONTINUER.test(@chunk) or
(prev = last @tokens, 1) and prev[0] isnt '.' and
(value = @value()) and not value.reserved and
NO_NEWLINE.test(value) and not CODE.test(value) and not ASSIGNED.test(@chunk)
@ -551,7 +534,16 @@ IDENTIFIER = /// ^
///
NUMBER = /^0x[\da-f]+|^(?:\d+(\.\d+)?|\.\d+)(?:e[+-]?\d+)?/i
HEREDOC = /^("""|''')([\s\S]*?)(?:\n[ \t]*)?\1/
OPERATOR = /// ^ (?: -[-=>]? | \+[+=]? | \.{3} | [*&|/%=<>^:!?]+ ) ///
OPERATOR = /// ^
(?: [-=]> # function
| [-+*/%<>&|^!?=]= # compound assign / compare
| >>>=? # zero-fill right shift
| ([-+:])\1 # doubles
| ([&|<>])\2=? # logic / shift
| \?\. # soak access
| \.{3} # splat
)
///
WHITESPACE = /^[ \t]+/
COMMENT = /^###([^#][\s\S]*?)(?:###[ \t]*\n|(?:###)?$)|^(?:\s*#(?!##[^#]).*)+/
CODE = /^[-=]>/
@ -580,8 +572,7 @@ HEREGEX_OMIT = /\s+(?:#.*)?/g
MULTILINER = /\n/g
HEREDOC_INDENT = /\n+([ \t]*)/g
ASSIGNED = /^\s*@?[$A-Za-z_][$\w]*[ \t]*?[:=][^:=>]/
NEXT_CHARACTER = /^\s*(\S?)/
NEXT_ELLIPSIS = /^\s*\.{3}/
LINE_CONTINUER = /// ^ \s* (?: , | \??\.(?!\.) | :: ) ///
LEADING_SPACES = /^\s+/
TRAILING_SPACES = /\s+$/
NO_NEWLINE = /// ^

View File

@ -178,7 +178,7 @@ class exports.Rewriter
return yes if not seenSingle and token.fromThen
[tag] = token
seenSingle = yes if tag in ['IF', 'ELSE', 'UNLESS', '->', '=>']
return yes if tag is 'PROPERTY_ACCESS' and @tag(i - 1) is 'OUTDENT'
return yes if tag in ['.', '?.', '::'] and @tag(i - 1) is 'OUTDENT'
not token.generated and @tag(i - 1) isnt ',' and tag in IMPLICIT_END and
(tag isnt 'INDENT' or
(@tag(i - 2) isnt 'CLASS' and @tag(i - 1) not in IMPLICIT_BLOCK and

View File

@ -54,3 +54,7 @@ greeting = id(
""")
ok greeting is "Hello"
ok not Date
::
?.foo, '`?.` and `::` should also continue lines'