mirror of
https://github.com/jashkenas/coffeescript.git
synced 2022-11-09 12:23:24 -05:00
rewriter is halfway done, and working
This commit is contained in:
parent
84feab3492
commit
557cdbba71
5 changed files with 259 additions and 57 deletions
|
@ -266,7 +266,7 @@
|
|||
}
|
||||
value = value || this.chunk.substr(0, 1);
|
||||
tag = value.match(ASSIGNMENT) ? 'ASSIGN' : value;
|
||||
if (this.value() && this.value().spaced && CALLABLE.indexOf(this.tag() >= 0)) {
|
||||
if (this.value() && !this.value().spaced && CALLABLE.indexOf(this.tag() >= 0)) {
|
||||
if (value === '(') {
|
||||
tag = 'CALL_START';
|
||||
}
|
||||
|
|
|
@ -49,12 +49,12 @@
|
|||
re.prototype.rewrite = function rewrite(tokens) {
|
||||
this.tokens = tokens;
|
||||
this.adjust_comments();
|
||||
// this.remove_leading_newlines()
|
||||
// this.remove_mid_expression_newlines()
|
||||
// this.move_commas_outside_outdents()
|
||||
// this.close_open_calls_and_indexes()
|
||||
this.remove_leading_newlines();
|
||||
this.remove_mid_expression_newlines();
|
||||
this.move_commas_outside_outdents();
|
||||
this.close_open_calls_and_indexes();
|
||||
// this.add_implicit_parentheses()
|
||||
// this.add_implicit_indentation()
|
||||
this.add_implicit_indentation();
|
||||
// this.ensure_balance(BALANCED_PAIRS)
|
||||
// this.rewrite_closing_parens()
|
||||
return this.tokens;
|
||||
|
@ -78,27 +78,149 @@
|
|||
// Massage newlines and indentations so that comments don't have to be
|
||||
// correctly indented, or appear on their own line.
|
||||
re.prototype.adjust_comments = function adjust_comments() {
|
||||
return this.scan_tokens(function(prev, token, post, i) {
|
||||
var after, before;
|
||||
if (!(token[0] === 'COMMENT')) {
|
||||
return 1;
|
||||
}
|
||||
before = this.tokens[i - 2];
|
||||
after = this.tokens[i + 2];
|
||||
if (before && after && ((before[0] === 'INDENT' && after[0] === 'OUTDENT') || (before[0] === 'OUTDENT' && after[0] === 'INDENT')) && before[1] === after[1]) {
|
||||
this.tokens.splice(i + 2, 1);
|
||||
this.tokens.splice(i - 2, 1);
|
||||
return this.scan_tokens((function(__this) {
|
||||
var __func = function(prev, token, post, i) {
|
||||
var after, before;
|
||||
if (!(token[0] === 'COMMENT')) {
|
||||
return 1;
|
||||
}
|
||||
before = this.tokens[i - 2];
|
||||
after = this.tokens[i + 2];
|
||||
if (before && after && ((before[0] === 'INDENT' && after[0] === 'OUTDENT') || (before[0] === 'OUTDENT' && after[0] === 'INDENT')) && before[1] === after[1]) {
|
||||
this.tokens.splice(i + 2, 1);
|
||||
this.tokens.splice(i - 2, 1);
|
||||
return 0;
|
||||
} else if (prev[0] === "\n" && after[0] === 'INDENT') {
|
||||
this.tokens.splice(i + 2, 1);
|
||||
this.tokens[i - 1] = after;
|
||||
return 1;
|
||||
} else if (prev[0] !== "\n" && prev[0] !== 'INDENT' && prev[0] !== 'OUTDENT') {
|
||||
this.tokens.splice(i, 0, ["\n", "\n"]);
|
||||
return 2;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
return (function() {
|
||||
return __func.apply(__this, arguments);
|
||||
});
|
||||
})(this));
|
||||
};
|
||||
// Leading newlines would introduce an ambiguity in the grammar, so we
|
||||
// dispatch them here.
|
||||
re.prototype.remove_leading_newlines = function remove_leading_newlines() {
|
||||
if (this.tokens[0][0] === "\n") {
|
||||
return this.tokens.shift();
|
||||
}
|
||||
};
|
||||
// Some blocks occur in the middle of expressions -- when we're expecting
|
||||
// this, remove their trailing newlines.
|
||||
re.prototype.remove_mid_expression_newlines = function remove_mid_expression_newlines() {
|
||||
return this.scan_tokens((function(__this) {
|
||||
var __func = function(prev, token, post, i) {
|
||||
if (!(post && EXPRESSION_CLOSE.indexOf(post[0]) >= 0 && token[0] === "\n")) {
|
||||
return 1;
|
||||
}
|
||||
this.tokens.splice(i, 1);
|
||||
return 0;
|
||||
} else if (prev[0] === "\n" && after[0] === 'INDENT') {
|
||||
this.tokens.splice(i + 2, 1);
|
||||
this.tokens[i - 1] = after;
|
||||
};
|
||||
return (function() {
|
||||
return __func.apply(__this, arguments);
|
||||
});
|
||||
})(this));
|
||||
};
|
||||
// Make sure that we don't accidentally break trailing commas, which need
|
||||
// to go on the outside of expression closers.
|
||||
re.prototype.move_commas_outside_outdents = function move_commas_outside_outdents() {
|
||||
return this.scan_tokens((function(__this) {
|
||||
var __func = function(prev, token, post, i) {
|
||||
if (token[0] === 'OUTDENT' && prev[0] === ',') {
|
||||
this.tokens.splice(i, 1, token);
|
||||
}
|
||||
return 1;
|
||||
} else if (prev[0] !== "\n" && prev[0] !== 'INDENT' && prev[0] !== 'OUTDENT') {
|
||||
this.tokens.splice(i, 0, ["\n", "\n"]);
|
||||
return 2;
|
||||
} else {
|
||||
};
|
||||
return (function() {
|
||||
return __func.apply(__this, arguments);
|
||||
});
|
||||
})(this));
|
||||
};
|
||||
// We've tagged the opening parenthesis of a method call, and the opening
|
||||
// bracket of an indexing operation. Match them with their close.
|
||||
re.prototype.close_open_calls_and_indexes = function close_open_calls_and_indexes() {
|
||||
var brackets, parens;
|
||||
parens = [0];
|
||||
brackets = [0];
|
||||
return this.scan_tokens((function(__this) {
|
||||
var __func = function(prev, token, post, i) {
|
||||
if (token[0] === 'CALL_START') {
|
||||
parens.push(0);
|
||||
} else if (token[0] === 'INDEX_START') {
|
||||
brackets.push(0);
|
||||
} else if (token[0] === '(') {
|
||||
parens[-1] += 1;
|
||||
} else if (token[0] === '[') {
|
||||
brackets[-1] += 1;
|
||||
} else if (token[0] === ')') {
|
||||
if (parens[parens.length - 1] === 0) {
|
||||
parens.pop;
|
||||
token[0] = 'CALL_END';
|
||||
} else {
|
||||
parens[parens.length - 1] -= 1;
|
||||
}
|
||||
} else if (token[0] === ']') {
|
||||
if (brackets[brackets.length - 1] === 0) {
|
||||
brackets.pop();
|
||||
token[0] = 'INDEX_END';
|
||||
} else {
|
||||
brackets[brackets.length - 1] -= 1;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
});
|
||||
};
|
||||
return (function() {
|
||||
return __func.apply(__this, arguments);
|
||||
});
|
||||
})(this));
|
||||
};
|
||||
// Because our grammar is LALR(1), it can't handle some single-line
|
||||
// expressions that lack ending delimiters. Use the lexer to add the implicit
|
||||
// blocks, so it doesn't need to.
|
||||
// ')' can close a single-line block, but we need to make sure it's balanced.
|
||||
re.prototype.add_implicit_indentation = function add_implicit_indentation() {
|
||||
return this.scan_tokens((function(__this) {
|
||||
var __func = function(prev, token, post, i) {
|
||||
var idx, insertion, parens, starter, tok;
|
||||
if (!(SINGLE_LINERS.indexOf(token[0]) >= 0 && post[0] !== 'INDENT' && !(token[0] === 'ELSE' && post[0] === 'IF'))) {
|
||||
return 1;
|
||||
}
|
||||
starter = token[0];
|
||||
this.tokens.splice(i + 1, 0, ['INDENT', 2]);
|
||||
idx = i + 1;
|
||||
parens = 0;
|
||||
while (true) {
|
||||
idx += 1;
|
||||
tok = this.tokens[idx];
|
||||
if ((!tok || SINGLE_CLOSERS.indexOf(tok[0]) >= 0 || (tok[0] === ')' && parens === 0)) && !(starter === 'ELSE' && tok[0] === 'ELSE')) {
|
||||
insertion = this.tokens[idx - 1][0] === "," ? idx - 1 : idx;
|
||||
this.tokens.splice(insertion, 0, ['OUTDENT', 2]);
|
||||
break;
|
||||
}
|
||||
if (tok[0] === '(') {
|
||||
parens += 1;
|
||||
}
|
||||
if (tok[0] === ')') {
|
||||
parens -= 1;
|
||||
}
|
||||
}
|
||||
if (!(token[0] === 'THEN')) {
|
||||
return 1;
|
||||
}
|
||||
this.tokens.splice(i, 1);
|
||||
return 0;
|
||||
};
|
||||
return (function() {
|
||||
return __func.apply(__this, arguments);
|
||||
});
|
||||
})(this));
|
||||
};
|
||||
})();
|
|
@ -151,6 +151,30 @@ module CoffeeScript
|
|||
end
|
||||
end
|
||||
|
||||
# Methods may be optionally called without parentheses, for simple cases.
|
||||
# Insert the implicit parentheses here, so that the parser doesn't have to
|
||||
# deal with them.
|
||||
def add_implicit_parentheses
|
||||
stack = [0]
|
||||
scan_tokens do |prev, token, post, i|
|
||||
stack.push(0) if token[0] == :INDENT
|
||||
if token[0] == :OUTDENT
|
||||
last = stack.pop
|
||||
stack[-1] += last
|
||||
end
|
||||
if stack.last > 0 && (IMPLICIT_END.include?(token[0]) || post.nil?)
|
||||
idx = token[0] == :OUTDENT ? i + 1 : i
|
||||
stack.last.times { @tokens.insert(idx, [:CALL_END, Value.new(')', token[1].line)]) }
|
||||
size, stack[-1] = stack[-1] + 1, 0
|
||||
next size
|
||||
end
|
||||
next 1 unless IMPLICIT_FUNC.include?(prev[0]) && IMPLICIT_CALL.include?(token[0])
|
||||
@tokens.insert(i, [:CALL_START, Value.new('(', token[1].line)])
|
||||
stack[-1] += 1
|
||||
next 2
|
||||
end
|
||||
end
|
||||
|
||||
# Because our grammar is LALR(1), it can't handle some single-line
|
||||
# expressions that lack ending delimiters. Use the lexer to add the implicit
|
||||
# blocks, so it doesn't need to.
|
||||
|
@ -183,30 +207,6 @@ module CoffeeScript
|
|||
end
|
||||
end
|
||||
|
||||
# Methods may be optionally called without parentheses, for simple cases.
|
||||
# Insert the implicit parentheses here, so that the parser doesn't have to
|
||||
# deal with them.
|
||||
def add_implicit_parentheses
|
||||
stack = [0]
|
||||
scan_tokens do |prev, token, post, i|
|
||||
stack.push(0) if token[0] == :INDENT
|
||||
if token[0] == :OUTDENT
|
||||
last = stack.pop
|
||||
stack[-1] += last
|
||||
end
|
||||
if stack.last > 0 && (IMPLICIT_END.include?(token[0]) || post.nil?)
|
||||
idx = token[0] == :OUTDENT ? i + 1 : i
|
||||
stack.last.times { @tokens.insert(idx, [:CALL_END, Value.new(')', token[1].line)]) }
|
||||
size, stack[-1] = stack[-1] + 1, 0
|
||||
next size
|
||||
end
|
||||
next 1 unless IMPLICIT_FUNC.include?(prev[0]) && IMPLICIT_CALL.include?(token[0])
|
||||
@tokens.insert(i, [:CALL_START, Value.new('(', token[1].line)])
|
||||
stack[-1] += 1
|
||||
next 2
|
||||
end
|
||||
end
|
||||
|
||||
# Ensure that all listed pairs of tokens are correctly balanced throughout
|
||||
# the course of the token stream.
|
||||
def ensure_balance(*pairs)
|
||||
|
|
|
@ -216,7 +216,7 @@ lex::literal_token: ->
|
|||
this.tag_parameters() if value and value.match(CODE)
|
||||
value ||= this.chunk.substr(0, 1)
|
||||
tag: if value.match(ASSIGNMENT) then 'ASSIGN' else value
|
||||
if this.value() and this.value().spaced and CALLABLE.indexOf(this.tag() >= 0)
|
||||
if this.value() and !this.value().spaced and CALLABLE.indexOf(this.tag() >= 0)
|
||||
tag: 'CALL_START' if value is '('
|
||||
tag: 'INDEX_START' if value is '['
|
||||
this.token tag, value
|
||||
|
|
|
@ -41,12 +41,12 @@ SINGLE_CLOSERS: ["\n", 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN', 'P
|
|||
re::rewrite: (tokens) ->
|
||||
this.tokens: tokens
|
||||
this.adjust_comments()
|
||||
# this.remove_leading_newlines()
|
||||
# this.remove_mid_expression_newlines()
|
||||
# this.move_commas_outside_outdents()
|
||||
# this.close_open_calls_and_indexes()
|
||||
this.remove_leading_newlines()
|
||||
this.remove_mid_expression_newlines()
|
||||
this.move_commas_outside_outdents()
|
||||
this.close_open_calls_and_indexes()
|
||||
# this.add_implicit_parentheses()
|
||||
# this.add_implicit_indentation()
|
||||
this.add_implicit_indentation()
|
||||
# this.ensure_balance(BALANCED_PAIRS)
|
||||
# this.rewrite_closing_parens()
|
||||
this.tokens
|
||||
|
@ -56,7 +56,7 @@ re::rewrite: (tokens) ->
|
|||
# forwards (or backwards) in the stream, to make sure we don't miss anything
|
||||
# as the stream changes length under our feet.
|
||||
re::scan_tokens: (yield) ->
|
||||
i = 0
|
||||
i: 0
|
||||
while true
|
||||
break unless this.tokens[i]
|
||||
move: yield(this.tokens[i - 1], this.tokens[i], this.tokens[i + 1], i)
|
||||
|
@ -66,7 +66,7 @@ re::scan_tokens: (yield) ->
|
|||
# Massage newlines and indentations so that comments don't have to be
|
||||
# correctly indented, or appear on their own line.
|
||||
re::adjust_comments: ->
|
||||
this.scan_tokens (prev, token, post, i) ->
|
||||
this.scan_tokens (prev, token, post, i) =>
|
||||
return 1 unless token[0] is 'COMMENT'
|
||||
before: this.tokens[i - 2]
|
||||
after: this.tokens[i + 2]
|
||||
|
@ -87,6 +87,86 @@ re::adjust_comments: ->
|
|||
else
|
||||
return 1
|
||||
|
||||
# Leading newlines would introduce an ambiguity in the grammar, so we
|
||||
# dispatch them here.
|
||||
re::remove_leading_newlines: ->
|
||||
this.tokens.shift() if this.tokens[0][0] is "\n"
|
||||
|
||||
# Some blocks occur in the middle of expressions -- when we're expecting
|
||||
# this, remove their trailing newlines.
|
||||
re::remove_mid_expression_newlines: ->
|
||||
this.scan_tokens (prev, token, post, i) =>
|
||||
return 1 unless post and EXPRESSION_CLOSE.indexOf(post[0]) >= 0 and token[0] is "\n"
|
||||
this.tokens.splice(i, 1)
|
||||
return 0
|
||||
|
||||
# Make sure that we don't accidentally break trailing commas, which need
|
||||
# to go on the outside of expression closers.
|
||||
re::move_commas_outside_outdents: ->
|
||||
this.scan_tokens (prev, token, post, i) =>
|
||||
this.tokens.splice(i, 1, token) if token[0] is 'OUTDENT' and prev[0] is ','
|
||||
return 1
|
||||
|
||||
# We've tagged the opening parenthesis of a method call, and the opening
|
||||
# bracket of an indexing operation. Match them with their close.
|
||||
re::close_open_calls_and_indexes: ->
|
||||
parens: [0]
|
||||
brackets: [0]
|
||||
this.scan_tokens (prev, token, post, i) =>
|
||||
switch token[0]
|
||||
when 'CALL_START' then parens.push(0)
|
||||
when 'INDEX_START' then brackets.push(0)
|
||||
when '(' then parens[-1] += 1
|
||||
when '[' then brackets[-1] += 1
|
||||
when ')'
|
||||
if parens[parens.length - 1] is 0
|
||||
parens.pop
|
||||
token[0]: 'CALL_END'
|
||||
else
|
||||
parens[parens.length - 1] -= 1
|
||||
when ']'
|
||||
if brackets[brackets.length - 1] == 0
|
||||
brackets.pop()
|
||||
token[0]: 'INDEX_END'
|
||||
else
|
||||
brackets[brackets.length - 1] -= 1
|
||||
return 1
|
||||
|
||||
# Because our grammar is LALR(1), it can't handle some single-line
|
||||
# expressions that lack ending delimiters. Use the lexer to add the implicit
|
||||
# blocks, so it doesn't need to.
|
||||
# ')' can close a single-line block, but we need to make sure it's balanced.
|
||||
re::add_implicit_indentation: ->
|
||||
this.scan_tokens (prev, token, post, i) =>
|
||||
return 1 unless SINGLE_LINERS.indexOf(token[0]) >= 0 and post[0] isnt 'INDENT' and
|
||||
not (token[0] is 'ELSE' and post[0] is 'IF')
|
||||
starter: token[0]
|
||||
this.tokens.splice(i + 1, 0, ['INDENT', 2])
|
||||
idx: i + 1
|
||||
parens: 0
|
||||
while true
|
||||
idx += 1
|
||||
tok: this.tokens[idx]
|
||||
if (not tok or SINGLE_CLOSERS.indexOf(tok[0]) >= 0 or
|
||||
(tok[0] is ')' && parens is 0)) and
|
||||
not (starter is 'ELSE' and tok[0] is 'ELSE')
|
||||
insertion: if this.tokens[idx - 1][0] is "," then idx - 1 else idx
|
||||
this.tokens.splice(insertion, 0, ['OUTDENT', 2])
|
||||
break
|
||||
parens += 1 if tok[0] is '('
|
||||
parens -= 1 if tok[0] is ')'
|
||||
return 1 unless token[0] is 'THEN'
|
||||
this.tokens.splice(i, 1)
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue