first draft of whitespace-sensitive method calls and indexes.

This commit is contained in:
Jeremy Ashkenas 2010-01-26 20:59:52 -05:00
parent ab4a4a5580
commit aa93d3c387
6 changed files with 1445 additions and 1388 deletions

View File

@ -6,6 +6,7 @@ token NUMBER STRING REGEX
token TRUE FALSE YES NO ON OFF
token IDENTIFIER PROPERTY_ACCESS PROTOTYPE_ACCESS SOAK_ACCESS
token CODE PARAM_START PARAM PARAM_END NEW RETURN
token CALL_START CALL_END INDEX_START INDEX_END
token TRY CATCH FINALLY THROW
token BREAK CONTINUE
token FOR IN OF BY WHEN WHILE
@ -246,12 +247,12 @@ rule
| PROTOTYPE_ACCESS IDENTIFIER { result = AccessorNode.new(val[1], :prototype) }
| SOAK_ACCESS IDENTIFIER { result = AccessorNode.new(val[1], :soak) }
| Index { result = val[0] }
| Range { result = SliceNode.new(val[0]) }
| Slice { result = SliceNode.new(val[0]) }
;
# Indexing into an object or array.
Index:
"[" Expression "]" { result = IndexNode.new(val[1]) }
INDEX_START Expression INDEX_END { result = IndexNode.new(val[1]) }
;
# An object literal.
@ -290,13 +291,13 @@ rule
# The list of arguments to a function invocation.
Arguments:
"(" ArgList ")" { result = val[1] }
| "(" ArgList ")" Code { result = val[1] << val[3] }
CALL_START ArgList CALL_END { result = val[1] }
| CALL_START ArgList CALL_END Code { result = val[1] << val[3] }
;
# Calling super.
Super:
SUPER "(" ArgList ")" { result = CallNode.new(Value.new('super'), val[2]) }
SUPER CALL_START ArgList CALL_END { result = CallNode.new(Value.new('super'), val[2]) }
;
# The range literal.
@ -307,6 +308,14 @@ rule
"." "." "." Expression "]" { result = RangeNode.new(val[1], val[5], true) }
;
# The slice literal.
Slice:
INDEX_START Expression "." "."
Expression INDEX_END { result = RangeNode.new(val[1], val[4]) }
| INDEX_START Expression "." "." "."
Expression INDEX_END { result = RangeNode.new(val[1], val[5], true) }
;
# The array literal.
Array:
"[" ArgList "]" { result = ArrayNode.new(val[1]) }

View File

@ -50,6 +50,9 @@ module CoffeeScript
:FALSE, :NULL, :TRUE
]
# Tokens which could legitimately be invoked or indexed.
CALLABLE = [:IDENTIFIER, :SUPER, ')', ']', '}', :STRING]
# Scan by attempting to match tokens one character at a time. Slow and steady.
def tokenize(code)
@code = code.chomp # Cleanup code by remove extra line breaks
@ -58,6 +61,7 @@ module CoffeeScript
@indent = 0 # The current indent level.
@indents = [] # The stack of all indent levels we are currently within.
@tokens = [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
@spaced = nil # The last value that has a space following it.
while @i < @code.length
@chunk = @code[@i..-1]
extract_next_token
@ -190,6 +194,7 @@ module CoffeeScript
# Matches and consumes non-meaningful whitespace.
def whitespace_token
return false unless whitespace = @chunk[WHITESPACE, 1]
@spaced = last_value
@i += whitespace.length
end
@ -214,6 +219,10 @@ module CoffeeScript
tag_parameters if value && value.match(CODE)
value ||= @chunk[0,1]
tag = value.match(ASSIGNMENT) ? :ASSIGN : value
if !@spaced.equal?(last_value) && CALLABLE.include?(last_tag)
tag = :CALL_START if value == '('
tag = :INDEX_START if value == '['
end
token(tag, value)
@i += value.length
end

File diff suppressed because it is too large Load Diff

View File

@ -6,7 +6,8 @@ module CoffeeScript
class Rewriter
# Tokens that must be balanced.
BALANCED_PAIRS = [['(', ')'], ['[', ']'], ['{', '}'], [:INDENT, :OUTDENT], [:PARAM_START, :PARAM_END]]
BALANCED_PAIRS = [['(', ')'], ['[', ']'], ['{', '}'], [:INDENT, :OUTDENT],
[:PARAM_START, :PARAM_END], [:CALL_START, :CALL_END], [:INDEX_START, :INDEX_END]]
# Tokens that signal the start of a balanced pair.
EXPRESSION_START = BALANCED_PAIRS.map {|pair| pair.first }
@ -45,6 +46,7 @@ module CoffeeScript
remove_leading_newlines
remove_mid_expression_newlines
move_commas_outside_outdents
close_open_calls_and_indexes
add_implicit_parentheses
add_implicit_indentation
ensure_balance(*BALANCED_PAIRS)
@ -119,6 +121,35 @@ module CoffeeScript
end
end
# We've tagged the opening parenthesis of a method call, and the opening
# bracket of an indexing operation. Match them with their close.
def close_open_calls_and_indexes
parens, brackets = [0], [0]
scan_tokens do |prev, token, post, i|
case token[0]
when :CALL_START then parens.push(0)
when :INDEX_START then brackets.push(0)
when '(' then parens[-1] += 1
when '[' then brackets[-1] += 1
when ')'
if parens.last == 0
parens.pop
token[0] = :CALL_END
else
parens[-1] -= 1
end
when ']'
if brackets.last == 0
brackets.pop
token[0] = :INDEX_END
else
brackets[-1] -= 1
end
end
next 1
end
end
# Because our grammar is LALR(1), it can't handle some single-line
# expressions that lack ending delimiters. Use the lexer to add the implicit
# blocks, so it doesn't need to.
@ -165,12 +196,12 @@ module CoffeeScript
if (stack.last > 0 && (IMPLICIT_END.include?(token[0]) || post.nil?)) &&
!(token[0] == :PARAM_START && prev[0] == ',')
idx = token[0] == :OUTDENT ? i + 1 : i
stack.last.times { @tokens.insert(idx, [')', Value.new(')', token[1].line)]) }
stack.last.times { @tokens.insert(idx, [:CALL_END, Value.new(')', token[1].line)]) }
size, stack[-1] = stack[-1] + 1, 0
next size
end
next 1 unless IMPLICIT_FUNC.include?(prev[0]) && IMPLICIT_CALL.include?(token[0])
@tokens.insert(i, ['(', Value.new('(', token[1].line)])
@tokens.insert(i, [:CALL_START, Value.new('(', token[1].line)])
stack[-1] += 1
next token[0] == :PARAM_START ? 1 : 2
end

View File

@ -1 +1 @@
[[:COMMENT, [" The cornerstone, an each implementation.", " Handles objects implementing forEach, arrays, and raw objects."]], ["\n", "\n"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "each"], [:ASSIGN, ":"], [:PARAM_START, "("], [:PARAM, "obj"], [",", ","], [:PARAM, "iterator"], [",", ","], [:PARAM, "context"], [:PARAM_END, ")"], ["->", "->"], [:INDENT, 2], [:IDENTIFIER, "index"], [:ASSIGN, ":"], [:NUMBER, "0"], ["\n", "\n"], [:TRY, "try"], [:INDENT, 2], [:IF, "if"], [:IDENTIFIER, "obj"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "forEach"], [:INDENT, 2], [:IDENTIFIER, "obj"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "forEach"], ["(", "("], [:IDENTIFIER, "iterator"], [",", ","], [:IDENTIFIER, "context"], [")", ")"], [:OUTDENT, 2], [:ELSE, "else"], [:IF, "if"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "isArray"], ["(", "("], [:IDENTIFIER, "obj"], [")", ")"], [:OR, "or"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "isArguments"], ["(", "("], [:IDENTIFIER, "obj"], [")", ")"], [:INDENT, 2], [:IDENTIFIER, "iterator"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "call"], ["(", "("], [:IDENTIFIER, "context"], [",", ","], [:IDENTIFIER, "item"], [",", ","], [:IDENTIFIER, "i"], [",", ","], [:IDENTIFIER, "obj"], [")", ")"], [:FOR, "for"], [:IDENTIFIER, "item"], [",", ","], [:IDENTIFIER, "i"], [:IN, "in"], [:IDENTIFIER, "obj"], [:OUTDENT, 2], [:ELSE, "else"], [:INDENT, 2], [:IDENTIFIER, "iterator"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "call"], ["(", "("], [:IDENTIFIER, "context"], [",", ","], [:IDENTIFIER, "obj"], ["[", "["], [:IDENTIFIER, "key"], ["]", "]"], [",", ","], [:IDENTIFIER, "key"], [",", ","], [:IDENTIFIER, "obj"], [")", ")"], [:FOR, "for"], [:IDENTIFIER, "key"], [:IN, "in"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "keys"], ["(", "("], [:IDENTIFIER, "obj"], [")", ")"], [:OUTDENT, 2], [:OUTDENT, 2], [:CATCH, "catch"], [:IDENTIFIER, "e"], [:INDENT, 2], [:THROW, "throw"], [:IDENTIFIER, "e"], [:IF, "if"], [:IDENTIFIER, "e"], [:ISNT, "isnt"], [:IDENTIFIER, "breaker"], [:OUTDENT, 2], ["\n", "\n"], [:IDENTIFIER, "obj"], [:OUTDENT, 2], ["\n", "\n"]]
[[:COMMENT, [" The cornerstone, an each implementation.", " Handles objects implementing forEach, arrays, and raw objects."]], ["\n", "\n"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "each"], [:ASSIGN, ":"], [:PARAM_START, "("], [:PARAM, "obj"], [",", ","], [:PARAM, "iterator"], [",", ","], [:PARAM, "context"], [:PARAM_END, ")"], ["->", "->"], [:INDENT, 2], [:IDENTIFIER, "index"], [:ASSIGN, ":"], [:NUMBER, "0"], ["\n", "\n"], [:TRY, "try"], [:INDENT, 2], [:IF, "if"], [:IDENTIFIER, "obj"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "forEach"], [:INDENT, 2], [:IDENTIFIER, "obj"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "forEach"], [:CALL_START, "("], [:IDENTIFIER, "iterator"], [",", ","], [:IDENTIFIER, "context"], [:CALL_END, ")"], [:OUTDENT, 2], [:ELSE, "else"], [:IF, "if"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "isArray"], [:CALL_START, "("], [:IDENTIFIER, "obj"], [:CALL_END, ")"], [:OR, "or"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "isArguments"], [:CALL_START, "("], [:IDENTIFIER, "obj"], [:CALL_END, ")"], [:INDENT, 2], [:IDENTIFIER, "iterator"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "call"], [:CALL_START, "("], [:IDENTIFIER, "context"], [",", ","], [:IDENTIFIER, "item"], [",", ","], [:IDENTIFIER, "i"], [",", ","], [:IDENTIFIER, "obj"], [:CALL_END, ")"], [:FOR, "for"], [:IDENTIFIER, "item"], [",", ","], [:IDENTIFIER, "i"], [:IN, "in"], [:IDENTIFIER, "obj"], [:OUTDENT, 2], [:ELSE, "else"], [:INDENT, 2], [:IDENTIFIER, "iterator"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "call"], [:CALL_START, "("], [:IDENTIFIER, "context"], [",", ","], [:IDENTIFIER, "obj"], [:INDEX_START, "["], [:IDENTIFIER, "key"], [:INDEX_END, "]"], [",", ","], [:IDENTIFIER, "key"], [",", ","], [:IDENTIFIER, "obj"], [:CALL_END, ")"], [:FOR, "for"], [:IDENTIFIER, "key"], [:IN, "in"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "keys"], [:CALL_START, "("], [:IDENTIFIER, "obj"], [:CALL_END, ")"], [:OUTDENT, 2], [:OUTDENT, 2], [:CATCH, "catch"], [:IDENTIFIER, "e"], [:INDENT, 2], [:THROW, "throw"], [:IDENTIFIER, "e"], [:IF, "if"], [:IDENTIFIER, "e"], [:ISNT, "isnt"], [:IDENTIFIER, "breaker"], [:OUTDENT, 2], ["\n", "\n"], [:IDENTIFIER, "obj"], [:OUTDENT, 2], ["\n", "\n"]]

View File

@ -34,8 +34,8 @@ class LexerTest < Test::Unit::TestCase
def test_lexing_if_statement
code = "clap_your_hands() if happy"
assert @lex.tokenize(code) == [[:IDENTIFIER, "clap_your_hands"], ["(", "("],
[")", ")"], [:IF, "if"], [:IDENTIFIER, "happy"], ["\n", "\n"]]
assert @lex.tokenize(code) == [[:IDENTIFIER, "clap_your_hands"], [:CALL_START, "("],
[:CALL_END, ")"], [:IF, "if"], [:IDENTIFIER, "happy"], ["\n", "\n"]]
end
def test_lexing_comment