first draft of whitespace-sensitive method calls and indexes.

2010-01-26 20:59:52 -05:00 · 2010-01-26 20:59:52 -05:00 · aa93d3c387
parent ab4a4a5580
commit aa93d3c387
6 changed files with 1445 additions and 1388 deletions
--- a/lib/coffee_script/grammar.y
+++ b/lib/coffee_script/grammar.y
@ -6,6 +6,7 @@ token NUMBER STRING REGEX
 token TRUE FALSE YES NO ON OFF
 token IDENTIFIER PROPERTY_ACCESS PROTOTYPE_ACCESS SOAK_ACCESS
 token CODE PARAM_START PARAM PARAM_END NEW RETURN
+token CALL_START CALL_END INDEX_START INDEX_END
 token TRY CATCH FINALLY THROW
 token BREAK CONTINUE
 token FOR IN OF BY WHEN WHILE
@ -246,12 +247,12 @@ rule
  | PROTOTYPE_ACCESS IDENTIFIER       { result = AccessorNode.new(val[1], :prototype) }
  | SOAK_ACCESS IDENTIFIER            { result = AccessorNode.new(val[1], :soak) }
  | Index                             { result = val[0] }
-  | Range                             { result = SliceNode.new(val[0]) }
+  | Slice                             { result = SliceNode.new(val[0]) }
  ;

  # Indexing into an object or array.
  Index:
-    "[" Expression "]"                { result = IndexNode.new(val[1]) }
+    INDEX_START Expression INDEX_END  { result = IndexNode.new(val[1]) }
  ;

  # An object literal.
@ -290,13 +291,13 @@ rule

  # The list of arguments to a function invocation.
  Arguments:
-    "(" ArgList ")"                   { result = val[1] }
-  | "(" ArgList ")" Code              { result = val[1] << val[3] }
+    CALL_START ArgList CALL_END       { result = val[1] }
+  | CALL_START ArgList CALL_END Code  { result = val[1] << val[3] }
  ;

  # Calling super.
  Super:
-    SUPER "(" ArgList ")"             { result = CallNode.new(Value.new('super'), val[2]) }
+    SUPER CALL_START ArgList CALL_END { result = CallNode.new(Value.new('super'), val[2]) }
  ;

  # The range literal.
@ -307,6 +308,14 @@ rule
      "." "." "." Expression "]"      { result = RangeNode.new(val[1], val[5], true) }
  ;

+  # The slice literal.
+  Slice:
+    INDEX_START Expression "." "."
+      Expression INDEX_END            { result = RangeNode.new(val[1], val[4]) }
+  | INDEX_START Expression "." "." "."
+      Expression INDEX_END            { result = RangeNode.new(val[1], val[5], true) }
+  ;
+
  # The array literal.
  Array:
    "[" ArgList "]"                   { result = ArrayNode.new(val[1]) }
--- a/lib/coffee_script/lexer.rb
+++ b/lib/coffee_script/lexer.rb
@ -50,6 +50,9 @@ module CoffeeScript
      :FALSE, :NULL, :TRUE
    ]

+    # Tokens which could legitimately be invoked or indexed.
+    CALLABLE = [:IDENTIFIER, :SUPER, ')', ']', '}', :STRING]
+
    # Scan by attempting to match tokens one character at a time. Slow and steady.
    def tokenize(code)
      @code    = code.chomp # Cleanup code by remove extra line breaks
@ -58,6 +61,7 @@ module CoffeeScript
      @indent  = 0          # The current indent level.
      @indents = []         # The stack of all indent levels we are currently within.
      @tokens  = []         # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
+      @spaced  = nil        # The last value that has a space following it.
      while @i < @code.length
        @chunk = @code[@i..-1]
        extract_next_token
@ -190,6 +194,7 @@ module CoffeeScript
    # Matches and consumes non-meaningful whitespace.
    def whitespace_token
      return false unless whitespace = @chunk[WHITESPACE, 1]
+      @spaced = last_value
      @i += whitespace.length
    end

@ -214,6 +219,10 @@ module CoffeeScript
      tag_parameters if value && value.match(CODE)
      value ||= @chunk[0,1]
      tag = value.match(ASSIGNMENT) ? :ASSIGN : value
+      if !@spaced.equal?(last_value) && CALLABLE.include?(last_tag)
+        tag = :CALL_START  if value == '('
+        tag = :INDEX_START if value == '['
+      end
      token(tag, value)
      @i += value.length
    end
--- a/lib/coffee_script/parser.rb
+++ b/lib/coffee_script/parser.rb
--- a/lib/coffee_script/rewriter.rb
+++ b/lib/coffee_script/rewriter.rb
@ -6,7 +6,8 @@ module CoffeeScript
  class Rewriter

    # Tokens that must be balanced.
-    BALANCED_PAIRS = [['(', ')'], ['[', ']'], ['{', '}'], [:INDENT, :OUTDENT], [:PARAM_START, :PARAM_END]]
+    BALANCED_PAIRS = [['(', ')'], ['[', ']'], ['{', '}'], [:INDENT, :OUTDENT],
+      [:PARAM_START, :PARAM_END], [:CALL_START, :CALL_END], [:INDEX_START, :INDEX_END]]

    # Tokens that signal the start of a balanced pair.
    EXPRESSION_START = BALANCED_PAIRS.map {|pair| pair.first }
@ -45,6 +46,7 @@ module CoffeeScript
      remove_leading_newlines
      remove_mid_expression_newlines
      move_commas_outside_outdents
+      close_open_calls_and_indexes
      add_implicit_parentheses
      add_implicit_indentation
      ensure_balance(*BALANCED_PAIRS)
@ -119,6 +121,35 @@ module CoffeeScript
      end
    end

+    # We've tagged the opening parenthesis of a method call, and the opening
+    # bracket of an indexing operation. Match them with their close.
+    def close_open_calls_and_indexes
+      parens, brackets = [0], [0]
+      scan_tokens do |prev, token, post, i|
+        case token[0]
+        when :CALL_START  then parens.push(0)
+        when :INDEX_START then brackets.push(0)
+        when '('          then parens[-1] += 1
+        when '['          then brackets[-1] += 1
+        when ')'
+          if parens.last == 0
+            parens.pop
+            token[0] = :CALL_END
+          else
+            parens[-1] -= 1
+          end
+        when ']'
+          if brackets.last == 0
+            brackets.pop
+            token[0] = :INDEX_END
+          else
+            brackets[-1] -= 1
+          end
+        end
+        next 1
+      end
+    end
+
    # Because our grammar is LALR(1), it can't handle some single-line
    # expressions that lack ending delimiters. Use the lexer to add the implicit
    # blocks, so it doesn't need to.
@ -165,12 +196,12 @@ module CoffeeScript
        if (stack.last > 0 && (IMPLICIT_END.include?(token[0]) || post.nil?)) &&
           !(token[0] == :PARAM_START && prev[0] == ',')
          idx = token[0] == :OUTDENT ? i + 1 : i
-          stack.last.times { @tokens.insert(idx, [')', Value.new(')', token[1].line)]) }
+          stack.last.times { @tokens.insert(idx, [:CALL_END, Value.new(')', token[1].line)]) }
          size, stack[-1] = stack[-1] + 1, 0
          next size
        end
        next 1 unless IMPLICIT_FUNC.include?(prev[0]) && IMPLICIT_CALL.include?(token[0])
-        @tokens.insert(i, ['(', Value.new('(', token[1].line)])
+        @tokens.insert(i, [:CALL_START, Value.new('(', token[1].line)])
        stack[-1] += 1
        next token[0] == :PARAM_START ? 1 : 2
      end
--- a/test/fixtures/generation/each.tokens
+++ b/test/fixtures/generation/each.tokens
@ -1 +1 @@
-[[:COMMENT, [" The cornerstone, an each implementation.", " Handles objects implementing forEach, arrays, and raw objects."]], ["\n", "\n"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "each"], [:ASSIGN, ":"], [:PARAM_START, "("], [:PARAM, "obj"], [",", ","], [:PARAM, "iterator"], [",", ","], [:PARAM, "context"], [:PARAM_END, ")"], ["->", "->"], [:INDENT, 2], [:IDENTIFIER, "index"], [:ASSIGN, ":"], [:NUMBER, "0"], ["\n", "\n"], [:TRY, "try"], [:INDENT, 2], [:IF, "if"], [:IDENTIFIER, "obj"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "forEach"], [:INDENT, 2], [:IDENTIFIER, "obj"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "forEach"], ["(", "("], [:IDENTIFIER, "iterator"], [",", ","], [:IDENTIFIER, "context"], [")", ")"], [:OUTDENT, 2], [:ELSE, "else"], [:IF, "if"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "isArray"], ["(", "("], [:IDENTIFIER, "obj"], [")", ")"], [:OR, "or"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "isArguments"], ["(", "("], [:IDENTIFIER, "obj"], [")", ")"], [:INDENT, 2], [:IDENTIFIER, "iterator"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "call"], ["(", "("], [:IDENTIFIER, "context"], [",", ","], [:IDENTIFIER, "item"], [",", ","], [:IDENTIFIER, "i"], [",", ","], [:IDENTIFIER, "obj"], [")", ")"], [:FOR, "for"], [:IDENTIFIER, "item"], [",", ","], [:IDENTIFIER, "i"], [:IN, "in"], [:IDENTIFIER, "obj"], [:OUTDENT, 2], [:ELSE, "else"], [:INDENT, 2], [:IDENTIFIER, "iterator"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "call"], ["(", "("], [:IDENTIFIER, "context"], [",", ","], [:IDENTIFIER, "obj"], ["[", "["], [:IDENTIFIER, "key"], ["]", "]"], [",", ","], [:IDENTIFIER, "key"], [",", ","], [:IDENTIFIER, "obj"], [")", ")"], [:FOR, "for"], [:IDENTIFIER, "key"], [:IN, "in"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "keys"], ["(", "("], [:IDENTIFIER, "obj"], [")", ")"], [:OUTDENT, 2], [:OUTDENT, 2], [:CATCH, "catch"], [:IDENTIFIER, "e"], [:INDENT, 2], [:THROW, "throw"], [:IDENTIFIER, "e"], [:IF, "if"], [:IDENTIFIER, "e"], [:ISNT, "isnt"], [:IDENTIFIER, "breaker"], [:OUTDENT, 2], ["\n", "\n"], [:IDENTIFIER, "obj"], [:OUTDENT, 2], ["\n", "\n"]]
+[[:COMMENT, [" The cornerstone, an each implementation.", " Handles objects implementing forEach, arrays, and raw objects."]], ["\n", "\n"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "each"], [:ASSIGN, ":"], [:PARAM_START, "("], [:PARAM, "obj"], [",", ","], [:PARAM, "iterator"], [",", ","], [:PARAM, "context"], [:PARAM_END, ")"], ["->", "->"], [:INDENT, 2], [:IDENTIFIER, "index"], [:ASSIGN, ":"], [:NUMBER, "0"], ["\n", "\n"], [:TRY, "try"], [:INDENT, 2], [:IF, "if"], [:IDENTIFIER, "obj"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "forEach"], [:INDENT, 2], [:IDENTIFIER, "obj"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "forEach"], [:CALL_START, "("], [:IDENTIFIER, "iterator"], [",", ","], [:IDENTIFIER, "context"], [:CALL_END, ")"], [:OUTDENT, 2], [:ELSE, "else"], [:IF, "if"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "isArray"], [:CALL_START, "("], [:IDENTIFIER, "obj"], [:CALL_END, ")"], [:OR, "or"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "isArguments"], [:CALL_START, "("], [:IDENTIFIER, "obj"], [:CALL_END, ")"], [:INDENT, 2], [:IDENTIFIER, "iterator"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "call"], [:CALL_START, "("], [:IDENTIFIER, "context"], [",", ","], [:IDENTIFIER, "item"], [",", ","], [:IDENTIFIER, "i"], [",", ","], [:IDENTIFIER, "obj"], [:CALL_END, ")"], [:FOR, "for"], [:IDENTIFIER, "item"], [",", ","], [:IDENTIFIER, "i"], [:IN, "in"], [:IDENTIFIER, "obj"], [:OUTDENT, 2], [:ELSE, "else"], [:INDENT, 2], [:IDENTIFIER, "iterator"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "call"], [:CALL_START, "("], [:IDENTIFIER, "context"], [",", ","], [:IDENTIFIER, "obj"], [:INDEX_START, "["], [:IDENTIFIER, "key"], [:INDEX_END, "]"], [",", ","], [:IDENTIFIER, "key"], [",", ","], [:IDENTIFIER, "obj"], [:CALL_END, ")"], [:FOR, "for"], [:IDENTIFIER, "key"], [:IN, "in"], [:IDENTIFIER, "_"], [:PROPERTY_ACCESS, "."], [:IDENTIFIER, "keys"], [:CALL_START, "("], [:IDENTIFIER, "obj"], [:CALL_END, ")"], [:OUTDENT, 2], [:OUTDENT, 2], [:CATCH, "catch"], [:IDENTIFIER, "e"], [:INDENT, 2], [:THROW, "throw"], [:IDENTIFIER, "e"], [:IF, "if"], [:IDENTIFIER, "e"], [:ISNT, "isnt"], [:IDENTIFIER, "breaker"], [:OUTDENT, 2], ["\n", "\n"], [:IDENTIFIER, "obj"], [:OUTDENT, 2], ["\n", "\n"]]
--- a/test/unit/test_lexer.rb
+++ b/test/unit/test_lexer.rb
@ -34,8 +34,8 @@ class LexerTest < Test::Unit::TestCase

  def test_lexing_if_statement
    code = "clap_your_hands() if happy"
-    assert @lex.tokenize(code) == [[:IDENTIFIER, "clap_your_hands"], ["(", "("],
-      [")", ")"], [:IF, "if"], [:IDENTIFIER, "happy"], ["\n", "\n"]]
+    assert @lex.tokenize(code) == [[:IDENTIFIER, "clap_your_hands"], [:CALL_START, "("],
+      [:CALL_END, ")"], [:IF, "if"], [:IDENTIFIER, "happy"], ["\n", "\n"]]
  end

  def test_lexing_comment