AST: Track exclusive end line/column (#5156)

* updated grammar (with patched Jison) * passing tests * updated grammar * updated grammar * updated grammar * updated grammar * cleanup * refactor
2019-02-08 00:55:11 -05:00 · 2019-02-08 00:55:11 -05:00 · 6a8e6a4078
parent 42402da526
commit 6a8e6a4078
12 changed files with 355 additions and 306 deletions
--- a/lib/coffeescript/grammar.js
+++ b/lib/coffeescript/grammar.js
@ -48,7 +48,7 @@
      // is added to the first parameter passed in, and the parameter is returned.
      // If the parameter is not a node, it will just be passed through unaffected.
      getAddDataToNodeFunctionString = function(first, last, forceUpdateLocation = true) {
-        return `yy.addDataToNode(yy, @${first}, ${last ? `@${last}` : 'null'}, ${forceUpdateLocation ? 'true' : 'false'})`;
+        return `yy.addDataToNode(yy, @${first}, ${first[0] === '$' ? '$$' : '$'}${first}, ${last ? `@${last}, ${last[0] === '$' ? '$$' : '$'}${last}` : 'null, null'}, ${forceUpdateLocation ? 'true' : 'false'})`;
      };
      returnsLoc = /^LOC/.test(action);
      action = action.replace(/LOC\(([0-9]*)\)/g, getAddDataToNodeFunctionString('$1'));
@ -1114,11 +1114,15 @@
    RangeDots: [
      o('..',
      function() {
-        return 'inclusive';
+        return {
+          exclusive: false
+        };
      }),
      o('...',
      function() {
-        return 'exclusive';
+        return {
+          exclusive: true
+        };
      })
    ],
    // The CoffeeScript range literal.
@ -1127,13 +1131,13 @@
      function() {
        return new Range($2,
      $4,
-      $3);
+      $3.exclusive ? 'exclusive' : 'inclusive');
      }),
      o('[ ExpressionLine RangeDots Expression ]',
      function() {
        return new Range($2,
      $4,
-      $3);
+      $3.exclusive ? 'exclusive' : 'inclusive');
      })
    ],
    // Array slice literals.
@ -1142,37 +1146,37 @@
      function() {
        return new Range($1,
      $3,
-      $2);
+      $2.exclusive ? 'exclusive' : 'inclusive');
      }),
      o('Expression RangeDots',
      function() {
        return new Range($1,
      null,
-      $2);
+      $2.exclusive ? 'exclusive' : 'inclusive');
      }),
      o('ExpressionLine RangeDots Expression',
      function() {
        return new Range($1,
      $3,
-      $2);
+      $2.exclusive ? 'exclusive' : 'inclusive');
      }),
      o('ExpressionLine RangeDots',
      function() {
        return new Range($1,
      null,
-      $2);
+      $2.exclusive ? 'exclusive' : 'inclusive');
      }),
      o('RangeDots Expression',
      function() {
        return new Range(null,
      $2,
-      $1);
+      $1.exclusive ? 'exclusive' : 'inclusive');
      }),
      o('RangeDots',
      function() {
        return new Range(null,
      null,
-      $1);
+      $1.exclusive ? 'exclusive' : 'inclusive');
      })
    ],
    // The **ArgList** is the list of objects passed into a function call
--- a/lib/coffeescript/helpers.js
+++ b/lib/coffeescript/helpers.js
@ -158,6 +158,8 @@
        first_column: first.first_column,
        last_line: last.last_line,
        last_column: last.last_column,
+        last_line_exclusive: last.last_line_exclusive,
+        last_column_exclusive: last.last_column_exclusive,
        range: [first.range[0], last.range[1]]
      };
    }
@ -199,12 +201,15 @@
  // This returns a function which takes an object as a parameter, and if that
  // object is an AST node, updates that object's locationData.
  // The object is returned either way.
-  exports.addDataToNode = function(parserState, first, last, forceUpdateLocation = true) {
+  exports.addDataToNode = function(parserState, firstLocationData, firstValue, lastLocationData, lastValue, forceUpdateLocation = true) {
    return function(obj) {
-      var objHash, ref1;
+      var locationData, objHash, ref1, ref2, ref3;
      // Add location data.
-      if (((obj != null ? obj.updateLocationDataIfMissing : void 0) != null) && (first != null)) {
-        obj.updateLocationDataIfMissing(buildLocationData(first, last), forceUpdateLocation);
+      locationData = buildLocationData((ref1 = firstValue != null ? firstValue.locationData : void 0) != null ? ref1 : firstLocationData, (ref2 = lastValue != null ? lastValue.locationData : void 0) != null ? ref2 : lastLocationData);
+      if (((obj != null ? obj.updateLocationDataIfMissing : void 0) != null) && (firstLocationData != null)) {
+        obj.updateLocationDataIfMissing(locationData, forceUpdateLocation);
+      } else {
+        obj.locationData = locationData;
      }
      // Add comments, building the dictionary of token data if it hasn’t been
      // built yet.
@ -213,7 +218,7 @@
      }
      if (obj.locationData != null) {
        objHash = buildLocationHash(obj.locationData);
-        if (((ref1 = parserState.tokenData[objHash]) != null ? ref1.comments : void 0) != null) {
+        if (((ref3 = parserState.tokenData[objHash]) != null ? ref3.comments : void 0) != null) {
          attachCommentsToNode(parserState.tokenData[objHash].comments, obj);
        }
      }
--- a/lib/coffeescript/lexer.js
+++ b/lib/coffeescript/lexer.js
@ -7,7 +7,7 @@

  //     [tag, value, locationData]

-  // where locationData is {first_line, first_column, last_line, last_column}, which is a
+  // where locationData is {first_line, first_column, last_line, last_column, last_line_exclusive, last_column_exclusive}, which is a
  // format that can be fed directly into [Jison](https://github.com/zaach/jison).  These
  // are read by jison in the `parser.lexer` function defined in coffeescript.coffee.
  var BOM, BOOL, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_ALIAS_MAP, COFFEE_KEYWORDS, COMMENT, COMPARABLE_LEFT_SIDE, COMPARE, COMPOUND_ASSIGN, CSX_ATTRIBUTE, CSX_FRAGMENT_IDENTIFIER, CSX_IDENTIFIER, CSX_INTERPOLATION, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HERE_JSTOKEN, IDENTIFIER, INDENTABLE_CLOSERS, INDEXABLE, INSIDE_CSX, INVERSES, JSTOKEN, JS_KEYWORDS, LINE_BREAK, LINE_CONTINUER, Lexer, MATH, MULTI_DENT, NOT_REGEX, NUMBER, OPERATOR, POSSIBLY_DIVISION, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, REGEX_INVALID_ESCAPE, RELATION, RESERVED, Rewriter, SHIFT, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_INVALID_ESCAPE, STRING_SINGLE, STRING_START, TRAILING_SPACES, UNARY, UNARY_MATH, UNFINISHED, VALID_FLAGS, WHITESPACE, addTokenData, attachCommentsToNode, compact, count, invertLiterate, isForFrom, isUnassignable, key, locationDataToString, merge, repeat, replaceUnicodeCodePointEscapes, starts, throwSyntaxError,
@ -1212,6 +1212,7 @@
      } else {
        lastToken[2].last_column += closingDelimiter.length;
      }
+      lastToken[2].last_column_exclusive += closingDelimiter.length;
      if (lastToken[1].length === 0) {
        lastToken[2].last_column -= 1;
        lastToken[2].range[1] -= 1;
@ -1299,6 +1300,8 @@
            first_column: lparen[2].first_column,
            last_line: lastToken[2].last_line,
            last_column: lastToken[2].last_column,
+            last_line_exclusive: lastToken[2].last_line_exclusive,
+            last_column_exclusive: lastToken[2].last_column_exclusive,
            range: [lparen[2].range[0],
          lastToken[2].range[1]]
          }
@ -1310,6 +1313,8 @@
          first_column: lastToken[2].last_column,
          last_line: lastToken[2].last_line,
          last_column: lastToken[2].last_column,
+          last_line_exclusive: lastToken[2].last_line_exclusive,
+          last_column_exclusive: lastToken[2].last_column_exclusive,
          range: lastToken[2].range
        };
      }
@ -1373,6 +1378,7 @@
      // so if last_column == first_column, then we’re looking at a character of length 1.
      lastCharacter = length > 0 ? length - 1 : 0;
      [locationData.last_line, locationData.last_column, endOffset] = this.getLineAndColumnFromChunk(offsetInChunk + lastCharacter);
+      [locationData.last_line_exclusive, locationData.last_column_exclusive] = this.getLineAndColumnFromChunk(offsetInChunk + lastCharacter + 1);
      locationData.range[1] = length > 0 ? endOffset + 1 : endOffset;
      return locationData;
    }
--- a/lib/coffeescript/nodes.js
+++ b/lib/coffeescript/nodes.js
@ -7483,13 +7483,19 @@
      first_column: locationDataA.first_column
    }, justLeading ? {
      last_line: locationDataA.last_line,
-      last_column: locationDataA.last_column
+      last_column: locationDataA.last_column,
+      last_line_exclusive: locationDataA.last_line_exclusive,
+      last_column_exclusive: locationDataA.last_column_exclusive
    } : isLocationDataEndGreater(locationDataA, locationDataB) ? {
      last_line: locationDataA.last_line,
-      last_column: locationDataA.last_column
+      last_column: locationDataA.last_column,
+      last_line_exclusive: locationDataA.last_line_exclusive,
+      last_column_exclusive: locationDataA.last_column_exclusive
    } : {
      last_line: locationDataB.last_line,
-      last_column: locationDataB.last_column
+      last_column: locationDataB.last_column,
+      last_line_exclusive: locationDataB.last_line_exclusive,
+      last_column_exclusive: locationDataB.last_column_exclusive
    }, {
      range: [justEnding ? locationDataA.range[0] : lesser(locationDataA.range[0], locationDataB.range[0]), justLeading ? locationDataA.range[1] : greater(locationDataA.range[1], locationDataB.range[1])]
    });
@ -7522,7 +7528,7 @@
  };

  // Convert Jison-style node class location data to Babel-style location data
-  jisonLocationDataToAstLocationData = function({first_line, first_column, last_line, last_column, range}) {
+  jisonLocationDataToAstLocationData = function({first_line, first_column, last_line_exclusive, last_column_exclusive, range}) {
    return {
      loc: {
        start: {
@ -7530,8 +7536,8 @@
          column: first_column
        },
        end: {
-          line: last_line + 1,
-          column: last_column + 1
+          line: last_line_exclusive + 1,
+          column: last_column_exclusive
        }
      },
      range: [range[0], range[1]],
--- a/lib/coffeescript/parser.js
+++ b/lib/coffeescript/parser.js
--- a/lib/coffeescript/rewriter.js
+++ b/lib/coffeescript/rewriter.js
@ -749,6 +749,8 @@
            first_column: column,
            last_line: line,
            last_column: column,
+            last_line_exclusive: line,
+            last_column_exclusive: column,
            range: [rangeIndex, rangeIndex]
          };
          return 1;
@ -770,6 +772,8 @@
            first_column: prevLocationData.last_column,
            last_line: prevLocationData.last_line,
            last_column: prevLocationData.last_column,
+            last_line_exclusive: prevLocationData.last_line_exclusive,
+            last_column_exclusive: prevLocationData.last_column_exclusive,
            range: prevLocationData.range
          };
          return 1;
--- a/src/grammar.coffee
+++ b/src/grammar.coffee
@ -45,7 +45,7 @@ o = (patternString, action, options) ->
    # is added to the first parameter passed in, and the parameter is returned.
    # If the parameter is not a node, it will just be passed through unaffected.
    getAddDataToNodeFunctionString = (first, last, forceUpdateLocation = yes) ->
-      "yy.addDataToNode(yy, @#{first}, #{if last then "@#{last}" else 'null'}, #{if forceUpdateLocation then 'true' else 'false'})"
+      "yy.addDataToNode(yy, @#{first}, #{if first[0] is '$' then '$$' else '$'}#{first}, #{if last then "@#{last}, #{if last[0] is '$' then '$$' else '$'}#{last}" else 'null, null'}, #{if forceUpdateLocation then 'true' else 'false'})"

    returnsLoc = /^LOC/.test action
    action = action.replace /LOC\(([0-9]*)\)/g, getAddDataToNodeFunctionString('$1')
@ -550,24 +550,24 @@ grammar =

  # Inclusive and exclusive range dots.
  RangeDots: [
-    o '..',                                     -> 'inclusive'
-    o '...',                                    -> 'exclusive'
+    o '..',                                     -> exclusive: no
+    o '...',                                    -> exclusive: yes
  ]

  # The CoffeeScript range literal.
  Range: [
-    o '[ Expression RangeDots Expression ]',      -> new Range $2, $4, $3
-    o '[ ExpressionLine RangeDots Expression ]',  -> new Range $2, $4, $3
+    o '[ Expression RangeDots Expression ]',      -> new Range $2, $4, if $3.exclusive then 'exclusive' else 'inclusive'
+    o '[ ExpressionLine RangeDots Expression ]',  -> new Range $2, $4, if $3.exclusive then 'exclusive' else 'inclusive'
  ]

  # Array slice literals.
  Slice: [
-    o 'Expression RangeDots Expression',        -> new Range $1, $3, $2
-    o 'Expression RangeDots',                   -> new Range $1, null, $2
-    o 'ExpressionLine RangeDots Expression',    -> new Range $1, $3, $2
-    o 'ExpressionLine RangeDots',               -> new Range $1, null, $2
-    o 'RangeDots Expression',                   -> new Range null, $2, $1
-    o 'RangeDots',                              -> new Range null, null, $1
+    o 'Expression RangeDots Expression',        -> new Range $1, $3, if $2.exclusive then 'exclusive' else 'inclusive'
+    o 'Expression RangeDots',                   -> new Range $1, null, if $2.exclusive then 'exclusive' else 'inclusive'
+    o 'ExpressionLine RangeDots Expression',    -> new Range $1, $3, if $2.exclusive then 'exclusive' else 'inclusive'
+    o 'ExpressionLine RangeDots',               -> new Range $1, null, if $2.exclusive then 'exclusive' else 'inclusive'
+    o 'RangeDots Expression',                   -> new Range null, $2, if $1.exclusive then 'exclusive' else 'inclusive'
+    o 'RangeDots',                              -> new Range null, null, if $1.exclusive then 'exclusive' else 'inclusive'
  ]

  # The **ArgList** is the list of objects passed into a function call
--- a/src/helpers.coffee
+++ b/src/helpers.coffee
@ -107,6 +107,8 @@ buildLocationData = (first, last) ->
    first_column: first.first_column
    last_line: last.last_line
    last_column: last.last_column
+    last_line_exclusive: last.last_line_exclusive
+    last_column_exclusive: last.last_column_exclusive
    range: [
      first.range[0]
      last.range[1]
@ -136,11 +138,14 @@ buildTokenDataDictionary = (parserState) ->
 # This returns a function which takes an object as a parameter, and if that
 # object is an AST node, updates that object's locationData.
 # The object is returned either way.
-exports.addDataToNode = (parserState, first, last, forceUpdateLocation = yes) ->
+exports.addDataToNode = (parserState, firstLocationData, firstValue, lastLocationData, lastValue, forceUpdateLocation = yes) ->
  (obj) ->
    # Add location data.
-    if obj?.updateLocationDataIfMissing? and first?
-      obj.updateLocationDataIfMissing buildLocationData(first, last), forceUpdateLocation
+    locationData = buildLocationData(firstValue?.locationData ? firstLocationData, lastValue?.locationData ? lastLocationData)
+    if obj?.updateLocationDataIfMissing? and firstLocationData?
+      obj.updateLocationDataIfMissing locationData, forceUpdateLocation
+    else
+      obj.locationData = locationData

    # Add comments, building the dictionary of token data if it hasn’t been
    # built yet.
--- a/src/lexer.coffee
+++ b/src/lexer.coffee
@ -5,7 +5,7 @@
 #
 #     [tag, value, locationData]
 #
-# where locationData is {first_line, first_column, last_line, last_column}, which is a
+# where locationData is {first_line, first_column, last_line, last_column, last_line_exclusive, last_column_exclusive}, which is a
 # format that can be fed directly into [Jison](https://github.com/zaach/jison).  These
 # are read by jison in the `parser.lexer` function defined in coffeescript.coffee.

@ -851,6 +851,7 @@ exports.Lexer = class Lexer
      lastToken[2].last_column = closingDelimiter.length - 1
    else
      lastToken[2].last_column += closingDelimiter.length
+    lastToken[2].last_column_exclusive += closingDelimiter.length
    if lastToken[1].length is 0
      lastToken[2].last_column -= 1
      lastToken[2].range[1]    -= 1
@ -903,10 +904,12 @@ exports.Lexer = class Lexer
    if lparen
      [..., lastToken] = tokens
      lparen.origin = ['STRING', null,
-        first_line:   lparen[2].first_line
-        first_column: lparen[2].first_column
-        last_line:    lastToken[2].last_line
-        last_column:  lastToken[2].last_column
+        first_line:            lparen[2].first_line
+        first_column:          lparen[2].first_column
+        last_line:             lastToken[2].last_line
+        last_column:           lastToken[2].last_column
+        last_line_exclusive:   lastToken[2].last_line_exclusive
+        last_column_exclusive: lastToken[2].last_column_exclusive
        range: [
          lparen[2].range[0]
          lastToken[2].range[1]
@ -915,11 +918,13 @@ exports.Lexer = class Lexer
      lparen[2] = lparen.origin[2]
      rparen = @token 'STRING_END', ')'
      rparen[2] =
-        first_line:   lastToken[2].last_line
-        first_column: lastToken[2].last_column
-        last_line:    lastToken[2].last_line
-        last_column:  lastToken[2].last_column
-        range:        lastToken[2].range
+        first_line:            lastToken[2].last_line
+        first_column:          lastToken[2].last_column
+        last_line:             lastToken[2].last_line
+        last_column:           lastToken[2].last_column
+        last_line_exclusive:   lastToken[2].last_line_exclusive
+        last_column_exclusive: lastToken[2].last_column_exclusive
+        range:                 lastToken[2].range

  # Pairs up a closing token, ensuring that all listed pairs of tokens are
  # correctly balanced throughout the course of the token stream.
@ -973,6 +978,8 @@ exports.Lexer = class Lexer
    lastCharacter = if length > 0 then (length - 1) else 0
    [locationData.last_line, locationData.last_column, endOffset] =
      @getLineAndColumnFromChunk offsetInChunk + lastCharacter
+    [locationData.last_line_exclusive, locationData.last_column_exclusive] =
+      @getLineAndColumnFromChunk offsetInChunk + lastCharacter + 1
    locationData.range[1] = if length > 0 then endOffset + 1 else endOffset

    locationData
--- a/src/nodes.coffee
+++ b/src/nodes.coffee
@ -4944,15 +4944,21 @@ exports.mergeLocationData = mergeLocationData = (locationDataA, locationDataB, {
        first_column: locationDataA.first_column
  ,
    if justLeading
-      last_line:   locationDataA.last_line
-      last_column: locationDataA.last_column
+      last_line:             locationDataA.last_line
+      last_column:           locationDataA.last_column
+      last_line_exclusive:   locationDataA.last_line_exclusive
+      last_column_exclusive: locationDataA.last_column_exclusive
    else
      if isLocationDataEndGreater locationDataA, locationDataB
-        last_line:   locationDataA.last_line
-        last_column: locationDataA.last_column
+        last_line:             locationDataA.last_line
+        last_column:           locationDataA.last_column
+        last_line_exclusive:   locationDataA.last_line_exclusive
+        last_column_exclusive: locationDataA.last_column_exclusive
      else
-        last_line:   locationDataB.last_line
-        last_column: locationDataB.last_column
+        last_line:             locationDataB.last_line
+        last_column:           locationDataB.last_column
+        last_line_exclusive:   locationDataB.last_line_exclusive
+        last_column_exclusive: locationDataB.last_column_exclusive
  ,
    range: [
      if justEnding
@ -5023,15 +5029,15 @@ exports.mergeAstLocationData = mergeAstLocationData = (nodeA, nodeB, {justLeadin
        greater nodeA.end, nodeB.end

 # Convert Jison-style node class location data to Babel-style location data
-jisonLocationDataToAstLocationData = ({first_line, first_column, last_line, last_column, range}) ->
+jisonLocationDataToAstLocationData = ({first_line, first_column, last_line_exclusive, last_column_exclusive, range}) ->
  return
    loc:
      start:
        line:   first_line + 1
        column: first_column
      end:
-        line:   last_line + 1
-        column: last_column + 1
+        line:   last_line_exclusive + 1
+        column: last_column_exclusive
    range: [
      range[0]
      range[1]
--- a/src/rewriter.coffee
+++ b/src/rewriter.coffee
@ -520,10 +520,12 @@ exports.Rewriter = class Rewriter
        line = column = 0
        rangeIndex = 0
      token[2] = {
-        first_line:   line
-        first_column: column
-        last_line:    line
-        last_column:  column
+        first_line:            line
+        first_column:          column
+        last_line:             line
+        last_column:           column
+        last_line_exclusive:   line
+        last_column_exclusive: column
        range: [rangeIndex, rangeIndex]
      }
      return 1
@ -538,11 +540,13 @@ exports.Rewriter = class Rewriter
        (token.generated and token[0] is '}')
      prevLocationData = tokens[i - 1][2]
      token[2] =
-        first_line:   prevLocationData.last_line
-        first_column: prevLocationData.last_column
-        last_line:    prevLocationData.last_line
-        last_column:  prevLocationData.last_column
-        range:        prevLocationData.range
+        first_line:             prevLocationData.last_line
+        first_column:           prevLocationData.last_column
+        last_line:              prevLocationData.last_line
+        last_column:            prevLocationData.last_column
+        last_line_exclusive:    prevLocationData.last_line_exclusive
+        last_column_exclusive:  prevLocationData.last_column_exclusive
+        range:                  prevLocationData.range
      return 1

  # Because our grammar is LALR(1), it can’t handle some single-line
--- a/test/abstract_syntax_tree_location_data.coffee
+++ b/test/abstract_syntax_tree_location_data.coffee
@ -3511,8 +3511,7 @@ test "AST location data as expected for Code node", ->
          column: 2
        end:
          line: 1
-          # column: 2 TODO: make this accurate/should match range?
-          column: 3
+          column: 2
    start: 0
    end: 2
    range: [0, 2]
@ -3522,5 +3521,4 @@ test "AST location data as expected for Code node", ->
        column: 0
      end:
        line: 1
-        # column: 2
-        column: 3
+        column: 2