making regexes stricter about their flags.

2010-05-14 09:14:41 -04:00 · 2010-05-14 09:14:41 -04:00 · 8136c5f3de
parent 45669e08c6
commit 8136c5f3de
3 changed files with 21 additions and 7 deletions
--- a/lib/lexer.js
+++ b/lib/lexer.js
@ -1,5 +1,5 @@
 (function(){
-  var ACCESSORS, ASSIGNMENT, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, CONVERSIONS, HALF_ASSIGNMENTS, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, LINE_BREAK, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX_ESCAPE, REGEX_FLAGS, REGEX_INTERPOLATION, REGEX_START, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, _a, _b, _c, balanced_string, compact, count, helpers, include, starts;
+  var ACCESSORS, ASSIGNMENT, CALLABLE, CODE, COFFEE_ALIASES, COFFEE_KEYWORDS, COMMENT, COMMENT_CLEANER, CONVERSIONS, HALF_ASSIGNMENTS, HEREDOC, HEREDOC_INDENT, IDENTIFIER, INTERPOLATION, JS_CLEANER, JS_FORBIDDEN, JS_KEYWORDS, KEYWORDS, LAST_DENT, LAST_DENTS, LINE_BREAK, Lexer, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX_END, REGEX_ESCAPE, REGEX_INTERPOLATION, REGEX_START, RESERVED, Rewriter, STRING_NEWLINES, WHITESPACE, _a, _b, _c, balanced_string, compact, count, helpers, include, starts;
  var __slice = Array.prototype.slice;
  // The CoffeeScript Lexer. Uses a series of token-matching regexes to attempt
  // matches against the beginning of the source code. When a match is found,
@ -239,7 +239,7 @@
    // JavaScript and Ruby, borrow slash balancing from `@balanced_token`, and
    // borrow interpolation from `@interpolate_string`.
    Lexer.prototype.regex_token = function regex_token() {
-      var flags, regex, str;
+      var end, flags, regex, str;
      if (!(this.chunk.match(REGEX_START))) {
        return false;
      }
@ -249,7 +249,12 @@
      if (!(regex = this.balanced_token(['/', '/']))) {
        return false;
      }
-      regex += (flags = this.chunk.substr(regex.length).match(REGEX_FLAGS));
+      if (!(end = this.chunk.substr(regex.length).match(REGEX_END))) {
+        return false;
+      }
+      if (end[2]) {
+        regex += (flags = end[2]);
+      }
      if (regex.match(REGEX_INTERPOLATION)) {
        str = regex.substring(1).split('/')[0];
        str = str.replace(REGEX_ESCAPE, function(escaped) {
@ -662,7 +667,7 @@
  // Regex-matching-regexes.
  REGEX_START = /^\/[^\/ ]/;
  REGEX_INTERPOLATION = /([^\\]\$[a-zA-Z_@]|[^\\]\$\{.*[^\\]\})/;
-  REGEX_FLAGS = /^[imgy]{0,4}/;
+  REGEX_END = /^(([imgy]{1,4})\b|\W)/;
  REGEX_ESCAPE = /\\[^\$]/g;
  // Token cleaning regexes.
  JS_CLEANER = /(^`|`$)/g;
--- a/src/lexer.coffee
+++ b/src/lexer.coffee
@ -165,7 +165,8 @@ exports.Lexer: class Lexer
    return false unless @chunk.match REGEX_START
    return false if     include NOT_REGEX, @tag()
    return false unless regex: @balanced_token ['/', '/']
-    regex: + (flags: @chunk.substr(regex.length).match REGEX_FLAGS)
+    return false unless end: @chunk.substr(regex.length).match REGEX_END
+    regex: + flags: end[2] if end[2]
    if regex.match REGEX_INTERPOLATION
      str: regex.substring(1).split('/')[0]
      str: str.replace REGEX_ESCAPE, (escaped) -> '\\' + escaped
@ -493,7 +494,7 @@ ASSIGNMENT    : /^(:|=)$/
 # Regex-matching-regexes.
 REGEX_START        : /^\/[^\/ ]/
 REGEX_INTERPOLATION: /([^\\]\$[a-zA-Z_@]|[^\\]\$\{.*[^\\]\})/
-REGEX_FLAGS        : /^[imgy]{0,4}/
+REGEX_END          : /^(([imgy]{1,4})\b|\W)/
 REGEX_ESCAPE       : /\\[^\$]/g

 # Token cleaning regexes.
--- a/test/test_regexps.coffee
+++ b/test/test_regexps.coffee
@ -12,4 +12,12 @@ g: 1

 ok y / x/g is 2

-ok 'http://google.com'.match(/:\/\/goog/)
+ok 'http://google.com'.match(/:\/\/goog/)
+
+obj: {
+  width:  -> 10
+  height: -> 20
+}
+id: 2
+
+ok (obj.width()/id - obj.height()/id) is -5