Bug 1304737 - Do not ignore trailing .* on matchOnly RegExp execution. r=h4writer, a=ritu
authorTooru Fujisawa <arai_a@mac.com>
Sat, 24 Sep 2016 05:10:55 +0900
changeset 350421 2704e305065955cf0f5e448202fa3df1014c0bae
parent 350420 835bdb83d00506514f0b68b62afa3dfc481079cb
child 350422 d1baa416e1b9c1a3729153301b4b3ef1786236ba
push id1230
push userjlund@mozilla.com
push dateMon, 31 Oct 2016 18:13:35 +0000
treeherdermozilla-release@5e06e3766db2 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersh4writer, ritu
bugs1304737
milestone50.0
Bug 1304737 - Do not ignore trailing .* on matchOnly RegExp execution. r=h4writer, a=ritu
js/src/irregexp/RegExpParser.cpp
js/src/irregexp/RegExpParser.h
js/src/tests/ecma_6/RegExp/test-trailing.js
js/src/vm/RegExpObject.cpp
--- a/js/src/irregexp/RegExpParser.cpp
+++ b/js/src/irregexp/RegExpParser.cpp
@@ -1827,33 +1827,37 @@ RegExpParser<CharT>::ParseDisjunction()
 
 template class irregexp::RegExpParser<Latin1Char>;
 template class irregexp::RegExpParser<char16_t>;
 
 template <typename CharT>
 static bool
 ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length,
              bool multiline, bool match_only, bool unicode, bool ignore_case,
-             RegExpCompileData* data)
+             bool global, bool sticky, RegExpCompileData* data)
 {
     if (match_only) {
         // Try to strip a leading '.*' from the RegExp, but only if it is not
         // followed by a '?' (which will affect how the .* is parsed). This
         // pattern will affect the captures produced by the RegExp, but not
         // whether there is a match or not.
         if (length >= 3 && chars[0] == '.' && chars[1] == '*' && chars[2] != '?') {
             chars += 2;
             length -= 2;
         }
 
         // Try to strip a trailing '.*' from the RegExp, which as above will
         // affect the captures but not whether there is a match. Only do this
-        // when there are no other meta characters in the RegExp, so that we
-        // are sure this will not affect how the RegExp is parsed.
+        // when the following conditions are met:
+        //   1. there are no other meta characters in the RegExp, so that we
+        //      are sure this will not affect how the RegExp is parsed
+        //   2. global and sticky flags are not set, as lastIndex needs to be
+        //      set properly on global or sticky match
         if (length >= 3 && !HasRegExpMetaChars(chars, length - 2) &&
+            !global && !sticky &&
             chars[length - 2] == '.' && chars[length - 1] == '*')
         {
             length -= 2;
         }
     }
 
     RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, multiline, unicode, ignore_case);
     data->tree = parser.ParsePattern();
@@ -1864,24 +1868,24 @@ ParsePattern(frontend::TokenStream& ts, 
     data->contains_anchor = parser.contains_anchor();
     data->capture_count = parser.captures_started();
     return true;
 }
 
 bool
 irregexp::ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
                        bool multiline, bool match_only, bool unicode, bool ignore_case,
-                       RegExpCompileData* data)
+                       bool global, bool sticky, RegExpCompileData* data)
 {
     JS::AutoCheckCannotGC nogc;
     return str->hasLatin1Chars()
            ? ::ParsePattern(ts, alloc, str->latin1Chars(nogc), str->length(),
-                            multiline, match_only, unicode, ignore_case, data)
+                            multiline, match_only, unicode, ignore_case, global, sticky, data)
            : ::ParsePattern(ts, alloc, str->twoByteChars(nogc), str->length(),
-                            multiline, match_only, unicode, ignore_case, data);
+                            multiline, match_only, unicode, ignore_case, global, sticky, data);
 }
 
 template <typename CharT>
 static bool
 ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length,
                    bool unicode)
 {
     LifoAllocScope scope(&alloc);
--- a/js/src/irregexp/RegExpParser.h
+++ b/js/src/irregexp/RegExpParser.h
@@ -39,17 +39,17 @@ namespace frontend {
     class TokenStream;
 }
 
 namespace irregexp {
 
 bool
 ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
              bool multiline, bool match_only, bool unicode, bool ignore_case,
-             RegExpCompileData* data);
+             bool global, bool sticky, RegExpCompileData* data);
 
 bool
 ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
                    bool unicode);
 
 // A BufferedVector is an automatically growing list, just like (and backed
 // by) a Vector, that is optimized for the case of adding and removing
 // a single element. The last element added is stored outside the backing list,
new file mode 100644
--- /dev/null
+++ b/js/src/tests/ecma_6/RegExp/test-trailing.js
@@ -0,0 +1,31 @@
+var BUGNUMBER = 1304737;
+var summary = "Trailing .* should not be ignored on matchOnly match.";
+
+print(BUGNUMBER + ": " + summary);
+
+function test(r, lastIndexIsZero) {
+    r.lastIndex = 0;
+    r.test("foo");
+    assertEq(r.lastIndex, lastIndexIsZero ? 0 : 3);
+
+    r.lastIndex = 0;
+    r.test("foo\nbar");
+    assertEq(r.lastIndex, lastIndexIsZero ? 0 : 3);
+
+    var input = "foo" + ".bar".repeat(20000);
+    r.lastIndex = 0;
+    r.test(input);
+    assertEq(r.lastIndex, lastIndexIsZero ? 0 : input.length);
+
+    r.lastIndex = 0;
+    r.test(input + "\nbaz");
+    assertEq(r.lastIndex, lastIndexIsZero ? 0 : input.length);
+}
+
+test(/f.*/, true);
+test(/f.*/g, false);
+test(/f.*/y, false);
+test(/f.*/gy, false);
+
+if (typeof reportCompare === "function")
+    reportCompare(true, true);
--- a/js/src/vm/RegExpObject.cpp
+++ b/js/src/vm/RegExpObject.cpp
@@ -573,17 +573,18 @@ RegExpShared::compile(JSContext* cx, Han
     CompileOptions options(cx);
     TokenStream dummyTokenStream(cx, options, nullptr, 0, nullptr);
 
     LifoAllocScope scope(&cx->tempLifoAlloc());
 
     /* Parse the pattern. */
     irregexp::RegExpCompileData data;
     if (!irregexp::ParsePattern(dummyTokenStream, cx->tempLifoAlloc(), pattern,
-                                multiline(), mode == MatchOnly, unicode(), ignoreCase(), &data))
+                                multiline(), mode == MatchOnly, unicode(), ignoreCase(),
+                                global(), sticky(), &data))
     {
         return false;
     }
 
     this->parenCount = data.capture_count;
 
     irregexp::RegExpCode code = irregexp::CompilePattern(cx, this, &data, input,
                                                          false /* global() */,