Bug 1304737 - Do not ignore trailing .* on matchOnly RegExp execution. r=h4writer
authorTooru Fujisawa <arai_a@mac.com>
Sat, 24 Sep 2016 05:10:55 +0900
changeset 315184 f3800637757590adbf18a09c18b9351a2d04fa76
parent 315183 13fe99de32f645984008a2ddc31923093a9fa43a
child 315185 9f3a85f50cff5d0f3a7c32c7ad9c703b8ed7d031
push id30744
push userihsiao@mozilla.com
push dateMon, 26 Sep 2016 10:35:40 +0000
treeherdermozilla-central@c55bcb7c777e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersh4writer
bugs1304737
milestone52.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1304737 - Do not ignore trailing .* on matchOnly RegExp execution. r=h4writer
js/src/irregexp/RegExpParser.cpp
js/src/irregexp/RegExpParser.h
js/src/tests/ecma_6/RegExp/test-trailing.js
js/src/vm/RegExpObject.cpp
--- a/js/src/irregexp/RegExpParser.cpp
+++ b/js/src/irregexp/RegExpParser.cpp
@@ -1827,33 +1827,37 @@ RegExpParser<CharT>::ParseDisjunction()
 
 template class irregexp::RegExpParser<Latin1Char>;
 template class irregexp::RegExpParser<char16_t>;
 
 template <typename CharT>
 static bool
 ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length,
              bool multiline, bool match_only, bool unicode, bool ignore_case,
-             RegExpCompileData* data)
+             bool global, bool sticky, RegExpCompileData* data)
 {
     if (match_only) {
         // Try to strip a leading '.*' from the RegExp, but only if it is not
         // followed by a '?' (which will affect how the .* is parsed). This
         // pattern will affect the captures produced by the RegExp, but not
         // whether there is a match or not.
         if (length >= 3 && chars[0] == '.' && chars[1] == '*' && chars[2] != '?') {
             chars += 2;
             length -= 2;
         }
 
         // Try to strip a trailing '.*' from the RegExp, which as above will
         // affect the captures but not whether there is a match. Only do this
-        // when there are no other meta characters in the RegExp, so that we
-        // are sure this will not affect how the RegExp is parsed.
+        // when the following conditions are met:
+        //   1. there are no other meta characters in the RegExp, so that we
+        //      are sure this will not affect how the RegExp is parsed
+        //   2. global and sticky flags are not set, as lastIndex needs to be
+        //      set properly on global or sticky match
         if (length >= 3 && !HasRegExpMetaChars(chars, length - 2) &&
+            !global && !sticky &&
             chars[length - 2] == '.' && chars[length - 1] == '*')
         {
             length -= 2;
         }
     }
 
     RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, multiline, unicode, ignore_case);
     data->tree = parser.ParsePattern();
@@ -1864,24 +1868,24 @@ ParsePattern(frontend::TokenStream& ts, 
     data->contains_anchor = parser.contains_anchor();
     data->capture_count = parser.captures_started();
     return true;
 }
 
 bool
 irregexp::ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
                        bool multiline, bool match_only, bool unicode, bool ignore_case,
-                       RegExpCompileData* data)
+                       bool global, bool sticky, RegExpCompileData* data)
 {
     JS::AutoCheckCannotGC nogc;
     return str->hasLatin1Chars()
            ? ::ParsePattern(ts, alloc, str->latin1Chars(nogc), str->length(),
-                            multiline, match_only, unicode, ignore_case, data)
+                            multiline, match_only, unicode, ignore_case, global, sticky, data)
            : ::ParsePattern(ts, alloc, str->twoByteChars(nogc), str->length(),
-                            multiline, match_only, unicode, ignore_case, data);
+                            multiline, match_only, unicode, ignore_case, global, sticky, data);
 }
 
 template <typename CharT>
 static bool
 ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length,
                    bool unicode)
 {
     LifoAllocScope scope(&alloc);
--- a/js/src/irregexp/RegExpParser.h
+++ b/js/src/irregexp/RegExpParser.h
@@ -39,17 +39,17 @@ namespace frontend {
     class TokenStream;
 }
 
 namespace irregexp {
 
 bool
 ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
              bool multiline, bool match_only, bool unicode, bool ignore_case,
-             RegExpCompileData* data);
+             bool global, bool sticky, RegExpCompileData* data);
 
 bool
 ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
                    bool unicode);
 
 // A BufferedVector is an automatically growing list, just like (and backed
 // by) a Vector, that is optimized for the case of adding and removing
 // a single element. The last element added is stored outside the backing list,
new file mode 100644
--- /dev/null
+++ b/js/src/tests/ecma_6/RegExp/test-trailing.js
@@ -0,0 +1,31 @@
+var BUGNUMBER = 1304737;
+var summary = "Trailing .* should not be ignored on matchOnly match.";
+
+print(BUGNUMBER + ": " + summary);
+
+function test(r, lastIndexIsZero) {
+    r.lastIndex = 0;
+    r.test("foo");
+    assertEq(r.lastIndex, lastIndexIsZero ? 0 : 3);
+
+    r.lastIndex = 0;
+    r.test("foo\nbar");
+    assertEq(r.lastIndex, lastIndexIsZero ? 0 : 3);
+
+    var input = "foo" + ".bar".repeat(20000);
+    r.lastIndex = 0;
+    r.test(input);
+    assertEq(r.lastIndex, lastIndexIsZero ? 0 : input.length);
+
+    r.lastIndex = 0;
+    r.test(input + "\nbaz");
+    assertEq(r.lastIndex, lastIndexIsZero ? 0 : input.length);
+}
+
+test(/f.*/, true);
+test(/f.*/g, false);
+test(/f.*/y, false);
+test(/f.*/gy, false);
+
+if (typeof reportCompare === "function")
+    reportCompare(true, true);
--- a/js/src/vm/RegExpObject.cpp
+++ b/js/src/vm/RegExpObject.cpp
@@ -573,17 +573,18 @@ RegExpShared::compile(JSContext* cx, Han
     CompileOptions options(cx);
     TokenStream dummyTokenStream(cx, options, nullptr, 0, nullptr);
 
     LifoAllocScope scope(&cx->tempLifoAlloc());
 
     /* Parse the pattern. */
     irregexp::RegExpCompileData data;
     if (!irregexp::ParsePattern(dummyTokenStream, cx->tempLifoAlloc(), pattern,
-                                multiline(), mode == MatchOnly, unicode(), ignoreCase(), &data))
+                                multiline(), mode == MatchOnly, unicode(), ignoreCase(),
+                                global(), sticky(), &data))
     {
         return false;
     }
 
     this->parenCount = data.capture_count;
 
     irregexp::RegExpCode code = irregexp::CompilePattern(cx, this, &data, input,
                                                          false /* global() */,