Bug 773687 - Fix assertion pattern in RegExp with sticky flag. r=till
authorTooru Fujisawa <arai_a@mac.com>
Sun, 20 Sep 2015 00:00:36 +0900
changeset 263644 a567df6edc075cddf0d3c66d95c7a1e0cdd067ac
parent 263643 79f59da627671ffde98e3d27cff648c6d92c2251
child 263645 a43a384dbaeae57ae3bd483ffd9592a91ce56731
push id65389
push userarai_a@mac.com
push dateTue, 22 Sep 2015 07:01:55 +0000
treeherdermozilla-inbound@a567df6edc07 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerstill
bugs773687
milestone44.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 773687 - Fix assertion pattern in RegExp with sticky flag. r=till
js/src/irregexp/RegExpEngine.cpp
js/src/irregexp/RegExpEngine.h
js/src/jsstr.cpp
js/src/jsstr.h
js/src/tests/ecma_6/RegExp/sticky.js
js/src/vm/MatchPairs.h
js/src/vm/RegExpObject.cpp
--- a/js/src/irregexp/RegExpEngine.cpp
+++ b/js/src/irregexp/RegExpEngine.cpp
@@ -1646,17 +1646,17 @@ IsNativeRegExpEnabled(JSContext* cx)
 #else
     return cx->runtime()->options().nativeRegExp();
 #endif
 }
 
 RegExpCode
 irregexp::CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData* data,
                          HandleLinearString sample, bool is_global, bool ignore_case,
-                         bool is_ascii, bool match_only, bool force_bytecode)
+                         bool is_ascii, bool match_only, bool force_bytecode, bool sticky)
 {
     if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
         JS_ReportError(cx, "regexp too big");
         return RegExpCode();
     }
 
     LifoAlloc& alloc = cx->tempLifoAlloc();
     RegExpCompiler compiler(cx, &alloc, data->capture_count, ignore_case, is_ascii, match_only);
@@ -1672,17 +1672,17 @@ irregexp::CompilePattern(JSContext* cx, 
 
     // Wrap the body of the regexp in capture #0.
     RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,
                                                       0,
                                                       &compiler,
                                                       compiler.accept());
     RegExpNode* node = captured_body;
     bool is_end_anchored = data->tree->IsAnchoredAtEnd();
-    bool is_start_anchored = data->tree->IsAnchoredAtStart();
+    bool is_start_anchored = sticky || data->tree->IsAnchoredAtStart();
     int max_length = data->tree->max_match();
     if (!is_start_anchored) {
         // Add a .*? at the beginning, outside the body capture, unless
         // this expression is anchored at the beginning.
         RegExpNode* loop_node =
             RegExpQuantifier::ToNode(0,
                                      RegExpTree::kInfinity,
                                      false,
--- a/js/src/irregexp/RegExpEngine.h
+++ b/js/src/irregexp/RegExpEngine.h
@@ -83,17 +83,17 @@ struct RegExpCode
     void destroy() {
         js_free(byteCode);
     }
 };
 
 RegExpCode
 CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData* data,
                HandleLinearString sample,  bool is_global, bool ignore_case,
-               bool is_ascii, bool match_only, bool force_bytecode);
+               bool is_ascii, bool match_only, bool force_bytecode, bool sticky);
 
 // Note: this may return RegExpRunStatus_Error if an interrupt was requested
 // while the code was executing.
 template <typename CharT>
 RegExpRunStatus
 ExecuteCode(JSContext* cx, jit::JitCode* codeBlock, const CharT* chars, size_t start,
             size_t length, MatchPairs* matches);
 
--- a/js/src/jsstr.cpp
+++ b/js/src/jsstr.cpp
@@ -1734,18 +1734,18 @@ js::str_lastIndexOf(JSContext* cx, unsig
         else
             res = LastIndexOfImpl(textChars, textLen, pat->twoByteChars(nogc), patLen, start);
     }
 
     args.rval().setInt32(res);
     return true;
 }
 
-static bool
-HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start)
+bool
+js::HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start)
 {
     MOZ_ASSERT(start + pat->length() <= text->length());
 
     size_t patLen = pat->length();
 
     AutoCheckCannotGC nogc;
     if (text->hasLatin1Chars()) {
         const Latin1Char* textChars = text->latin1Chars(nogc) + start;
--- a/js/src/jsstr.h
+++ b/js/src/jsstr.h
@@ -214,16 +214,20 @@ StringEqualsAscii(JSLinearString* str, c
 
 /* Return true if the string contains a pattern anywhere inside it. */
 extern bool
 StringHasPattern(JSLinearString* text, const char16_t* pat, uint32_t patlen);
 
 extern int
 StringFindPattern(JSLinearString* text, JSLinearString* pat, size_t start);
 
+/* Return true if the string contains a pattern at |start|. */
+extern bool
+HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start);
+
 template <typename CharT>
 extern bool
 HasRegExpMetaChars(const CharT* chars, size_t length);
 
 extern bool
 StringHasRegExpMetaChars(JSLinearString* str);
 
 template <typename Char1, typename Char2>
new file mode 100644
--- /dev/null
+++ b/js/src/tests/ecma_6/RegExp/sticky.js
@@ -0,0 +1,126 @@
+var BUGNUMBER = 773687;
+var summary = 'sticky flag should not break assertion behavior.';
+
+print(BUGNUMBER + ": " + summary);
+
+function test(re, text, expectations) {
+  // Sanity check for test data itself.
+  assertEq(expectations.length, text.length + 1);
+
+  for (var i = 0; i < expectations.length; i++) {
+    var result = expectations[i];
+
+    re.lastIndex = i;
+    var match = re.exec(text);
+    if (result === null) {
+      assertEq(re.lastIndex, 0);
+      assertEq(match, null);
+    } else {
+      assertEq(re.lastIndex, result.lastIndex);
+      assertEq(match !== null, true);
+      assertEq(match.length, result.matches.length);
+      for (var j = 0; j < result.matches.length; j++)
+        assertEq(match[j], result.matches[j]);
+      assertEq(match.index, result.index);
+    }
+  }
+}
+
+// simple text
+test(/bc/y, "abcabd", [
+  null,
+  { lastIndex: 3, matches: ["bc"], index: 1 },
+  null,
+  null,
+  null,
+  null,
+  null,
+]);
+
+// complex pattern
+test(/bc|c|d/y, "abcabd", [
+  null,
+  { lastIndex: 3, matches: ["bc"], index: 1 },
+  { lastIndex: 3, matches: ["c"], index: 2 },
+  null,
+  null,
+  { lastIndex: 6, matches: ["d"], index: 5 },
+  null,
+]);
+
+test(/.*(bc|c|d)/y, "abcabd", [
+  { lastIndex: 6, matches: ["abcabd", "d"], index: 0 },
+  { lastIndex: 6, matches: ["bcabd", "d"], index: 1 },
+  { lastIndex: 6, matches: ["cabd", "d"], index: 2 },
+  { lastIndex: 6, matches: ["abd", "d"], index: 3 },
+  { lastIndex: 6, matches: ["bd", "d"], index: 4 },
+  { lastIndex: 6, matches: ["d", "d"], index: 5 },
+  null,
+]);
+
+test(/.*?(bc|c|d)/y, "abcabd", [
+  { lastIndex: 3, matches: ["abc", "bc"], index: 0 },
+  { lastIndex: 3, matches: ["bc", "bc"], index: 1 },
+  { lastIndex: 3, matches: ["c", "c"], index: 2 },
+  { lastIndex: 6, matches: ["abd", "d"], index: 3 },
+  { lastIndex: 6, matches: ["bd", "d"], index: 4 },
+  { lastIndex: 6, matches: ["d", "d"], index: 5 },
+  null,
+]);
+
+test(/(bc|.*c|d)/y, "abcabd", [
+  { lastIndex: 3, matches: ["abc", "abc"], index: 0 },
+  { lastIndex: 3, matches: ["bc", "bc"], index: 1 },
+  { lastIndex: 3, matches: ["c", "c"], index: 2 },
+  null,
+  null,
+  { lastIndex: 6, matches: ["d", "d"], index: 5 },
+  null,
+]);
+
+// ^ assertions
+test(/^/y, "abcabc", [
+  { lastIndex: 0, matches: [""], index: 0 },
+  null,
+  null,
+  null,
+  null,
+  null,
+  null,
+]);
+
+test(/^a/my, "abc\nabc", [
+  { lastIndex: 1, matches: ["a"], index: 0 },
+  null,
+  null,
+  null,
+  { lastIndex: 5, matches: ["a"], index: 4 },
+  null,
+  null,
+  null,
+]);
+
+// \b assertions
+test(/\b/y, "abc bc", [
+  { lastIndex: 0, matches: [""], index: 0 },
+  null,
+  null,
+  { lastIndex: 3, matches: [""], index: 3 },
+  { lastIndex: 4, matches: [""], index: 4 },
+  null,
+  { lastIndex: 6, matches: [""], index: 6 },
+]);
+
+// \B assertions
+test(/\B/y, "abc bc", [
+  null,
+  { lastIndex: 1, matches: [""], index: 1 },
+  { lastIndex: 2, matches: [""], index: 2 },
+  null,
+  null,
+  { lastIndex: 5, matches: [""], index: 5 },
+  null,
+]);
+
+if (typeof reportCompare === "function")
+  reportCompare(true, true);
--- a/js/src/vm/MatchPairs.h
+++ b/js/src/vm/MatchPairs.h
@@ -74,17 +74,16 @@ class MatchPairs
     friend class RegExpStatics;
 
     /* MatchPair buffer allocator: set pairs_ and pairCount_. */
     virtual bool allocOrExpandArray(size_t pairCount) = 0;
 
     bool initArrayFrom(MatchPairs& copyFrom);
     void forgetArray() { pairs_ = nullptr; }
 
-    void displace(size_t disp);
     void checkAgainst(size_t inputLength) {
 #ifdef DEBUG
         for (size_t i = 0; i < pairCount_; i++) {
             const MatchPair& p = (*this)[i];
             MOZ_ASSERT(p.check());
             if (p.isUndefined())
                 continue;
             MOZ_ASSERT(size_t(p.limit) <= inputLength);
--- a/js/src/vm/RegExpObject.cpp
+++ b/js/src/vm/RegExpObject.cpp
@@ -142,29 +142,16 @@ MatchPairs::initArrayFrom(MatchPairs& co
     if (!allocOrExpandArray(copyFrom.pairCount()))
         return false;
 
     PodCopy(pairs_, copyFrom.pairs_, pairCount_);
 
     return true;
 }
 
-void
-MatchPairs::displace(size_t disp)
-{
-    if (disp == 0)
-        return;
-
-    for (size_t i = 0; i < pairCount_; i++) {
-        MOZ_ASSERT(pairs_[i].check());
-        pairs_[i].start += (pairs_[i].start < 0) ? 0 : disp;
-        pairs_[i].limit += (pairs_[i].limit < 0) ? 0 : disp;
-    }
-}
-
 bool
 ScopedMatchPairs::allocOrExpandArray(size_t pairCount)
 {
     /* Array expansion is forbidden, but array reuse is acceptable. */
     if (pairCount_) {
         MOZ_ASSERT(pairs_);
         MOZ_ASSERT(pairCount_ == pairCount);
         return true;
@@ -575,42 +562,18 @@ RegExpShared::trace(JSTracer* trc)
 
 bool
 RegExpShared::compile(JSContext* cx, HandleLinearString input,
                       CompilationMode mode, ForceByteCodeEnum force)
 {
     TraceLoggerThread* logger = TraceLoggerForMainThread(cx->runtime());
     AutoTraceLog logCompile(logger, TraceLogger_IrregexpCompile);
 
-    if (!sticky()) {
-        RootedAtom pattern(cx, source);
-        return compile(cx, pattern, input, mode, force);
-    }
-
-    /*
-     * The sticky case we implement hackily by prepending a caret onto the front
-     * and relying on |::execute| to pseudo-slice the string when it sees a sticky regexp.
-     */
-    static const char prefix[] = {'^', '(', '?', ':'};
-    static const char postfix[] = {')'};
-
-    using mozilla::ArrayLength;
-    StringBuffer sb(cx);
-    if (!sb.reserve(ArrayLength(prefix) + source->length() + ArrayLength(postfix)))
-        return false;
-    sb.infallibleAppend(prefix, ArrayLength(prefix));
-    if (!sb.append(source))
-        return false;
-    sb.infallibleAppend(postfix, ArrayLength(postfix));
-
-    RootedAtom fakeySource(cx, sb.finishAtom());
-    if (!fakeySource)
-        return false;
-
-    return compile(cx, fakeySource, input, mode, force);
+    RootedAtom pattern(cx, source);
+    return compile(cx, pattern, input, mode, force);
 }
 
 bool
 RegExpShared::compile(JSContext* cx, HandleAtom pattern, HandleLinearString input,
                       CompilationMode mode, ForceByteCodeEnum force)
 {
     if (!ignoreCase() && !StringHasRegExpMetaChars(pattern))
         canStringMatch = true;
@@ -630,17 +593,18 @@ RegExpShared::compile(JSContext* cx, Han
 
     this->parenCount = data.capture_count;
 
     irregexp::RegExpCode code = irregexp::CompilePattern(cx, this, &data, input,
                                                          false /* global() */,
                                                          ignoreCase(),
                                                          input->hasLatin1Chars(),
                                                          mode == MatchOnly,
-                                                         force == ForceByteCode);
+                                                         force == ForceByteCode,
+                                                         sticky());
     if (code.empty())
         return false;
 
     MOZ_ASSERT(!code.jitCode || !code.byteCode);
     MOZ_ASSERT_IF(force == ForceByteCode, code.byteCode);
 
     RegExpCompilation& compilation = this->compilation(mode, input->hasLatin1Chars());
     if (code.jitCode)
@@ -676,64 +640,67 @@ RegExpShared::execute(JSContext* cx, Han
      * Ensure sufficient memory for output vector.
      * No need to initialize it. The RegExp engine fills them in on a match.
      */
     if (matches && !matches->allocOrExpandArray(pairCount())) {
         ReportOutOfMemory(cx);
         return RegExpRunStatus_Error;
     }
 
-    /*
-     * |displacement| emulates sticky mode by matching from this offset
-     * into the char buffer and subtracting the delta off at the end.
-     */
-    size_t charsOffset = 0;
     size_t length = input->length();
-    size_t origLength = length;
-    size_t displacement = 0;
-
-    if (sticky()) {
-        displacement = start;
-        charsOffset += displacement;
-        length -= displacement;
-        start = 0;
-    }
 
     // Reset the Irregexp backtrack stack if it grows during execution.
     irregexp::RegExpStackScope stackScope(cx->runtime());
 
     if (canStringMatch) {
         MOZ_ASSERT(pairCount() == 1);
-        int res = StringFindPattern(input, source, start + charsOffset);
+        size_t sourceLength = source->length();
+        if (sticky()) {
+            // First part checks size_t overflow.
+            if (sourceLength + start < sourceLength || sourceLength + start > length)
+                return RegExpRunStatus_Success_NotFound;
+            if (!HasSubstringAt(input, source, start))
+                return RegExpRunStatus_Success_NotFound;
+
+            if (matches) {
+                (*matches)[0].start = start;
+                (*matches)[0].limit = start + sourceLength;
+
+                matches->checkAgainst(length);
+            }
+            return RegExpRunStatus_Success;
+        }
+
+        int res = StringFindPattern(input, source, start);
         if (res == -1)
             return RegExpRunStatus_Success_NotFound;
 
         if (matches) {
             (*matches)[0].start = res;
-            (*matches)[0].limit = res + source->length();
+            (*matches)[0].limit = res + sourceLength;
 
-            matches->checkAgainst(origLength);
+            matches->checkAgainst(length);
         }
         return RegExpRunStatus_Success;
     }
 
     do {
         jit::JitCode* code = compilation(mode, input->hasLatin1Chars()).jitCode;
         if (!code)
             break;
 
         RegExpRunStatus result;
         {
             AutoTraceLog logJIT(logger, TraceLogger_IrregexpExecute);
             AutoCheckCannotGC nogc;
             if (input->hasLatin1Chars()) {
-                const Latin1Char* chars = input->latin1Chars(nogc) + charsOffset;
+                const Latin1Char* chars = input->latin1Chars(nogc);
                 result = irregexp::ExecuteCode(cx, code, chars, start, length, matches);
             } else {
-                const char16_t* chars = input->twoByteChars(nogc) + charsOffset;
+                const char16_t* chars = input->twoByteChars(nogc);
                 result = irregexp::ExecuteCode(cx, code, chars, start, length, matches);
             }
         }
 
         if (result == RegExpRunStatus_Error) {
             // An 'Error' result is returned if a stack overflow guard or
             // interrupt guard failed. If CheckOverRecursed doesn't throw, break
             // out and retry the regexp in the bytecode interpreter, which can
@@ -744,47 +711,43 @@ RegExpShared::execute(JSContext* cx, Han
             break;
         }
 
         if (result == RegExpRunStatus_Success_NotFound)
             return RegExpRunStatus_Success_NotFound;
 
         MOZ_ASSERT(result == RegExpRunStatus_Success);
 
-        if (matches) {
-            matches->displace(displacement);
-            matches->checkAgainst(origLength);
-        }
+        if (matches)
+            matches->checkAgainst(length);
         return RegExpRunStatus_Success;
     } while (false);
 
     // Compile bytecode for the RegExp if necessary.
     if (!compileIfNecessary(cx, input, mode, ForceByteCode))
         return RegExpRunStatus_Error;
 
     uint8_t* byteCode = compilation(mode, input->hasLatin1Chars()).byteCode;
     AutoTraceLog logInterpreter(logger, TraceLogger_IrregexpExecute);
 
     AutoStableStringChars inputChars(cx);
     if (!inputChars.init(cx, input))
         return RegExpRunStatus_Error;
 
     RegExpRunStatus result;
     if (inputChars.isLatin1()) {
-        const Latin1Char* chars = inputChars.latin1Range().start().get() + charsOffset;
+        const Latin1Char* chars = inputChars.latin1Range().start().get();
         result = irregexp::InterpretCode(cx, byteCode, chars, start, length, matches);
     } else {
-        const char16_t* chars = inputChars.twoByteRange().start().get() + charsOffset;
+        const char16_t* chars = inputChars.twoByteRange().start().get();
         result = irregexp::InterpretCode(cx, byteCode, chars, start, length, matches);
     }
 
-    if (result == RegExpRunStatus_Success && matches) {
-        matches->displace(displacement);
-        matches->checkAgainst(origLength);
-    }
+    if (result == RegExpRunStatus_Success && matches)
+        matches->checkAgainst(length);
     return result;
 }
 
 size_t
 RegExpShared::sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf)
 {
     size_t n = mallocSizeOf(this);