Bug 965712: Part 1 - Use our string matching logic for regexps when possible, r=luke
☠☠ backed out by 44e189f0fd67 ☠ ☠
authorHannes Verschore <hv1989@gmail.com>
Wed, 30 Apr 2014 16:42:25 +0200
changeset 181425 7796f1b424872b11f26be040da31570ea1967b7f
parent 181424 30f939a41a22939ce0f205d670b555de4f5d376b
child 181426 00edef0582f1fc689226c6bb9181ad4e43332fa6
push id272
push userpvanderbeken@mozilla.com
push dateMon, 05 May 2014 16:31:18 +0000
reviewersluke
bugs965712
milestone32.0a1
Bug 965712: Part 1 - Use our string matching logic for regexps when possible, r=luke
js/src/jsstr.cpp
js/src/jsstr.h
js/src/vm/RegExpObject.cpp
js/src/vm/RegExpObject.h
--- a/js/src/jsstr.cpp
+++ b/js/src/jsstr.cpp
@@ -1168,16 +1168,23 @@ static const size_t sRopeMatchThresholdR
 
 bool
 js::StringHasPattern(const jschar *text, uint32_t textlen,
                      const jschar *pat, uint32_t patlen)
 {
     return StringMatch(text, textlen, pat, patlen) != -1;
 }
 
+int
+js::StringFindPattern(const jschar *text, uint32_t textlen,
+                      const jschar *pat, uint32_t patlen)
+{
+    return StringMatch(text, textlen, pat, patlen);
+}
+
 // When an algorithm does not need a string represented as a single linear
 // array of characters, this range utility may be used to traverse the string a
 // sequence of linear arrays of characters. This avoids flattening ropes.
 class StringSegmentRange
 {
     // If malloc() shows up in any profiles from this vector, we can add a new
     // StackAllocPolicy which stashes a reusable freed-at-gc buffer in the cx.
     AutoStringVector stack;
@@ -1732,16 +1739,22 @@ HasRegExpMetaChars(const jschar *chars, 
 {
     for (size_t i = 0; i < length; ++i) {
         if (IsRegExpMetaChar(chars[i]))
             return true;
     }
     return false;
 }
 
+bool
+js::StringHasRegExpMetaChars(const jschar *chars, size_t length)
+{
+    return HasRegExpMetaChars(chars, length);
+}
+
 namespace {
 
 /*
  * StringRegExpGuard factors logic out of String regexp operations.
  *
  * |optarg| indicates in which argument position RegExp flags will be found, if
  * present. This is a Mozilla extension and not part of any ECMA spec.
  */
--- a/js/src/jsstr.h
+++ b/js/src/jsstr.h
@@ -208,16 +208,23 @@ CompareAtoms(JSAtom *atom1, JSAtom *atom
 extern bool
 StringEqualsAscii(JSLinearString *str, const char *asciiBytes);
 
 /* Return true if the string contains a pattern anywhere inside it. */
 extern bool
 StringHasPattern(const jschar *text, uint32_t textlen,
                  const jschar *pat, uint32_t patlen);
 
+extern int
+StringFindPattern(const jschar *text, uint32_t textlen,
+                  const jschar *pat, uint32_t patlen);
+
+extern bool
+StringHasRegExpMetaChars(const jschar *chars, size_t length);
+
 } /* namespace js */
 
 extern size_t
 js_strlen(const jschar *s);
 
 extern int32_t
 js_strcmp(const jschar *lhs, const jschar *rhs);
 
--- a/js/src/vm/RegExpObject.cpp
+++ b/js/src/vm/RegExpObject.cpp
@@ -3,16 +3,18 @@
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "vm/RegExpObject.h"
 
 #include "mozilla/MemoryReporting.h"
 
+#include "jsstr.h"
+
 #include "frontend/TokenStream.h"
 #include "vm/MatchPairs.h"
 #include "vm/RegExpStatics.h"
 #include "vm/StringBuffer.h"
 #include "vm/TraceLogging.h"
 #include "vm/Xdr.h"
 #include "yarr/YarrSyntaxChecker.h"
 
@@ -372,17 +374,17 @@ RegExpObject::toString(JSContext *cx) co
         return nullptr;
 
     return sb.finishString();
 }
 
 /* RegExpShared */
 
 RegExpShared::RegExpShared(JSAtom *source, RegExpFlag flags, uint64_t gcNumber)
-  : source(source), flags(flags), parenCount(0),
+  : source(source), flags(flags), parenCount(0), canStringMatch(false),
 #if ENABLE_YARR_JIT
     codeBlock(),
 #endif
     bytecode(nullptr), activeUseCount(0), gcNumberWhenUsed(gcNumber)
 {}
 
 RegExpShared::~RegExpShared()
 {
@@ -433,16 +435,19 @@ RegExpShared::checkSyntax(ExclusiveConte
 
     reportYarrError(cx, tokenStream, error);
     return false;
 }
 
 bool
 RegExpShared::compile(JSContext *cx, bool matchOnly)
 {
+    TraceLogger *logger = TraceLoggerForMainThread(cx->runtime());
+    AutoTraceLog logCompile(logger, TraceLogger::YarrCompile);
+
     if (!sticky())
         return compile(cx, *source, matchOnly);
 
     /*
      * The sticky case we implement hackily by prepending a caret onto the front
      * and relying on |::execute| to pseudo-slice the string when it sees a sticky regexp.
      */
     static const jschar prefix[] = {'^', '(', '?', ':'};
@@ -461,16 +466,21 @@ RegExpShared::compile(JSContext *cx, boo
         return false;
 
     return compile(cx, *fakeySource, matchOnly);
 }
 
 bool
 RegExpShared::compile(JSContext *cx, JSLinearString &pattern, bool matchOnly)
 {
+    if (!StringHasRegExpMetaChars(pattern.chars(), pattern.length())) {
+        canStringMatch = true;
+        return true;
+    }
+
     /* Parse the pattern. */
     ErrorCode yarrError;
     YarrPattern yarrPattern(pattern, ignoreCase(), multiline(), &yarrError);
     if (yarrError) {
         reportYarrError(cx, nullptr, yarrError);
         return false;
     }
     this->parenCount = yarrPattern.m_numSubpatterns;
@@ -502,41 +512,38 @@ RegExpShared::compile(JSContext *cx, JSL
 
     bytecode = byteCompile(yarrPattern, bumpAlloc).get();
     return true;
 }
 
 bool
 RegExpShared::compileIfNecessary(JSContext *cx)
 {
-    if (hasCode() || hasBytecode())
+    if (hasCode() || hasBytecode() || canStringMatch)
         return true;
     return compile(cx, false);
 }
 
 bool
 RegExpShared::compileMatchOnlyIfNecessary(JSContext *cx)
 {
-    if (hasMatchOnlyCode() || hasBytecode())
+    if (hasMatchOnlyCode() || hasBytecode() || canStringMatch)
         return true;
     return compile(cx, true);
 }
 
 RegExpRunStatus
 RegExpShared::execute(JSContext *cx, const jschar *chars, size_t length,
                       size_t *lastIndex, MatchPairs &matches)
 {
     TraceLogger *logger = TraceLoggerForMainThread(cx->runtime());
 
-    {
-        /* Compile the code at point-of-use. */
-        AutoTraceLog logCompile(logger, TraceLogger::YarrCompile);
-        if (!compileIfNecessary(cx))
-            return RegExpRunStatus_Error;
-    }
+    /* Compile the code at point-of-use. */
+    if (!compileIfNecessary(cx))
+        return RegExpRunStatus_Error;
 
     /* Ensure sufficient memory for output vector. */
     if (!matches.initArray(pairCount()))
         return RegExpRunStatus_Error;
 
     /*
      * |displacement| emulates sticky mode by matching from this offset
      * into the char buffer and subtracting the delta off at the end.
@@ -550,16 +557,30 @@ RegExpShared::execute(JSContext *cx, con
         chars += displacement;
         length -= displacement;
         start = 0;
     }
 
     unsigned *outputBuf = matches.rawBuf();
     unsigned result;
 
+    if (canStringMatch) {
+        int res = StringFindPattern(chars+start, length-start, source->chars(), source->length());
+        if (res == -1)
+            return RegExpRunStatus_Success_NotFound;
+
+        outputBuf[0] = res + start;
+        outputBuf[1] = outputBuf[0] + source->length();
+
+        matches.displace(displacement);
+        matches.checkAgainst(origLength);
+        *lastIndex = matches[0].limit;
+        return RegExpRunStatus_Success;
+    }
+
 #if ENABLE_YARR_JIT
     if (codeBlock.isFallBack()) {
         AutoTraceLog logInterpret(logger, TraceLogger::YarrInterpret);
         result = JSC::Yarr::interpret(cx, bytecode, chars, length, start, outputBuf);
     } else {
         AutoTraceLog logJIT(logger, TraceLogger::YarrJIT);
         result = codeBlock.execute(chars, start, length, (int *)outputBuf).start;
     }
@@ -585,36 +606,44 @@ RegExpShared::execute(JSContext *cx, con
 }
 
 RegExpRunStatus
 RegExpShared::executeMatchOnly(JSContext *cx, const jschar *chars, size_t length,
                                size_t *lastIndex, MatchPair &match)
 {
     TraceLogger *logger = js::TraceLoggerForMainThread(cx->runtime());
 
-    {
-        /* Compile the code at point-of-use. */
-        AutoTraceLog logCompile(logger, TraceLogger::YarrCompile);
-        if (!compileMatchOnlyIfNecessary(cx))
-            return RegExpRunStatus_Error;
-    }
+    /* Compile the code at point-of-use. */
+    if (!compileMatchOnlyIfNecessary(cx))
+        return RegExpRunStatus_Error;
 
 #ifdef DEBUG
     const size_t origLength = length;
 #endif
     size_t start = *lastIndex;
     size_t displacement = 0;
 
     if (sticky()) {
         displacement = start;
         chars += displacement;
         length -= displacement;
         start = 0;
     }
 
+    if (canStringMatch) {
+        int res = StringFindPattern(chars+start, length-start, source->chars(), source->length());
+        if (res == -1)
+            return RegExpRunStatus_Success_NotFound;
+
+        match = MatchPair(res + start, res + start + source->length());
+        match.displace(displacement);
+        *lastIndex = match.limit;
+        return RegExpRunStatus_Success;
+    }
+
 #if ENABLE_YARR_JIT
     if (!codeBlock.isFallBack()) {
         AutoTraceLog logJIT(logger, TraceLogger::YarrJIT);
         MatchResult result = codeBlock.execute(chars, start, length);
         if (!result)
             return RegExpRunStatus_Success_NotFound;
 
         match = MatchPair(result.start, result.end);
--- a/js/src/vm/RegExpObject.h
+++ b/js/src/vm/RegExpObject.h
@@ -140,16 +140,17 @@ class RegExpShared
      * Source to the RegExp, for lazy compilation.
      * The source must be rooted while activeUseCount is non-zero
      * via RegExpGuard or explicit calls to trace().
      */
     JSAtom *           source;
 
     RegExpFlag         flags;
     unsigned           parenCount;
+    bool               canStringMatch;
 
 #if ENABLE_YARR_JIT
     /* Note: Native code is valid only if |codeBlock.isFallBack() == false|. */
     YarrCodeBlock   codeBlock;
 #endif
     BytecodePattern *bytecode;
 
     /* Lifetime-preserving variables: see class-level comment above. */