Bug 1077514 - Execute regexps in the bytecode interpreter if the initial JIT execution was interrupted. r=jandem, a=lmandel
authorBrian Hackett <bhackett1024@gmail.com>
Fri, 31 Oct 2014 14:27:14 -0700
changeset 225896 5238acab8176
parent 225895 631a73cdbc91
child 225897 8b1b897ca39c
push id4060
push userbhackett@mozilla.com
push date2014-10-31 21:27 +0000
treeherdermozilla-beta@5238acab8176 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjandem, lmandel
bugs1077514
milestone34.0
Bug 1077514 - Execute regexps in the bytecode interpreter if the initial JIT execution was interrupted. r=jandem, a=lmandel
js/src/irregexp/RegExpEngine.cpp
js/src/irregexp/RegExpEngine.h
js/src/vm/RegExpObject.cpp
js/src/vm/RegExpObject.h
--- a/js/src/irregexp/RegExpEngine.cpp
+++ b/js/src/irregexp/RegExpEngine.cpp
@@ -1641,17 +1641,17 @@ IsNativeRegExpEnabled(JSContext *cx)
 #else
     return cx->runtime()->options().nativeRegExp();
 #endif
 }
 
 RegExpCode
 irregexp::CompilePattern(JSContext *cx, RegExpShared *shared, RegExpCompileData *data,
                          HandleLinearString sample, bool is_global, bool ignore_case,
-                         bool is_ascii)
+                         bool is_ascii, bool force_bytecode)
 {
     if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
         JS_ReportError(cx, "regexp too big");
         return RegExpCode();
     }
 
     LifoAlloc &alloc = cx->tempLifoAlloc();
     RegExpCompiler compiler(cx, &alloc, data->capture_count, ignore_case, is_ascii);
@@ -1718,17 +1718,17 @@ irregexp::CompilePattern(JSContext *cx, 
         return RegExpCode();
     }
 
     Maybe<jit::IonContext> ctx;
     Maybe<NativeRegExpMacroAssembler> native_assembler;
     Maybe<InterpretedRegExpMacroAssembler> interpreted_assembler;
 
     RegExpMacroAssembler *assembler;
-    if (IsNativeRegExpEnabled(cx)) {
+    if (IsNativeRegExpEnabled(cx) && !force_bytecode) {
         NativeRegExpMacroAssembler::Mode mode =
             is_ascii ? NativeRegExpMacroAssembler::ASCII
                      : NativeRegExpMacroAssembler::JSCHAR;
 
         ctx.emplace(cx, (jit::TempAllocator *) nullptr);
         native_assembler.emplace(&alloc, shared, cx->runtime(), mode, (data->capture_count + 1) * 2);
         assembler = native_assembler.ptr();
     } else {
--- a/js/src/irregexp/RegExpEngine.h
+++ b/js/src/irregexp/RegExpEngine.h
@@ -82,18 +82,18 @@ struct RegExpCode
 
     void destroy() {
         js_free(byteCode);
     }
 };
 
 RegExpCode
 CompilePattern(JSContext *cx, RegExpShared *shared, RegExpCompileData *data,
-               HandleLinearString sample,  bool is_global, bool ignore_case = false,
-               bool is_ascii = false);
+               HandleLinearString sample,  bool is_global, bool ignore_case,
+               bool is_ascii, bool force_bytecode);
 
 // Note: this may return RegExpRunStatus_Error if an interrupt was requested
 // while the code was executing.
 template <typename CharT>
 RegExpRunStatus
 ExecuteCode(JSContext *cx, jit::JitCode *codeBlock, const CharT *chars, size_t start,
             size_t length, MatchPairs *matches);
 
--- a/js/src/vm/RegExpObject.cpp
+++ b/js/src/vm/RegExpObject.cpp
@@ -452,24 +452,25 @@ RegExpShared::trace(JSTracer *trc)
     if (jitCodeLatin1)
         MarkJitCode(trc, &jitCodeLatin1, "RegExpShared code Latin1");
 
     if (jitCodeTwoByte)
         MarkJitCode(trc, &jitCodeTwoByte, "RegExpShared code TwoByte");
 }
 
 bool
-RegExpShared::compile(JSContext *cx, HandleLinearString input)
+RegExpShared::compile(JSContext *cx, HandleLinearString input,
+                      ForceByteCodeEnum force)
 {
     TraceLogger *logger = TraceLoggerForMainThread(cx->runtime());
     AutoTraceLog logCompile(logger, TraceLogger::IrregexpCompile);
 
     if (!sticky()) {
         RootedAtom pattern(cx, source);
-        return compile(cx, pattern, input);
+        return compile(cx, pattern, input, force);
     }
 
     /*
      * The sticky case we implement hackily by prepending a caret onto the front
      * and relying on |::execute| to pseudo-slice the string when it sees a sticky regexp.
      */
     static const char prefix[] = {'^', '(', '?', ':'};
     static const char postfix[] = {')'};
@@ -482,21 +483,22 @@ RegExpShared::compile(JSContext *cx, Han
     if (!sb.append(source))
         return false;
     sb.infallibleAppend(postfix, ArrayLength(postfix));
 
     RootedAtom fakeySource(cx, sb.finishAtom());
     if (!fakeySource)
         return false;
 
-    return compile(cx, fakeySource, input);
+    return compile(cx, fakeySource, input, force);
 }
 
 bool
-RegExpShared::compile(JSContext *cx, HandleAtom pattern, HandleLinearString input)
+RegExpShared::compile(JSContext *cx, HandleAtom pattern, HandleLinearString input,
+                      ForceByteCodeEnum force)
 {
     if (!ignoreCase() && !StringHasRegExpMetaChars(pattern)) {
         canStringMatch = true;
         parenCount = 0;
         return true;
     }
 
     CompileOptions options(cx);
@@ -509,50 +511,56 @@ RegExpShared::compile(JSContext *cx, Han
     if (!irregexp::ParsePattern(dummyTokenStream, cx->tempLifoAlloc(), pattern, multiline(), &data))
         return false;
 
     this->parenCount = data.capture_count;
 
     irregexp::RegExpCode code = irregexp::CompilePattern(cx, this, &data, input,
                                                          false /* global() */,
                                                          ignoreCase(),
-                                                         input->hasLatin1Chars());
+                                                         input->hasLatin1Chars(),
+                                                         force == ForceByteCode);
     if (code.empty())
         return false;
 
     JS_ASSERT(!code.jitCode || !code.byteCode);
-    if (input->hasLatin1Chars())
-        jitCodeLatin1 = code.jitCode;
-    else
-        jitCodeTwoByte = code.jitCode;
+    MOZ_ASSERT_IF(force == ForceByteCode, code.byteCode);
 
-    if (input->hasLatin1Chars())
-        byteCodeLatin1 = code.byteCode;
-    else
-        byteCodeTwoByte = code.byteCode;
+    if (code.jitCode) {
+        if (input->hasLatin1Chars())
+            jitCodeLatin1 = code.jitCode;
+        else
+            jitCodeTwoByte = code.jitCode;
+    } else {
+        if (input->hasLatin1Chars())
+            byteCodeLatin1 = code.byteCode;
+        else
+            byteCodeTwoByte = code.byteCode;
+    }
 
     return true;
 }
 
 bool
-RegExpShared::compileIfNecessary(JSContext *cx, HandleLinearString input)
+RegExpShared::compileIfNecessary(JSContext *cx, HandleLinearString input,
+                                 ForceByteCodeEnum force)
 {
-    if (isCompiled(input->hasLatin1Chars()) || canStringMatch)
+    if (isCompiled(input->hasLatin1Chars(), force) || canStringMatch)
         return true;
-    return compile(cx, input);
+    return compile(cx, input, force);
 }
 
 RegExpRunStatus
 RegExpShared::execute(JSContext *cx, HandleLinearString input, size_t *lastIndex,
                       MatchPairs &matches)
 {
     TraceLogger *logger = TraceLoggerForMainThread(cx->runtime());
 
     /* Compile the code at point-of-use. */
-    if (!compileIfNecessary(cx, input))
+    if (!compileIfNecessary(cx, input, DontForceByteCode))
         return RegExpRunStatus_Error;
 
     /*
      * Ensure sufficient memory for output vector.
      * No need to initialize it. The RegExp engine fills them in on a match.
      */
     if (!matches.allocOrExpandArray(pairCount()))
         return RegExpRunStatus_Error;
@@ -586,85 +594,90 @@ RegExpShared::execute(JSContext *cx, Han
         matches[0].start = res;
         matches[0].limit = res + source->length();
 
         matches.checkAgainst(origLength);
         *lastIndex = matches[0].limit;
         return RegExpRunStatus_Success;
     }
 
-    if (uint8_t *byteCode = maybeByteCode(input->hasLatin1Chars())) {
-        AutoTraceLog logInterpreter(logger, TraceLogger::IrregexpExecute);
-
-        AutoStableStringChars inputChars(cx);
-        if (!inputChars.init(cx, input))
-            return RegExpRunStatus_Error;
+    do {
+        jit::JitCode *code = input->hasLatin1Chars() ? jitCodeLatin1 : jitCodeTwoByte;
+        if (!code)
+            break;
 
         RegExpRunStatus result;
-        if (inputChars.isLatin1()) {
-            const Latin1Char *chars = inputChars.latin1Range().start().get() + charsOffset;
-            result = irregexp::InterpretCode(cx, byteCode, chars, start, length, &matches);
-        } else {
-            const jschar *chars = inputChars.twoByteRange().start().get() + charsOffset;
-            result = irregexp::InterpretCode(cx, byteCode, chars, start, length, &matches);
-        }
-
-        if (result == RegExpRunStatus_Success) {
-            matches.displace(displacement);
-            matches.checkAgainst(origLength);
-            *lastIndex = matches[0].limit;
-        }
-        return result;
-    }
-
-    while (true) {
-        RegExpRunStatus result;
         {
             AutoTraceLog logJIT(logger, TraceLogger::IrregexpExecute);
             AutoCheckCannotGC nogc;
             if (input->hasLatin1Chars()) {
                 const Latin1Char *chars = input->latin1Chars(nogc) + charsOffset;
-                result = irregexp::ExecuteCode(cx, jitCodeLatin1, chars, start, length, &matches);
+                result = irregexp::ExecuteCode(cx, code, chars, start, length, &matches);
             } else {
                 const jschar *chars = input->twoByteChars(nogc) + charsOffset;
-                result = irregexp::ExecuteCode(cx, jitCodeTwoByte, chars, start, length, &matches);
+                result = irregexp::ExecuteCode(cx, code, chars, start, length, &matches);
             }
         }
 
         if (result == RegExpRunStatus_Error) {
             // The RegExp engine might exit with an exception if an interrupt
-            // was requested. Check this case and retry until a clean result is
-            // obtained.
+            // was requested. If this happens, break out and retry the regexp
+            // in the bytecode interpreter, which can execute while tolerating
+            // future interrupts. Otherwise, if we keep getting interrupted we
+            // will never finish executing the regexp.
             bool interrupted;
             {
                 JSRuntime::AutoLockForInterrupt lock(cx->runtime());
                 interrupted = cx->runtime()->interrupt;
             }
 
             if (interrupted) {
                 if (!InvokeInterruptCallback(cx))
                     return RegExpRunStatus_Error;
-                continue;
+                break;
             }
 
             js_ReportOverRecursed(cx);
             return RegExpRunStatus_Error;
         }
 
-        if (result == RegExpRunStatus_Success_NotFound)
-            return RegExpRunStatus_Success_NotFound;
+        if (result == RegExpRunStatus_Success) {
+            matches.displace(displacement);
+            matches.checkAgainst(origLength);
+            *lastIndex = matches[0].limit;
+        }
+        return result;
+    } while (false);
+
+    // Compile bytecode for the RegExp if necessary.
+    if (!compileIfNecessary(cx, input, ForceByteCode))
+        return RegExpRunStatus_Error;
 
-        JS_ASSERT(result == RegExpRunStatus_Success);
-        break;
+    uint8_t *byteCode = maybeByteCode(input->hasLatin1Chars());
+    AutoTraceLog logInterpreter(logger, TraceLogger::IrregexpExecute);
+
+    AutoStableStringChars inputChars(cx);
+    if (!inputChars.init(cx, input))
+        return RegExpRunStatus_Error;
+
+    RegExpRunStatus result;
+    if (inputChars.isLatin1()) {
+        const Latin1Char *chars = inputChars.latin1Range().start().get() + charsOffset;
+        result = irregexp::InterpretCode(cx, byteCode, chars, start, length, &matches);
+    } else {
+        const jschar *chars = inputChars.twoByteRange().start().get() + charsOffset;
+        result = irregexp::InterpretCode(cx, byteCode, chars, start, length, &matches);
     }
 
-    matches.displace(displacement);
-    matches.checkAgainst(origLength);
-    *lastIndex = matches[0].limit;
-    return RegExpRunStatus_Success;
+    if (result == RegExpRunStatus_Success) {
+        matches.displace(displacement);
+        matches.checkAgainst(origLength);
+        *lastIndex = matches[0].limit;
+    }
+    return result;
 }
 
 size_t
 RegExpShared::sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf)
 {
     size_t n = mallocSizeOf(this);
 
     if (byteCodeLatin1)
--- a/js/src/vm/RegExpObject.h
+++ b/js/src/vm/RegExpObject.h
@@ -94,16 +94,23 @@ CloneRegExpObject(JSContext *cx, JSObjec
  * Usually, RegExpObjects clear their pointers to their RegExpShareds rather
  * than explicitly tracing them, so that the RegExpShared and any jitcode can
  * be reclaimed quicker. However, the RegExpShareds are traced through by
  * objects when we are preserving jitcode in their zone, to avoid the same
  * recompilation inefficiencies as normal Ion and baseline compilation.
  */
 class RegExpShared
 {
+  public:
+    enum ForceByteCodeEnum {
+        DontForceByteCode,
+        ForceByteCode
+    };
+
+  private:
     friend class RegExpCompartment;
     friend class RegExpStatics;
 
     typedef frontend::TokenStream TokenStream;
 
     /* Source to the RegExp, for lazy compilation. */
     HeapPtrAtom        source;
 
@@ -116,20 +123,22 @@ class RegExpShared
     HeapPtrJitCode     jitCodeTwoByte;
     uint8_t            *byteCodeLatin1;
     uint8_t            *byteCodeTwoByte;
 
     // Tables referenced by JIT code.
     Vector<uint8_t *, 0, SystemAllocPolicy> tables;
 
     /* Internal functions. */
-    bool compile(JSContext *cx, HandleLinearString input);
-    bool compile(JSContext *cx, HandleAtom pattern, HandleLinearString input);
+    bool compile(JSContext *cx, HandleLinearString input, ForceByteCodeEnum force);
+    bool compile(JSContext *cx, HandleAtom pattern, HandleLinearString input,
+                 ForceByteCodeEnum force);
 
-    bool compileIfNecessary(JSContext *cx, HandleLinearString input);
+    bool compileIfNecessary(JSContext *cx, HandleLinearString input,
+                            ForceByteCodeEnum force);
 
   public:
     RegExpShared(JSAtom *source, RegExpFlag flags);
     ~RegExpShared();
 
     /* Primary interface: run this regular expression on the given string. */
     RegExpRunStatus execute(JSContext *cx, HandleLinearString input, size_t *lastIndex,
                             MatchPairs &matches);
@@ -167,20 +176,22 @@ class RegExpShared
     }
     bool hasByteCodeTwoByte() const {
         return byteCodeTwoByte != nullptr;
     }
     uint8_t *maybeByteCode(bool latin1) const {
         return latin1 ? byteCodeLatin1 : byteCodeTwoByte;
     }
 
-    bool isCompiled(bool latin1) const {
-        if (latin1)
-            return hasJitCodeLatin1() || hasByteCodeLatin1();
-        return hasJitCodeTwoByte() || hasByteCodeTwoByte();
+    bool isCompiled(bool latin1, ForceByteCodeEnum force = DontForceByteCode) const {
+        if (force == DontForceByteCode) {
+            if (latin1 ? hasJitCodeLatin1() : hasJitCodeTwoByte())
+                return true;
+        }
+        return latin1 ? hasByteCodeLatin1() : hasByteCodeTwoByte();
     }
     bool isCompiled() const {
         return isCompiled(true) || isCompiled(false);
     }
 
     void trace(JSTracer *trc);
 
     bool marked() const { return marked_; }