Bug 1077514 - Execute regexps in the bytecode interpreter if the initial JIT execution was interrupted. r=jandem, a=lmandel
authorBrian Hackett <bhackett1024@gmail.com>
Mon, 13 Oct 2014 10:46:38 -0700
changeset 225635 29275e2e7e21f9d82ce93a8ef12beec5d0632173
parent 225634 9014364077a7aee29809e83a64c6e71beaf4bf8c
child 225636 fdd0036a0ae5e5e777d5bb0bb4c3bdf312b1b893
push id7142
push userryanvm@gmail.com
push dateWed, 22 Oct 2014 23:06:03 +0000
treeherdermozilla-aurora@7a29843ef488 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjandem, lmandel
bugs1077514
milestone35.0a2
Bug 1077514 - Execute regexps in the bytecode interpreter if the initial JIT execution was interrupted. r=jandem, a=lmandel
js/src/irregexp/RegExpEngine.cpp
js/src/irregexp/RegExpEngine.h
js/src/vm/RegExpObject.cpp
js/src/vm/RegExpObject.h
--- a/js/src/irregexp/RegExpEngine.cpp
+++ b/js/src/irregexp/RegExpEngine.cpp
@@ -1645,17 +1645,17 @@ IsNativeRegExpEnabled(JSContext *cx)
 #else
     return cx->runtime()->options().nativeRegExp();
 #endif
 }
 
 RegExpCode
 irregexp::CompilePattern(JSContext *cx, RegExpShared *shared, RegExpCompileData *data,
                          HandleLinearString sample, bool is_global, bool ignore_case,
-                         bool is_ascii, bool match_only)
+                         bool is_ascii, bool match_only, bool force_bytecode)
 {
     if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
         JS_ReportError(cx, "regexp too big");
         return RegExpCode();
     }
 
     LifoAlloc &alloc = cx->tempLifoAlloc();
     RegExpCompiler compiler(cx, &alloc, data->capture_count, ignore_case, is_ascii, match_only);
@@ -1722,17 +1722,17 @@ irregexp::CompilePattern(JSContext *cx, 
         return RegExpCode();
     }
 
     Maybe<jit::IonContext> ctx;
     Maybe<NativeRegExpMacroAssembler> native_assembler;
     Maybe<InterpretedRegExpMacroAssembler> interpreted_assembler;
 
     RegExpMacroAssembler *assembler;
-    if (IsNativeRegExpEnabled(cx)) {
+    if (IsNativeRegExpEnabled(cx) && !force_bytecode) {
         NativeRegExpMacroAssembler::Mode mode =
             is_ascii ? NativeRegExpMacroAssembler::ASCII
                      : NativeRegExpMacroAssembler::CHAR16;
 
         ctx.emplace(cx, (jit::TempAllocator *) nullptr);
         native_assembler.emplace(&alloc, shared, cx->runtime(), mode, (data->capture_count + 1) * 2);
         assembler = native_assembler.ptr();
     } else {
--- a/js/src/irregexp/RegExpEngine.h
+++ b/js/src/irregexp/RegExpEngine.h
@@ -83,17 +83,17 @@ struct RegExpCode
     void destroy() {
         js_free(byteCode);
     }
 };
 
 RegExpCode
 CompilePattern(JSContext *cx, RegExpShared *shared, RegExpCompileData *data,
                HandleLinearString sample,  bool is_global, bool ignore_case,
-               bool is_ascii, bool match_only);
+               bool is_ascii, bool match_only, bool force_bytecode);
 
 // Note: this may return RegExpRunStatus_Error if an interrupt was requested
 // while the code was executing.
 template <typename CharT>
 RegExpRunStatus
 ExecuteCode(JSContext *cx, jit::JitCode *codeBlock, const CharT *chars, size_t start,
             size_t length, MatchPairs *matches);
 
--- a/js/src/vm/RegExpObject.cpp
+++ b/js/src/vm/RegExpObject.cpp
@@ -450,24 +450,25 @@ RegExpShared::trace(JSTracer *trc)
     for (size_t i = 0; i < ArrayLength(compilationArray); i++) {
         RegExpCompilation &compilation = compilationArray[i];
         if (compilation.jitCode)
             MarkJitCode(trc, &compilation.jitCode, "RegExpShared code");
     }
 }
 
 bool
-RegExpShared::compile(JSContext *cx, HandleLinearString input, CompilationMode mode)
+RegExpShared::compile(JSContext *cx, HandleLinearString input,
+                      CompilationMode mode, ForceByteCodeEnum force)
 {
     TraceLogger *logger = TraceLoggerForMainThread(cx->runtime());
     AutoTraceLog logCompile(logger, TraceLogger::IrregexpCompile);
 
     if (!sticky()) {
         RootedAtom pattern(cx, source);
-        return compile(cx, pattern, input, mode);
+        return compile(cx, pattern, input, mode, force);
     }
 
     /*
      * The sticky case we implement hackily by prepending a caret onto the front
      * and relying on |::execute| to pseudo-slice the string when it sees a sticky regexp.
      */
     static const char prefix[] = {'^', '(', '?', ':'};
     static const char postfix[] = {')'};
@@ -480,22 +481,22 @@ RegExpShared::compile(JSContext *cx, Han
     if (!sb.append(source))
         return false;
     sb.infallibleAppend(postfix, ArrayLength(postfix));
 
     RootedAtom fakeySource(cx, sb.finishAtom());
     if (!fakeySource)
         return false;
 
-    return compile(cx, fakeySource, input, mode);
+    return compile(cx, fakeySource, input, mode, force);
 }
 
 bool
 RegExpShared::compile(JSContext *cx, HandleAtom pattern, HandleLinearString input,
-                      CompilationMode mode)
+                      CompilationMode mode, ForceByteCodeEnum force)
 {
     if (!ignoreCase() && !StringHasRegExpMetaChars(pattern)) {
         canStringMatch = true;
         parenCount = 0;
         return true;
     }
 
     CompileOptions options(cx);
@@ -512,47 +513,52 @@ RegExpShared::compile(JSContext *cx, Han
     }
 
     this->parenCount = data.capture_count;
 
     irregexp::RegExpCode code = irregexp::CompilePattern(cx, this, &data, input,
                                                          false /* global() */,
                                                          ignoreCase(),
                                                          input->hasLatin1Chars(),
-                                                         mode == MatchOnly);
+                                                         mode == MatchOnly,
+                                                         force == ForceByteCode);
     if (code.empty())
         return false;
 
     MOZ_ASSERT(!code.jitCode || !code.byteCode);
+    MOZ_ASSERT_IF(force == ForceByteCode, code.byteCode);
 
     RegExpCompilation &compilation = this->compilation(mode, input->hasLatin1Chars());
-    compilation.jitCode = code.jitCode;
-    compilation.byteCode = code.byteCode;
+    if (code.jitCode)
+        compilation.jitCode = code.jitCode;
+    else if (code.byteCode)
+        compilation.byteCode = code.byteCode;
 
     return true;
 }
 
 bool
-RegExpShared::compileIfNecessary(JSContext *cx, HandleLinearString input, CompilationMode mode)
+RegExpShared::compileIfNecessary(JSContext *cx, HandleLinearString input,
+                                 CompilationMode mode, ForceByteCodeEnum force)
 {
-    if (isCompiled(mode, input->hasLatin1Chars()) || canStringMatch)
+    if (isCompiled(mode, input->hasLatin1Chars(), force) || canStringMatch)
         return true;
-    return compile(cx, input, mode);
+    return compile(cx, input, mode, force);
 }
 
 RegExpRunStatus
 RegExpShared::execute(JSContext *cx, HandleLinearString input, size_t start,
                       MatchPairs *matches)
 {
     TraceLogger *logger = TraceLoggerForMainThread(cx->runtime());
 
     CompilationMode mode = matches ? Normal : MatchOnly;
 
     /* Compile the code at point-of-use. */
-    if (!compileIfNecessary(cx, input, mode))
+    if (!compileIfNecessary(cx, input, mode, DontForceByteCode))
         return RegExpRunStatus_Error;
 
     /*
      * Ensure sufficient memory for output vector.
      * No need to initialize it. The RegExp engine fills them in on a match.
      */
     if (matches && !matches->allocOrExpandArray(pairCount()))
         return RegExpRunStatus_Error;
@@ -586,86 +592,93 @@ RegExpShared::execute(JSContext *cx, Han
             (*matches)[0].start = res;
             (*matches)[0].limit = res + source->length();
 
             matches->checkAgainst(origLength);
         }
         return RegExpRunStatus_Success;
     }
 
-    if (uint8_t *byteCode = compilation(mode, input->hasLatin1Chars()).byteCode) {
-        AutoTraceLog logInterpreter(logger, TraceLogger::IrregexpExecute);
-
-        AutoStableStringChars inputChars(cx);
-        if (!inputChars.init(cx, input))
-            return RegExpRunStatus_Error;
+    do {
+        jit::JitCode *code = compilation(mode, input->hasLatin1Chars()).jitCode;
+        if (!code)
+            break;
 
         RegExpRunStatus result;
-        if (inputChars.isLatin1()) {
-            const Latin1Char *chars = inputChars.latin1Range().start().get() + charsOffset;
-            result = irregexp::InterpretCode(cx, byteCode, chars, start, length, matches);
-        } else {
-            const char16_t *chars = inputChars.twoByteRange().start().get() + charsOffset;
-            result = irregexp::InterpretCode(cx, byteCode, chars, start, length, matches);
-        }
-
-        if (result == RegExpRunStatus_Success && matches) {
-            matches->displace(displacement);
-            matches->checkAgainst(origLength);
-        }
-        return result;
-    }
-
-    while (true) {
-        RegExpRunStatus result;
         {
             AutoTraceLog logJIT(logger, TraceLogger::IrregexpExecute);
             AutoCheckCannotGC nogc;
-            jit::JitCode *code = compilation(mode, input->hasLatin1Chars()).jitCode;
             if (input->hasLatin1Chars()) {
                 const Latin1Char *chars = input->latin1Chars(nogc) + charsOffset;
                 result = irregexp::ExecuteCode(cx, code, chars, start, length, matches);
             } else {
                 const char16_t *chars = input->twoByteChars(nogc) + charsOffset;
                 result = irregexp::ExecuteCode(cx, code, chars, start, length, matches);
             }
         }
 
         if (result == RegExpRunStatus_Error) {
             // The RegExp engine might exit with an exception if an interrupt
-            // was requested. Check this case and retry until a clean result is
-            // obtained.
+            // was requested. If this happens, break out and retry the regexp
+            // in the bytecode interpreter, which can execute while tolerating
+            // future interrupts. Otherwise, if we keep getting interrupted we
+            // will never finish executing the regexp.
             bool interrupted;
             {
                 JSRuntime::AutoLockForInterrupt lock(cx->runtime());
                 interrupted = cx->runtime()->interrupt;
             }
 
             if (interrupted) {
                 if (!InvokeInterruptCallback(cx))
                     return RegExpRunStatus_Error;
-                continue;
+                break;
             }
 
             js_ReportOverRecursed(cx);
             return RegExpRunStatus_Error;
         }
 
         if (result == RegExpRunStatus_Success_NotFound)
             return RegExpRunStatus_Success_NotFound;
 
         MOZ_ASSERT(result == RegExpRunStatus_Success);
-        break;
+
+        if (matches) {
+            matches->displace(displacement);
+            matches->checkAgainst(origLength);
+        }
+        return RegExpRunStatus_Success;
+    } while (false);
+
+    // Compile bytecode for the RegExp if necessary.
+    if (!compileIfNecessary(cx, input, mode, ForceByteCode))
+        return RegExpRunStatus_Error;
+
+    uint8_t *byteCode = compilation(mode, input->hasLatin1Chars()).byteCode;
+    AutoTraceLog logInterpreter(logger, TraceLogger::IrregexpExecute);
+
+    AutoStableStringChars inputChars(cx);
+    if (!inputChars.init(cx, input))
+        return RegExpRunStatus_Error;
+
+    RegExpRunStatus result;
+    if (inputChars.isLatin1()) {
+        const Latin1Char *chars = inputChars.latin1Range().start().get() + charsOffset;
+        result = irregexp::InterpretCode(cx, byteCode, chars, start, length, matches);
+    } else {
+        const char16_t *chars = inputChars.twoByteRange().start().get() + charsOffset;
+        result = irregexp::InterpretCode(cx, byteCode, chars, start, length, matches);
     }
 
-    if (matches) {
+    if (result == RegExpRunStatus_Success && matches) {
         matches->displace(displacement);
         matches->checkAgainst(origLength);
     }
-    return RegExpRunStatus_Success;
+    return result;
 }
 
 size_t
 RegExpShared::sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf)
 {
     size_t n = mallocSizeOf(this);
 
     for (size_t i = 0; i < ArrayLength(compilationArray); i++) {
--- a/js/src/vm/RegExpObject.h
+++ b/js/src/vm/RegExpObject.h
@@ -101,31 +101,38 @@ CloneRegExpObject(JSContext *cx, JSObjec
 class RegExpShared
 {
   public:
     enum CompilationMode {
         Normal,
         MatchOnly
     };
 
+    enum ForceByteCodeEnum {
+        DontForceByteCode,
+        ForceByteCode
+    };
+
   private:
     friend class RegExpCompartment;
     friend class RegExpStatics;
 
     typedef frontend::TokenStream TokenStream;
 
     struct RegExpCompilation
     {
         HeapPtrJitCode jitCode;
         uint8_t *byteCode;
 
         RegExpCompilation() : byteCode(nullptr) {}
         ~RegExpCompilation() { js_free(byteCode); }
 
-        bool compiled() const { return jitCode || byteCode; }
+        bool compiled(ForceByteCodeEnum force = DontForceByteCode) const {
+            return byteCode || (force == DontForceByteCode && jitCode);
+        }
     };
 
     /* Source to the RegExp, for lazy compilation. */
     HeapPtrAtom        source;
 
     RegExpFlag         flags;
     size_t             parenCount;
     bool               canStringMatch;
@@ -140,20 +147,23 @@ class RegExpShared
         }
         MOZ_CRASH();
     }
 
     // Tables referenced by JIT code.
     Vector<uint8_t *, 0, SystemAllocPolicy> tables;
 
     /* Internal functions. */
-    bool compile(JSContext *cx, HandleLinearString input, CompilationMode mode);
-    bool compile(JSContext *cx, HandleAtom pattern, HandleLinearString input, CompilationMode mode);
+    bool compile(JSContext *cx, HandleLinearString input,
+                 CompilationMode mode, ForceByteCodeEnum force);
+    bool compile(JSContext *cx, HandleAtom pattern, HandleLinearString input,
+                 CompilationMode mode, ForceByteCodeEnum force);
 
-    bool compileIfNecessary(JSContext *cx, HandleLinearString input, CompilationMode mode);
+    bool compileIfNecessary(JSContext *cx, HandleLinearString input,
+                            CompilationMode mode, ForceByteCodeEnum force);
 
     const RegExpCompilation &compilation(CompilationMode mode, bool latin1) const {
         return compilationArray[CompilationIndex(mode, latin1)];
     }
 
     RegExpCompilation &compilation(CompilationMode mode, bool latin1) {
         return compilationArray[CompilationIndex(mode, latin1)];
     }
@@ -184,18 +194,19 @@ class RegExpShared
 
     JSAtom *getSource() const           { return source; }
     RegExpFlag getFlags() const         { return flags; }
     bool ignoreCase() const             { return flags & IgnoreCaseFlag; }
     bool global() const                 { return flags & GlobalFlag; }
     bool multiline() const              { return flags & MultilineFlag; }
     bool sticky() const                 { return flags & StickyFlag; }
 
-    bool isCompiled(CompilationMode mode, bool latin1) const {
-        return compilation(mode, latin1).compiled();
+    bool isCompiled(CompilationMode mode, bool latin1,
+                    ForceByteCodeEnum force = DontForceByteCode) const {
+        return compilation(mode, latin1).compiled(force);
     }
     bool isCompiled() const {
         return isCompiled(Normal, true) || isCompiled(Normal, false)
             || isCompiled(MatchOnly, true) || isCompiled(MatchOnly, false);
     }
 
     void trace(JSTracer *trc);