Bug 691797: Optimize RegExp.prototype.test with leading .*, take 2. (r=mrbkap,luke)
authorChris Leary <cdleary@mozilla.com>
Mon, 28 Nov 2011 13:35:12 -0800
changeset 82607 eacdec27e5d3f2f8b9e7473ed0c12b5e8341ccbf
parent 82606 67451553bcbb4f04dcc6516f6f916ca2a72f1016
child 82608 0dd55a7547cde7b9a964f41e94df3a18e2c05023
push id519
push userakeybl@mozilla.com
push dateWed, 01 Feb 2012 00:38:35 +0000
treeherdermozilla-beta@788ea1ef610b [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmrbkap, luke
bugs691797
milestone11.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 691797: Optimize RegExp.prototype.test with leading .*, take 2. (r=mrbkap,luke)
js/src/builtin/RegExp.cpp
js/src/jit-test/tests/basic/bug691797-regexp-1.js
js/src/jit-test/tests/basic/bug691797-regexp-2.js
js/src/jsprvtd.h
js/src/vm/RegExpObject-inl.h
js/src/vm/RegExpObject.cpp
js/src/vm/RegExpObject.h
--- a/js/src/builtin/RegExp.cpp
+++ b/js/src/builtin/RegExp.cpp
@@ -512,18 +512,23 @@ ExecuteRegExp(JSContext *cx, Native nati
     bool ok;
     JSObject *obj = NonGenericMethodGuard(cx, args, native, &RegExpClass, &ok);
     if (!obj)
         return ok;
 
     RegExpObject *reobj = obj->asRegExp();
 
     RegExpMatcher matcher(cx);
-    if (!matcher.reset(reobj))
-        return false;
+    if (reobj->startsWithAtomizedGreedyStar()) {
+        if (!matcher.resetWithTestOptimized(reobj))
+            return false;
+    } else {
+        if (!matcher.reset(reobj))
+            return false;
+    }
 
     RegExpStatics *res = cx->regExpStatics();
 
     /* Step 2. */
     JSString *input = js_ValueToString(cx, (args.length() > 0) ? args[0] : UndefinedValue());
     if (!input)
         return false;
 
new file mode 100644
--- /dev/null
+++ b/js/src/jit-test/tests/basic/bug691797-regexp-1.js
@@ -0,0 +1,4 @@
+var re = /.*star.*/i;
+var str = "The Shawshank Redemption (1994)";
+for (var k = 0; k < 100; k++)
+  assertEq(false, re.test(str));
new file mode 100644
--- /dev/null
+++ b/js/src/jit-test/tests/basic/bug691797-regexp-2.js
@@ -0,0 +1,6 @@
+var re = /.*(a\w).*/i;
+var str = "abccccccad";
+for (var k = 0; k < 100; k++) {
+  re.test(str);
+  assertEq('ad', RegExp['$1']);
+}
--- a/js/src/jsprvtd.h
+++ b/js/src/jsprvtd.h
@@ -125,16 +125,17 @@ class RegExpMatcher;
 class RegExpObjectBuilder;
 class RegExpStatics;
 class MatchPairs;
 
 namespace detail {
 
 class RegExpPrivate;
 class RegExpPrivateCode;
+class RegExpPrivateCacheValue;
 
 } /* namespace detail */
 
 enum RegExpFlag
 {
     IgnoreCaseFlag  = 0x01,
     GlobalFlag      = 0x02,
     MultilineFlag   = 0x04,
@@ -231,17 +232,20 @@ typedef Vector<UpvarCookie, 8> UpvarCook
 
 class Breakpoint;
 class BreakpointSite;
 typedef HashMap<jsbytecode *, BreakpointSite *, DefaultHasher<jsbytecode *>, RuntimeAllocPolicy>
     BreakpointSiteMap;
 class Debugger;
 class WatchpointMap;
 
-typedef HashMap<JSAtom *, detail::RegExpPrivate *, DefaultHasher<JSAtom *>, RuntimeAllocPolicy>
+typedef HashMap<JSAtom *,
+                detail::RegExpPrivateCacheValue,
+                DefaultHasher<JSAtom *>,
+                RuntimeAllocPolicy>
     RegExpPrivateCache;
 
 typedef JSNative             Native;
 typedef JSPropertyOp         PropertyOp;
 typedef JSStrictPropertyOp   StrictPropertyOp;
 typedef JSPropertyDescriptor PropertyDescriptor;
 
 namespace analyze {
--- a/js/src/vm/RegExpObject-inl.h
+++ b/js/src/vm/RegExpObject-inl.h
@@ -84,16 +84,33 @@ HasRegExpMetaChars(const jschar *chars, 
 {
     for (size_t i = 0; i < length; ++i) {
         if (IsRegExpMetaChar(chars[i]))
             return true;
     }
     return false;
 }
 
+inline bool
+RegExpObject::startsWithAtomizedGreedyStar() const
+{
+    JSLinearString *source = getSource();
+
+    if (!source->isAtom())
+        return false;
+
+    if (source->length() < 3)
+        return false;
+
+    const jschar *chars = source->chars();
+    return chars[0] == detail::GreedyStarChars[0] &&
+           chars[1] == detail::GreedyStarChars[1] &&
+           chars[2] != '?';
+}
+
 inline size_t *
 RegExpObject::addressOfPrivateRefCount() const
 {
     JS_ASSERT(getPrivate());
     return getPrivate()->addressOfRefCount();
 }
 
 inline void
@@ -252,55 +269,59 @@ detail::RegExpPrivate::getOrCreateCache(
         return cache;
 
     js_ReportOutOfMemory(cx);
     return NULL;
 }
 
 inline bool
 detail::RegExpPrivate::cacheLookup(JSContext *cx, JSAtom *atom, RegExpFlag flags,
+                                   RegExpPrivateCacheKind targetKind,
                                    AlreadyIncRefed<RegExpPrivate> *result)
 {
     RegExpPrivateCache *cache = getOrCreateCache(cx);
     if (!cache)
         return false;
 
     if (RegExpPrivateCache::Ptr p = cache->lookup(atom)) {
-        NeedsIncRef<RegExpPrivate> cached(p->value);
-        if (cached->getFlags() == flags) {
+        RegExpPrivateCacheValue &cacheValue = p->value;
+        if (cacheValue.kind() == targetKind && cacheValue.rep()->getFlags() == flags) {
+            NeedsIncRef<RegExpPrivate> cached(cacheValue.rep());
             cached->incref(cx);
             *result = AlreadyIncRefed<RegExpPrivate>(cached.get());
             return true;
         }
     }
 
     JS_ASSERT(result->null());
     return true;
 }
 
 inline bool
-detail::RegExpPrivate::cacheInsert(JSContext *cx, JSAtom *atom, RegExpPrivate *priv)
+detail::RegExpPrivate::cacheInsert(JSContext *cx, JSAtom *atom, RegExpPrivateCacheKind kind,
+                                   RegExpPrivate *priv)
 {
     JS_ASSERT(priv);
 
     /*
      * Note: allocation performed since lookup may cause a garbage collection,
      * so we have to re-lookup the cache (and inside the cache) after the
      * allocation is performed.
      */
     RegExpPrivateCache *cache = getOrCreateCache(cx);
     if (!cache)
         return false;
 
     if (RegExpPrivateCache::AddPtr addPtr = cache->lookupForAdd(atom)) {
-        /* We clobber existing entries with the same source (but different flags). */
-        JS_ASSERT(addPtr->value->getFlags() != priv->getFlags());
-        addPtr->value = priv;
+        /* We clobber existing entries with the same source (but different flags or kind). */
+        JS_ASSERT(addPtr->value.rep()->getFlags() != priv->getFlags() ||
+                  addPtr->value.kind() != kind);
+        addPtr->value.reset(priv, kind);
     } else {
-        if (!cache->add(addPtr, atom, priv)) {
+        if (!cache->add(addPtr, atom, RegExpPrivateCacheValue(priv, kind))) {
             js_ReportOutOfMemory(cx);
             return false;
         }
     }
 
     return true;
 }
 
@@ -324,27 +345,27 @@ detail::RegExpPrivate::create(JSContext 
      * keep a refcount. The cache holds a "weak ref", where the
      * |RegExpPrivate|'s deallocation decref will first cause it to
      * remove itself from the cache.
      */
 
     JSAtom *sourceAtom = &source->asAtom();
 
     AlreadyIncRefed<RegExpPrivate> cached;
-    if (!cacheLookup(cx, sourceAtom, flags, &cached))
+    if (!cacheLookup(cx, sourceAtom, flags, RegExpPrivateCache_ExecCapable, &cached))
         return RetType(NULL);
 
     if (cached)
         return cached;
 
     RegExpPrivate *priv = RegExpPrivate::createUncached(cx, source, flags, ts);
     if (!priv)
         return RetType(NULL);
 
-    if (!cacheInsert(cx, sourceAtom, priv))
+    if (!cacheInsert(cx, sourceAtom, RegExpPrivateCache_ExecCapable, priv))
         return RetType(NULL);
 
     return RetType(priv);
 }
 
 /* This function should be deleted once bad Android platforms phase out. See bug 604774. */
 inline bool
 detail::RegExpPrivateCode::isJITRuntimeEnabled(JSContext *cx)
@@ -486,17 +507,17 @@ detail::RegExpPrivate::decref(JSContext 
 #endif
 
     if (--refCount != 0)
         return;
 
     RegExpPrivateCache *cache;
     if (source->isAtom() && (cache = cx->threadData()->getRegExpPrivateCache())) {
         RegExpPrivateCache::Ptr ptr = cache->lookup(&source->asAtom());
-        if (ptr && ptr->value == this)
+        if (ptr && ptr->value.rep() == this)
             cache->remove(ptr);
     }
 
 #ifdef DEBUG
     this->~RegExpPrivate();
     memset(this, 0xcd, sizeof(*this));
     cx->free_(this);
 #else
--- a/js/src/vm/RegExpObject.cpp
+++ b/js/src/vm/RegExpObject.cpp
@@ -52,16 +52,44 @@ using namespace js;
 using js::detail::RegExpPrivate;
 using js::detail::RegExpPrivateCode;
 
 JS_STATIC_ASSERT(IgnoreCaseFlag == JSREG_FOLD);
 JS_STATIC_ASSERT(GlobalFlag == JSREG_GLOB);
 JS_STATIC_ASSERT(MultilineFlag == JSREG_MULTILINE);
 JS_STATIC_ASSERT(StickyFlag == JSREG_STICKY);
 
+/* RegExpMatcher */
+
+bool
+RegExpMatcher::resetWithTestOptimized(RegExpObject *reobj)
+{
+    JS_ASSERT(reobj->startsWithAtomizedGreedyStar());
+
+    JSAtom *source = &reobj->getSource()->asAtom();
+    AlreadyIncRefed<RegExpPrivate> priv =
+        RegExpPrivate::createTestOptimized(cx, source, reobj->getFlags());
+    if (!priv)
+        return false;
+
+    /*
+     * Create a dummy RegExpObject to persist this RegExpPrivate until the next GC.
+     * Note that we give the ref we have to this new object.
+     */
+    RegExpObjectBuilder builder(cx);
+    RegExpObject *dummy = builder.build(priv);
+    if (!dummy) {
+        priv->decref(cx);
+        return false;
+    }
+
+    arc.reset(NeedsIncRef<RegExpPrivate>(priv.get()));
+    return true;
+}
+
 /* RegExpObjectBuilder */
 
 bool
 RegExpObjectBuilder::getOrCreate()
 {
     if (reobj_)
         return true;
 
@@ -463,33 +491,65 @@ js::ParseRegExpFlags(JSContext *cx, JSSt
             return false;
           }
         }
 #undef HANDLE_FLAG
     }
     return true;
 }
 
-/* static */ RegExpPrivate *
+RegExpPrivate *
 RegExpPrivate::createUncached(JSContext *cx, JSLinearString *source, RegExpFlag flags,
                               TokenStream *tokenStream)
 {
     RegExpPrivate *priv = cx->new_<RegExpPrivate>(source, flags);
     if (!priv)
         return NULL;
 
     if (!priv->compile(cx, tokenStream)) {
         Foreground::delete_(priv);
         return NULL;
     }
 
     return priv;
 }
 
 AlreadyIncRefed<RegExpPrivate>
+RegExpPrivate::createTestOptimized(JSContext *cx, JSAtom *cacheKey, RegExpFlag flags)
+{
+    typedef AlreadyIncRefed<RegExpPrivate> RetType;
+
+    RetType cached;
+    if (!cacheLookup(cx, cacheKey, flags, RegExpPrivateCache_TestOptimized, &cached))
+        return RetType(NULL);
+
+    if (cached)
+        return cached;
+
+    /* Strip off the greedy star characters, create a new RegExpPrivate, and cache. */
+    JS_ASSERT(cacheKey->length() > JS_ARRAY_LENGTH(GreedyStarChars));
+    JSDependentString *stripped =
+      JSDependentString::new_(cx, cacheKey, cacheKey->chars() + JS_ARRAY_LENGTH(GreedyStarChars),
+                              cacheKey->length() - JS_ARRAY_LENGTH(GreedyStarChars));
+    if (!stripped)
+        return RetType(NULL);
+
+    RegExpPrivate *priv = createUncached(cx, cacheKey, flags, NULL);
+    if (!priv)
+        return RetType(NULL);
+
+    if (!cacheInsert(cx, cacheKey, RegExpPrivateCache_TestOptimized, priv)) {
+        priv->decref(cx);
+        return RetType(NULL);
+    }
+
+    return RetType(priv);
+}
+
+AlreadyIncRefed<RegExpPrivate>
 RegExpPrivate::create(JSContext *cx, JSLinearString *str, JSString *opt, TokenStream *ts)
 {
     if (!opt)
         return create(cx, str, RegExpFlag(0), ts);
 
     RegExpFlag flags = RegExpFlag(0);
     if (!ParseRegExpFlags(cx, opt, &flags))
         return AlreadyIncRefed<RegExpPrivate>(NULL);
--- a/js/src/vm/RegExpObject.h
+++ b/js/src/vm/RegExpObject.h
@@ -129,16 +129,18 @@ class RegExpObject : public ::JSObject
         uintN flags = 0;
         flags |= global() ? GlobalFlag : 0;
         flags |= ignoreCase() ? IgnoreCaseFlag : 0;
         flags |= multiline() ? MultilineFlag : 0;
         flags |= sticky() ? StickyFlag : 0;
         return RegExpFlag(flags);
     }
 
+    inline bool startsWithAtomizedGreedyStar() const;
+
     /* JIT only. */
 
     inline size_t *addressOfPrivateRefCount() const;
 
     /* Flags. */
 
     inline void setIgnoreCase(bool enabled);
     inline void setGlobal(bool enabled);
@@ -210,32 +212,36 @@ class RegExpObjectBuilder
     JSContext       *cx;
     RegExpObject    *reobj_;
 
     bool getOrCreate();
     bool getOrCreateClone(RegExpObject *proto);
 
     RegExpObject *build(AlreadyIncRefed<RegExpPrivate> rep);
 
+    friend class RegExpMatcher;
+
   public:
     RegExpObjectBuilder(JSContext *cx, RegExpObject *reobj = NULL)
       : cx(cx), reobj_(reobj)
     { }
 
     RegExpObject *reobj() { return reobj_; }
 
     RegExpObject *build(JSLinearString *str, RegExpFlag flags);
     RegExpObject *build(RegExpObject *other);
 
     /* Perform a VM-internal clone. */
     RegExpObject *clone(RegExpObject *other, RegExpObject *proto);
 };
 
 namespace detail {
 
+static const jschar GreedyStarChars[] = {'.', '*'};
+
 /* Abstracts away the gross |RegExpPrivate| backend details. */
 class RegExpPrivateCode
 {
 #if ENABLE_YARR_JIT
     typedef JSC::Yarr::BytecodePattern BytecodePattern;
     typedef JSC::Yarr::ErrorCode ErrorCode;
     typedef JSC::Yarr::JSGlobalData JSGlobalData;
     typedef JSC::Yarr::YarrCodeBlock YarrCodeBlock;
@@ -301,16 +307,54 @@ class RegExpPrivateCode
     inline bool compile(JSContext *cx, JSLinearString &pattern, TokenStream *ts, uintN *parenCount,
                         RegExpFlag flags);
 
 
     inline RegExpRunStatus execute(JSContext *cx, const jschar *chars, size_t length, size_t start,
                                    int *output, size_t outputCount);
 };
 
+enum RegExpPrivateCacheKind
+{
+    RegExpPrivateCache_TestOptimized,
+    RegExpPrivateCache_ExecCapable
+};
+
+class RegExpPrivateCacheValue
+{
+    union {
+        RegExpPrivate   *rep_;
+        uintptr_t       bits;
+    };
+
+  public:
+    RegExpPrivateCacheValue() : rep_(NULL) {}
+
+    RegExpPrivateCacheValue(RegExpPrivate *rep, RegExpPrivateCacheKind kind) {
+        reset(rep, kind);
+    }
+
+    RegExpPrivateCacheKind kind() const {
+        return (bits & 0x1)
+                 ? RegExpPrivateCache_TestOptimized
+                 : RegExpPrivateCache_ExecCapable;
+    }
+
+    RegExpPrivate *rep() {
+        return reinterpret_cast<RegExpPrivate *>(bits & ~uintptr_t(1));
+    }
+
+    void reset(RegExpPrivate *rep, RegExpPrivateCacheKind kind) {
+        rep_ = rep;
+        if (kind == RegExpPrivateCache_TestOptimized)
+            bits |= 0x1;
+        JS_ASSERT(this->kind() == kind);
+    }
+};
+
 /*
  * The "meat" of the builtin regular expression objects: it contains the
  * mini-program that represents the source of the regular expression.
  * Excepting refcounts, this is an immutable datastructure after
  * compilation.
  *
  * Non-atomic refcounting is used, so single-thread invariants must be
  * maintained. |RegExpPrivate|s are currently shared within a single
@@ -339,26 +383,30 @@ class RegExpPrivate
     static inline void checkMatchPairs(JSString *input, int *buf, size_t matchItemCount);
 
     static RegExpPrivate *
     createUncached(JSContext *cx, JSLinearString *source, RegExpFlag flags,
                    TokenStream *tokenStream);
 
     static RegExpPrivateCache *getOrCreateCache(JSContext *cx);
     static bool cacheLookup(JSContext *cx, JSAtom *atom, RegExpFlag flags,
-                            AlreadyIncRefed<RegExpPrivate> *result);
-    static bool cacheInsert(JSContext *cx, JSAtom *atom, RegExpPrivate *priv);
+                            RegExpPrivateCacheKind kind, AlreadyIncRefed<RegExpPrivate> *result);
+    static bool cacheInsert(JSContext *cx, JSAtom *atom,
+                            RegExpPrivateCacheKind kind, RegExpPrivate *priv);
 
   public:
     static AlreadyIncRefed<RegExpPrivate>
     create(JSContext *cx, JSLinearString *source, RegExpFlag flags, TokenStream *ts);
 
     static AlreadyIncRefed<RegExpPrivate>
     create(JSContext *cx, JSLinearString *source, JSString *flags, TokenStream *ts);
 
+    static AlreadyIncRefed<RegExpPrivate>
+    createTestOptimized(JSContext *cx, JSAtom *originalSource, RegExpFlag flags);
+
     RegExpRunStatus execute(JSContext *cx, const jschar *chars, size_t length, size_t *lastIndex,
                             LifoAllocScope &allocScope, MatchPairs **output);
 
     /* Mutators */
 
     void incref(JSContext *cx);
     void decref(JSContext *cx);
 
@@ -417,16 +465,18 @@ class RegExpMatcher
         if (!priv)
             return false;
         arc.reset(NeedsIncRef<RegExpPrivate>(priv));
         return true;
     }
 
     inline bool reset(JSLinearString *patstr, JSString *opt);
 
+    bool resetWithTestOptimized(RegExpObject *reobj);
+
     RegExpRunStatus execute(JSContext *cx, const jschar *chars, size_t length, size_t *lastIndex,
                             LifoAllocScope &allocScope, MatchPairs **output) {
         JS_ASSERT(!arc.null());
         return arc.get()->execute(cx, chars, length, lastIndex, allocScope, output);
     }
 };
 
 /*