Bug 1024132 - Add one slot cache for stripping leading and trailing .* from RegExps for test() calls. r=jandem, a=lmandel
authorBrian Hackett <bhackett1024@gmail.com>
Sun, 20 Jul 2014 21:42:00 -0400
changeset 208168 2339e39f5ff4
parent 208167 f9b0de65c69d
child 208169 5a6749df6a78
push id3753
push userryanvm@gmail.com
push date2014-07-28 14:29 +0000
treeherdermozilla-beta@ecfc5bee1685 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjandem, lmandel
bugs1024132
milestone32.0
Bug 1024132 - Add one slot cache for stripping leading and trailing .* from RegExps for test() calls. r=jandem, a=lmandel
js/src/builtin/RegExp.cpp
js/src/gc/Nursery.cpp
js/src/jit-test/tests/basic/regexp-removed-dot-star.js
js/src/jsatom.cpp
js/src/jsatom.h
js/src/jsgc.cpp
js/src/jsstr.cpp
js/src/jsstr.h
js/src/vm/Runtime.h
--- a/js/src/builtin/RegExp.cpp
+++ b/js/src/builtin/RegExp.cpp
@@ -700,28 +700,123 @@ regexp_test_impl(JSContext *cx, CallArgs
     ScopedMatchPairs matches(&cx->tempLifoAlloc());
     MatchConduit conduit(&matches);
 #endif
     RegExpRunStatus status = ExecuteRegExp(cx, args, conduit);
     args.rval().setBoolean(status == RegExpRunStatus_Success);
     return status != RegExpRunStatus_Error;
 }
 
+static inline bool
+StringHasDotStar(Handle<JSLinearString *> str, size_t index)
+{
+    // Return whether the portion of the string at the specified index is '.*'
+    return str->latin1OrTwoByteChar(index) == '.' && str->latin1OrTwoByteChar(index + 1) == '*';
+}
+
+static bool
+TryFillRegExpTestCache(JSContext *cx, HandleObject regexp, RegExpTestCache &cache,
+                       MutableHandleObject result)
+{
+    cache.purge();
+
+    // test() on global RegExps uses the lastIndex in a fashion that is
+    // incompatible with the cache.
+    if (regexp->as<RegExpObject>().global())
+        return true;
+
+    RootedAtom source(cx, regexp->as<RegExpObject>().getSource());
+
+    // Try to strip a leading '.*' from the RegExp, but only if it is not
+    // followed by a '?' (which will affect how the .* is parsed).
+    if (source->length() >= 3 &&
+        StringHasDotStar(source, 0) &&
+        source->latin1OrTwoByteChar(2) != '?')
+    {
+        source = AtomizeSubstring(cx, source, 2, source->length() - 2);
+        if (!source)
+            return false;
+    }
+
+    // Try to strip a trailing '.*' from the RegExp, but only if it does not
+    // have any other meta characters (to be sure we are not affecting how the
+    // RegExp will be parsed).
+    if (source->length() >= 3 &&
+        StringHasDotStar(source, source->length() - 2) &&
+        !StringHasRegExpMetaChars(source, 0, 2))
+    {
+        source = AtomizeSubstring(cx, source, 0, source->length() - 2);
+        if (!source)
+            return false;
+    }
+
+    if (source == regexp->as<RegExpObject>().getSource()) {
+        // We weren't able to remove a leading or trailing .*
+        return true;
+    }
+
+    RegExpObjectBuilder builder(cx);
+
+    result.set(builder.build(source, regexp->as<RegExpObject>().getFlags()));
+    if (!result)
+        return false;
+
+    cache.fill(&regexp->as<RegExpObject>(), &result->as<RegExpObject>());
+    return true;
+}
+
 /* Separate interface for use by IonMonkey. */
 bool
 js::regexp_test_raw(JSContext *cx, HandleObject regexp, HandleString input, bool *result)
 {
 #ifdef JS_YARR
     MatchPair match;
     MatchConduit conduit(&match);
 #else
     ScopedMatchPairs matches(&cx->tempLifoAlloc());
     MatchConduit conduit(&matches);
 #endif
-    RegExpRunStatus status = ExecuteRegExp(cx, regexp, input, conduit, UpdateRegExpStatics);
+
+    RegExpTestCache &cache = cx->runtime()->regExpTestCache;
+
+    RootedObject alternate(cx);
+    if (regexp == cache.key ||
+        (cache.key &&
+         regexp->as<RegExpObject>().getSource() == cache.key->getSource() &&
+         regexp->as<RegExpObject>().getFlags() == cache.key->getFlags()))
+    {
+        alternate = cache.value;
+    } else {
+        if (!TryFillRegExpTestCache(cx, regexp, cache, &alternate))
+            return false;
+    }
+
+    RegExpRunStatus status;
+    if (alternate) {
+        // The alternate RegExp is simpler and should execute faster than the
+        // original one, so use it instead.
+        status = ExecuteRegExp(cx, alternate, input, conduit, DontUpdateRegExpStatics);
+
+        if (status == RegExpRunStatus_Success) {
+            // Update the RegExpStatics to reflect the original RegExp we were
+            // trying to execute, and not the alternate one.
+            RegExpStatics *res = cx->global()->getRegExpStatics(cx);
+            if (!res)
+                return RegExpRunStatus_Error;
+
+            RegExpGuard shared(cx);
+            if (!regexp->as<RegExpObject>().getShared(cx, &shared))
+                return RegExpRunStatus_Error;
+
+            res->updateLazily(cx, &input->asLinear(), shared.re(), 0);
+        }
+    } else {
+        status = ExecuteRegExp(cx, regexp, input, conduit, UpdateRegExpStatics);
+    }
+
     *result = (status == RegExpRunStatus_Success);
     return status != RegExpRunStatus_Error;
 }
 
 bool
 js::regexp_test(JSContext *cx, unsigned argc, Value *vp)
 {
     CallArgs args = CallArgsFromVp(argc, vp);
--- a/js/src/gc/Nursery.cpp
+++ b/js/src/gc/Nursery.cpp
@@ -804,16 +804,20 @@ js::Nursery::collect(JSRuntime *rt, JS::
     TIME_START(markDebugger);
     Debugger::markAll(&trc);
     TIME_END(markDebugger);
 
     TIME_START(clearNewObjectCache);
     rt->newObjectCache.clearNurseryObjects(rt);
     TIME_END(clearNewObjectCache);
 
+    TIME_START(clearRegExpTestCache);
+    rt->regExpTestCache.purge();
+    TIME_END(clearRegExpTestCache);
+
     // Most of the work is done here. This loop iterates over objects that have
     // been moved to the major heap. If these objects have any outgoing pointers
     // to the nursery, then those nursery objects get moved as well, until no
     // objects are left to move. That is, we iterate to a fixed point.
     TIME_START(collectToFP);
     TenureCountCache tenureCounts;
     collectToFixedPoint(&trc, tenureCounts);
     TIME_END(collectToFP);
new file mode 100644
--- /dev/null
+++ b/js/src/jit-test/tests/basic/regexp-removed-dot-star.js
@@ -0,0 +1,49 @@
+
+// Test that removal of leading or trailing .* from RegExp test() calls
+// does not affect lastMatch or other RegExpStatics info.
+
+function first(input) {
+    var re = /.*b(cd)/;
+    for (var i = 0; i < 10; i++)
+	re.test(input);
+}
+
+first("1234\nabcdefg\n1234");
+assertEq(RegExp.lastMatch, "abcd");
+assertEq(RegExp.$1, "cd");
+assertEq(RegExp.input, "1234\nabcdefg\n1234");
+assertEq(RegExp.leftContext, "1234\n");
+assertEq(RegExp.rightContext, "efg\n1234");
+assertEq(RegExp.lastParen, "cd");
+
+
+// Test that removal of leading or trailing .* from RegExp test() calls
+// does not affect lastMatch or other RegExpStatics info.
+
+function second(input) {
+    var re = /bcd.*/;
+    for (var i = 0; i < 10; i++)
+	re.test(input);
+}
+
+second("1234\nabcdefg\n1234");
+assertEq(RegExp.lastMatch, "bcdefg");
+assertEq(RegExp.$1, "");
+assertEq(RegExp.input, "1234\nabcdefg\n1234");
+assertEq(RegExp.leftContext, "1234\na");
+assertEq(RegExp.rightContext, "\n1234");
+assertEq(RegExp.lastParen, "");
+
+function third(input) {
+    var re = /.*bcd.*/;
+    for (var i = 0; i < 10; i++)
+	re.test(input);
+}
+
+third("1234\nabcdefg\n1234");
+assertEq(RegExp.lastMatch, "abcdefg");
+assertEq(RegExp.$1, "");
+assertEq(RegExp.input, "1234\nabcdefg\n1234");
+assertEq(RegExp.leftContext, "1234\n");
+assertEq(RegExp.rightContext, "\n1234");
+assertEq(RegExp.lastParen, "");
--- a/js/src/jsatom.cpp
+++ b/js/src/jsatom.cpp
@@ -464,16 +464,29 @@ js::AtomizeChars(ExclusiveContext *cx, c
     CHECK_REQUEST(cx);
 
     if (!JSString::validateLength(cx, length))
         return nullptr;
 
     return AtomizeAndCopyChars(cx, chars, length, ib);
 }
 
+JSAtom *
+js::AtomizeSubstring(ExclusiveContext *cx, JSString *str, size_t start, size_t length,
+                     InternBehavior ib /* = DoNotInternAtom */)
+{
+    JS_ASSERT(start + length <= str->length());
+
+    JSLinearString *linear = str->ensureLinear(cx);
+    if (!linear)
+        return nullptr;
+
+    return AtomizeAndCopyChars(cx, linear->chars() + start, length, ib);
+}
+
 bool
 js::IndexToIdSlow(ExclusiveContext *cx, uint32_t index, MutableHandleId idp)
 {
     JS_ASSERT(index > JSID_INT_MAX);
 
     jschar buf[UINT32_CHAR_BUFFER_LENGTH];
     RangedPtr<jschar> end(ArrayEnd(buf), buf, ArrayEnd(buf));
     RangedPtr<jschar> start = BackfillIndexInCharBuffer(index, end);
--- a/js/src/jsatom.h
+++ b/js/src/jsatom.h
@@ -189,16 +189,20 @@ Atomize(ExclusiveContext *cx, const char
 
 extern JSAtom *
 AtomizeChars(ExclusiveContext *cx, const jschar *chars, size_t length,
              js::InternBehavior ib = js::DoNotInternAtom);
 
 extern JSAtom *
 AtomizeString(ExclusiveContext *cx, JSString *str, js::InternBehavior ib = js::DoNotInternAtom);
 
+extern JSAtom *
+AtomizeSubstring(ExclusiveContext *cx, JSString *str, size_t start, size_t length,
+                 InternBehavior ib = DoNotInternAtom);
+
 template <AllowGC allowGC>
 extern JSAtom *
 ToAtom(ExclusiveContext *cx, typename MaybeRooted<Value, allowGC>::HandleType v);
 
 enum XDRMode {
     XDR_ENCODE,
     XDR_DECODE
 };
--- a/js/src/jsgc.cpp
+++ b/js/src/jsgc.cpp
@@ -2836,16 +2836,17 @@ PurgeRuntime(JSRuntime *rt)
     rt->interpreterStack().purge(rt);
 
     rt->gsnCache.purge();
     rt->scopeCoordinateNameCache.purge();
     rt->newObjectCache.purge();
     rt->nativeIterCache.purge();
     rt->sourceDataCache.purge();
     rt->evalCache.clear();
+    rt->regExpTestCache.purge();
 
     if (!rt->hasActiveCompilations())
         rt->parseMapPool().purgeAll();
 }
 
 bool
 GCRuntime::shouldPreserveJITCode(JSCompartment *comp, int64_t currentTime)
 {
--- a/js/src/jsstr.cpp
+++ b/js/src/jsstr.cpp
@@ -1976,24 +1976,26 @@ HasRegExpMetaChars(const CharT *chars, s
 {
     for (size_t i = 0; i < length; ++i) {
         if (IsRegExpMetaChar(chars[i]))
             return true;
     }
     return false;
 }
 
-static inline bool
-HasRegExpMetaChars(JSLinearString *str)
+bool
+js::StringHasRegExpMetaChars(JSLinearString *str, size_t beginOffset, size_t endOffset)
 {
+    JS_ASSERT(beginOffset + endOffset <= str->length());
+
     AutoCheckCannotGC nogc;
     if (str->hasLatin1Chars())
-        return HasRegExpMetaChars(str->latin1Chars(nogc), str->length());
-
-    return HasRegExpMetaChars(str->twoByteChars(nogc), str->length());
+        return HasRegExpMetaChars(str->latin1Chars(nogc) + beginOffset, str->length() - beginOffset - endOffset);
+
+    return HasRegExpMetaChars(str->twoByteChars(nogc) + beginOffset, str->length() - beginOffset - endOffset);
 }
 
 bool
 js::StringHasRegExpMetaChars(const jschar *chars, size_t length)
 {
     return HasRegExpMetaChars(chars, length);
 }
 
@@ -2100,17 +2102,17 @@ class MOZ_STACK_CLASS StringRegExpGuard
     {
         if (re_.initialized())
             return nullptr;
 
         if (optarg < argc)
             return nullptr;
 
         size_t patLen = fm.pat_->length();
-        if (checkMetaChars && (patLen > MAX_FLAT_PAT_LEN || HasRegExpMetaChars(fm.pat_)))
+        if (checkMetaChars && (patLen > MAX_FLAT_PAT_LEN || StringHasRegExpMetaChars(fm.pat_)))
             return nullptr;
 
         /*
          * |text| could be a rope, so we want to avoid flattening it for as
          * long as possible.
          */
         if (text->isRope()) {
             if (!RopeMatch(cx, &text->asRope(), fm.pat_, &fm.match_))
--- a/js/src/jsstr.h
+++ b/js/src/jsstr.h
@@ -207,16 +207,21 @@ StringEqualsAscii(JSLinearString *str, c
 extern bool
 StringHasPattern(const jschar *text, uint32_t textlen,
                  const jschar *pat, uint32_t patlen);
 
 extern int
 StringFindPattern(const jschar *text, uint32_t textlen,
                   const jschar *pat, uint32_t patlen);
 
+// Whether the string contains any RegExp meta characters (., *, and so forth).
+// Searches the range [beginOffset, length - endOffset>.
+extern bool
+StringHasRegExpMetaChars(JSLinearString *str, size_t beginOffset = 0, size_t endOffset = 0);
+
 extern bool
 StringHasRegExpMetaChars(const jschar *chars, size_t length);
 
 } /* namespace js */
 
 extern size_t
 js_strlen(const jschar *s);
 
--- a/js/src/vm/Runtime.h
+++ b/js/src/vm/Runtime.h
@@ -351,16 +351,40 @@ class NewObjectCache
         js_memcpy(dst, src, gc::Arena::thingSize(kind));
 #ifdef JSGC_GENERATIONAL
         Shape::writeBarrierPost(dst->shape_, &dst->shape_);
         types::TypeObject::writeBarrierPost(dst->type_, &dst->type_);
 #endif
     }
 };
 
+class RegExpObject;
+
+// One slot cache for speeding up RegExp.test() executions, by stripping
+// unnecessary leading or trailing .* from the RegExp.
+struct RegExpTestCache
+{
+    RegExpObject *key;
+    RegExpObject *value;
+
+    RegExpTestCache()
+      : key(nullptr), value(nullptr)
+    {}
+
+    void purge() {
+        key = nullptr;
+        value = nullptr;
+    }
+
+    void fill(RegExpObject *key, RegExpObject *value) {
+        this->key = key;
+        this->value = value;
+    }
+};
+
 /*
  * A FreeOp can do one thing: free memory. For convenience, it has delete_
  * convenience methods that also call destructors.
  *
  * FreeOp is passed to finalizers and other sweep-phase hooks so that we do not
  * need to pass a JSContext to those hooks.
  */
 class FreeOp : public JSFreeOp {
@@ -1103,16 +1127,17 @@ struct JSRuntime : public JS::shadow::Ru
 
     js::GSNCache        gsnCache;
     js::ScopeCoordinateNameCache scopeCoordinateNameCache;
     js::NewObjectCache  newObjectCache;
     js::NativeIterCache nativeIterCache;
     js::SourceDataCache sourceDataCache;
     js::EvalCache       evalCache;
     js::LazyScriptCache lazyScriptCache;
+    js::RegExpTestCache regExpTestCache;
 
     js::DateTimeInfo    dateTimeInfo;
 
     // Pool of maps used during parse/emit. This may be modified by threads
     // with an ExclusiveContext and requires a lock. Active compilations
     // prevent the pool from being purged during GCs.
   private:
     js::frontend::ParseMapPool parseMapPool_;