Bug 1024132 - Add one slot cache for stripping leading and trailing .* from RegExps for test() calls, r=jandem.
authorBrian Hackett <bhackett1024@gmail.com>
Wed, 16 Jul 2014 08:34:30 -0800
changeset 216322 19b8c4576669c84aadcd24b66f02205f91aff9a3
parent 216321 6432c138d1b7c2f0f91cb707a9d8c0cd28657837
child 216323 2f62414fe13f3a47524d83e8fe3dc6e1864aab39
push id515
push userraliiev@mozilla.com
push dateMon, 06 Oct 2014 12:51:51 +0000
treeherdermozilla-release@267c7a481bef [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjandem
bugs1024132
milestone33.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1024132 - Add one slot cache for stripping leading and trailing .* from RegExps for test() calls, r=jandem.
js/src/builtin/RegExp.cpp
js/src/gc/Nursery.cpp
js/src/jit-test/tests/basic/regexp-removed-dot-star.js
js/src/jsatom.cpp
js/src/jsatom.h
js/src/jsgc.cpp
js/src/jsstr.cpp
js/src/jsstr.h
js/src/vm/Runtime.h
--- a/js/src/builtin/RegExp.cpp
+++ b/js/src/builtin/RegExp.cpp
@@ -672,22 +672,117 @@ static bool
 regexp_test_impl(JSContext *cx, CallArgs args)
 {
     ScopedMatchPairs matches(&cx->tempLifoAlloc());
     RegExpRunStatus status = ExecuteRegExp(cx, args, matches);
     args.rval().setBoolean(status == RegExpRunStatus_Success);
     return status != RegExpRunStatus_Error;
 }
 
+static inline bool
+StringHasDotStar(HandleLinearString str, size_t index)
+{
+    // Return whether the portion of the string at the specified index is '.*'
+    return str->latin1OrTwoByteChar(index) == '.' && str->latin1OrTwoByteChar(index + 1) == '*';
+}
+
+static bool
+TryFillRegExpTestCache(JSContext *cx, HandleObject regexp, RegExpTestCache &cache,
+                       MutableHandleObject result)
+{
+    cache.purge();
+
+    // test() on global RegExps uses the lastIndex in a fashion that is
+    // incompatible with the cache.
+    if (regexp->as<RegExpObject>().global())
+        return true;
+
+    RootedAtom source(cx, regexp->as<RegExpObject>().getSource());
+
+    // Try to strip a leading '.*' from the RegExp, but only if it is not
+    // followed by a '?' (which will affect how the .* is parsed).
+    if (source->length() >= 3 &&
+        StringHasDotStar(source, 0) &&
+        source->latin1OrTwoByteChar(2) != '?')
+    {
+        source = AtomizeSubstring(cx, source, 2, source->length() - 2);
+        if (!source)
+            return false;
+    }
+
+    // Try to strip a trailing '.*' from the RegExp, but only if it does not
+    // have any other meta characters (to be sure we are not affecting how the
+    // RegExp will be parsed).
+    if (source->length() >= 3 &&
+        StringHasDotStar(source, source->length() - 2) &&
+        !StringHasRegExpMetaChars(source, 0, 2))
+    {
+        source = AtomizeSubstring(cx, source, 0, source->length() - 2);
+        if (!source)
+            return false;
+    }
+
+    if (source == regexp->as<RegExpObject>().getSource()) {
+        // We weren't able to remove a leading or trailing .*
+        return true;
+    }
+
+    RegExpObjectBuilder builder(cx);
+
+    result.set(builder.build(source, regexp->as<RegExpObject>().getFlags()));
+    if (!result)
+        return false;
+
+    cache.fill(&regexp->as<RegExpObject>(), &result->as<RegExpObject>());
+    return true;
+}
+
 /* Separate interface for use by IonMonkey. */
 bool
 js::regexp_test_raw(JSContext *cx, HandleObject regexp, HandleString input, bool *result)
 {
     ScopedMatchPairs matches(&cx->tempLifoAlloc());
-    RegExpRunStatus status = ExecuteRegExp(cx, regexp, input, matches, UpdateRegExpStatics);
+
+    RegExpTestCache &cache = cx->runtime()->regExpTestCache;
+
+    RootedObject alternate(cx);
+    if (regexp == cache.key ||
+        (cache.key &&
+         regexp->as<RegExpObject>().getSource() == cache.key->getSource() &&
+         regexp->as<RegExpObject>().getFlags() == cache.key->getFlags()))
+    {
+        alternate = cache.value;
+    } else {
+        if (!TryFillRegExpTestCache(cx, regexp, cache, &alternate))
+            return false;
+    }
+
+    RegExpRunStatus status;
+    if (alternate) {
+        // The alternate RegExp is simpler and should execute faster than the
+        // original one, so use it instead.
+        status = ExecuteRegExp(cx, alternate, input, matches, DontUpdateRegExpStatics);
+
+        if (status == RegExpRunStatus_Success) {
+            // Update the RegExpStatics to reflect the original RegExp we were
+            // trying to execute, and not the alternate one.
+            RegExpStatics *res = cx->global()->getRegExpStatics(cx);
+            if (!res)
+                return RegExpRunStatus_Error;
+
+            RegExpGuard shared(cx);
+            if (!regexp->as<RegExpObject>().getShared(cx, &shared))
+                return RegExpRunStatus_Error;
+
+            res->updateLazily(cx, &input->asLinear(), shared.re(), 0);
+        }
+    } else {
+        status = ExecuteRegExp(cx, regexp, input, matches, UpdateRegExpStatics);
+    }
+
     *result = (status == RegExpRunStatus_Success);
     return status != RegExpRunStatus_Error;
 }
 
 bool
 js::regexp_test(JSContext *cx, unsigned argc, Value *vp)
 {
     CallArgs args = CallArgsFromVp(argc, vp);
--- a/js/src/gc/Nursery.cpp
+++ b/js/src/gc/Nursery.cpp
@@ -823,16 +823,20 @@ js::Nursery::collect(JSRuntime *rt, JS::
     TIME_START(markDebugger);
     Debugger::markAll(&trc);
     TIME_END(markDebugger);
 
     TIME_START(clearNewObjectCache);
     rt->newObjectCache.clearNurseryObjects(rt);
     TIME_END(clearNewObjectCache);
 
+    TIME_START(clearRegExpTestCache);
+    rt->regExpTestCache.purge();
+    TIME_END(clearRegExpTestCache);
+
     // Most of the work is done here. This loop iterates over objects that have
     // been moved to the major heap. If these objects have any outgoing pointers
     // to the nursery, then those nursery objects get moved as well, until no
     // objects are left to move. That is, we iterate to a fixed point.
     TIME_START(collectToFP);
     TenureCountCache tenureCounts;
     collectToFixedPoint(&trc, tenureCounts);
     TIME_END(collectToFP);
new file mode 100644
--- /dev/null
+++ b/js/src/jit-test/tests/basic/regexp-removed-dot-star.js
@@ -0,0 +1,49 @@
+
+// Test that removal of leading or trailing .* from RegExp test() calls
+// does not affect lastMatch or other RegExpStatics info.
+
+function first(input) {
+    var re = /.*b(cd)/;
+    for (var i = 0; i < 10; i++)
+	re.test(input);
+}
+
+first("1234\nabcdefg\n1234");
+assertEq(RegExp.lastMatch, "abcd");
+assertEq(RegExp.$1, "cd");
+assertEq(RegExp.input, "1234\nabcdefg\n1234");
+assertEq(RegExp.leftContext, "1234\n");
+assertEq(RegExp.rightContext, "efg\n1234");
+assertEq(RegExp.lastParen, "cd");
+
+
+// Test that removal of leading or trailing .* from RegExp test() calls
+// does not affect lastMatch or other RegExpStatics info.
+
+function second(input) {
+    var re = /bcd.*/;
+    for (var i = 0; i < 10; i++)
+	re.test(input);
+}
+
+second("1234\nabcdefg\n1234");
+assertEq(RegExp.lastMatch, "bcdefg");
+assertEq(RegExp.$1, "");
+assertEq(RegExp.input, "1234\nabcdefg\n1234");
+assertEq(RegExp.leftContext, "1234\na");
+assertEq(RegExp.rightContext, "\n1234");
+assertEq(RegExp.lastParen, "");
+
+function third(input) {
+    var re = /.*bcd.*/;
+    for (var i = 0; i < 10; i++)
+	re.test(input);
+}
+
+third("1234\nabcdefg\n1234");
+assertEq(RegExp.lastMatch, "abcdefg");
+assertEq(RegExp.$1, "");
+assertEq(RegExp.input, "1234\nabcdefg\n1234");
+assertEq(RegExp.leftContext, "1234\n");
+assertEq(RegExp.rightContext, "\n1234");
+assertEq(RegExp.lastParen, "");
--- a/js/src/jsatom.cpp
+++ b/js/src/jsatom.cpp
@@ -468,16 +468,32 @@ js::AtomizeString(ExclusiveContext *cx, 
 
     JS::AutoCheckCannotGC nogc;
     return linear->hasLatin1Chars()
            ? AtomizeAndCopyChars(cx, linear->latin1Chars(nogc), linear->length(), ib)
            : AtomizeAndCopyChars(cx, linear->twoByteChars(nogc), linear->length(), ib);
 }
 
 JSAtom *
+js::AtomizeSubstring(ExclusiveContext *cx, JSString *str, size_t start, size_t length,
+                     InternBehavior ib /* = DoNotInternAtom */)
+{
+    JS_ASSERT(start + length <= str->length());
+
+    JSLinearString *linear = str->ensureLinear(cx);
+    if (!linear)
+        return nullptr;
+
+    JS::AutoCheckCannotGC nogc;
+    return linear->hasLatin1Chars()
+           ? AtomizeAndCopyChars(cx, linear->latin1Chars(nogc) + start, length, ib)
+           : AtomizeAndCopyChars(cx, linear->twoByteChars(nogc) + start, length, ib);
+}
+
+JSAtom *
 js::Atomize(ExclusiveContext *cx, const char *bytes, size_t length, InternBehavior ib)
 {
     CHECK_REQUEST(cx);
 
     if (!JSString::validateLength(cx, length))
         return nullptr;
 
     if (EnableLatin1Strings) {
--- a/js/src/jsatom.h
+++ b/js/src/jsatom.h
@@ -204,16 +204,20 @@ Atomize(ExclusiveContext *cx, const char
 template <typename CharT>
 extern JSAtom *
 AtomizeChars(ExclusiveContext *cx, const CharT *chars, size_t length,
              js::InternBehavior ib = js::DoNotInternAtom);
 
 extern JSAtom *
 AtomizeString(ExclusiveContext *cx, JSString *str, js::InternBehavior ib = js::DoNotInternAtom);
 
+extern JSAtom *
+AtomizeSubstring(ExclusiveContext *cx, JSString *str, size_t start, size_t length,
+                 InternBehavior ib = DoNotInternAtom);
+
 template <AllowGC allowGC>
 extern JSAtom *
 ToAtom(ExclusiveContext *cx, typename MaybeRooted<Value, allowGC>::HandleType v);
 
 enum XDRMode {
     XDR_ENCODE,
     XDR_DECODE
 };
--- a/js/src/jsgc.cpp
+++ b/js/src/jsgc.cpp
@@ -3040,16 +3040,17 @@ PurgeRuntime(JSRuntime *rt)
     rt->interpreterStack().purge(rt);
 
     rt->gsnCache.purge();
     rt->scopeCoordinateNameCache.purge();
     rt->newObjectCache.purge();
     rt->nativeIterCache.purge();
     rt->uncompressedSourceCache.purge();
     rt->evalCache.clear();
+    rt->regExpTestCache.purge();
 
     if (!rt->hasActiveCompilations())
         rt->parseMapPool().purgeAll();
 }
 
 bool
 GCRuntime::shouldPreserveJITCode(JSCompartment *comp, int64_t currentTime,
                                  JS::gcreason::Reason reason)
--- a/js/src/jsstr.cpp
+++ b/js/src/jsstr.cpp
@@ -1986,23 +1986,25 @@ HasRegExpMetaChars(const CharT *chars, s
     for (size_t i = 0; i < length; ++i) {
         if (IsRegExpMetaChar(chars[i]))
             return true;
     }
     return false;
 }
 
 bool
-js::StringHasRegExpMetaChars(JSLinearString *str)
+js::StringHasRegExpMetaChars(JSLinearString *str, size_t beginOffset, size_t endOffset)
 {
+    JS_ASSERT(beginOffset + endOffset <= str->length());
+
     AutoCheckCannotGC nogc;
     if (str->hasLatin1Chars())
-        return HasRegExpMetaChars(str->latin1Chars(nogc), str->length());
-
-    return HasRegExpMetaChars(str->twoByteChars(nogc), str->length());
+        return HasRegExpMetaChars(str->latin1Chars(nogc) + beginOffset, str->length() - beginOffset - endOffset);
+
+    return HasRegExpMetaChars(str->twoByteChars(nogc) + beginOffset, str->length() - beginOffset - endOffset);
 }
 
 namespace {
 
 /*
  * StringRegExpGuard factors logic out of String regexp operations.
  *
  * |optarg| indicates in which argument position RegExp flags will be found, if
--- a/js/src/jsstr.h
+++ b/js/src/jsstr.h
@@ -257,18 +257,20 @@ StringEqualsAscii(JSLinearString *str, c
 
 /* Return true if the string contains a pattern anywhere inside it. */
 extern bool
 StringHasPattern(JSLinearString *text, const jschar *pat, uint32_t patlen);
 
 extern int
 StringFindPattern(JSLinearString *text, JSLinearString *pat, size_t start);
 
+// Whether the string contains any RegExp meta characters (., *, and so forth).
+// Searches the range [beginOffset, length - endOffset>.
 extern bool
-StringHasRegExpMetaChars(JSLinearString *str);
+StringHasRegExpMetaChars(JSLinearString *str, size_t beginOffset = 0, size_t endOffset = 0);
 
 template <typename Char1, typename Char2>
 inline bool
 EqualChars(const Char1 *s1, const Char2 *s2, size_t len);
 
 template <typename Char1>
 inline bool
 EqualChars(const Char1 *s1, const Char1 *s2, size_t len)
--- a/js/src/vm/Runtime.h
+++ b/js/src/vm/Runtime.h
@@ -345,16 +345,40 @@ class NewObjectCache
         js_memcpy(dst, src, gc::Arena::thingSize(kind));
 #ifdef JSGC_GENERATIONAL
         Shape::writeBarrierPost(dst->shape_, &dst->shape_);
         types::TypeObject::writeBarrierPost(dst->type_, &dst->type_);
 #endif
     }
 };
 
+class RegExpObject;
+
+// One slot cache for speeding up RegExp.test() executions, by stripping
+// unnecessary leading or trailing .* from the RegExp.
+struct RegExpTestCache
+{
+    RegExpObject *key;
+    RegExpObject *value;
+
+    RegExpTestCache()
+      : key(nullptr), value(nullptr)
+    {}
+
+    void purge() {
+        key = nullptr;
+        value = nullptr;
+    }
+
+    void fill(RegExpObject *key, RegExpObject *value) {
+        this->key = key;
+        this->value = value;
+    }
+};
+
 /*
  * A FreeOp can do one thing: free memory. For convenience, it has delete_
  * convenience methods that also call destructors.
  *
  * FreeOp is passed to finalizers and other sweep-phase hooks so that we do not
  * need to pass a JSContext to those hooks.
  */
 class FreeOp : public JSFreeOp {
@@ -1105,16 +1129,17 @@ struct JSRuntime : public JS::shadow::Ru
 
     js::GSNCache        gsnCache;
     js::ScopeCoordinateNameCache scopeCoordinateNameCache;
     js::NewObjectCache  newObjectCache;
     js::NativeIterCache nativeIterCache;
     js::UncompressedSourceCache uncompressedSourceCache;
     js::EvalCache       evalCache;
     js::LazyScriptCache lazyScriptCache;
+    js::RegExpTestCache regExpTestCache;
 
     js::CompressedSourceSet compressedSourceSet;
     js::DateTimeInfo    dateTimeInfo;
 
     // Pool of maps used during parse/emit. This may be modified by threads
     // with an ExclusiveContext and requires a lock. Active compilations
     // prevent the pool from being purged during GCs.
   private: