Bug 965712: Part 2 - Replace the unrolling matcher with memchr, r=luke
☠☠ backed out by 44e189f0fd67 ☠ ☠
authorHannes Verschore <hv1989@gmail.com>
Wed, 30 Apr 2014 16:42:27 +0200
changeset 181426 00edef0582f1fc689226c6bb9181ad4e43332fa6
parent 181425 7796f1b424872b11f26be040da31570ea1967b7f
child 181427 b30e471400c5d320fd1d78c75870f6f0586bff0e
push id272
push userpvanderbeken@mozilla.com
push dateMon, 05 May 2014 16:31:18 +0000
reviewersluke
bugs965712
milestone32.0a1
Bug 965712: Part 2 - Replace the unrolling matcher with memchr, r=luke
js/src/jit-test/tests/ion/bug965712.js
js/src/jsstr.cpp
js/src/vm/RegExpObject.cpp
js/src/vm/RegExpObject.h
new file mode 100644
--- /dev/null
+++ b/js/src/jit-test/tests/ion/bug965712.js
@@ -0,0 +1,2 @@
+var result = "D1D1D1D1D1D1D1D1D1D1".replace(/d1/ig,1);
+assertEq(result, "1111111111");
--- a/js/src/jsstr.cpp
+++ b/js/src/jsstr.cpp
@@ -1058,56 +1058,63 @@ struct ManualCmp {
                 return false;
         }
         return true;
     }
 };
 
 template <class InnerMatch>
 static int
-UnrolledMatch(const jschar *text, uint32_t textlen, const jschar *pat, uint32_t patlen)
+MemChrMatch(const jschar *text, uint32_t textlen, const jschar *pat, uint32_t patlen)
 {
-    JS_ASSERT(patlen > 0 && textlen > 0);
-    const jschar *textend = text + textlen - (patlen - 1);
-    const jschar p0 = *pat;
-    const jschar *const patNext = pat + 1;
+    /*
+     * Use the very efficient memchr to find the character that matches the
+     * first pattern character. The only caveat is that memchr uses 8 bit chars,
+     * while spidermonkey uses 16 bit chars.
+     */
+
     const typename InnerMatch::Extent extent = InnerMatch::computeExtent(pat, patlen);
-    uint8_t fixup;
-
-    const jschar *t = text;
-    switch ((textend - t) & 7) {
-      case 0: if (*t++ == p0) { fixup = 8; goto match; }
-      case 7: if (*t++ == p0) { fixup = 7; goto match; }
-      case 6: if (*t++ == p0) { fixup = 6; goto match; }
-      case 5: if (*t++ == p0) { fixup = 5; goto match; }
-      case 4: if (*t++ == p0) { fixup = 4; goto match; }
-      case 3: if (*t++ == p0) { fixup = 3; goto match; }
-      case 2: if (*t++ == p0) { fixup = 2; goto match; }
-      case 1: if (*t++ == p0) { fixup = 1; goto match; }
-    }
-    while (t != textend) {
-      if (t[0] == p0) { t += 1; fixup = 8; goto match; }
-      if (t[1] == p0) { t += 2; fixup = 7; goto match; }
-      if (t[2] == p0) { t += 3; fixup = 6; goto match; }
-      if (t[3] == p0) { t += 4; fixup = 5; goto match; }
-      if (t[4] == p0) { t += 5; fixup = 4; goto match; }
-      if (t[5] == p0) { t += 6; fixup = 3; goto match; }
-      if (t[6] == p0) { t += 7; fixup = 2; goto match; }
-      if (t[7] == p0) { t += 8; fixup = 1; goto match; }
-        t += 8;
-        continue;
-        do {
-            if (*t++ == p0) {
-              match:
-                if (!InnerMatch::match(patNext, t, extent))
-                    goto failed_match;
-                return t - text - 1;
-            }
-          failed_match:;
-        } while (--fixup > 0);
+
+    /* Treat input as 8 bit strings */
+    const char *text8 = (const char *)text;
+    const char *pat8 = (const char *)pat;
+
+    /*
+     * Indexing on 8 bits. So the indexes get twice as big.
+     * Currently this isn't a problem since max string size is 28bit.
+     */
+    JS_ASSERT(textlen < UINT32_MAX/2);
+
+    uint32_t i = 0;
+    uint32_t n = (textlen - patlen)*2;
+    while (i <= n) {
+        /* Find the first 8 bits of the pattern in the text. */
+        const char* pos = reinterpret_cast<const char *>(memchr(text8 + i, pat8[0], n - i + 1));
+        if (pos == nullptr)
+            return -1;
+
+        i = static_cast<uint32_t>(pos - text8);
+
+        /* Ignore when it matched the upper 8 bits of the 16bits text. */
+        if (i%2 != 0) {
+            i++;
+            continue;
+        }
+
+        /*
+         * Test the upper 8 bits of the first character of the pattern.
+         * On success test the full text using normal 16bit character compares,
+         * starting at the second 16 bit character.
+         */
+        if (pat8[1] == text8[i+1]) {
+            if (InnerMatch::match(pat + 1, text + i/2 + 1, extent))
+                return i/2;
+        }
+
+        i += 2;
     }
     return -1;
 }
 
 static MOZ_ALWAYS_INLINE int
 StringMatch(const jschar *text, uint32_t textlen,
             const jschar *pat, uint32_t patlen)
 {
@@ -1153,20 +1160,20 @@ StringMatch(const jschar *text, uint32_t
     /*
      * For big patterns with large potential overlap we want the SIMD-optimized
      * speed of memcmp. For small patterns, a simple loop is faster.
      *
      * FIXME: Linux memcmp performance is sad and the manual loop is faster.
      */
     return
 #if !defined(__linux__)
-           patlen > 128 ? UnrolledMatch<MemCmp>(text, textlen, pat, patlen)
+           patlen > 128 ? MemChrMatch<MemCmp>(text, textlen, pat, patlen)
                         :
 #endif
-                          UnrolledMatch<ManualCmp>(text, textlen, pat, patlen);
+                          MemChrMatch<ManualCmp>(text, textlen, pat, patlen);
 }
 
 static const size_t sRopeMatchThresholdRatioLog2 = 5;
 
 bool
 js::StringHasPattern(const jschar *text, uint32_t textlen,
                      const jschar *pat, uint32_t patlen)
 {
--- a/js/src/vm/RegExpObject.cpp
+++ b/js/src/vm/RegExpObject.cpp
@@ -466,18 +466,19 @@ RegExpShared::compile(JSContext *cx, boo
         return false;
 
     return compile(cx, *fakeySource, matchOnly);
 }
 
 bool
 RegExpShared::compile(JSContext *cx, JSLinearString &pattern, bool matchOnly)
 {
-    if (!StringHasRegExpMetaChars(pattern.chars(), pattern.length())) {
+    if (!ignoreCase() && !StringHasRegExpMetaChars(pattern.chars(), pattern.length())) {
         canStringMatch = true;
+        parenCount = 0;
         return true;
     }
 
     /* Parse the pattern. */
     ErrorCode yarrError;
     YarrPattern yarrPattern(pattern, ignoreCase(), multiline(), &yarrError);
     if (yarrError) {
         reportYarrError(cx, nullptr, yarrError);
--- a/js/src/vm/RegExpObject.h
+++ b/js/src/vm/RegExpObject.h
@@ -200,17 +200,21 @@ class RegExpShared
                             size_t *lastIndex, MatchPairs &matches);
 
     /* Run the regular expression without collecting matches, for test(). */
     RegExpRunStatus executeMatchOnly(JSContext *cx, const jschar *chars, size_t length,
                                      size_t *lastIndex, MatchPair &match);
 
     /* Accessors */
 
-    size_t getParenCount() const        { JS_ASSERT(isCompiled()); return parenCount; }
+    size_t getParenCount() const {
+        JS_ASSERT(isCompiled() || canStringMatch);
+        return parenCount;
+    }
+
     void incRef()                       { activeUseCount++; }
     void decRef()                       { JS_ASSERT(activeUseCount > 0); activeUseCount--; }
 
     /* Accounts for the "0" (whole match) pair. */
     size_t pairCount() const            { return getParenCount() + 1; }
 
     RegExpFlag getFlags() const         { return flags; }
     bool ignoreCase() const             { return flags & IgnoreCaseFlag; }