Bug 1019585 part 1 - Make FlatMatch work with Latin1 strings. r=luke
authorJan de Mooij <jdemooij@mozilla.com>
Thu, 05 Jun 2014 12:01:58 +0200
changeset 207117 b3c9dcbc34c084b55e2516553be3b38a0159933f
parent 207116 c3d4d93e55b3b959dd1a2ba3664c4c57ca53b7b9
child 207118 f2b79650ac7b8a6352d10ef18026eaba6627dbbc
push id494
push userraliiev@mozilla.com
push dateMon, 25 Aug 2014 18:42:16 +0000
treeherdermozilla-release@a3cc3e46b571 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersluke
bugs1019585
milestone32.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1019585 part 1 - Make FlatMatch work with Latin1 strings. r=luke
js/src/jit-test/tests/latin1/search.js
js/src/jsstr.cpp
new file mode 100644
--- /dev/null
+++ b/js/src/jit-test/tests/latin1/search.js
@@ -0,0 +1,22 @@
+function testSearchFlat() {
+    var s1 = toLatin1("fooBar12345");
+    var s2 = toLatin1("Bar1");
+
+    // Latin1 + Latin1
+    assertEq(s1.search(s2), 3);
+    assertEq(s2.search(s1), -1);
+    assertEq(s1.search(s1), 0);
+
+    // Latin1 + TwoByte
+    assertEq(s1.search(s2 + "\u1200"), -1);
+    assertEq(s1.search(("12345\u1200").slice(0, -1)), 6);
+
+    // TwoByte + Latin1
+    assertEq("fooBar12345\u1200".search(s1), 0);
+    assertEq("fooBar12345\u1200".search(s2), 3);
+
+    // TwoByte + TwoByte
+    assertEq("fooBar12345\u1200".search("5\u1200"), 10);
+    assertEq("fooBar12345\u1200".search("5\u1201"), -1);
+}
+testSearchFlat();
--- a/js/src/jsstr.cpp
+++ b/js/src/jsstr.cpp
@@ -1199,16 +1199,42 @@ StringMatch(const TextChar *text, uint32
 #if !defined(__linux__)
         (patLen > 128 && IsSame<TextChar, PatChar>::value)
             ? UnrolledMatch<MemCmp<TextChar, PatChar>>(text, textLen, pat, patLen)
             :
 #endif
               UnrolledMatch<ManualCmp<TextChar, PatChar>>(text, textLen, pat, patLen);
 }
 
+static int32_t
+StringMatch(JSLinearString *text, JSLinearString *pat, uint32_t start = 0)
+{
+    MOZ_ASSERT(start <= text->length());
+    uint32_t textLen = text->length() - start;
+    uint32_t patLen = pat->length();
+
+    int match;
+    AutoCheckCannotGC nogc;
+    if (text->hasLatin1Chars()) {
+        const Latin1Char *textChars = text->latin1Chars(nogc) + start;
+        if (pat->hasLatin1Chars())
+            match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
+        else
+            match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
+    } else {
+        const jschar *textChars = text->twoByteChars(nogc) + start;
+        if (pat->hasLatin1Chars())
+            match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
+        else
+            match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
+    }
+
+    return (match == -1) ? -1 : start + match;
+}
+
 static const size_t sRopeMatchThresholdRatioLog2 = 5;
 
 bool
 js::StringHasPattern(const jschar *text, uint32_t textLen,
                      const jschar *pat, uint32_t patLen)
 {
     return StringMatch(text, textLen, pat, patLen) != -1;
 }
@@ -1370,42 +1396,16 @@ RopeMatch(JSContext *cx, JSString *texts
 
         pos += len;
     }
 
     *match = -1;
     return true;
 }
 
-static int32_t
-IndexOfImpl(JSLinearString *text, JSLinearString *pat, uint32_t start)
-{
-    MOZ_ASSERT(start <= text->length());
-    uint32_t textLen = text->length() - start;
-    uint32_t patLen = pat->length();
-
-    int match;
-    AutoCheckCannotGC nogc;
-    if (text->hasLatin1Chars()) {
-        const Latin1Char *textChars = text->latin1Chars(nogc) + start;
-        if (pat->hasLatin1Chars())
-            match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
-        else
-            match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
-    } else {
-        const jschar *textChars = text->twoByteChars(nogc) + start;
-        if (pat->hasLatin1Chars())
-            match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
-        else
-            match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
-    }
-
-    return (match == -1) ? -1 : start + match;
-}
-
 /* ES6 20121026 draft 15.5.4.24. */
 static bool
 str_contains(JSContext *cx, unsigned argc, Value *vp)
 {
     CallArgs args = CallArgsFromVp(argc, vp);
 
     // Steps 1, 2, and 3
     RootedString str(cx, ThisToStringForStringProto(cx, args));
@@ -1437,17 +1437,17 @@ str_contains(JSContext *cx, unsigned arg
     // Step 9
     uint32_t start = Min(Max(pos, 0U), textLen);
 
     // Steps 10 and 11
     JSLinearString *text = str->ensureLinear(cx);
     if (!text)
         return false;
 
-    args.rval().setBoolean(IndexOfImpl(text, searchStr, start) != -1);
+    args.rval().setBoolean(StringMatch(text, searchStr, start) != -1);
     return true;
 }
 
 /* ES6 20120927 draft 15.5.4.7. */
 static bool
 str_indexOf(JSContext *cx, unsigned argc, Value *vp)
 {
     CallArgs args = CallArgsFromVp(argc, vp);
@@ -1482,17 +1482,17 @@ str_indexOf(JSContext *cx, unsigned argc
     // Step 9
     uint32_t start = Min(Max(pos, 0U), textLen);
 
     // Steps 10 and 11
     JSLinearString *text = str->ensureLinear(cx);
     if (!text)
         return false;
 
-    args.rval().setInt32(IndexOfImpl(text, searchStr, start));
+    args.rval().setInt32(StringMatch(text, searchStr, start));
     return true;
 }
 
 template <typename TextChar, typename PatChar>
 static int32_t
 LastIndexOfImpl(const TextChar *text, size_t textLen, const PatChar *pat, size_t patLen,
                 size_t start)
 {
@@ -1821,27 +1821,25 @@ str_trimRight(JSContext *cx, unsigned ar
  * Perl-inspired string functions.
  */
 
 namespace {
 
 /* Result of a successfully performed flat match. */
 class FlatMatch
 {
-    RootedAtom patstr;
-    const jschar *pat;
-    size_t       patLen;
-    int32_t      match_;
+    RootedAtom pat_;
+    int32_t match_;
 
     friend class StringRegExpGuard;
 
   public:
-    explicit FlatMatch(JSContext *cx) : patstr(cx) {}
-    JSLinearString *pattern() const { return patstr; }
-    size_t patternLength() const { return patLen; }
+    explicit FlatMatch(JSContext *cx) : pat_(cx) {}
+    JSLinearString *pattern() const { return pat_; }
+    size_t patternLength() const { return pat_->length(); }
 
     /*
      * Note: The match is -1 when the match is performed successfully,
      * but no match is found.
      */
     int32_t match() const { return match_; }
 };
 
@@ -1856,26 +1854,37 @@ IsRegExpMetaChar(jschar c)
       case '?': case '(': case ')': case '[': case ']': case '{':
       case '}': case '|':
         return true;
       default:
         return false;
     }
 }
 
+template <typename CharT>
 static inline bool
-HasRegExpMetaChars(const jschar *chars, size_t length)
+HasRegExpMetaChars(const CharT *chars, size_t length)
 {
     for (size_t i = 0; i < length; ++i) {
         if (IsRegExpMetaChar(chars[i]))
             return true;
     }
     return false;
 }
 
+static inline bool
+HasRegExpMetaChars(JSLinearString *str)
+{
+    AutoCheckCannotGC nogc;
+    if (str->hasLatin1Chars())
+        return HasRegExpMetaChars(str->latin1Chars(nogc), str->length());
+
+    return HasRegExpMetaChars(str->twoByteChars(nogc), str->length());
+}
+
 bool
 js::StringHasRegExpMetaChars(const jschar *chars, size_t length)
 {
     return HasRegExpMetaChars(chars, length);
 }
 
 namespace {
 
@@ -1926,89 +1935,85 @@ class MOZ_STACK_CLASS StringRegExpGuard
 
     /* init must succeed in order to call tryFlatMatch or normalizeRegExp. */
     bool init(JSContext *cx, CallArgs args, bool convertVoid = false)
     {
         if (args.length() != 0 && IsObjectWithClass(args[0], ESClass_RegExp, cx))
             return init(cx, &args[0].toObject());
 
         if (convertVoid && !args.hasDefined(0)) {
-            fm.patstr = cx->runtime()->emptyString;
+            fm.pat_ = cx->runtime()->emptyString;
             return true;
         }
 
         JSString *arg = ArgToRootedString(cx, args, 0);
         if (!arg)
             return false;
 
-        fm.patstr = AtomizeString(cx, arg);
-        if (!fm.patstr)
+        fm.pat_ = AtomizeString(cx, arg);
+        if (!fm.pat_)
             return false;
 
         return true;
     }
 
     bool init(JSContext *cx, JSObject *regexp) {
         obj_ = regexp;
 
         JS_ASSERT(ObjectClassIs(obj_, ESClass_RegExp, cx));
 
         if (!RegExpToShared(cx, obj_, &re_))
             return false;
         return true;
     }
 
     bool init(JSContext *cx, HandleString pattern) {
-        fm.patstr = AtomizeString(cx, pattern);
-        if (!fm.patstr)
+        fm.pat_ = AtomizeString(cx, pattern);
+        if (!fm.pat_)
             return false;
         return true;
     }
 
     /*
      * Attempt to match |patstr| to |textstr|. A flags argument, metachars in
      * the pattern string, or a lengthy pattern string can thwart this process.
      *
      * |checkMetaChars| looks for regexp metachars in the pattern string.
      *
      * Return whether flat matching could be used.
      *
      * N.B. tryFlatMatch returns nullptr on OOM, so the caller must check
      * cx->isExceptionPending().
      */
     const FlatMatch *
-    tryFlatMatch(JSContext *cx, JSString *textstr, unsigned optarg, unsigned argc,
+    tryFlatMatch(JSContext *cx, JSString *text, unsigned optarg, unsigned argc,
                  bool checkMetaChars = true)
     {
         if (re_.initialized())
             return nullptr;
 
-        fm.pat = fm.patstr->chars();
-        fm.patLen = fm.patstr->length();
-
         if (optarg < argc)
             return nullptr;
 
-        if (checkMetaChars &&
-            (fm.patLen > MAX_FLAT_PAT_LEN || HasRegExpMetaChars(fm.pat, fm.patLen))) {
+        size_t patLen = fm.pat_->length();
+        if (checkMetaChars && (patLen > MAX_FLAT_PAT_LEN || HasRegExpMetaChars(fm.pat_)))
             return nullptr;
-        }
 
         /*
-         * textstr could be a rope, so we want to avoid flattening it for as
+         * |text| could be a rope, so we want to avoid flattening it for as
          * long as possible.
          */
-        if (textstr->isRope()) {
-            if (!RopeMatch(cx, textstr, fm.pat, fm.patLen, &fm.match_))
+        if (text->isRope()) {
+            const jschar *pat = fm.pat_->chars();
+            if (!RopeMatch(cx, text, pat, patLen, &fm.match_))
                 return nullptr;
         } else {
-            const jschar *text = textstr->asLinear().chars();
-            size_t textLen = textstr->length();
-            fm.match_ = StringMatch(text, textLen, fm.pat, fm.patLen);
+            fm.match_ = StringMatch(&text->asLinear(), fm.pat_, 0);
         }
+
         return &fm;
     }
 
     /* If the pattern is not already a regular expression, make it so. */
     bool normalizeRegExp(JSContext *cx, bool flat, unsigned optarg, CallArgs args)
     {
         if (re_.initialized())
             return true;
@@ -2018,27 +2023,27 @@ class MOZ_STACK_CLASS StringRegExpGuard
         if (optarg < args.length()) {
             opt = ToString<CanGC>(cx, args[optarg]);
             if (!opt)
                 return false;
         } else {
             opt = nullptr;
         }
 
-        Rooted<JSAtom *> patstr(cx);
+        Rooted<JSAtom *> pat(cx);
         if (flat) {
-            patstr = flattenPattern(cx, fm.patstr);
-            if (!patstr)
+            pat = flattenPattern(cx, fm.pat_);
+            if (!pat)
                 return false;
         } else {
-            patstr = fm.patstr;
+            pat = fm.pat_;
         }
-        JS_ASSERT(patstr);
-
-        return cx->compartment()->regExps.get(cx, patstr, opt, &re_);
+        JS_ASSERT(pat);
+
+        return cx->compartment()->regExps.get(cx, pat, opt, &re_);
     }
 
     bool zeroLastIndex(JSContext *cx) {
         if (!regExpIsObject())
             return true;
 
         // Use a fast path for same-global RegExp objects with writable
         // lastIndex.
@@ -3230,17 +3235,17 @@ str_replace_flat_lambda(JSContext *cx, C
         return false;
 
     RootedString leftSide(cx, js_NewDependentString(cx, rdata.str, 0, fm.match()));
     if (!leftSide)
         return false;
 
     size_t matchLimit = fm.match() + fm.patternLength();
     RootedString rightSide(cx, js_NewDependentString(cx, rdata.str, matchLimit,
-                                                        rdata.str->length() - matchLimit));
+                                                     rdata.str->length() - matchLimit));
     if (!rightSide)
         return false;
 
     RopeBuilder builder(cx);
     if (!(builder.append(leftSide) &&
           builder.append(repstr) &&
           builder.append(rightSide))) {
         return false;