Bug 1019585 part 6 - Make str_replace regex case handle Latin1 strings. r=terrence
authorJan de Mooij <jdemooij@mozilla.com>
Thu, 19 Jun 2014 15:43:32 +0200
changeset 189611 f5358ae0094021b84bd67cf12977eb6e77ea7efa
parent 189610 f17c906538c9cb7490b68f03b825e72413a8f960
child 189612 1a8e50b64058bc5534c023fe6d96d5852ffdaa47
push id26992
push userkwierso@gmail.com
push dateFri, 20 Jun 2014 01:07:53 +0000
treeherdermozilla-central@bdac18bd6c74 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersterrence
bugs1019585
milestone33.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1019585 part 6 - Make str_replace regex case handle Latin1 strings. r=terrence
js/src/jit-test/tests/latin1/replace.js
js/src/jsstr.cpp
js/src/vm/StringBuffer.h
--- a/js/src/jit-test/tests/latin1/replace.js
+++ b/js/src/jit-test/tests/latin1/replace.js
@@ -35,8 +35,63 @@ function testDollarReplacement() {
     // TwoByte input, pat and replacement
     assertEq(s.replace(pat, "A\u1300"), "FooA\u1300baz123");
     assertEq(s.replace(pat, "A$$\u1300"), "FooA$\u1300baz123");
     assertEq(s.replace(pat, "A$`\u1300"), "FooAFoo\u1300baz123");
     assertEq(s.replace(pat, "A$&\u1300"), "FooAbar\u1200\u1300baz123");
     assertEq(s.replace(pat, "A$'\u1300"), "FooAbaz123\u1300baz123");
 }
 testDollarReplacement();
+
+function testRegExp() {
+    var s = toLatin1("Foobar123bar234");
+    assertEq(s.replace(/bar\d\d/, "456"), "Foo4563bar234");
+
+    // Latin1 input and replacement
+    var re1 = /bar\d\d/;
+    var re2 = /bar\d\d/g;
+    assertEq(s.replace(re1, toLatin1("789")), "Foo7893bar234");
+    assertEq(s.replace(re2, toLatin1("789\u00ff")), "Foo789\u00ff3789\u00ff4");
+
+    // Latin1 input, TwoByte replacement
+    assertEq(s.replace(re1, "789\u1200"), "Foo789\u12003bar234");
+    assertEq(s.replace(re2, "789\u1200"), "Foo789\u12003789\u12004");
+
+    // TwoByte input, Latin1 replacement
+    s += "\u1200";
+    assertEq(s.replace(re1, toLatin1("7890")), "Foo78903bar234\u1200");
+    assertEq(s.replace(re2, toLatin1("7890\u00ff")), "Foo7890\u00ff37890\u00ff4\u1200");
+
+    // TwoByte input and replacement
+    assertEq(s.replace(re1, "789\u1200"), "Foo789\u12003bar234\u1200");
+    assertEq(s.replace(re2, "789\u1200"), "Foo789\u12003789\u12004\u1200");
+}
+testRegExp();
+
+function testRegExpDollar() {
+    var s = toLatin1("Foobar123bar2345");
+
+    // Latin1 input and replacement
+    var re1 = /bar\d\d/;
+    var re2 = /bar(\d\d)/g;
+    assertEq(s.replace(re1, toLatin1("--$&--")), "Foo--bar12--3bar2345");
+    assertEq(s.replace(re2, toLatin1("--$'\u00ff--")), "Foo--3bar2345\xFF--3--45\xFF--45");
+    assertEq(s.replace(re2, toLatin1("--$`--")), "Foo--Foo--3--Foobar123--45");
+
+    // Latin1 input, TwoByte replacement
+    assertEq(s.replace(re1, "\u1200$$"), "Foo\u1200$3bar2345");
+    assertEq(s.replace(re2, "\u1200$1"), "Foo\u1200123\u12002345");
+    assertEq(s.replace(re2, "\u1200$'"), "Foo\u12003bar23453\u12004545");
+
+    // TwoByte input, Latin1 replacement
+    s += "\u1200";
+    assertEq(s.replace(re1, toLatin1("**$&**")), "Foo**bar12**3bar2345\u1200");
+    assertEq(s.replace(re2, toLatin1("**$1**")), "Foo**12**3**23**45\u1200");
+    assertEq(s.replace(re2, toLatin1("**$`**")), "Foo**Foo**3**Foobar123**45\u1200");
+    assertEq(s.replace(re2, toLatin1("**$'$$**")), "Foo**3bar2345\u1200$**3**45\u1200$**45\u1200");
+
+    // TwoByte input and replacement
+    assertEq(s.replace(re1, "**$&**\ueeee"), "Foo**bar12**\ueeee3bar2345\u1200");
+    assertEq(s.replace(re2, "**$1**\ueeee"), "Foo**12**\ueeee3**23**\ueeee45\u1200");
+    assertEq(s.replace(re2, "\ueeee**$`**"), "Foo\ueeee**Foo**3\ueeee**Foobar123**45\u1200");
+    assertEq(s.replace(re2, "\ueeee**$'$$**"), "Foo\ueeee**3bar2345\u1200$**3\ueeee**45\u1200$**45\u1200");
+}
+testRegExpDollar();
--- a/js/src/jsstr.cpp
+++ b/js/src/jsstr.cpp
@@ -2554,35 +2554,36 @@ DoMatchForReplaceGlobal(JSContext *cx, R
             return false;
         if (!res->matched())
             ++i;
     }
 
     return true;
 }
 
+template <typename CharT>
 static bool
-InterpretDollar(RegExpStatics *res, const jschar *dp, const jschar *ep,
+InterpretDollar(RegExpStatics *res, const CharT *bp, const CharT *dp, const CharT *ep,
                 ReplaceData &rdata, JSSubString *out, size_t *skip)
 {
     JS_ASSERT(*dp == '$');
 
     /* If there is only a dollar, bail now */
     if (dp + 1 >= ep)
         return false;
 
     /* Interpret all Perl match-induced dollar variables. */
     jschar dc = dp[1];
     if (JS7_ISDEC(dc)) {
         /* ECMA-262 Edition 3: 1-9 or 01-99 */
         unsigned num = JS7_UNDEC(dc);
         if (num > res->getMatches().parenCount())
             return false;
 
-        const jschar *cp = dp + 2;
+        const CharT *cp = dp + 2;
         if (cp < ep && (dc = *cp, JS7_ISDEC(dc))) {
             unsigned tmp = 10 * num + JS7_UNDEC(dc);
             if (tmp <= res->getMatches().parenCount()) {
                 cp++;
                 num = tmp;
             }
         }
         if (num == 0)
@@ -2598,17 +2599,17 @@ InterpretDollar(RegExpStatics *res, cons
          */
         res->getParen(num, out);
         return true;
     }
 
     *skip = 2;
     switch (dc) {
       case '$':
-        out->init(rdata.repstr, dp - rdata.repstr->chars(), 1);
+        out->init(rdata.repstr, dp - bp, 1);
         return true;
       case '&':
         res->getLastMatch(out);
         return true;
       case '+':
         res->getLastParen(out);
         return true;
       case '`':
@@ -2616,16 +2617,55 @@ InterpretDollar(RegExpStatics *res, cons
         return true;
       case '\'':
         res->getRightContext(out);
         return true;
     }
     return false;
 }
 
+template <typename CharT>
+static bool
+FindReplaceLengthString(JSContext *cx, RegExpStatics *res, ReplaceData &rdata, size_t *sizep)
+{
+    JSLinearString *repstr = rdata.repstr;
+    CheckedInt<uint32_t> replen = repstr->length();
+
+    if (rdata.dollarIndex != UINT32_MAX) {
+        AutoCheckCannotGC nogc;
+        MOZ_ASSERT(rdata.dollarIndex < repstr->length());
+        const CharT *bp = repstr->chars<CharT>(nogc);
+        const CharT *dp = bp + rdata.dollarIndex;
+        const CharT *ep = bp + repstr->length();
+        do {
+            JSSubString sub;
+            size_t skip;
+            if (InterpretDollar(res, bp, dp, ep, rdata, &sub, &skip)) {
+                if (sub.length > skip)
+                    replen += sub.length - skip;
+                else
+                    replen -= skip - sub.length;
+                dp += skip;
+            } else {
+                dp++;
+            }
+
+            dp = js_strchr_limit(dp, '$', ep);
+        } while (dp);
+    }
+
+    if (!replen.isValid()) {
+        js_ReportAllocationOverflow(cx);
+        return false;
+    }
+
+    *sizep = replen.value();
+    return true;
+}
+
 static bool
 FindReplaceLength(JSContext *cx, RegExpStatics *res, ReplaceData &rdata, size_t *sizep)
 {
     if (rdata.elembase) {
         /*
          * The base object is used when replace was passed a lambda which looks like
          * 'function(a) { return b[a]; }' for the base object b.  b will not change
          * in the course of the replace unless we end up making a scripted call due
@@ -2705,73 +2745,48 @@ FindReplaceLength(JSContext *cx, RegExpS
             return false;
         rdata.repstr = repstr->ensureLinear(cx);
         if (!rdata.repstr)
             return false;
         *sizep = rdata.repstr->length();
         return true;
     }
 
-    JSLinearString *repstr = rdata.repstr;
-    CheckedInt<uint32_t> replen = repstr->length();
-    if (rdata.dollarIndex != UINT32_MAX) {
-        MOZ_ASSERT(rdata.dollarIndex < repstr->length());
-        const jschar *dp = repstr->chars() + rdata.dollarIndex;
-        const jschar *ep = repstr->chars() + repstr->length();
-        do {
-            JSSubString sub;
-            size_t skip;
-            if (InterpretDollar(res, dp, ep, rdata, &sub, &skip)) {
-                if (sub.length > skip)
-                    replen += sub.length - skip;
-                else
-                    replen -= skip - sub.length;
-                dp += skip;
-            } else {
-                dp++;
-            }
-
-            dp = js_strchr_limit(dp, '$', ep);
-        } while (dp);
-    }
-
-    if (!replen.isValid()) {
-        js_ReportAllocationOverflow(cx);
-        return false;
-    }
-
-    *sizep = replen.value();
-    return true;
+    return rdata.repstr->hasLatin1Chars()
+           ? FindReplaceLengthString<Latin1Char>(cx, res, rdata, sizep)
+           : FindReplaceLengthString<jschar>(cx, res, rdata, sizep);
 }
 
 /*
  * Precondition: |rdata.sb| already has necessary growth space reserved (as
  * derived from FindReplaceLength), and has been inflated to TwoByte if
  * necessary.
  */
+template <typename CharT>
 static void
 DoReplace(RegExpStatics *res, ReplaceData &rdata)
 {
+    AutoCheckCannotGC nogc;
     JSLinearString *repstr = rdata.repstr;
-    const jschar *bp = repstr->chars();
-    const jschar *cp = bp;
+    const CharT *bp = repstr->chars<CharT>(nogc);
+    const CharT *cp = bp;
 
     if (rdata.dollarIndex != UINT32_MAX) {
         MOZ_ASSERT(rdata.dollarIndex < repstr->length());
-        const jschar *dp = bp + rdata.dollarIndex;
-        const jschar *ep = bp + repstr->length();
+        const CharT *dp = bp + rdata.dollarIndex;
+        const CharT *ep = bp + repstr->length();
         do {
             /* Move one of the constant portions of the replacement value. */
             size_t len = dp - cp;
             rdata.sb.infallibleAppend(cp, len);
             cp = dp;
 
             JSSubString sub;
             size_t skip;
-            if (InterpretDollar(res, dp, ep, rdata, &sub, &skip)) {
+            if (InterpretDollar(res, bp, dp, ep, rdata, &sub, &skip)) {
                 rdata.sb.infallibleAppendSubstring(sub.base, sub.offset, sub.length);
                 cp += skip;
                 dp += skip;
             } else {
                 dp++;
             }
 
             dp = js_strchr_limit(dp, '$', ep);
@@ -2814,20 +2829,22 @@ ReplaceRegExp(JSContext *cx, RegExpStati
         if (!rdata.sb.ensureTwoByteChars())
             return false;
     }
 
     if (!rdata.sb.reserve(newlen.value()))
         return false;
 
     /* Append skipped-over portion of the search value. */
-    const jschar *left = str.chars() + leftoff;
-    rdata.sb.infallibleAppend(left, leftlen);
-
-    DoReplace(res, rdata);
+    rdata.sb.infallibleAppendSubstring(&str, leftoff, leftlen);
+
+    if (rdata.repstr->hasLatin1Chars())
+        DoReplace<Latin1Char>(res, rdata);
+    else
+        DoReplace<jschar>(res, rdata);
     return true;
 }
 
 static bool
 BuildFlatReplacement(JSContext *cx, HandleString textstr, HandleString repstr,
                      const FlatMatch &fm, MutableHandleValue rval)
 {
     RopeBuilder builder(cx);
--- a/js/src/vm/StringBuffer.h
+++ b/js/src/vm/StringBuffer.h
@@ -250,17 +250,17 @@ StringBuffer::append(JSLinearString *str
            ? twoByteChars().append(str->latin1Chars(nogc), str->length())
            : twoByteChars().append(str->twoByteChars(nogc), str->length());
 }
 
 inline void
 StringBuffer::infallibleAppendSubstring(JSLinearString *base, size_t off, size_t len)
 {
     MOZ_ASSERT(off + len <= base->length());
-    MOZ_ASSERT(base->hasLatin1Chars() == isLatin1());
+    MOZ_ASSERT_IF(base->hasTwoByteChars(), isTwoByte());
 
     JS::AutoCheckCannotGC nogc;
     if (base->hasLatin1Chars())
         infallibleAppend(base->latin1Chars(nogc) + off, len);
     else
         infallibleAppend(base->twoByteChars(nogc) + off, len);
 }