Bug 1019585 part 6 - Make str_replace regex case handle Latin1 strings. r=terrence
authorJan de Mooij <jdemooij@mozilla.com>
Thu, 19 Jun 2014 15:43:32 +0200
changeset 189611 f5358ae0094021b84bd67cf12977eb6e77ea7efa
parent 189610 f17c906538c9cb7490b68f03b825e72413a8f960
child 189612 1a8e50b64058bc5534c023fe6d96d5852ffdaa47
push id1
push userroot
push dateMon, 20 Oct 2014 17:29:22 +0000
reviewersterrence
bugs1019585
milestone33.0a1
Bug 1019585 part 6 - Make str_replace regex case handle Latin1 strings. r=terrence
js/src/jit-test/tests/latin1/replace.js
js/src/jsstr.cpp
js/src/vm/StringBuffer.h
--- a/js/src/jit-test/tests/latin1/replace.js
+++ b/js/src/jit-test/tests/latin1/replace.js
@@ -35,8 +35,63 @@ function testDollarReplacement() {
     // TwoByte input, pat and replacement
     assertEq(s.replace(pat, "A\u1300"), "FooA\u1300baz123");
     assertEq(s.replace(pat, "A$$\u1300"), "FooA$\u1300baz123");
     assertEq(s.replace(pat, "A$`\u1300"), "FooAFoo\u1300baz123");
     assertEq(s.replace(pat, "A$&\u1300"), "FooAbar\u1200\u1300baz123");
     assertEq(s.replace(pat, "A$'\u1300"), "FooAbaz123\u1300baz123");
 }
 testDollarReplacement();
+
+function testRegExp() {
+    var s = toLatin1("Foobar123bar234");
+    assertEq(s.replace(/bar\d\d/, "456"), "Foo4563bar234");
+
+    // Latin1 input and replacement
+    var re1 = /bar\d\d/;
+    var re2 = /bar\d\d/g;
+    assertEq(s.replace(re1, toLatin1("789")), "Foo7893bar234");
+    assertEq(s.replace(re2, toLatin1("789\u00ff")), "Foo789\u00ff3789\u00ff4");
+
+    // Latin1 input, TwoByte replacement
+    assertEq(s.replace(re1, "789\u1200"), "Foo789\u12003bar234");
+    assertEq(s.replace(re2, "789\u1200"), "Foo789\u12003789\u12004");
+
+    // TwoByte input, Latin1 replacement
+    s += "\u1200";
+    assertEq(s.replace(re1, toLatin1("7890")), "Foo78903bar234\u1200");
+    assertEq(s.replace(re2, toLatin1("7890\u00ff")), "Foo7890\u00ff37890\u00ff4\u1200");
+
+    // TwoByte input and replacement
+    assertEq(s.replace(re1, "789\u1200"), "Foo789\u12003bar234\u1200");
+    assertEq(s.replace(re2, "789\u1200"), "Foo789\u12003789\u12004\u1200");
+}
+testRegExp();
+
+function testRegExpDollar() {
+    var s = toLatin1("Foobar123bar2345");
+
+    // Latin1 input and replacement
+    var re1 = /bar\d\d/;
+    var re2 = /bar(\d\d)/g;
+    assertEq(s.replace(re1, toLatin1("--$&--")), "Foo--bar12--3bar2345");
+    assertEq(s.replace(re2, toLatin1("--$'\u00ff--")), "Foo--3bar2345\xFF--3--45\xFF--45");
+    assertEq(s.replace(re2, toLatin1("--$`--")), "Foo--Foo--3--Foobar123--45");
+
+    // Latin1 input, TwoByte replacement
+    assertEq(s.replace(re1, "\u1200$$"), "Foo\u1200$3bar2345");
+    assertEq(s.replace(re2, "\u1200$1"), "Foo\u1200123\u12002345");
+    assertEq(s.replace(re2, "\u1200$'"), "Foo\u12003bar23453\u12004545");
+
+    // TwoByte input, Latin1 replacement
+    s += "\u1200";
+    assertEq(s.replace(re1, toLatin1("**$&**")), "Foo**bar12**3bar2345\u1200");
+    assertEq(s.replace(re2, toLatin1("**$1**")), "Foo**12**3**23**45\u1200");
+    assertEq(s.replace(re2, toLatin1("**$`**")), "Foo**Foo**3**Foobar123**45\u1200");
+    assertEq(s.replace(re2, toLatin1("**$'$$**")), "Foo**3bar2345\u1200$**3**45\u1200$**45\u1200");
+
+    // TwoByte input and replacement
+    assertEq(s.replace(re1, "**$&**\ueeee"), "Foo**bar12**\ueeee3bar2345\u1200");
+    assertEq(s.replace(re2, "**$1**\ueeee"), "Foo**12**\ueeee3**23**\ueeee45\u1200");
+    assertEq(s.replace(re2, "\ueeee**$`**"), "Foo\ueeee**Foo**3\ueeee**Foobar123**45\u1200");
+    assertEq(s.replace(re2, "\ueeee**$'$$**"), "Foo\ueeee**3bar2345\u1200$**3\ueeee**45\u1200$**45\u1200");
+}
+testRegExpDollar();
--- a/js/src/jsstr.cpp
+++ b/js/src/jsstr.cpp
@@ -2554,35 +2554,36 @@ DoMatchForReplaceGlobal(JSContext *cx, R
             return false;
         if (!res->matched())
             ++i;
     }
 
     return true;
 }
 
+template <typename CharT>
 static bool
-InterpretDollar(RegExpStatics *res, const jschar *dp, const jschar *ep,
+InterpretDollar(RegExpStatics *res, const CharT *bp, const CharT *dp, const CharT *ep,
                 ReplaceData &rdata, JSSubString *out, size_t *skip)
 {
     JS_ASSERT(*dp == '$');
 
     /* If there is only a dollar, bail now */
     if (dp + 1 >= ep)
         return false;
 
     /* Interpret all Perl match-induced dollar variables. */
     jschar dc = dp[1];
     if (JS7_ISDEC(dc)) {
         /* ECMA-262 Edition 3: 1-9 or 01-99 */
         unsigned num = JS7_UNDEC(dc);
         if (num > res->getMatches().parenCount())
             return false;
 
-        const jschar *cp = dp + 2;
+        const CharT *cp = dp + 2;
         if (cp < ep && (dc = *cp, JS7_ISDEC(dc))) {
             unsigned tmp = 10 * num + JS7_UNDEC(dc);
             if (tmp <= res->getMatches().parenCount()) {
                 cp++;
                 num = tmp;
             }
         }
         if (num == 0)
@@ -2598,17 +2599,17 @@ InterpretDollar(RegExpStatics *res, cons
          */
         res->getParen(num, out);
         return true;
     }
 
     *skip = 2;
     switch (dc) {
       case '$':
-        out->init(rdata.repstr, dp - rdata.repstr->chars(), 1);
+        out->init(rdata.repstr, dp - bp, 1);
         return true;
       case '&':
         res->getLastMatch(out);
         return true;
       case '+':
         res->getLastParen(out);
         return true;
       case '`':
@@ -2616,16 +2617,55 @@ InterpretDollar(RegExpStatics *res, cons
         return true;
       case '\'':
         res->getRightContext(out);
         return true;
     }
     return false;
 }
 
+template <typename CharT>
+static bool
+FindReplaceLengthString(JSContext *cx, RegExpStatics *res, ReplaceData &rdata, size_t *sizep)
+{
+    JSLinearString *repstr = rdata.repstr;
+    CheckedInt<uint32_t> replen = repstr->length();
+
+    if (rdata.dollarIndex != UINT32_MAX) {
+        AutoCheckCannotGC nogc;
+        MOZ_ASSERT(rdata.dollarIndex < repstr->length());
+        const CharT *bp = repstr->chars<CharT>(nogc);
+        const CharT *dp = bp + rdata.dollarIndex;
+        const CharT *ep = bp + repstr->length();
+        do {
+            JSSubString sub;
+            size_t skip;
+            if (InterpretDollar(res, bp, dp, ep, rdata, &sub, &skip)) {
+                if (sub.length > skip)
+                    replen += sub.length - skip;
+                else
+                    replen -= skip - sub.length;
+                dp += skip;
+            } else {
+                dp++;
+            }
+
+            dp = js_strchr_limit(dp, '$', ep);
+        } while (dp);
+    }
+
+    if (!replen.isValid()) {
+        js_ReportAllocationOverflow(cx);
+        return false;
+    }
+
+    *sizep = replen.value();
+    return true;
+}
+
 static bool
 FindReplaceLength(JSContext *cx, RegExpStatics *res, ReplaceData &rdata, size_t *sizep)
 {
     if (rdata.elembase) {
         /*
          * The base object is used when replace was passed a lambda which looks like
          * 'function(a) { return b[a]; }' for the base object b.  b will not change
          * in the course of the replace unless we end up making a scripted call due
@@ -2705,73 +2745,48 @@ FindReplaceLength(JSContext *cx, RegExpS
             return false;
         rdata.repstr = repstr->ensureLinear(cx);
         if (!rdata.repstr)
             return false;
         *sizep = rdata.repstr->length();
         return true;
     }
 
-    JSLinearString *repstr = rdata.repstr;
-    CheckedInt<uint32_t> replen = repstr->length();
-    if (rdata.dollarIndex != UINT32_MAX) {
-        MOZ_ASSERT(rdata.dollarIndex < repstr->length());
-        const jschar *dp = repstr->chars() + rdata.dollarIndex;
-        const jschar *ep = repstr->chars() + repstr->length();
-        do {
-            JSSubString sub;
-            size_t skip;
-            if (InterpretDollar(res, dp, ep, rdata, &sub, &skip)) {
-                if (sub.length > skip)
-                    replen += sub.length - skip;
-                else
-                    replen -= skip - sub.length;
-                dp += skip;
-            } else {
-                dp++;
-            }
-
-            dp = js_strchr_limit(dp, '$', ep);
-        } while (dp);
-    }
-
-    if (!replen.isValid()) {
-        js_ReportAllocationOverflow(cx);
-        return false;
-    }
-
-    *sizep = replen.value();
-    return true;
+    return rdata.repstr->hasLatin1Chars()
+           ? FindReplaceLengthString<Latin1Char>(cx, res, rdata, sizep)
+           : FindReplaceLengthString<jschar>(cx, res, rdata, sizep);
 }
 
 /*
  * Precondition: |rdata.sb| already has necessary growth space reserved (as
  * derived from FindReplaceLength), and has been inflated to TwoByte if
  * necessary.
  */
+template <typename CharT>
 static void
 DoReplace(RegExpStatics *res, ReplaceData &rdata)
 {
+    AutoCheckCannotGC nogc;
     JSLinearString *repstr = rdata.repstr;
-    const jschar *bp = repstr->chars();
-    const jschar *cp = bp;
+    const CharT *bp = repstr->chars<CharT>(nogc);
+    const CharT *cp = bp;
 
     if (rdata.dollarIndex != UINT32_MAX) {
         MOZ_ASSERT(rdata.dollarIndex < repstr->length());
-        const jschar *dp = bp + rdata.dollarIndex;
-        const jschar *ep = bp + repstr->length();
+        const CharT *dp = bp + rdata.dollarIndex;
+        const CharT *ep = bp + repstr->length();
         do {
             /* Move one of the constant portions of the replacement value. */
             size_t len = dp - cp;
             rdata.sb.infallibleAppend(cp, len);
             cp = dp;
 
             JSSubString sub;
             size_t skip;
-            if (InterpretDollar(res, dp, ep, rdata, &sub, &skip)) {
+            if (InterpretDollar(res, bp, dp, ep, rdata, &sub, &skip)) {
                 rdata.sb.infallibleAppendSubstring(sub.base, sub.offset, sub.length);
                 cp += skip;
                 dp += skip;
             } else {
                 dp++;
             }
 
             dp = js_strchr_limit(dp, '$', ep);
@@ -2814,20 +2829,22 @@ ReplaceRegExp(JSContext *cx, RegExpStati
         if (!rdata.sb.ensureTwoByteChars())
             return false;
     }
 
     if (!rdata.sb.reserve(newlen.value()))
         return false;
 
     /* Append skipped-over portion of the search value. */
-    const jschar *left = str.chars() + leftoff;
-    rdata.sb.infallibleAppend(left, leftlen);
-
-    DoReplace(res, rdata);
+    rdata.sb.infallibleAppendSubstring(&str, leftoff, leftlen);
+
+    if (rdata.repstr->hasLatin1Chars())
+        DoReplace<Latin1Char>(res, rdata);
+    else
+        DoReplace<jschar>(res, rdata);
     return true;
 }
 
 static bool
 BuildFlatReplacement(JSContext *cx, HandleString textstr, HandleString repstr,
                      const FlatMatch &fm, MutableHandleValue rval)
 {
     RopeBuilder builder(cx);
--- a/js/src/vm/StringBuffer.h
+++ b/js/src/vm/StringBuffer.h
@@ -250,17 +250,17 @@ StringBuffer::append(JSLinearString *str
            ? twoByteChars().append(str->latin1Chars(nogc), str->length())
            : twoByteChars().append(str->twoByteChars(nogc), str->length());
 }
 
 inline void
 StringBuffer::infallibleAppendSubstring(JSLinearString *base, size_t off, size_t len)
 {
     MOZ_ASSERT(off + len <= base->length());
-    MOZ_ASSERT(base->hasLatin1Chars() == isLatin1());
+    MOZ_ASSERT_IF(base->hasTwoByteChars(), isTwoByte());
 
     JS::AutoCheckCannotGC nogc;
     if (base->hasLatin1Chars())
         infallibleAppend(base->latin1Chars(nogc) + off, len);
     else
         infallibleAppend(base->twoByteChars(nogc) + off, len);
 }