Bug 1019585 part 6 - Make str_replace regex case handle Latin1 strings. r=terrence
--- a/js/src/jit-test/tests/latin1/replace.js
+++ b/js/src/jit-test/tests/latin1/replace.js
@@ -35,8 +35,63 @@ function testDollarReplacement() {
// TwoByte input, pat and replacement
assertEq(s.replace(pat, "A\u1300"), "FooA\u1300baz123");
assertEq(s.replace(pat, "A$$\u1300"), "FooA$\u1300baz123");
assertEq(s.replace(pat, "A$`\u1300"), "FooAFoo\u1300baz123");
assertEq(s.replace(pat, "A$&\u1300"), "FooAbar\u1200\u1300baz123");
assertEq(s.replace(pat, "A$'\u1300"), "FooAbaz123\u1300baz123");
}
testDollarReplacement();
+
+function testRegExp() {
+ var s = toLatin1("Foobar123bar234");
+ assertEq(s.replace(/bar\d\d/, "456"), "Foo4563bar234");
+
+ // Latin1 input and replacement
+ var re1 = /bar\d\d/;
+ var re2 = /bar\d\d/g;
+ assertEq(s.replace(re1, toLatin1("789")), "Foo7893bar234");
+ assertEq(s.replace(re2, toLatin1("789\u00ff")), "Foo789\u00ff3789\u00ff4");
+
+ // Latin1 input, TwoByte replacement
+ assertEq(s.replace(re1, "789\u1200"), "Foo789\u12003bar234");
+ assertEq(s.replace(re2, "789\u1200"), "Foo789\u12003789\u12004");
+
+ // TwoByte input, Latin1 replacement
+ s += "\u1200";
+ assertEq(s.replace(re1, toLatin1("7890")), "Foo78903bar234\u1200");
+ assertEq(s.replace(re2, toLatin1("7890\u00ff")), "Foo7890\u00ff37890\u00ff4\u1200");
+
+ // TwoByte input and replacement
+ assertEq(s.replace(re1, "789\u1200"), "Foo789\u12003bar234\u1200");
+ assertEq(s.replace(re2, "789\u1200"), "Foo789\u12003789\u12004\u1200");
+}
+testRegExp();
+
+function testRegExpDollar() {
+ var s = toLatin1("Foobar123bar2345");
+
+ // Latin1 input and replacement
+ var re1 = /bar\d\d/;
+ var re2 = /bar(\d\d)/g;
+ assertEq(s.replace(re1, toLatin1("--$&--")), "Foo--bar12--3bar2345");
+ assertEq(s.replace(re2, toLatin1("--$'\u00ff--")), "Foo--3bar2345\xFF--3--45\xFF--45");
+ assertEq(s.replace(re2, toLatin1("--$`--")), "Foo--Foo--3--Foobar123--45");
+
+ // Latin1 input, TwoByte replacement
+ assertEq(s.replace(re1, "\u1200$$"), "Foo\u1200$3bar2345");
+ assertEq(s.replace(re2, "\u1200$1"), "Foo\u1200123\u12002345");
+ assertEq(s.replace(re2, "\u1200$'"), "Foo\u12003bar23453\u12004545");
+
+ // TwoByte input, Latin1 replacement
+ s += "\u1200";
+ assertEq(s.replace(re1, toLatin1("**$&**")), "Foo**bar12**3bar2345\u1200");
+ assertEq(s.replace(re2, toLatin1("**$1**")), "Foo**12**3**23**45\u1200");
+ assertEq(s.replace(re2, toLatin1("**$`**")), "Foo**Foo**3**Foobar123**45\u1200");
+ assertEq(s.replace(re2, toLatin1("**$'$$**")), "Foo**3bar2345\u1200$**3**45\u1200$**45\u1200");
+
+ // TwoByte input and replacement
+ assertEq(s.replace(re1, "**$&**\ueeee"), "Foo**bar12**\ueeee3bar2345\u1200");
+ assertEq(s.replace(re2, "**$1**\ueeee"), "Foo**12**\ueeee3**23**\ueeee45\u1200");
+ assertEq(s.replace(re2, "\ueeee**$`**"), "Foo\ueeee**Foo**3\ueeee**Foobar123**45\u1200");
+ assertEq(s.replace(re2, "\ueeee**$'$$**"), "Foo\ueeee**3bar2345\u1200$**3\ueeee**45\u1200$**45\u1200");
+}
+testRegExpDollar();
--- a/js/src/jsstr.cpp
+++ b/js/src/jsstr.cpp
@@ -2554,35 +2554,36 @@ DoMatchForReplaceGlobal(JSContext *cx, R
return false;
if (!res->matched())
++i;
}
return true;
}
+template <typename CharT>
static bool
-InterpretDollar(RegExpStatics *res, const jschar *dp, const jschar *ep,
+InterpretDollar(RegExpStatics *res, const CharT *bp, const CharT *dp, const CharT *ep,
ReplaceData &rdata, JSSubString *out, size_t *skip)
{
JS_ASSERT(*dp == '$');
/* If there is only a dollar, bail now */
if (dp + 1 >= ep)
return false;
/* Interpret all Perl match-induced dollar variables. */
jschar dc = dp[1];
if (JS7_ISDEC(dc)) {
/* ECMA-262 Edition 3: 1-9 or 01-99 */
unsigned num = JS7_UNDEC(dc);
if (num > res->getMatches().parenCount())
return false;
- const jschar *cp = dp + 2;
+ const CharT *cp = dp + 2;
if (cp < ep && (dc = *cp, JS7_ISDEC(dc))) {
unsigned tmp = 10 * num + JS7_UNDEC(dc);
if (tmp <= res->getMatches().parenCount()) {
cp++;
num = tmp;
}
}
if (num == 0)
@@ -2598,17 +2599,17 @@ InterpretDollar(RegExpStatics *res, cons
*/
res->getParen(num, out);
return true;
}
*skip = 2;
switch (dc) {
case '$':
- out->init(rdata.repstr, dp - rdata.repstr->chars(), 1);
+ out->init(rdata.repstr, dp - bp, 1);
return true;
case '&':
res->getLastMatch(out);
return true;
case '+':
res->getLastParen(out);
return true;
case '`':
@@ -2616,16 +2617,55 @@ InterpretDollar(RegExpStatics *res, cons
return true;
case '\'':
res->getRightContext(out);
return true;
}
return false;
}
+template <typename CharT>
+static bool
+FindReplaceLengthString(JSContext *cx, RegExpStatics *res, ReplaceData &rdata, size_t *sizep)
+{
+ JSLinearString *repstr = rdata.repstr;
+ CheckedInt<uint32_t> replen = repstr->length();
+
+ if (rdata.dollarIndex != UINT32_MAX) {
+ AutoCheckCannotGC nogc;
+ MOZ_ASSERT(rdata.dollarIndex < repstr->length());
+ const CharT *bp = repstr->chars<CharT>(nogc);
+ const CharT *dp = bp + rdata.dollarIndex;
+ const CharT *ep = bp + repstr->length();
+ do {
+ JSSubString sub;
+ size_t skip;
+ if (InterpretDollar(res, bp, dp, ep, rdata, &sub, &skip)) {
+ if (sub.length > skip)
+ replen += sub.length - skip;
+ else
+ replen -= skip - sub.length;
+ dp += skip;
+ } else {
+ dp++;
+ }
+
+ dp = js_strchr_limit(dp, '$', ep);
+ } while (dp);
+ }
+
+ if (!replen.isValid()) {
+ js_ReportAllocationOverflow(cx);
+ return false;
+ }
+
+ *sizep = replen.value();
+ return true;
+}
+
static bool
FindReplaceLength(JSContext *cx, RegExpStatics *res, ReplaceData &rdata, size_t *sizep)
{
if (rdata.elembase) {
/*
* The base object is used when replace was passed a lambda which looks like
* 'function(a) { return b[a]; }' for the base object b. b will not change
* in the course of the replace unless we end up making a scripted call due
@@ -2705,73 +2745,48 @@ FindReplaceLength(JSContext *cx, RegExpS
return false;
rdata.repstr = repstr->ensureLinear(cx);
if (!rdata.repstr)
return false;
*sizep = rdata.repstr->length();
return true;
}
- JSLinearString *repstr = rdata.repstr;
- CheckedInt<uint32_t> replen = repstr->length();
- if (rdata.dollarIndex != UINT32_MAX) {
- MOZ_ASSERT(rdata.dollarIndex < repstr->length());
- const jschar *dp = repstr->chars() + rdata.dollarIndex;
- const jschar *ep = repstr->chars() + repstr->length();
- do {
- JSSubString sub;
- size_t skip;
- if (InterpretDollar(res, dp, ep, rdata, &sub, &skip)) {
- if (sub.length > skip)
- replen += sub.length - skip;
- else
- replen -= skip - sub.length;
- dp += skip;
- } else {
- dp++;
- }
-
- dp = js_strchr_limit(dp, '$', ep);
- } while (dp);
- }
-
- if (!replen.isValid()) {
- js_ReportAllocationOverflow(cx);
- return false;
- }
-
- *sizep = replen.value();
- return true;
+ return rdata.repstr->hasLatin1Chars()
+ ? FindReplaceLengthString<Latin1Char>(cx, res, rdata, sizep)
+ : FindReplaceLengthString<jschar>(cx, res, rdata, sizep);
}
/*
* Precondition: |rdata.sb| already has necessary growth space reserved (as
* derived from FindReplaceLength), and has been inflated to TwoByte if
* necessary.
*/
+template <typename CharT>
static void
DoReplace(RegExpStatics *res, ReplaceData &rdata)
{
+ AutoCheckCannotGC nogc;
JSLinearString *repstr = rdata.repstr;
- const jschar *bp = repstr->chars();
- const jschar *cp = bp;
+ const CharT *bp = repstr->chars<CharT>(nogc);
+ const CharT *cp = bp;
if (rdata.dollarIndex != UINT32_MAX) {
MOZ_ASSERT(rdata.dollarIndex < repstr->length());
- const jschar *dp = bp + rdata.dollarIndex;
- const jschar *ep = bp + repstr->length();
+ const CharT *dp = bp + rdata.dollarIndex;
+ const CharT *ep = bp + repstr->length();
do {
/* Move one of the constant portions of the replacement value. */
size_t len = dp - cp;
rdata.sb.infallibleAppend(cp, len);
cp = dp;
JSSubString sub;
size_t skip;
- if (InterpretDollar(res, dp, ep, rdata, &sub, &skip)) {
+ if (InterpretDollar(res, bp, dp, ep, rdata, &sub, &skip)) {
rdata.sb.infallibleAppendSubstring(sub.base, sub.offset, sub.length);
cp += skip;
dp += skip;
} else {
dp++;
}
dp = js_strchr_limit(dp, '$', ep);
@@ -2814,20 +2829,22 @@ ReplaceRegExp(JSContext *cx, RegExpStati
if (!rdata.sb.ensureTwoByteChars())
return false;
}
if (!rdata.sb.reserve(newlen.value()))
return false;
/* Append skipped-over portion of the search value. */
- const jschar *left = str.chars() + leftoff;
- rdata.sb.infallibleAppend(left, leftlen);
-
- DoReplace(res, rdata);
+ rdata.sb.infallibleAppendSubstring(&str, leftoff, leftlen);
+
+ if (rdata.repstr->hasLatin1Chars())
+ DoReplace<Latin1Char>(res, rdata);
+ else
+ DoReplace<jschar>(res, rdata);
return true;
}
static bool
BuildFlatReplacement(JSContext *cx, HandleString textstr, HandleString repstr,
const FlatMatch &fm, MutableHandleValue rval)
{
RopeBuilder builder(cx);
--- a/js/src/vm/StringBuffer.h
+++ b/js/src/vm/StringBuffer.h
@@ -250,17 +250,17 @@ StringBuffer::append(JSLinearString *str
? twoByteChars().append(str->latin1Chars(nogc), str->length())
: twoByteChars().append(str->twoByteChars(nogc), str->length());
}
inline void
StringBuffer::infallibleAppendSubstring(JSLinearString *base, size_t off, size_t len)
{
MOZ_ASSERT(off + len <= base->length());
- MOZ_ASSERT(base->hasLatin1Chars() == isLatin1());
+ MOZ_ASSERT_IF(base->hasTwoByteChars(), isTwoByte());
JS::AutoCheckCannotGC nogc;
if (base->hasLatin1Chars())
infallibleAppend(base->latin1Chars(nogc) + off, len);
else
infallibleAppend(base->twoByteChars(nogc) + off, len);
}