Bug 1130860 - Implement all of EscapeRegExpPattern instead of just escaping forward slashes. r=till
authorTooru Fujisawa <arai_a@mac.com>
Tue, 10 Feb 2015 02:04:43 +0900
changeset 228182 18240dad751f9e2302acacf5eb6a06f006ca0453
parent 228181 7b89d8a612eb83385c4f8331a565c7d769bb1c44
child 228183 ac3e4d79297c71c1378855bfa362940acb602147
push id28258
push userryanvm@gmail.com
push dateMon, 09 Feb 2015 21:30:07 +0000
treeherdermozilla-central@2cb22c058add [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerstill
bugs1130860
milestone38.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1130860 - Implement all of EscapeRegExpPattern instead of just escaping forward slashes. r=till
js/src/builtin/RegExp.cpp
js/src/tests/ecma_6/RegExp/escape.js
js/src/tests/ecma_6/RegExp/source.js
js/src/tests/js1_5/Regress/regress-248444.js
js/src/vm/RegExpObject.cpp
js/src/vm/RegExpObject.h
--- a/js/src/builtin/RegExp.cpp
+++ b/js/src/builtin/RegExp.cpp
@@ -134,64 +134,16 @@ js::ExecuteRegExpLegacy(JSContext *cx, R
         /* Forbid an array, as an optimization. */
         rval.setBoolean(true);
         return true;
     }
 
     return CreateRegExpMatchResult(cx, input, matches, rval);
 }
 
-/* Note: returns the original if no escaping need be performed. */
-template <typename CharT>
-static bool
-EscapeNakedForwardSlashes(StringBuffer &sb, const CharT *oldChars, size_t oldLen)
-{
-    for (const CharT *it = oldChars; it < oldChars + oldLen; ++it) {
-        if (*it == '/' && (it == oldChars || it[-1] != '\\')) {
-            /* There's a forward slash that needs escaping. */
-            if (sb.empty()) {
-                /* This is the first one we've seen, copy everything up to this point. */
-                if (mozilla::IsSame<CharT, char16_t>::value && !sb.ensureTwoByteChars())
-                    return false;
-
-                if (!sb.reserve(oldLen + 1))
-                    return false;
-
-                sb.infallibleAppend(oldChars, size_t(it - oldChars));
-            }
-            if (!sb.append('\\'))
-                return false;
-        }
-
-        if (!sb.empty() && !sb.append(*it))
-            return false;
-    }
-
-    return true;
-}
-
-static JSAtom *
-EscapeNakedForwardSlashes(JSContext *cx, JSAtom *unescaped)
-{
-    /* We may never need to use |sb|. Start using it lazily. */
-    StringBuffer sb(cx);
-
-    if (unescaped->hasLatin1Chars()) {
-        JS::AutoCheckCannotGC nogc;
-        if (!EscapeNakedForwardSlashes(sb, unescaped->latin1Chars(nogc), unescaped->length()))
-            return nullptr;
-    } else {
-        JS::AutoCheckCannotGC nogc;
-        if (!EscapeNakedForwardSlashes(sb, unescaped->twoByteChars(nogc), unescaped->length()))
-            return nullptr;
-    }
-
-    return sb.empty() ? unescaped : sb.finishAtom();
-}
-
 /*
  * Compile a new |RegExpShared| for the |RegExpObject|.
  *
  * Per ECMAv5 15.10.4.1, we act on combinations of (pattern, flags) as
  * arguments:
  *
  *  RegExp, undefined => flags := pattern.flags
  *  RegExp, _ => throw TypeError
@@ -270,30 +222,26 @@ CompileRegExpObject(JSContext *cx, RegEx
         RootedString flagStr(cx, ToString<CanGC>(cx, args[1]));
         if (!flagStr)
             return false;
         args[1].setString(flagStr);
         if (!ParseRegExpFlags(cx, flagStr, &flags))
             return false;
     }
 
-    RootedAtom escapedSourceStr(cx, EscapeNakedForwardSlashes(cx, source));
-    if (!escapedSourceStr)
-        return false;
-
     CompileOptions options(cx);
     frontend::TokenStream dummyTokenStream(cx, options, nullptr, 0, nullptr);
 
-    if (!irregexp::ParsePatternSyntax(dummyTokenStream, cx->tempLifoAlloc(), escapedSourceStr))
+    if (!irregexp::ParsePatternSyntax(dummyTokenStream, cx->tempLifoAlloc(), source))
         return false;
 
     RegExpStatics *res = cx->global()->getRegExpStatics(cx);
     if (!res)
         return false;
-    RegExpObject *reobj = builder.build(escapedSourceStr, RegExpFlag(flags | res->getFlags()));
+    RegExpObject *reobj = builder.build(source, RegExpFlag(flags | res->getFlags()));
     if (!reobj)
         return false;
 
     args.rval().setObject(*reobj);
     return true;
 }
 
 MOZ_ALWAYS_INLINE bool
@@ -490,21 +438,21 @@ regexp_source_impl(JSContext *cx, CallAr
     Rooted<RegExpObject*> reObj(cx, &args.thisv().toObject().as<RegExpObject>());
 
     /* Step 5. */
     RootedAtom src(cx, reObj->getSource());
     if (!src)
         return false;
 
     /* Step 7. */
-    if (src->length() == 0)
-        args.rval().setString(cx->names().emptyRegExp);
-    else
-        args.rval().setString(src);
+    RootedString str(cx, EscapeRegExpPattern(cx, src));
+    if (!str)
+        return false;
 
+    args.rval().setString(str);
     return true;
 }
 
 static bool
 regexp_source(JSContext *cx, unsigned argc, JS::Value *vp)
 {
     /* Steps 1-4. */
     CallArgs args = CallArgsFromVp(argc, vp);
new file mode 100644
--- /dev/null
+++ b/js/src/tests/ecma_6/RegExp/escape.js
@@ -0,0 +1,70 @@
+var BUGNUMBER = 1130860;
+var summary = 'Slash and LineTerminator should be escaped correctly.';
+
+print(BUGNUMBER + ": " + summary);
+
+function test(re, source) {
+  assertEq(re.source, source);
+  assertEq(eval("/" + re.source + "/").source, source);
+  assertEq(re.toString(), "/" + source + "/");
+}
+
+test(/\\n/,           "\\\\n");
+test(/\\\n/,          "\\\\\\n");
+test(/\\\\n/,         "\\\\\\\\n");
+test(RegExp("\\n"),   "\\n");
+test(RegExp("\\\n"),  "\\n");
+test(RegExp("\\\\n"), "\\\\n");
+
+test(/\\r/,           "\\\\r");
+test(/\\\r/,          "\\\\\\r");
+test(/\\\\r/,         "\\\\\\\\r");
+test(RegExp("\\r"),   "\\r");
+test(RegExp("\\\r"),  "\\r");
+test(RegExp("\\\\r"), "\\\\r");
+
+test(/\\u2028/,           "\\\\u2028");
+test(/\\\u2028/,          "\\\\\\u2028");
+test(/\\\\u2028/,         "\\\\\\\\u2028");
+test(RegExp("\\u2028"),   "\\u2028");
+test(RegExp("\\\u2028"),  "\\u2028");
+test(RegExp("\\\\u2028"), "\\\\u2028");
+
+test(/\\u2029/,           "\\\\u2029");
+test(/\\\u2029/,          "\\\\\\u2029");
+test(/\\\\u2029/,         "\\\\\\\\u2029");
+test(RegExp("\\u2029"),   "\\u2029");
+test(RegExp("\\\u2029"),  "\\u2029");
+test(RegExp("\\\\u2029"), "\\\\u2029");
+
+test(/\//,            "\\/");
+test(/\\\//,          "\\\\\\/");
+test(RegExp("/"),     "\\/");
+test(RegExp("\/"),    "\\/");
+test(RegExp("\\/"),   "\\/");
+test(RegExp("\\\/"),  "\\/");
+test(RegExp("\\\\/"), "\\\\\\/");
+
+test(/[/]/,             "[/]");
+test(/[\/]/,            "[\\/]");
+test(/[\\/]/,           "[\\\\/]");
+test(/[\\\/]/,          "[\\\\\\/]");
+test(RegExp("[/]"),     "[/]");
+test(RegExp("[\/]"),    "[/]");
+test(RegExp("[\\/]"),   "[\\/]");
+test(RegExp("[\\\/]"),  "[\\/]");
+test(RegExp("[\\\\/]"), "[\\\\/]");
+
+test(RegExp("\[/\]"),   "[/]");
+test(RegExp("\[\\/\]"), "[\\/]");
+
+test(/\[\/\]/,              "\\[\\/\\]");
+test(/\[\\\/\]/,            "\\[\\\\\\/\\]");
+test(RegExp("\\[/\\]"),     "\\[\\/\\]");
+test(RegExp("\\[\/\\]"),    "\\[\\/\\]");
+test(RegExp("\\[\\/\\]"),   "\\[\\/\\]");
+test(RegExp("\\[\\\/\\]"),  "\\[\\/\\]");
+test(RegExp("\\[\\\\/\\]"), "\\[\\\\\\/\\]");
+
+if (typeof reportCompare === "function")
+  reportCompare(true, true);
--- a/js/src/tests/ecma_6/RegExp/source.js
+++ b/js/src/tests/ecma_6/RegExp/source.js
@@ -2,19 +2,23 @@ var BUGNUMBER = 1120169;
 var summary = "Implement RegExp.prototype.source";
 
 print(BUGNUMBER + ": " + summary);
 
 assertEq(RegExp.prototype.source, "(?:)");
 assertEq(/foo/.source, "foo");
 assertEq(/foo/iymg.source, "foo");
 assertEq(/\//.source, "\\/");
+assertEq(/\n\r/.source, "\\n\\r");
+assertEq(/\u2028\u2029/.source, "\\u2028\\u2029");
 assertEq(RegExp("").source, "(?:)");
 assertEq(RegExp("", "mygi").source, "(?:)");
 assertEq(RegExp("/").source, "\\/");
+assertEq(RegExp("\n\r").source, "\\n\\r");
+assertEq(RegExp("\u2028\u2029").source, "\\u2028\\u2029");
 
 assertThrowsInstanceOf(() => genericSource(), TypeError);
 assertThrowsInstanceOf(() => genericSource(1), TypeError);
 assertThrowsInstanceOf(() => genericSource(""), TypeError);
 assertThrowsInstanceOf(() => genericSource({}), TypeError);
 assertThrowsInstanceOf(() => genericSource(new Proxy(/foo/, {get(){ return true; }})), TypeError);
 
 function genericSource(obj) {
--- a/js/src/tests/js1_5/Regress/regress-248444.js
+++ b/js/src/tests/js1_5/Regress/regress-248444.js
@@ -24,20 +24,8 @@ status = summary + ' ' + inSection(2);
 re = RegExp("[^\\\/]+$");
 actual = re.toString();
 reportCompare(expect, actual, status);
 
 status = summary + ' ' + inSection(3);
 re = RegExp("[^\\/]+$");
 actual = re.toString();
 reportCompare(expect, actual, status);
-
-status = summary + ' ' + inSection(4);
-re = RegExp("[^\/]+$");
-actual = re.toString();
-reportCompare(expect, actual, status);
-
-status = summary + ' ' + inSection(5);
-re = RegExp("[^/]+$");
-actual = re.toString();
-reportCompare(expect, actual, status);
-
-
--- a/js/src/vm/RegExpObject.cpp
+++ b/js/src/vm/RegExpObject.cpp
@@ -363,32 +363,189 @@ RegExpObject::init(ExclusiveContext *cx,
     self->setSource(source);
     self->setGlobal(flags & GlobalFlag);
     self->setIgnoreCase(flags & IgnoreCaseFlag);
     self->setMultiline(flags & MultilineFlag);
     self->setSticky(flags & StickyFlag);
     return true;
 }
 
+static MOZ_ALWAYS_INLINE bool
+IsLineTerminator(const JS::Latin1Char c)
+{
+    return c == '\n' || c == '\r';
+}
+
+static MOZ_ALWAYS_INLINE bool
+IsLineTerminator(const char16_t c)
+{
+    return c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029;
+}
+
+static MOZ_ALWAYS_INLINE bool
+AppendEscapedLineTerminator(StringBuffer &sb, const JS::Latin1Char c)
+{
+    switch (c) {
+      case '\n':
+        if (!sb.append('n'))
+            return false;
+        break;
+      case '\r':
+        if (!sb.append('r'))
+            return false;
+        break;
+      default:
+        MOZ_CRASH("Bad LineTerminator");
+    }
+    return true;
+}
+
+static MOZ_ALWAYS_INLINE bool
+AppendEscapedLineTerminator(StringBuffer &sb, const char16_t c)
+{
+    switch (c) {
+      case '\n':
+        if (!sb.append('n'))
+            return false;
+        break;
+      case '\r':
+        if (!sb.append('r'))
+            return false;
+        break;
+      case 0x2028:
+        if (!sb.append("u2028"))
+            return false;
+        break;
+      case 0x2029:
+        if (!sb.append("u2029"))
+            return false;
+        break;
+      default:
+        MOZ_CRASH("Bad LineTerminator");
+    }
+    return true;
+}
+
+static template <typename CharT>
+MOZ_ALWAYS_INLINE bool
+SetupBuffer(StringBuffer &sb, const CharT *oldChars, size_t oldLen, const CharT *it)
+{
+    if (mozilla::IsSame<CharT, char16_t>::value && !sb.ensureTwoByteChars())
+        return false;
+
+    if (!sb.reserve(oldLen + 1))
+        return false;
+
+    sb.infallibleAppend(oldChars, size_t(it - oldChars));
+    return true;
+}
+
+// Note: returns the original if no escaping need be performed.
+template <typename CharT>
+static bool
+EscapeRegExpPattern(StringBuffer &sb, const CharT *oldChars, size_t oldLen)
+{
+    bool inBrackets = false;
+    bool previousCharacterWasBackslash = false;
+
+    for (const CharT *it = oldChars; it < oldChars + oldLen; ++it) {
+        CharT ch = *it;
+        if (!previousCharacterWasBackslash) {
+            if (inBrackets) {
+                if (ch == ']')
+                    inBrackets = false;
+            } else if (ch == '/') {
+                // There's a forward slash that needs escaping.
+                if (sb.empty()) {
+                    // This is the first char we've seen that needs escaping,
+                    // copy everything up to this point.
+                    if (!SetupBuffer(sb, oldChars, oldLen, it))
+                        return false;
+                }
+                if (!sb.append('\\'))
+                    return false;
+            } else if (ch == '[') {
+                inBrackets = true;
+            }
+        }
+
+        if (IsLineTerminator(ch)) {
+            // There's LineTerminator that needs escaping.
+            if (sb.empty()) {
+                // This is the first char we've seen that needs escaping,
+                // copy everything up to this point.
+                if (!SetupBuffer(sb, oldChars, oldLen, it))
+                    return false;
+            }
+            if (!previousCharacterWasBackslash) {
+                if (!sb.append('\\'))
+                    return false;
+            }
+            if (!AppendEscapedLineTerminator(sb, ch))
+                return false;
+        } else if (!sb.empty()) {
+            if (!sb.append(ch))
+                return false;
+        }
+
+        if (previousCharacterWasBackslash)
+            previousCharacterWasBackslash = false;
+        else if (ch == '\\')
+            previousCharacterWasBackslash = true;
+    }
+
+    return true;
+}
+
+// ES6 draft rev32 21.2.3.2.4.
+JSAtom *
+js::EscapeRegExpPattern(JSContext *cx, HandleAtom src)
+{
+    // Step 2.
+    if (src->length() == 0)
+        return cx->names().emptyRegExp;
+
+    // We may never need to use |sb|. Start using it lazily.
+    StringBuffer sb(cx);
+
+    if (src->hasLatin1Chars()) {
+        JS::AutoCheckCannotGC nogc;
+        if (!::EscapeRegExpPattern(sb, src->latin1Chars(nogc), src->length()))
+            return nullptr;
+    } else {
+        JS::AutoCheckCannotGC nogc;
+        if (!::EscapeRegExpPattern(sb, src->twoByteChars(nogc), src->length()))
+            return nullptr;
+    }
+
+    // Step 3.
+    return sb.empty() ? src : sb.finishAtom();
+}
+
+// ES6 draft rev32 21.2.5.14. Optimized for RegExpObject.
 JSFlatString *
 RegExpObject::toString(JSContext *cx) const
 {
-    JSAtom *src = getSource();
+    // Steps 3-4.
+    RootedAtom src(cx, getSource());
+    if (!src)
+        return nullptr;
+    RootedAtom escapedSrc(cx, EscapeRegExpPattern(cx, src));
+
+    // Step 7.
     StringBuffer sb(cx);
-    if (size_t len = src->length()) {
-        if (!sb.reserve(len + 2))
-            return nullptr;
-        sb.infallibleAppend('/');
-        if (!sb.append(src))
-            return nullptr;
-        sb.infallibleAppend('/');
-    } else {
-        if (!sb.append("/(?:)/"))
-            return nullptr;
-    }
+    size_t len = escapedSrc->length();
+    if (!sb.reserve(len + 2))
+        return nullptr;
+    sb.infallibleAppend('/');
+    if (!sb.append(escapedSrc))
+        return nullptr;
+    sb.infallibleAppend('/');
+
+    // Steps 5-7.
     if (global() && !sb.append('g'))
         return nullptr;
     if (ignoreCase() && !sb.append('i'))
         return nullptr;
     if (multiline() && !sb.append('m'))
         return nullptr;
     if (sticky() && !sb.append('y'))
         return nullptr;
--- a/js/src/vm/RegExpObject.h
+++ b/js/src/vm/RegExpObject.h
@@ -495,11 +495,14 @@ RegExpToShared(JSContext *cx, HandleObje
 
 template<XDRMode mode>
 bool
 XDRScriptRegExpObject(XDRState<mode> *xdr, MutableHandle<RegExpObject*> objp);
 
 extern JSObject *
 CloneScriptRegExpObject(JSContext *cx, RegExpObject &re);
 
+JSAtom *
+EscapeRegExpPattern(JSContext *cx, HandleAtom src);
+
 } /* namespace js */
 
 #endif /* vm_RegExpObject_h */