Bug 1130860 - Implement all of EscapeRegExpPattern instead of just escaping forward slashes. r=till
authorTooru Fujisawa <arai_a@mac.com>
Tue, 10 Feb 2015 02:04:43 +0900
changeset 241801 18240dad751f9e2302acacf5eb6a06f006ca0453
parent 241800 7b89d8a612eb83385c4f8331a565c7d769bb1c44
child 241802 ac3e4d79297c71c1378855bfa362940acb602147
push id619
push usercliu@mozilla.com
push dateMon, 09 Feb 2015 21:57:21 +0000
reviewerstill
bugs1130860
milestone38.0a1
Bug 1130860 - Implement all of EscapeRegExpPattern instead of just escaping forward slashes. r=till
js/src/builtin/RegExp.cpp
js/src/tests/ecma_6/RegExp/escape.js
js/src/tests/ecma_6/RegExp/source.js
js/src/tests/js1_5/Regress/regress-248444.js
js/src/vm/RegExpObject.cpp
js/src/vm/RegExpObject.h
--- a/js/src/builtin/RegExp.cpp
+++ b/js/src/builtin/RegExp.cpp
@@ -134,64 +134,16 @@ js::ExecuteRegExpLegacy(JSContext *cx, R
         /* Forbid an array, as an optimization. */
         rval.setBoolean(true);
         return true;
     }
 
     return CreateRegExpMatchResult(cx, input, matches, rval);
 }
 
-/* Note: returns the original if no escaping need be performed. */
-template <typename CharT>
-static bool
-EscapeNakedForwardSlashes(StringBuffer &sb, const CharT *oldChars, size_t oldLen)
-{
-    for (const CharT *it = oldChars; it < oldChars + oldLen; ++it) {
-        if (*it == '/' && (it == oldChars || it[-1] != '\\')) {
-            /* There's a forward slash that needs escaping. */
-            if (sb.empty()) {
-                /* This is the first one we've seen, copy everything up to this point. */
-                if (mozilla::IsSame<CharT, char16_t>::value && !sb.ensureTwoByteChars())
-                    return false;
-
-                if (!sb.reserve(oldLen + 1))
-                    return false;
-
-                sb.infallibleAppend(oldChars, size_t(it - oldChars));
-            }
-            if (!sb.append('\\'))
-                return false;
-        }
-
-        if (!sb.empty() && !sb.append(*it))
-            return false;
-    }
-
-    return true;
-}
-
-static JSAtom *
-EscapeNakedForwardSlashes(JSContext *cx, JSAtom *unescaped)
-{
-    /* We may never need to use |sb|. Start using it lazily. */
-    StringBuffer sb(cx);
-
-    if (unescaped->hasLatin1Chars()) {
-        JS::AutoCheckCannotGC nogc;
-        if (!EscapeNakedForwardSlashes(sb, unescaped->latin1Chars(nogc), unescaped->length()))
-            return nullptr;
-    } else {
-        JS::AutoCheckCannotGC nogc;
-        if (!EscapeNakedForwardSlashes(sb, unescaped->twoByteChars(nogc), unescaped->length()))
-            return nullptr;
-    }
-
-    return sb.empty() ? unescaped : sb.finishAtom();
-}
-
 /*
  * Compile a new |RegExpShared| for the |RegExpObject|.
  *
  * Per ECMAv5 15.10.4.1, we act on combinations of (pattern, flags) as
  * arguments:
  *
  *  RegExp, undefined => flags := pattern.flags
  *  RegExp, _ => throw TypeError
@@ -270,30 +222,26 @@ CompileRegExpObject(JSContext *cx, RegEx
         RootedString flagStr(cx, ToString<CanGC>(cx, args[1]));
         if (!flagStr)
             return false;
         args[1].setString(flagStr);
         if (!ParseRegExpFlags(cx, flagStr, &flags))
             return false;
     }
 
-    RootedAtom escapedSourceStr(cx, EscapeNakedForwardSlashes(cx, source));
-    if (!escapedSourceStr)
-        return false;
-
     CompileOptions options(cx);
     frontend::TokenStream dummyTokenStream(cx, options, nullptr, 0, nullptr);
 
-    if (!irregexp::ParsePatternSyntax(dummyTokenStream, cx->tempLifoAlloc(), escapedSourceStr))
+    if (!irregexp::ParsePatternSyntax(dummyTokenStream, cx->tempLifoAlloc(), source))
         return false;
 
     RegExpStatics *res = cx->global()->getRegExpStatics(cx);
     if (!res)
         return false;
-    RegExpObject *reobj = builder.build(escapedSourceStr, RegExpFlag(flags | res->getFlags()));
+    RegExpObject *reobj = builder.build(source, RegExpFlag(flags | res->getFlags()));
     if (!reobj)
         return false;
 
     args.rval().setObject(*reobj);
     return true;
 }
 
 MOZ_ALWAYS_INLINE bool
@@ -490,21 +438,21 @@ regexp_source_impl(JSContext *cx, CallAr
     Rooted<RegExpObject*> reObj(cx, &args.thisv().toObject().as<RegExpObject>());
 
     /* Step 5. */
     RootedAtom src(cx, reObj->getSource());
     if (!src)
         return false;
 
     /* Step 7. */
-    if (src->length() == 0)
-        args.rval().setString(cx->names().emptyRegExp);
-    else
-        args.rval().setString(src);
+    RootedString str(cx, EscapeRegExpPattern(cx, src));
+    if (!str)
+        return false;
 
+    args.rval().setString(str);
     return true;
 }
 
 static bool
 regexp_source(JSContext *cx, unsigned argc, JS::Value *vp)
 {
     /* Steps 1-4. */
     CallArgs args = CallArgsFromVp(argc, vp);
new file mode 100644
--- /dev/null
+++ b/js/src/tests/ecma_6/RegExp/escape.js
@@ -0,0 +1,70 @@
+var BUGNUMBER = 1130860;
+var summary = 'Slash and LineTerminator should be escaped correctly.';
+
+print(BUGNUMBER + ": " + summary);
+
+function test(re, source) {
+  assertEq(re.source, source);
+  assertEq(eval("/" + re.source + "/").source, source);
+  assertEq(re.toString(), "/" + source + "/");
+}
+
+test(/\\n/,           "\\\\n");
+test(/\\\n/,          "\\\\\\n");
+test(/\\\\n/,         "\\\\\\\\n");
+test(RegExp("\\n"),   "\\n");
+test(RegExp("\\\n"),  "\\n");
+test(RegExp("\\\\n"), "\\\\n");
+
+test(/\\r/,           "\\\\r");
+test(/\\\r/,          "\\\\\\r");
+test(/\\\\r/,         "\\\\\\\\r");
+test(RegExp("\\r"),   "\\r");
+test(RegExp("\\\r"),  "\\r");
+test(RegExp("\\\\r"), "\\\\r");
+
+test(/\\u2028/,           "\\\\u2028");
+test(/\\\u2028/,          "\\\\\\u2028");
+test(/\\\\u2028/,         "\\\\\\\\u2028");
+test(RegExp("\\u2028"),   "\\u2028");
+test(RegExp("\\\u2028"),  "\\u2028");
+test(RegExp("\\\\u2028"), "\\\\u2028");
+
+test(/\\u2029/,           "\\\\u2029");
+test(/\\\u2029/,          "\\\\\\u2029");
+test(/\\\\u2029/,         "\\\\\\\\u2029");
+test(RegExp("\\u2029"),   "\\u2029");
+test(RegExp("\\\u2029"),  "\\u2029");
+test(RegExp("\\\\u2029"), "\\\\u2029");
+
+test(/\//,            "\\/");
+test(/\\\//,          "\\\\\\/");
+test(RegExp("/"),     "\\/");
+test(RegExp("\/"),    "\\/");
+test(RegExp("\\/"),   "\\/");
+test(RegExp("\\\/"),  "\\/");
+test(RegExp("\\\\/"), "\\\\\\/");
+
+test(/[/]/,             "[/]");
+test(/[\/]/,            "[\\/]");
+test(/[\\/]/,           "[\\\\/]");
+test(/[\\\/]/,          "[\\\\\\/]");
+test(RegExp("[/]"),     "[/]");
+test(RegExp("[\/]"),    "[/]");
+test(RegExp("[\\/]"),   "[\\/]");
+test(RegExp("[\\\/]"),  "[\\/]");
+test(RegExp("[\\\\/]"), "[\\\\/]");
+
+test(RegExp("\[/\]"),   "[/]");
+test(RegExp("\[\\/\]"), "[\\/]");
+
+test(/\[\/\]/,              "\\[\\/\\]");
+test(/\[\\\/\]/,            "\\[\\\\\\/\\]");
+test(RegExp("\\[/\\]"),     "\\[\\/\\]");
+test(RegExp("\\[\/\\]"),    "\\[\\/\\]");
+test(RegExp("\\[\\/\\]"),   "\\[\\/\\]");
+test(RegExp("\\[\\\/\\]"),  "\\[\\/\\]");
+test(RegExp("\\[\\\\/\\]"), "\\[\\\\\\/\\]");
+
+if (typeof reportCompare === "function")
+  reportCompare(true, true);
--- a/js/src/tests/ecma_6/RegExp/source.js
+++ b/js/src/tests/ecma_6/RegExp/source.js
@@ -2,19 +2,23 @@ var BUGNUMBER = 1120169;
 var summary = "Implement RegExp.prototype.source";
 
 print(BUGNUMBER + ": " + summary);
 
 assertEq(RegExp.prototype.source, "(?:)");
 assertEq(/foo/.source, "foo");
 assertEq(/foo/iymg.source, "foo");
 assertEq(/\//.source, "\\/");
+assertEq(/\n\r/.source, "\\n\\r");
+assertEq(/\u2028\u2029/.source, "\\u2028\\u2029");
 assertEq(RegExp("").source, "(?:)");
 assertEq(RegExp("", "mygi").source, "(?:)");
 assertEq(RegExp("/").source, "\\/");
+assertEq(RegExp("\n\r").source, "\\n\\r");
+assertEq(RegExp("\u2028\u2029").source, "\\u2028\\u2029");
 
 assertThrowsInstanceOf(() => genericSource(), TypeError);
 assertThrowsInstanceOf(() => genericSource(1), TypeError);
 assertThrowsInstanceOf(() => genericSource(""), TypeError);
 assertThrowsInstanceOf(() => genericSource({}), TypeError);
 assertThrowsInstanceOf(() => genericSource(new Proxy(/foo/, {get(){ return true; }})), TypeError);
 
 function genericSource(obj) {
--- a/js/src/tests/js1_5/Regress/regress-248444.js
+++ b/js/src/tests/js1_5/Regress/regress-248444.js
@@ -24,20 +24,8 @@ status = summary + ' ' + inSection(2);
 re = RegExp("[^\\\/]+$");
 actual = re.toString();
 reportCompare(expect, actual, status);
 
 status = summary + ' ' + inSection(3);
 re = RegExp("[^\\/]+$");
 actual = re.toString();
 reportCompare(expect, actual, status);
-
-status = summary + ' ' + inSection(4);
-re = RegExp("[^\/]+$");
-actual = re.toString();
-reportCompare(expect, actual, status);
-
-status = summary + ' ' + inSection(5);
-re = RegExp("[^/]+$");
-actual = re.toString();
-reportCompare(expect, actual, status);
-
-
--- a/js/src/vm/RegExpObject.cpp
+++ b/js/src/vm/RegExpObject.cpp
@@ -363,32 +363,189 @@ RegExpObject::init(ExclusiveContext *cx,
     self->setSource(source);
     self->setGlobal(flags & GlobalFlag);
     self->setIgnoreCase(flags & IgnoreCaseFlag);
     self->setMultiline(flags & MultilineFlag);
     self->setSticky(flags & StickyFlag);
     return true;
 }
 
+static MOZ_ALWAYS_INLINE bool
+IsLineTerminator(const JS::Latin1Char c)
+{
+    return c == '\n' || c == '\r';
+}
+
+static MOZ_ALWAYS_INLINE bool
+IsLineTerminator(const char16_t c)
+{
+    return c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029;
+}
+
+static MOZ_ALWAYS_INLINE bool
+AppendEscapedLineTerminator(StringBuffer &sb, const JS::Latin1Char c)
+{
+    switch (c) {
+      case '\n':
+        if (!sb.append('n'))
+            return false;
+        break;
+      case '\r':
+        if (!sb.append('r'))
+            return false;
+        break;
+      default:
+        MOZ_CRASH("Bad LineTerminator");
+    }
+    return true;
+}
+
+static MOZ_ALWAYS_INLINE bool
+AppendEscapedLineTerminator(StringBuffer &sb, const char16_t c)
+{
+    switch (c) {
+      case '\n':
+        if (!sb.append('n'))
+            return false;
+        break;
+      case '\r':
+        if (!sb.append('r'))
+            return false;
+        break;
+      case 0x2028:
+        if (!sb.append("u2028"))
+            return false;
+        break;
+      case 0x2029:
+        if (!sb.append("u2029"))
+            return false;
+        break;
+      default:
+        MOZ_CRASH("Bad LineTerminator");
+    }
+    return true;
+}
+
+static template <typename CharT>
+MOZ_ALWAYS_INLINE bool
+SetupBuffer(StringBuffer &sb, const CharT *oldChars, size_t oldLen, const CharT *it)
+{
+    if (mozilla::IsSame<CharT, char16_t>::value && !sb.ensureTwoByteChars())
+        return false;
+
+    if (!sb.reserve(oldLen + 1))
+        return false;
+
+    sb.infallibleAppend(oldChars, size_t(it - oldChars));
+    return true;
+}
+
+// Note: returns the original if no escaping need be performed.
+template <typename CharT>
+static bool
+EscapeRegExpPattern(StringBuffer &sb, const CharT *oldChars, size_t oldLen)
+{
+    bool inBrackets = false;
+    bool previousCharacterWasBackslash = false;
+
+    for (const CharT *it = oldChars; it < oldChars + oldLen; ++it) {
+        CharT ch = *it;
+        if (!previousCharacterWasBackslash) {
+            if (inBrackets) {
+                if (ch == ']')
+                    inBrackets = false;
+            } else if (ch == '/') {
+                // There's a forward slash that needs escaping.
+                if (sb.empty()) {
+                    // This is the first char we've seen that needs escaping,
+                    // copy everything up to this point.
+                    if (!SetupBuffer(sb, oldChars, oldLen, it))
+                        return false;
+                }
+                if (!sb.append('\\'))
+                    return false;
+            } else if (ch == '[') {
+                inBrackets = true;
+            }
+        }
+
+        if (IsLineTerminator(ch)) {
+            // There's LineTerminator that needs escaping.
+            if (sb.empty()) {
+                // This is the first char we've seen that needs escaping,
+                // copy everything up to this point.
+                if (!SetupBuffer(sb, oldChars, oldLen, it))
+                    return false;
+            }
+            if (!previousCharacterWasBackslash) {
+                if (!sb.append('\\'))
+                    return false;
+            }
+            if (!AppendEscapedLineTerminator(sb, ch))
+                return false;
+        } else if (!sb.empty()) {
+            if (!sb.append(ch))
+                return false;
+        }
+
+        if (previousCharacterWasBackslash)
+            previousCharacterWasBackslash = false;
+        else if (ch == '\\')
+            previousCharacterWasBackslash = true;
+    }
+
+    return true;
+}
+
+// ES6 draft rev32 21.2.3.2.4.
+JSAtom *
+js::EscapeRegExpPattern(JSContext *cx, HandleAtom src)
+{
+    // Step 2.
+    if (src->length() == 0)
+        return cx->names().emptyRegExp;
+
+    // We may never need to use |sb|. Start using it lazily.
+    StringBuffer sb(cx);
+
+    if (src->hasLatin1Chars()) {
+        JS::AutoCheckCannotGC nogc;
+        if (!::EscapeRegExpPattern(sb, src->latin1Chars(nogc), src->length()))
+            return nullptr;
+    } else {
+        JS::AutoCheckCannotGC nogc;
+        if (!::EscapeRegExpPattern(sb, src->twoByteChars(nogc), src->length()))
+            return nullptr;
+    }
+
+    // Step 3.
+    return sb.empty() ? src : sb.finishAtom();
+}
+
+// ES6 draft rev32 21.2.5.14. Optimized for RegExpObject.
 JSFlatString *
 RegExpObject::toString(JSContext *cx) const
 {
-    JSAtom *src = getSource();
+    // Steps 3-4.
+    RootedAtom src(cx, getSource());
+    if (!src)
+        return nullptr;
+    RootedAtom escapedSrc(cx, EscapeRegExpPattern(cx, src));
+
+    // Step 7.
     StringBuffer sb(cx);
-    if (size_t len = src->length()) {
-        if (!sb.reserve(len + 2))
-            return nullptr;
-        sb.infallibleAppend('/');
-        if (!sb.append(src))
-            return nullptr;
-        sb.infallibleAppend('/');
-    } else {
-        if (!sb.append("/(?:)/"))
-            return nullptr;
-    }
+    size_t len = escapedSrc->length();
+    if (!sb.reserve(len + 2))
+        return nullptr;
+    sb.infallibleAppend('/');
+    if (!sb.append(escapedSrc))
+        return nullptr;
+    sb.infallibleAppend('/');
+
+    // Steps 5-7.
     if (global() && !sb.append('g'))
         return nullptr;
     if (ignoreCase() && !sb.append('i'))
         return nullptr;
     if (multiline() && !sb.append('m'))
         return nullptr;
     if (sticky() && !sb.append('y'))
         return nullptr;
--- a/js/src/vm/RegExpObject.h
+++ b/js/src/vm/RegExpObject.h
@@ -495,11 +495,14 @@ RegExpToShared(JSContext *cx, HandleObje
 
 template<XDRMode mode>
 bool
 XDRScriptRegExpObject(XDRState<mode> *xdr, MutableHandle<RegExpObject*> objp);
 
 extern JSObject *
 CloneScriptRegExpObject(JSContext *cx, RegExpObject &re);
 
+JSAtom *
+EscapeRegExpPattern(JSContext *cx, HandleAtom src);
+
 } /* namespace js */
 
 #endif /* vm_RegExpObject_h */