Bug 1135377 - Part 2: Parse RegExp unicode character in non-CharacterClass. r=till, f=anba
authorTooru Fujisawa <arai_a@mac.com>
Fri, 07 Aug 2015 08:11:07 +0900
changeset 277057 5a944e733ada08f08dd095531c9c715f64683a0d
parent 277056 9295eeb878f5fc4570025cb4cf38d4be1e364e8d
child 277058 4e05611fe3dd8f91320ed1d123bfc2032d11eabe
push id16724
push usercbook@mozilla.com
push dateMon, 21 Dec 2015 11:00:52 +0000
treeherderfx-team@3f3f0361567c [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerstill
bugs1135377
milestone46.0a1
Bug 1135377 - Part 2: Parse RegExp unicode character in non-CharacterClass. r=till, f=anba
js/src/irregexp/RegExpAST.h
js/src/irregexp/RegExpEngine.cpp
js/src/irregexp/RegExpEngine.h
js/src/irregexp/RegExpParser.cpp
js/src/irregexp/RegExpParser.h
js/src/js.msg
js/src/tests/ecma_6/RegExp/unicode-braced.js
js/src/tests/ecma_6/RegExp/unicode-lead-trail.js
js/src/tests/ecma_6/RegExp/unicode-raw.js
js/src/vm/Unicode.h
js/src/vm/Xdr.h
--- a/js/src/irregexp/RegExpAST.h
+++ b/js/src/irregexp/RegExpAST.h
@@ -133,17 +133,18 @@ class RegExpAlternative : public RegExpT
 class RegExpAssertion : public RegExpTree {
  public:
   enum AssertionType {
     START_OF_LINE,
     START_OF_INPUT,
     END_OF_LINE,
     END_OF_INPUT,
     BOUNDARY,
-    NON_BOUNDARY
+    NON_BOUNDARY,
+    NOT_AFTER_LEAD_SURROGATE
   };
   explicit RegExpAssertion(AssertionType type) : assertion_type_(type) { }
   virtual void* Accept(RegExpVisitor* visitor, void* data);
   virtual RegExpNode* ToNode(RegExpCompiler* compiler,
                              RegExpNode* on_success);
   virtual RegExpAssertion* AsAssertion();
   virtual bool IsAssertion();
   virtual bool IsAnchoredAtStart();
--- a/js/src/irregexp/RegExpEngine.cpp
+++ b/js/src/irregexp/RegExpEngine.cpp
@@ -2051,16 +2051,18 @@ RegExpAssertion::ToNode(RegExpCompiler* 
 
         // Add the two alternatives to the ChoiceNode.
         GuardedAlternative eol_alternative(end_of_line);
         result->AddAlternative(eol_alternative);
         GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
         result->AddAlternative(end_alternative);
         return result;
       }
+      case NOT_AFTER_LEAD_SURROGATE:
+        return AssertionNode::NotAfterLeadSurrogate(on_success);
       default:
         MOZ_CRASH("Bad assertion type");
     }
     return on_success;
 }
 
 RegExpNode*
 RegExpBackReference::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
@@ -2838,16 +2840,41 @@ EmitHat(RegExpCompiler* compiler, RegExp
             assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
         assembler->CheckCharacter('\n', &ok);
         assembler->CheckNotCharacter('\r', new_trace.backtrack());
     }
     assembler->Bind(&ok);
     on_success->Emit(compiler, &new_trace);
 }
 
+// Assert that the next character cannot be a part of a surrogate pair.
+static void
+EmitNotAfterLeadSurrogate(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace)
+{
+    RegExpMacroAssembler* assembler = compiler->macro_assembler();
+
+    // We will be loading the previous character into the current character
+    // register.
+    Trace new_trace(*trace);
+    new_trace.InvalidateCurrentCharacter();
+
+    jit::Label ok;
+    if (new_trace.cp_offset() == 0)
+        assembler->CheckAtStart(&ok);
+
+    // We already checked that we are not at the start of input so it must be
+    // OK to load the previous character.
+    assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, new_trace.backtrack(), false);
+    assembler->CheckCharacterInRange(unicode::LeadSurrogateMin, unicode::LeadSurrogateMax,
+                                     new_trace.backtrack());
+
+    assembler->Bind(&ok);
+    on_success->Emit(compiler, &new_trace);
+}
+
 // Check for [0-9A-Z_a-z].
 static void
 EmitWordCheck(RegExpMacroAssembler* assembler,
               jit::Label* word, jit::Label* non_word, bool fall_through_on_word)
 {
     if (assembler->CheckSpecialCharacterClass(fall_through_on_word ? 'w' : 'W',
                                               fall_through_on_word ? non_word : word))
     {
@@ -2991,16 +3018,19 @@ AssertionNode::Emit(RegExpCompiler* comp
       case AFTER_NEWLINE:
         EmitHat(compiler, on_success(), trace);
         return;
       case AT_BOUNDARY:
       case AT_NON_BOUNDARY: {
         EmitBoundaryCheck(compiler, trace);
         return;
       }
+      case NOT_AFTER_LEAD_SURROGATE:
+        EmitNotAfterLeadSurrogate(compiler, on_success(), trace);
+        return;
     }
     on_success()->Emit(compiler, trace);
 }
 
 static bool
 DeterminedAlready(QuickCheckDetails* quick_check, int offset)
 {
     if (quick_check == nullptr)
--- a/js/src/irregexp/RegExpEngine.h
+++ b/js/src/irregexp/RegExpEngine.h
@@ -783,17 +783,18 @@ class TextNode : public SeqRegExpNode
 class AssertionNode : public SeqRegExpNode
 {
   public:
     enum AssertionType {
         AT_END,
         AT_START,
         AT_BOUNDARY,
         AT_NON_BOUNDARY,
-        AFTER_NEWLINE
+        AFTER_NEWLINE,
+        NOT_AFTER_LEAD_SURROGATE
     };
     AssertionNode(AssertionType t, RegExpNode* on_success)
       : SeqRegExpNode(on_success), assertion_type_(t)
     {}
 
     static AssertionNode* AtEnd(RegExpNode* on_success) {
         return on_success->alloc()->newInfallible<AssertionNode>(AT_END, on_success);
     }
@@ -804,16 +805,20 @@ class AssertionNode : public SeqRegExpNo
         return on_success->alloc()->newInfallible<AssertionNode>(AT_BOUNDARY, on_success);
     }
     static AssertionNode* AtNonBoundary(RegExpNode* on_success) {
         return on_success->alloc()->newInfallible<AssertionNode>(AT_NON_BOUNDARY, on_success);
     }
     static AssertionNode* AfterNewline(RegExpNode* on_success) {
         return on_success->alloc()->newInfallible<AssertionNode>(AFTER_NEWLINE, on_success);
     }
+    static AssertionNode* NotAfterLeadSurrogate(RegExpNode* on_success) {
+        return on_success->alloc()->newInfallible<AssertionNode>(NOT_AFTER_LEAD_SURROGATE,
+                                                                 on_success);
+    }
     virtual void Accept(NodeVisitor* visitor);
     virtual void Emit(RegExpCompiler* compiler, Trace* trace);
     virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
     virtual void GetQuickCheckDetails(QuickCheckDetails* details,
                                       RegExpCompiler* compiler,
                                       int filled_in,
                                       bool not_at_start);
     virtual bool FillInBMInfo(int offset,
--- a/js/src/irregexp/RegExpParser.cpp
+++ b/js/src/irregexp/RegExpParser.cpp
@@ -297,16 +297,118 @@ RegExpParser<CharT>::ParseHexEscape(int 
         if (i == length - 1) {
             done = true;
         }
     }
     *value = val;
     return true;
 }
 
+template <typename CharT>
+bool
+RegExpParser<CharT>::ParseBracedHexEscape(size_t* value)
+{
+    MOZ_ASSERT(current() == '{');
+    Advance();
+
+    bool first = true;
+    uint32_t code = 0;
+    while (true) {
+        widechar c = current();
+        if (c == kEndMarker) {
+            ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
+            return false;
+        }
+        if (c == '}') {
+            if (first) {
+                ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
+                return false;
+            }
+            Advance();
+            break;
+        }
+
+        int d = HexValue(c);
+        if (d < 0) {
+            ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
+            return false;
+        }
+        code = (code << 4) | d;
+        if (code > unicode::NonBMPMax) {
+            ReportError(JSMSG_UNICODE_OVERFLOW);
+            return false;
+        }
+        Advance();
+        first = false;
+    }
+
+    *value = code;
+    return true;
+}
+
+template <typename CharT>
+bool
+RegExpParser<CharT>::ParseTrailSurrogate(size_t* value)
+{
+    if (current() != '\\')
+        return false;
+
+    const CharT* start = position();
+    Advance();
+    if (current() != 'u') {
+        Reset(start);
+        return false;
+    }
+    Advance();
+    if (!ParseHexEscape(4, value)) {
+        Reset(start);
+        return false;
+    }
+    if (!unicode::IsTrailSurrogate(*value)) {
+        Reset(start);
+        return false;
+    }
+    return true;
+}
+
+template <typename CharT>
+bool
+RegExpParser<CharT>::ParseRawSurrogatePair(char16_t* lead, char16_t* trail)
+{
+    widechar c1 = current();
+    if (!unicode::IsLeadSurrogate(c1))
+        return false;
+
+    const CharT* start = position();
+    Advance();
+    widechar c2 = current();
+    if (!unicode::IsTrailSurrogate(c2)) {
+        Reset(start);
+        return false;
+    }
+    Advance();
+    *lead = c1;
+    *trail = c2;
+    return true;
+}
+
+static inline RegExpTree*
+RangeAtom(LifoAlloc* alloc, char16_t from, char16_t to)
+{
+    CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
+    ranges->append(CharacterRange::Range(from, to));
+    return alloc->newInfallible<RegExpCharacterClass>(ranges, false);
+}
+
+static inline RegExpTree*
+NegativeLookahead(LifoAlloc* alloc, char16_t from, char16_t to)
+{
+    return alloc->newInfallible<RegExpLookahead>(RangeAtom(alloc, from, to), false, 0, 0);
+}
+
 #ifdef DEBUG
 // Currently only used in an assert.kASSERT.
 static bool
 IsSpecialClassEscape(widechar c)
 {
   switch (c) {
     case 'd': case 'D':
     case 's': case 'S':
@@ -670,16 +772,45 @@ template <typename CharT>
 RegExpTree*
 RegExpParser<CharT>::ParsePattern()
 {
     RegExpTree* result = ParseDisjunction();
     MOZ_ASSERT_IF(result, !has_more());
     return result;
 }
 
+static inline RegExpTree*
+SurrogatePairAtom(LifoAlloc* alloc, char16_t lead, char16_t trail)
+{
+    RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
+    builder->AddCharacter(lead);
+    builder->AddCharacter(trail);
+    return builder->ToRegExp();
+}
+
+static inline RegExpTree*
+LeadSurrogateAtom(LifoAlloc* alloc, char16_t value)
+{
+    RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
+    builder->AddCharacter(value);
+    builder->AddAtom(NegativeLookahead(alloc, unicode::TrailSurrogateMin,
+                                       unicode::TrailSurrogateMax));
+    return builder->ToRegExp();
+}
+
+static inline RegExpTree*
+TrailSurrogateAtom(LifoAlloc* alloc, char16_t value)
+{
+    RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
+    builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(
+        RegExpAssertion::NOT_AFTER_LEAD_SURROGATE));
+    builder->AddCharacter(value);
+    return builder->ToRegExp();
+}
+
 // Disjunction ::
 //   Alternative
 //   Alternative | Disjunction
 // Alternative ::
 //   [empty]
 //   Term Alternative
 // Term ::
 //   Assertion
@@ -924,16 +1055,48 @@ RegExpParser<CharT>::ParseDisjunction()
                 } else {
                     builder->AddCharacter('x');
                 }
                 break;
               }
               case 'u': {
                 Advance(2);
                 size_t value;
+                if (unicode_) {
+                    if (current() == '{') {
+                        if (!ParseBracedHexEscape(&value))
+                            return nullptr;
+                        if (unicode::IsLeadSurrogate(value)) {
+                            builder->AddAtom(LeadSurrogateAtom(alloc, value));
+                        } else if (unicode::IsTrailSurrogate(value)) {
+                            builder->AddAtom(TrailSurrogateAtom(alloc, value));
+                        } else if (value >= unicode::NonBMPMin) {
+                            size_t lead, trail;
+                            unicode::UTF16Encode(value, &lead, &trail);
+                            builder->AddAtom(SurrogatePairAtom(alloc, lead, trail));
+                        } else {
+                            builder->AddCharacter(value);
+                        }
+                    } else if (ParseHexEscape(4, &value)) {
+                        if (unicode::IsLeadSurrogate(value)) {
+                            size_t trail;
+                            if (ParseTrailSurrogate(&trail))
+                                builder->AddAtom(SurrogatePairAtom(alloc, value, trail));
+                            else
+                                builder->AddAtom(LeadSurrogateAtom(alloc, value));
+                        } else if (unicode::IsTrailSurrogate(value)) {
+                            builder->AddAtom(TrailSurrogateAtom(alloc, value));
+                        } else {
+                            builder->AddCharacter(value);
+                        }
+                    } else {
+                        return ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
+                    }
+                    break;
+                }
                 if (ParseHexEscape(4, &value)) {
                     builder->AddCharacter(value);
                 } else {
                     builder->AddCharacter('u');
                 }
                 break;
               }
               default:
@@ -945,16 +1108,32 @@ RegExpParser<CharT>::ParseDisjunction()
             break;
           case '{': {
             int dummy;
             if (ParseIntervalQuantifier(&dummy, &dummy))
                 return ReportError(JSMSG_NOTHING_TO_REPEAT);
             // fallthrough
           }
           default:
+            if (unicode_) {
+                char16_t lead, trail;
+                if (ParseRawSurrogatePair(&lead, &trail)) {
+                    builder->AddAtom(SurrogatePairAtom(alloc, lead, trail));
+                } else {
+                    widechar c = current();
+                    if (unicode::IsLeadSurrogate(c))
+                        builder->AddAtom(LeadSurrogateAtom(alloc, c));
+                    else if (unicode::IsTrailSurrogate(c))
+                        builder->AddAtom(TrailSurrogateAtom(alloc, c));
+                    else
+                        builder->AddCharacter(c);
+                    Advance();
+                }
+                break;
+            }
             builder->AddCharacter(current());
             Advance();
             break;
         }  // end switch(current())
 
         int min;
         int max;
         switch (current()) {
--- a/js/src/irregexp/RegExpParser.h
+++ b/js/src/irregexp/RegExpParser.h
@@ -188,16 +188,20 @@ class RegExpParser
     // Parses and returns a single escaped character.  The character
     // must not be 'b' or 'B' since they are usually handled specially.
     widechar ParseClassCharacterEscape();
 
     // Checks whether the following is a length-digit hexadecimal number,
     // and sets the value if it is.
     bool ParseHexEscape(int length, size_t* value);
 
+    bool ParseBracedHexEscape(size_t* value);
+    bool ParseTrailSurrogate(size_t* value);
+    bool ParseRawSurrogatePair(char16_t* lead, char16_t* trail);
+
     size_t ParseOctalLiteral();
 
     // Tries to parse the input as a back reference.  If successful it
     // stores the result in the output parameter and returns true.  If
     // it fails it will push back the characters read so the same characters
     // can be reparsed.
     bool ParseBackReferenceIndex(int* index_out);
 
--- a/js/src/js.msg
+++ b/js/src/js.msg
@@ -442,21 +442,24 @@ MSG_DEF(JSMSG_INVALID_OPTION_VALUE,    2
 MSG_DEF(JSMSG_INVALID_TIME_ZONE,       1, JSEXN_RANGEERR, "invalid time zone in DateTimeFormat(): {0}")
 MSG_DEF(JSMSG_UNDEFINED_CURRENCY,      0, JSEXN_TYPEERR, "undefined currency in NumberFormat() with currency style")
 
 // RegExp
 MSG_DEF(JSMSG_BAD_CLASS_RANGE,         0, JSEXN_SYNTAXERR, "invalid range in character class")
 MSG_DEF(JSMSG_DEPRECATED_REGEXP_MULTILINE, 0, JSEXN_SYNTAXERR, "RegExp.multiline is deprecated. Use m flag instead")
 MSG_DEF(JSMSG_ESCAPE_AT_END_OF_REGEXP, 0, JSEXN_SYNTAXERR, "\\ at end of pattern")
 MSG_DEF(JSMSG_INVALID_GROUP,           0, JSEXN_SYNTAXERR, "invalid regexp group")
+MSG_DEF(JSMSG_INVALID_IDENTITY_ESCAPE, 0, JSEXN_SYNTAXERR, "invalid identity escape in regular expression")
+MSG_DEF(JSMSG_INVALID_UNICODE_ESCAPE,  0, JSEXN_SYNTAXERR, "invalid unicode escape in regular expression")
 MSG_DEF(JSMSG_MISSING_PAREN,           0, JSEXN_SYNTAXERR, "unterminated parenthetical")
 MSG_DEF(JSMSG_NEWREGEXP_FLAGGED,       0, JSEXN_TYPEERR, "can't supply flags when constructing one RegExp from another")
 MSG_DEF(JSMSG_NOTHING_TO_REPEAT,       0, JSEXN_SYNTAXERR, "nothing to repeat")
 MSG_DEF(JSMSG_NUMBERS_OUT_OF_ORDER,    0, JSEXN_SYNTAXERR, "numbers out of order in {} quantifier.")
 MSG_DEF(JSMSG_TOO_MANY_PARENS,         0, JSEXN_INTERNALERR, "too many parentheses in regular expression")
+MSG_DEF(JSMSG_UNICODE_OVERFLOW,        0, JSEXN_SYNTAXERR, "unicode codepoint should not be greater than 0x10FFFF in regular expression")
 MSG_DEF(JSMSG_UNMATCHED_RIGHT_PAREN,   0, JSEXN_SYNTAXERR, "unmatched ) in regular expression")
 MSG_DEF(JSMSG_UNTERM_CLASS,            0, JSEXN_SYNTAXERR, "unterminated character class")
 
 // Self-hosting
 MSG_DEF(JSMSG_DEFAULT_LOCALE_ERROR,    0, JSEXN_ERR, "internal error getting the default locale")
 MSG_DEF(JSMSG_NO_SUCH_SELF_HOSTED_PROP,1, JSEXN_ERR, "No such property on self-hosted object: {0}")
 
 // Typed object / SIMD
new file mode 100644
--- /dev/null
+++ b/js/src/tests/ecma_6/RegExp/unicode-braced.js
@@ -0,0 +1,166 @@
+var BUGNUMBER = 1135377;
+var summary = "Implement RegExp unicode flag -- braced pattern in RegExpUnicodeEscapeSequence.";
+
+print(BUGNUMBER + ": " + summary);
+
+// ==== standalone ====
+
+assertEqArray(/\u{41}/u.exec("ABC"),
+              ["A"]);
+assertEqArray(/\u{41}/.exec("ABC" + "u".repeat(41)),
+              ["u".repeat(41)]);
+
+assertEqArray(/\u{4A}/u.exec("JKL"),
+              ["J"]);
+assertEqArray(/\u{4A}/.exec("JKLu{4A}"),
+              ["u{4A}"]);
+
+assertEqArray(/\u{1F438}/u.exec("\u{1F438}"),
+              ["\u{1F438}"]);
+assertEqArray(/\u{1F438}/.exec("u{1F438}"),
+              ["u{1F438}"]);
+
+assertEqArray(/\u{0}/u.exec("\u{0}"),
+              ["\u{0}"]);
+assertEqArray(/\u{10FFFF}/u.exec("\u{10FFFF}"),
+              ["\u{10FFFF}"]);
+assertEqArray(/\u{10ffff}/u.exec("\u{10FFFF}"),
+              ["\u{10FFFF}"]);
+
+// leading 0
+assertEqArray(/\u{0000000000000000000000}/u.exec("\u{0}"),
+              ["\u{0}"]);
+assertEqArray(/\u{000000000000000010FFFF}/u.exec("\u{10FFFF}"),
+              ["\u{10FFFF}"]);
+
+// RegExp constructor
+assertEqArray(new RegExp("\\u{0}", "u").exec("\u{0}"),
+              ["\u{0}"]);
+assertEqArray(new RegExp("\\u{41}", "u").exec("ABC"),
+              ["A"]);
+assertEqArray(new RegExp("\\u{1F438}", "u").exec("\u{1F438}"),
+              ["\u{1F438}"]);
+assertEqArray(new RegExp("\\u{10FFFF}", "u").exec("\u{10FFFF}"),
+              ["\u{10FFFF}"]);
+
+assertEqArray(new RegExp("\\u{0000000000000000}", "u").exec("\u{0}"),
+              ["\u{0}"]);
+
+assertEqArray(eval(`/\\u{${"0".repeat(Math.pow(2, 24)) + "1234"}}/u`).exec("\u{1234}"),
+              ["\u{1234}"]);
+assertEqArray(new RegExp(`\\u{${"0".repeat(Math.pow(2, 24)) + "1234"}}`, "u").exec("\u{1234}"),
+              ["\u{1234}"]);
+
+// ==== ? ====
+
+assertEqArray(/\u{1F438}?/u.exec("\u{1F438}"),
+              ["\u{1F438}"]);
+assertEqArray(/\u{1F438}?/u.exec(""),
+              [""]);
+
+// lead-only target
+assertEqArray(/\u{1F438}?/u.exec("\uD83D"),
+              [""]);
+
+// RegExp constructor
+assertEqArray(new RegExp("\\u{1F438}?", "u").exec("\u{1F438}"),
+              ["\u{1F438}"]);
+assertEqArray(new RegExp("\\u{1F438}?", "u").exec(""),
+              [""]);
+assertEqArray(new RegExp("\\u{1F438}?", "u").exec("\uD83D"),
+              [""]);
+
+// ==== + ====
+
+assertEqArray(/\u{1F438}+/u.exec("\u{1F438}"),
+              ["\u{1F438}"]);
+assertEqArray(/\u{1F438}+/u.exec("\u{1F438}\u{1F438}"),
+              ["\u{1F438}\u{1F438}"]);
+assertEq(/\u{1F438}+/u.exec(""),
+         null);
+
+// lead-only target
+assertEq(/\u{1F438}+/u.exec("\uD83D"),
+         null);
+assertEqArray(/\u{1F438}+/u.exec("\uD83D\uDC38\uDC38"),
+              ["\uD83D\uDC38"]);
+
+// ==== * ====
+
+assertEqArray(/\u{1F438}*/u.exec("\u{1F438}"),
+              ["\u{1F438}"]);
+assertEqArray(/\u{1F438}*/u.exec("\u{1F438}\u{1F438}"),
+              ["\u{1F438}\u{1F438}"]);
+assertEqArray(/\u{1F438}*/u.exec(""),
+              [""]);
+
+// lead-only target
+assertEqArray(/\u{1F438}*/u.exec("\uD83D"),
+              [""]);
+assertEqArray(/\u{1F438}*/u.exec("\uD83D\uDC38\uDC38"),
+              ["\uD83D\uDC38"]);
+
+// ==== lead-only ====
+
+// match only non-surrogate pair
+assertEqArray(/\u{D83D}/u.exec("\uD83D\uDBFF"),
+              ["\uD83D"]);
+assertEq(/\u{D83D}/u.exec("\uD83D\uDC00"),
+         null);
+assertEq(/\u{D83D}/u.exec("\uD83D\uDFFF"),
+         null);
+assertEqArray(/\u{D83D}/u.exec("\uD83D\uE000"),
+              ["\uD83D"]);
+
+// match before non-tail char
+assertEqArray(/\u{D83D}/u.exec("\uD83D"),
+              ["\uD83D"]);
+assertEqArray(/\u{D83D}/u.exec("\uD83DA"),
+              ["\uD83D"]);
+
+// ==== trail-only ====
+
+// match only non-surrogate pair
+assertEqArray(/\u{DC38}/u.exec("\uD7FF\uDC38"),
+              ["\uDC38"]);
+assertEq(/\u{DC38}/u.exec("\uD800\uDC38"),
+         null);
+assertEq(/\u{DC38}/u.exec("\uDBFF\uDC38"),
+         null);
+assertEqArray(/\u{DC38}/u.exec("\uDC00\uDC38"),
+              ["\uDC38"]);
+
+// match after non-lead char
+assertEqArray(/\u{DC38}/u.exec("\uDC38"),
+              ["\uDC38"]);
+assertEqArray(/\u{DC38}/u.exec("A\uDC38"),
+              ["\uDC38"]);
+
+// ==== wrong patterns ====
+
+assertThrowsInstanceOf(() => eval(`/\\u{-1}/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\u{0.0}/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\u{G}/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\u{}/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\u{{/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\u{/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\u{110000}/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\u{00110000}/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\u{100000000000000000000000000000}/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\u{FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF}/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\u{   FFFF}/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\u{FFFF   }/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\u{FF   FF}/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\u{F F F F}/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\u{100000001}/u`), SyntaxError);
+
+// surrogate pair with braced
+assertEq(/\u{D83D}\u{DC38}+/u.exec("\uD83D\uDC38\uDC38"),
+         null);
+assertEq(/\uD83D\u{DC38}+/u.exec("\uD83D\uDC38\uDC38"),
+         null);
+assertEq(/\u{D83D}\uDC38+/u.exec("\uD83D\uDC38\uDC38"),
+         null);
+
+if (typeof reportCompare === "function")
+    reportCompare(true, true);
new file mode 100644
--- /dev/null
+++ b/js/src/tests/ecma_6/RegExp/unicode-lead-trail.js
@@ -0,0 +1,218 @@
+var BUGNUMBER = 1135377;
+var summary = "Implement RegExp unicode flag -- lead and trail patterns in RegExpUnicodeEscapeSequence.";
+
+print(BUGNUMBER + ": " + summary);
+
+// ==== standalone ====
+
+assertEqArray(/\uD83D\uDC38/u.exec("\u{1F438}"),
+              ["\u{1F438}"]);
+
+// no unicode flag
+assertEqArray(/\uD83D\uDC38/.exec("\u{1F438}"),
+              ["\u{1F438}"]);
+
+// RegExp constructor
+assertEqArray(new RegExp("\\uD83D\\uDC38", "u").exec("\u{1F438}"),
+              ["\u{1F438}"]);
+
+// RegExp constructor, no unicode flag
+assertEqArray(new RegExp("\\uD83D\\uDC38", "").exec("\u{1F438}"),
+              ["\u{1F438}"]);
+
+// ==== ? ====
+
+assertEqArray(/\uD83D\uDC38?/u.exec("\u{1F438}"),
+              ["\u{1F438}"]);
+assertEqArray(/\uD83D\uDC38?/u.exec(""),
+              [""]);
+
+// lead-only target
+assertEqArray(/\uD83D\uDC38?/u.exec("\uD83D"),
+              [""]);
+
+// no unicode flag
+assertEqArray(/\uD83D\uDC38?/.exec("\u{1F438}"),
+              ["\u{1F438}"]);
+assertEq(/\uD83D\uDC38?/.exec(""),
+         null);
+
+assertEqArray(/\uD83D\uDC38?/.exec("\uD83D"),
+              ["\uD83D"]);
+
+// RegExp constructor
+assertEqArray(new RegExp("\\uD83D\\uDC38?", "u").exec("\u{1F438}"),
+              ["\u{1F438}"]);
+assertEqArray(new RegExp("\\uD83D\\uDC38?", "u").exec(""),
+              [""]);
+
+assertEqArray(new RegExp("\\uD83D\\uDC38?", "u").exec("\uD83D"),
+              [""]);
+
+// RegExp constructor, no unicode flag
+assertEqArray(new RegExp("\\uD83D\\uDC38?", "").exec("\u{1F438}"),
+              ["\u{1F438}"]);
+assertEq(new RegExp("\\uD83D\\uDC38?", "").exec(""),
+         null);
+
+assertEqArray(new RegExp("\\uD83D\\uDC38?", "").exec("\uD83D"),
+              ["\uD83D"]);
+
+// ==== + ====
+
+assertEqArray(/\uD83D\uDC38+/u.exec("\u{1F438}"),
+              ["\u{1F438}"]);
+assertEqArray(/\uD83D\uDC38+/u.exec("\u{1F438}\u{1F438}"),
+              ["\u{1F438}\u{1F438}"]);
+assertEq(/\uD83D\uDC38+/u.exec(""),
+         null);
+
+// lead-only target
+assertEq(/\uD83D\uDC38+/u.exec("\uD83D"),
+         null);
+assertEqArray(/\uD83D\uDC38+/u.exec("\uD83D\uDC38\uDC38"),
+              ["\uD83D\uDC38"]);
+
+// no unicode flag
+assertEqArray(/\uD83D\uDC38+/.exec("\u{1F438}"),
+              ["\u{1F438}"]);
+assertEqArray(/\uD83D\uDC38+/.exec("\u{1F438}\u{1F438}"),
+              ["\u{1F438}"]);
+assertEq(/\uD83D\uDC38+/.exec("\uD83D"),
+         null);
+assertEqArray(/\uD83D\uDC38+/.exec("\uD83D\uDC38\uDC38"),
+              ["\uD83D\uDC38\uDC38"]);
+assertEq(/\uD83D\uDC38+/.exec(""),
+         null);
+
+// ==== * ====
+
+assertEqArray(/\uD83D\uDC38*/u.exec("\u{1F438}"),
+              ["\u{1F438}"]);
+assertEqArray(/\uD83D\uDC38*/u.exec("\u{1F438}\u{1F438}"),
+              ["\u{1F438}\u{1F438}"]);
+assertEqArray(/\uD83D\uDC38*/u.exec(""),
+              [""]);
+
+// lead-only target
+assertEqArray(/\uD83D\uDC38*/u.exec("\uD83D"),
+              [""]);
+assertEqArray(/\uD83D\uDC38*/u.exec("\uD83D\uDC38\uDC38"),
+              ["\uD83D\uDC38"]);
+
+// no unicode flag
+assertEqArray(/\uD83D\uDC38*/.exec("\u{1F438}"),
+              ["\u{1F438}"]);
+assertEqArray(/\uD83D\uDC38*/.exec("\u{1F438}\u{1F438}"),
+              ["\u{1F438}"]);
+assertEqArray(/\uD83D\uDC38*/.exec("\uD83D"),
+              ["\uD83D"]);
+assertEqArray(/\uD83D\uDC38*/.exec("\uD83D\uDC38\uDC38"),
+              ["\uD83D\uDC38\uDC38"]);
+assertEq(/\uD83D\uDC38*/.exec(""),
+         null);
+
+// ==== lead-only ====
+
+// match only non-surrogate pair
+assertEqArray(/\uD83D/u.exec("\uD83D\uDBFF"),
+              ["\uD83D"]);
+assertEq(/\uD83D/u.exec("\uD83D\uDC00"),
+         null);
+assertEq(/\uD83D/u.exec("\uD83D\uDFFF"),
+         null);
+assertEqArray(/\uD83D/u.exec("\uD83D\uE000"),
+              ["\uD83D"]);
+
+// match before non-tail char
+assertEqArray(/\uD83D/u.exec("\uD83D"),
+              ["\uD83D"]);
+assertEqArray(/\uD83D/u.exec("\uD83DA"),
+              ["\uD83D"]);
+
+// no unicode flag
+assertEqArray(/\uD83D/.exec("\uD83D\uDBFF"),
+              ["\uD83D"]);
+assertEqArray(/\uD83D/.exec("\uD83D\uDC00"),
+              ["\uD83D"]);
+assertEqArray(/\uD83D/.exec("\uD83D\uDFFF"),
+              ["\uD83D"]);
+assertEqArray(/\uD83D/.exec("\uD83D\uE000"),
+              ["\uD83D"]);
+assertEqArray(/\uD83D/.exec("\uD83D"),
+              ["\uD83D"]);
+assertEqArray(/\uD83D/.exec("\uD83DA"),
+              ["\uD83D"]);
+
+// ==== trail-only ====
+
+// match only non-surrogate pair
+assertEqArray(/\uDC38/u.exec("\uD7FF\uDC38"),
+              ["\uDC38"]);
+assertEq(/\uDC38/u.exec("\uD800\uDC38"),
+         null);
+assertEq(/\uDC38/u.exec("\uDBFF\uDC38"),
+         null);
+assertEqArray(/\uDC38/u.exec("\uDC00\uDC38"),
+              ["\uDC38"]);
+
+// match after non-lead char
+assertEqArray(/\uDC38/u.exec("\uDC38"),
+              ["\uDC38"]);
+assertEqArray(/\uDC38/u.exec("A\uDC38"),
+              ["\uDC38"]);
+
+// no unicode flag
+assertEqArray(/\uDC38/.exec("\uD7FF\uDC38"),
+              ["\uDC38"]);
+assertEqArray(/\uDC38/.exec("\uD800\uDC38"),
+              ["\uDC38"]);
+assertEqArray(/\uDC38/.exec("\uDBFF\uDC38"),
+              ["\uDC38"]);
+assertEqArray(/\uDC38/.exec("\uDC00\uDC38"),
+              ["\uDC38"]);
+assertEqArray(/\uDC38/.exec("\uDC38"),
+              ["\uDC38"]);
+assertEqArray(/\uDC38/.exec("A\uDC38"),
+              ["\uDC38"]);
+
+// ==== invalid trail ====
+
+assertEqArray(/\uD83D\u3042*/u.exec("\uD83D"),
+              ["\uD83D"]);
+assertEqArray(/\uD83D\u3042*/u.exec("\uD83D\u3042"),
+              ["\uD83D\u3042"]);
+assertEqArray(/\uD83D\u3042*/u.exec("\uD83D\u3042\u3042"),
+              ["\uD83D\u3042\u3042"]);
+
+assertEqArray(/\uD83D\u{3042}*/u.exec("\uD83D"),
+              ["\uD83D"]);
+assertEqArray(/\uD83D\u{3042}*/u.exec("\uD83D\u3042"),
+              ["\uD83D\u3042"]);
+assertEqArray(/\uD83D\u{3042}*/u.exec("\uD83D\u3042\u3042"),
+              ["\uD83D\u3042\u3042"]);
+
+assertEqArray(/\uD83DA*/u.exec("\uD83D"),
+              ["\uD83D"]);
+assertEqArray(/\uD83DA*/u.exec("\uD83DA"),
+              ["\uD83DA"]);
+assertEqArray(/\uD83DA*/u.exec("\uD83DAA"),
+              ["\uD83DAA"]);
+
+// ==== wrong patterns ====
+
+assertThrowsInstanceOf(() => eval(`/\\u/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\u0/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\u00/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\u000/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\u000G/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\u0.00/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\uD83D\\u/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\uD83D\\u0/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\uD83D\\u00/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\uD83D\\u000/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\uD83D\\u000G/u`), SyntaxError);
+assertThrowsInstanceOf(() => eval(`/\\uD83D\\u0.00/u`), SyntaxError);
+
+if (typeof reportCompare === "function")
+    reportCompare(true, true);
new file mode 100644
--- /dev/null
+++ b/js/src/tests/ecma_6/RegExp/unicode-raw.js
@@ -0,0 +1,139 @@
+var BUGNUMBER = 1135377;
+var summary = "Implement RegExp unicode flag -- raw unicode.";
+
+print(BUGNUMBER + ": " + summary);
+
+// ==== standalone ====
+
+assertEqArray(eval(`/\uD83D\uDC38/u`).exec("\u{1F438}"),
+              ["\u{1F438}"]);
+
+// no unicode flag
+assertEqArray(eval(`/\uD83D\uDC38/`).exec("\u{1F438}"),
+              ["\u{1F438}"]);
+
+// escaped (lead)
+assertEq(eval(`/\\uD83D\uDC38/u`).exec("\u{1F438}"),
+         null);
+assertEq(eval(`/\\u{D83D}\uDC38/u`).exec("\u{1F438}"),
+         null);
+
+// escaped (trail)
+assertEq(eval(`/\uD83D\\uDC38/u`).exec("\u{1F438}"),
+         null);
+assertEq(eval(`/\uD83D\\u{DC38}/u`).exec("\u{1F438}"),
+         null);
+
+// escaped (lead), no unicode flag
+assertEqArray(eval(`/\\uD83D\uDC38/`).exec("\u{1F438}"),
+              ["\u{1F438}"]);
+
+// escaped (trail), no unicode flag
+assertEqArray(eval(`/\uD83D\\uDC38/`).exec("\u{1F438}"),
+              ["\u{1F438}"]);
+
+// ==== RegExp constructor ====
+
+assertEqArray(new RegExp("\uD83D\uDC38", "u").exec("\u{1F438}"),
+              ["\u{1F438}"]);
+
+// no unicode flag
+assertEqArray(new RegExp("\uD83D\uDC38", "").exec("\u{1F438}"),
+              ["\u{1F438}"]);
+
+// escaped(lead)
+assertEq(new RegExp("\\uD83D\uDC38", "u").exec("\u{1F438}"),
+         null);
+assertEq(new RegExp("\\u{D83D}\uDC38", "u").exec("\u{1F438}"),
+         null);
+
+// escaped(trail)
+assertEq(new RegExp("\uD83D\\uDC38", "u").exec("\u{1F438}"),
+         null);
+assertEq(new RegExp("\uD83D\\u{DC38}", "u").exec("\u{1F438}"),
+         null);
+
+// escaped(lead), no unicode flag
+assertEqArray(new RegExp("\\uD83D\uDC38", "").exec("\u{1F438}"),
+              ["\u{1F438}"]);
+
+// escaped(trail), no unicode flag
+assertEqArray(new RegExp("\uD83D\\uDC38", "").exec("\u{1F438}"),
+              ["\u{1F438}"]);
+
+// ==== ? ====
+
+assertEqArray(eval(`/\uD83D\uDC38?/u`).exec("\u{1F438}"),
+              ["\u{1F438}"]);
+assertEqArray(eval(`/\uD83D\uDC38?/u`).exec(""),
+              [""]);
+
+assertEqArray(eval(`/\uD83D\uDC38?/u`).exec("\uD83D"),
+              [""]);
+
+// no unicode flag
+assertEqArray(eval(`/\uD83D\uDC38?/`).exec("\u{1F438}"),
+              ["\u{1F438}"]);
+assertEq(eval(`/\uD83D\uDC38?/`).exec(""),
+         null);
+
+assertEqArray(eval(`/\uD83D\uDC38?/`).exec("\uD83D"),
+              ["\uD83D"]);
+
+// escaped (lead)
+assertEq(eval(`/\\uD83D\uDC38?/u`).exec("\u{1F438}"),
+         null);
+assertEq(eval(`/\\uD83D\uDC38?/u`).exec(""),
+         null);
+
+assertEqArray(eval(`/\\uD83D\uDC38?/u`).exec("\uD83D"),
+              ["\uD83D"]);
+
+// escaped (trail)
+assertEq(eval(`/\uD83D\\uDC38?/u`).exec("\u{1F438}"),
+         null);
+assertEq(eval(`/\uD83D\\uDC38?/u`).exec(""),
+         null);
+
+assertEqArray(eval(`/\uD83D\\uDC38?/u`).exec("\uD83D"),
+              ["\uD83D"]);
+
+// escaped (lead), no unicode flag
+assertEqArray(eval(`/\\uD83D\uDC38?/`).exec("\u{1F438}"),
+              ["\u{1F438}"]);
+assertEq(eval(`/\\uD83D\uDC38?/`).exec(""),
+         null);
+
+assertEqArray(eval(`/\\uD83D\uDC38?/`).exec("\uD83D"),
+              ["\uD83D"]);
+
+// escaped (trail), no unicode flag
+assertEqArray(eval(`/\uD83D\\uDC38?/`).exec("\u{1F438}"),
+              ["\u{1F438}"]);
+assertEq(eval(`/\uD83D\\uDC38?/`).exec(""),
+         null);
+
+assertEqArray(eval(`/\uD83D\\uDC38?/`).exec("\uD83D"),
+              ["\uD83D"]);
+
+// ==== RegExp constructor, ? ====
+
+assertEqArray(new RegExp("\uD83D\uDC38?", "u").exec("\u{1F438}"),
+              ["\u{1F438}"]);
+assertEqArray(new RegExp("\uD83D\uDC38?", "u").exec(""),
+              [""]);
+
+assertEqArray(new RegExp("\uD83D\uDC38?", "u").exec("\uD83D"),
+              [""]);
+
+// no unicode flag
+assertEqArray(new RegExp("\uD83D\uDC38?", "").exec("\u{1F438}"),
+              ["\u{1F438}"]);
+assertEq(new RegExp("\uD83D\uDC38?", "").exec(""),
+         null);
+
+assertEqArray(new RegExp("\uD83D\uDC38?", "").exec("\uD83D"),
+              ["\uD83D"]);
+
+if (typeof reportCompare === "function")
+    reportCompare(true, true);
--- a/js/src/vm/Unicode.h
+++ b/js/src/vm/Unicode.h
@@ -229,12 +229,50 @@ CanUpperCase(char16_t ch)
 inline bool
 CanLowerCase(char16_t ch)
 {
     if (ch < 128)
         return ch >= 'A' && ch <= 'Z';
     return CharInfo(ch).lowerCase != 0;
 }
 
+const size_t LeadSurrogateMin = 0xD800;
+const size_t LeadSurrogateMax = 0xDBFF;
+const size_t TrailSurrogateMin = 0xDC00;
+const size_t TrailSurrogateMax = 0xDFFF;
+const size_t UTF16Max = 0xFFFF;
+const size_t NonBMPMin = 0x10000;
+const size_t NonBMPMax = 0x10FFFF;
+
+inline bool
+IsLeadSurrogate(size_t value)
+{
+    return value >= LeadSurrogateMin && value <= LeadSurrogateMax;
+}
+
+inline bool
+IsTrailSurrogate(size_t value)
+{
+    return value >= TrailSurrogateMin && value <= TrailSurrogateMax;
+}
+
+inline void
+UTF16Encode(size_t cp, size_t* lead, size_t* trail)
+{
+    MOZ_ASSERT(cp >= NonBMPMin && cp <= NonBMPMax);
+
+    *lead = (cp - NonBMPMin) / 1024 + LeadSurrogateMin;
+    *trail = ((cp - NonBMPMin) % 1024) + TrailSurrogateMin;
+}
+
+inline size_t
+UTF16Decode(size_t lead, size_t trail)
+{
+    MOZ_ASSERT(IsLeadSurrogate(lead));
+    MOZ_ASSERT(IsTrailSurrogate(trail));
+
+    return (lead - LeadSurrogateMin) * 1024 + (trail - TrailSurrogateMin) + NonBMPMin;
+}
+
 } /* namespace unicode */
 } /* namespace js */
 
 #endif /* vm_Unicode_h */
--- a/js/src/vm/Xdr.h
+++ b/js/src/vm/Xdr.h
@@ -24,21 +24,21 @@ namespace js {
  * versions.  If deserialization fails, the data should be invalidated if
  * possible.
  *
  * When you change this, run make_opcode_doc.py and copy the new output into
  * this wiki page:
  *
  *  https://developer.mozilla.org/en-US/docs/SpiderMonkey/Internals/Bytecode
  */
-static const uint32_t XDR_BYTECODE_VERSION_SUBTRAHEND = 334;
+static const uint32_t XDR_BYTECODE_VERSION_SUBTRAHEND = 335;
 static const uint32_t XDR_BYTECODE_VERSION =
     uint32_t(0xb973c0de - XDR_BYTECODE_VERSION_SUBTRAHEND);
 
-static_assert(JSErr_Limit == 421,
+static_assert(JSErr_Limit == 424,
               "GREETINGS, POTENTIAL SUBTRAHEND INCREMENTER! If you added or "
               "removed MSG_DEFs from js.msg, you should increment "
               "XDR_BYTECODE_VERSION_SUBTRAHEND and update this assertion's "
               "expected JSErr_Limit value.");
 
 class XDRBuffer {
   public:
     explicit XDRBuffer(JSContext* cx)