Bug 1314037 - Part 1: Use uniform types for UTF-16 code units and code points. r=arai
☠☠ backed out by 0cb96d5b5a57 ☠ ☠
authorAndré Bargull <andre.bargull@gmail.com>
Mon, 31 Oct 2016 07:15:11 -0700
changeset 320530 c9b3a12523634393dd72332a75005d221b5f42bb
parent 320529 691b2d09dd096065630e18a89a91d18d8ada0264
child 320531 0223902c23532c4f51e57ffeea85fbed8b9c8cb9
push id83404
push usercbook@mozilla.com
push dateWed, 02 Nov 2016 11:29:54 +0000
treeherdermozilla-inbound@cdb6fdbec002 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersarai
bugs1314037
milestone52.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1314037 - Part 1: Use uniform types for UTF-16 code units and code points. r=arai
js/src/frontend/TokenStream.cpp
js/src/frontend/TokenStream.h
js/src/irregexp/RegExpParser.cpp
js/src/irregexp/RegExpParser.h
js/src/jsstr.cpp
js/src/vm/Unicode.h
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@@ -23,20 +23,20 @@
 #include "jsnum.h"
 
 #include "frontend/BytecodeCompiler.h"
 #include "js/CharacterEncoding.h"
 #include "js/UniquePtr.h"
 #include "vm/HelperThreads.h"
 #include "vm/Keywords.h"
 #include "vm/StringBuffer.h"
+#include "vm/Unicode.h"
 
 using namespace js;
 using namespace js::frontend;
-using namespace js::unicode;
 
 using mozilla::Maybe;
 using mozilla::PodAssign;
 using mozilla::PodCopy;
 using mozilla::PodZero;
 
 struct KeywordInfo {
     const char* chars;         // C string with keyword text
@@ -101,22 +101,22 @@ FindKeyword(JSLinearString* str)
 
 template <typename CharT>
 static bool
 IsIdentifier(const CharT* chars, size_t length)
 {
     if (length == 0)
         return false;
 
-    if (!IsIdentifierStart(*chars))
+    if (!unicode::IsIdentifierStart(char16_t(*chars)))
         return false;
 
     const CharT* end = chars + length;
     while (++chars != end) {
-        if (!IsIdentifierPart(*chars))
+        if (!unicode::IsIdentifierPart(char16_t(*chars)))
             return false;
     }
 
     return true;
 }
 
 bool
 frontend::IsIdentifier(JSLinearString* str)
@@ -752,51 +752,51 @@ TokenStream::reportAsmJSError(uint32_t o
     unsigned flags = options().throwOnAsmJSValidationFailureOption
                      ? JSREPORT_ERROR
                      : JSREPORT_WARNING;
     reportCompileErrorNumberVA(offset, flags, errorNumber, args);
     va_end(args);
 }
 
 // We have encountered a '\': check for a Unicode escape sequence after it.
-// Return 'true' and the character code value (by value) if we found a
+// Return 'true' and the character code point (by value) if we found a
 // Unicode escape sequence.  Otherwise, return 'false'.  In both cases, do not
 // advance along the buffer.
 bool
-TokenStream::peekUnicodeEscape(int* result)
+TokenStream::peekUnicodeEscape(uint32_t* codePoint)
 {
     char16_t cp[5];
 
     if (peekChars(5, cp) && cp[0] == 'u' &&
         JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
         JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
     {
-        *result = (((((JS7_UNHEX(cp[1]) << 4)
+        *codePoint = (((((JS7_UNHEX(cp[1]) << 4)
                 + JS7_UNHEX(cp[2])) << 4)
               + JS7_UNHEX(cp[3])) << 4)
             + JS7_UNHEX(cp[4]);
         return true;
     }
     return false;
 }
 
 bool
-TokenStream::matchUnicodeEscapeIdStart(int32_t* cp)
+TokenStream::matchUnicodeEscapeIdStart(uint32_t* codePoint)
 {
-    if (peekUnicodeEscape(cp) && IsIdentifierStart(*cp)) {
+    if (peekUnicodeEscape(codePoint) && unicode::IsIdentifierStart(*codePoint)) {
         skipChars(5);
         return true;
     }
     return false;
 }
 
 bool
-TokenStream::matchUnicodeEscapeIdent(int32_t* cp)
+TokenStream::matchUnicodeEscapeIdent(uint32_t* codePoint)
 {
-    if (peekUnicodeEscape(cp) && IsIdentifierPart(*cp)) {
+    if (peekUnicodeEscape(codePoint) && unicode::IsIdentifierPart(*codePoint)) {
         skipChars(5);
         return true;
     }
     return false;
 }
 
 // Helper function which returns true if the first length(q) characters in p are
 // the same as the characters in q.
@@ -841,17 +841,17 @@ TokenStream::getDirective(bool isMultili
     if (peekChars(directiveLength, peeked) && CharsMatch(peeked, directive)) {
         if (shouldWarnDeprecated &&
             !reportWarning(JSMSG_DEPRECATED_PRAGMA, errorMsgPragma))
             return false;
 
         skipChars(directiveLength);
         tokenbuf.clear();
 
-        while ((c = peekChar()) && c != EOF && !IsSpaceOrBOM2(c)) {
+        while ((c = peekChar()) && c != EOF && !unicode::IsSpaceOrBOM2(c)) {
             getChar();
             // Debugging directives can occur in both single- and multi-line
             // comments. If we're currently inside a multi-line comment, we also
             // need to recognize multi-line comment terminators.
             if (isMultiline && c == '*' && peekChar() == '/') {
                 ungetChar('*');
                 break;
             }
@@ -936,24 +936,25 @@ IsTokenSane(Token* tp)
 
     return true;
 }
 #endif
 
 bool
 TokenStream::putIdentInTokenbuf(const char16_t* identStart)
 {
-    int32_t c, qc;
+    int32_t c;
+    uint32_t qc;
     const char16_t* tmp = userbuf.addressOfNextRawChar();
     userbuf.setAddressOfNextRawChar(identStart);
 
     tokenbuf.clear();
     for (;;) {
         c = getCharIgnoreEOL();
-        if (!IsIdentifierPart(c)) {
+        if (!unicode::IsIdentifierPart(char16_t(c))) {
             if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
                 break;
             c = qc;
         }
         if (!tokenbuf.append(c)) {
             userbuf.setAddressOfNextRawChar(tmp);
             return false;
         }
@@ -1059,17 +1060,18 @@ static const uint8_t firstCharKinds[] = 
 #undef _______
 
 static_assert(LastCharKind < (1 << (sizeof(firstCharKinds[0]) * 8)),
               "Elements of firstCharKinds[] are too small");
 
 bool
 TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
 {
-    int c, qc;
+    int c;
+    uint32_t qc;
     Token* tp;
     FirstCharKind c1kind;
     const char16_t* numStart;
     bool hasExp;
     DecimalPoint decimalPoint;
     const char16_t* identStart;
     bool hadUnicodeEscape;
 
@@ -1090,34 +1092,34 @@ TokenStream::getTokenInternal(TokenKind*
     }
 
     c = userbuf.getRawChar();
     MOZ_ASSERT(c != EOF);
 
     // Chars not in the range 0..127 are rare.  Getting them out of the way
     // early allows subsequent checking to be faster.
     if (MOZ_UNLIKELY(c >= 128)) {
-        if (IsSpaceOrBOM2(c)) {
+        if (unicode::IsSpaceOrBOM2(c)) {
             if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) {
                 updateLineInfoForEOL();
                 updateFlagsForEOL();
             }
 
             goto retry;
         }
 
         tp = newToken(-1);
 
         static_assert('$' < 128,
                       "IdentifierStart contains '$', but as !IsLetter('$'), "
                       "ensure that '$' is never handled here");
         static_assert('_' < 128,
                       "IdentifierStart contains '_', but as !IsLetter('_'), "
                       "ensure that '_' is never handled here");
-        if (IsLetter(c)) {
+        if (unicode::IsLetter(c)) {
             identStart = userbuf.addressOfNextRawChar() - 1;
             hadUnicodeEscape = false;
             goto identifier;
         }
 
         goto badchar;
     }
 
@@ -1163,17 +1165,17 @@ TokenStream::getTokenInternal(TokenKind*
         identStart = userbuf.addressOfNextRawChar() - 1;
         hadUnicodeEscape = false;
 
       identifier:
         for (;;) {
             c = getCharIgnoreEOL();
             if (c == EOF)
                 break;
-            if (!IsIdentifierPart(c)) {
+            if (!unicode::IsIdentifierPart(char16_t(c))) {
                 if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
                     break;
                 hadUnicodeEscape = true;
             }
         }
         ungetCharIgnoreEOL(c);
 
         // Identifiers containing no Unicode escapes can be processed directly
@@ -1257,17 +1259,17 @@ TokenStream::getTokenInternal(TokenKind*
                 goto error;
             }
             do {
                 c = getCharIgnoreEOL();
             } while (JS7_ISDEC(c));
         }
         ungetCharIgnoreEOL(c);
 
-        if (c != EOF && IsIdentifierStart(c)) {
+        if (c != EOF && unicode::IsIdentifierStart(char16_t(c))) {
             reportError(JSMSG_IDSTART_AFTER_NUMBER);
             goto error;
         }
 
         // Unlike identifiers and strings, numbers cannot contain escaped
         // chars, so we don't need to use tokenbuf.  Instead we can just
         // convert the char16_t characters in userbuf to the numeric value.
         double dval;
@@ -1364,17 +1366,17 @@ TokenStream::getTokenInternal(TokenKind*
             }
         } else {
             // '0' not followed by 'x', 'X' or a digit;  scan as a decimal number.
             numStart = userbuf.addressOfNextRawChar() - 1;
             goto decimal;
         }
         ungetCharIgnoreEOL(c);
 
-        if (c != EOF && IsIdentifierStart(c)) {
+        if (c != EOF && unicode::IsIdentifierStart(char16_t(c))) {
             reportError(JSMSG_IDSTART_AFTER_NUMBER);
             goto error;
         }
 
         double dval;
         const char16_t* dummy;
         if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), radix, &dummy, &dval))
             goto error;
@@ -1669,17 +1671,17 @@ TokenStream::getBracedUnicode(uint32_t* 
                 return false;
             break;
         }
 
         if (!JS7_ISHEX(c))
             return false;
 
         code = (code << 4) | JS7_UNHEX(c);
-        if (code > 0x10FFFF)
+        if (code > unicode::NonBMPMax)
             return false;
         first = false;
     }
 
     *cp = code;
     return true;
 }
 
@@ -1722,23 +1724,23 @@ TokenStream::getStringOrTemplateToken(in
               case 'u': {
                 if (peekChar() == '{') {
                     uint32_t code;
                     if (!getBracedUnicode(&code)) {
                         reportError(JSMSG_MALFORMED_ESCAPE, "Unicode");
                         return false;
                     }
 
-                    MOZ_ASSERT(code <= 0x10FFFF);
-                    if (code < 0x10000) {
+                    MOZ_ASSERT(code <= unicode::NonBMPMax);
+                    if (code < unicode::NonBMPMin) {
                         c = code;
                     } else {
-                        if (!tokenbuf.append((code - 0x10000) / 1024 + 0xD800))
+                        if (!tokenbuf.append(unicode::LeadSurrogate(code)))
                             return false;
-                        c = ((code - 0x10000) % 1024) + 0xDC00;
+                        c = unicode::TrailSurrogate(code);
                     }
                     break;
                 }
 
                 char16_t cp[4];
                 if (peekChars(4, cp) &&
                     JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3]))
                 {
--- a/js/src/frontend/TokenStream.h
+++ b/js/src/frontend/TokenStream.h
@@ -943,19 +943,19 @@ class MOZ_STACK_CLASS TokenStream
     MOZ_MUST_USE bool getBracedUnicode(uint32_t* code);
     MOZ_MUST_USE bool getStringOrTemplateToken(int untilChar, Token** tp);
 
     int32_t getChar();
     int32_t getCharIgnoreEOL();
     void ungetChar(int32_t c);
     void ungetCharIgnoreEOL(int32_t c);
     Token* newToken(ptrdiff_t adjust);
-    bool peekUnicodeEscape(int32_t* c);
-    bool matchUnicodeEscapeIdStart(int32_t* c);
-    bool matchUnicodeEscapeIdent(int32_t* c);
+    bool peekUnicodeEscape(uint32_t* codePoint);
+    bool matchUnicodeEscapeIdStart(uint32_t* codePoint);
+    bool matchUnicodeEscapeIdent(uint32_t* codePoint);
     bool peekChars(int n, char16_t* cp);
 
     MOZ_MUST_USE bool getDirectives(bool isMultiline, bool shouldWarnDeprecated);
     MOZ_MUST_USE bool getDirective(bool isMultiline, bool shouldWarnDeprecated,
                                    const char* directive, int directiveLength,
                                    const char* errorMsgPragma,
                                    UniquePtr<char16_t[], JS::FreePolicy>* destination);
     MOZ_MUST_USE bool getDisplayURL(bool isMultiline, bool shouldWarnDeprecated);
--- a/js/src/irregexp/RegExpParser.cpp
+++ b/js/src/irregexp/RegExpParser.cpp
@@ -271,17 +271,17 @@ HexValue(uint32_t c)
     c -= '0';
     if (static_cast<unsigned>(c) <= 9) return c;
     c = (c | 0x20) - ('a' - '0');  // detect 0x11..0x16 and 0x31..0x36.
     if (static_cast<unsigned>(c) <= 5) return c + 10;
     return -1;
 }
 
 template <typename CharT>
-size_t
+widechar
 RegExpParser<CharT>::ParseOctalLiteral()
 {
     MOZ_ASSERT('0' <= current() && current() <= '7');
     // For compatibility with some other browsers (not all), we parse
     // up to three octal digits with a value below 256.
     widechar value = current() - '0';
     Advance();
     if ('0' <= current() && current() <= '7') {
@@ -292,17 +292,17 @@ RegExpParser<CharT>::ParseOctalLiteral()
             Advance();
         }
     }
     return value;
 }
 
 template <typename CharT>
 bool
-RegExpParser<CharT>::ParseHexEscape(int length, size_t* value)
+RegExpParser<CharT>::ParseHexEscape(int length, widechar* value)
 {
     const CharT* start = position();
     uint32_t val = 0;
     bool done = false;
     for (int i = 0; !done; i++) {
         widechar c = current();
         int d = HexValue(c);
         if (d < 0) {
@@ -316,17 +316,17 @@ RegExpParser<CharT>::ParseHexEscape(int 
         }
     }
     *value = val;
     return true;
 }
 
 template <typename CharT>
 bool
-RegExpParser<CharT>::ParseBracedHexEscape(size_t* value)
+RegExpParser<CharT>::ParseBracedHexEscape(widechar* value)
 {
     MOZ_ASSERT(current() == '{');
     Advance();
 
     bool first = true;
     uint32_t code = 0;
     while (true) {
         widechar c = current();
@@ -358,17 +358,17 @@ RegExpParser<CharT>::ParseBracedHexEscap
     }
 
     *value = code;
     return true;
 }
 
 template <typename CharT>
 bool
-RegExpParser<CharT>::ParseTrailSurrogate(size_t* value)
+RegExpParser<CharT>::ParseTrailSurrogate(widechar* value)
 {
     if (current() != '\\')
         return false;
 
     const CharT* start = position();
     Advance();
     if (current() != 'u') {
         Reset(start);
@@ -536,43 +536,43 @@ RegExpParser<CharT>::ParseClassCharacter
         // For compatibility, outside of unicode mode, we interpret a decimal
         // escape that isn't a back reference (and therefore either \0 or not
         // valid according to the specification) as a 1..3 digit octal
         // character code.
         *code = ParseOctalLiteral();
         return true;
       case 'x': {
         Advance();
-        size_t value;
+        widechar value;
         if (ParseHexEscape(2, &value)) {
             *code = value;
             return true;
         }
         if (unicode_) {
             ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
             return false;
         }
         // If \x is not followed by a two-digit hexadecimal, treat it
         // as an identity escape in non-unicode mode.
         *code = 'x';
         return true;
       }
       case 'u': {
         Advance();
-        size_t value;
+        widechar value;
         if (unicode_) {
             if (current() == '{') {
                 if (!ParseBracedHexEscape(&value))
                     return false;
                 *code = value;
                 return true;
             }
             if (ParseHexEscape(4, &value)) {
                 if (unicode::IsLeadSurrogate(value)) {
-                    size_t trail;
+                    widechar trail;
                     if (ParseTrailSurrogate(&trail)) {
                         *code = unicode::UTF16Decode(value, trail);
                         return true;
                     }
                 }
                 *code = value;
                 return true;
             }
@@ -777,20 +777,20 @@ NegateUnicodeRanges(LifoAlloc* alloc, In
     //   SWAP(result_ranges, tmp_ranges)
     // The last SWAP is just for simplicity of the loop.
     for (size_t i = 0; i < (*ranges)->length(); i++) {
         result_ranges->clear();
 
         const RangeType& range = (**ranges)[i];
         for (size_t j = 0; j < tmp_ranges->length(); j++) {
             const RangeType& tmpRange = (*tmp_ranges)[j];
-            size_t from1 = tmpRange.from();
-            size_t to1 = tmpRange.to();
-            size_t from2 = range.from();
-            size_t to2 = range.to();
+            auto from1 = tmpRange.from();
+            auto to1 = tmpRange.to();
+            auto from2 = range.from();
+            auto to2 = range.to();
 
             if (from1 < from2) {
                 if (to1 < from2) {
                     result_ranges->append(tmpRange);
                 } else if (to1 <= to2) {
                     result_ranges->append(RangeType::Range(from1, from2 - 1));
                 } else {
                     result_ranges->append(RangeType::Range(from1, from2 - 1));
@@ -921,18 +921,18 @@ UnicodeRangesAtom(LifoAlloc* alloc,
 
     for (size_t i = 0; i < wide_ranges->length(); i++) {
         if (added)
             builder->NewAlternative();
 
         const WideCharRange& range = (*wide_ranges)[i];
         widechar from = range.from();
         widechar to = range.to();
-        size_t from_lead, from_trail;
-        size_t to_lead, to_trail;
+        char16_t from_lead, from_trail;
+        char16_t to_lead, to_trail;
 
         unicode::UTF16Encode(from, &from_lead, &from_trail);
         if (from == to) {
             builder->AddCharacter(from_lead);
             builder->AddCharacter(from_trail);
         } else {
             unicode::UTF16Encode(to, &to_lead, &to_trail);
             if (from_lead == to_lead) {
@@ -1631,17 +1631,17 @@ RegExpParser<CharT>::ParseDisjunction()
                     Advance(2);
                     if (IsDecimalDigit(current()))
                         return ReportError(JSMSG_INVALID_DECIMAL_ESCAPE);
                     builder->AddCharacter(0);
                     break;
                 }
 
                 Advance();
-                size_t octal = ParseOctalLiteral();
+                widechar octal = ParseOctalLiteral();
                 builder->AddCharacter(octal);
                 break;
               }
                 // ControlEscape :: one of
                 //   f n r t v
               case 'f':
                 Advance(2);
                 builder->AddCharacter('\f');
@@ -1679,48 +1679,48 @@ RegExpParser<CharT>::ParseDisjunction()
                 } else {
                     Advance(2);
                     builder->AddCharacter(controlLetter & 0x1f);
                 }
                 break;
               }
               case 'x': {
                 Advance(2);
-                size_t value;
+                widechar value;
                 if (ParseHexEscape(2, &value)) {
                     builder->AddCharacter(value);
                 } else {
                     if (unicode_)
                         return ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
                     builder->AddCharacter('x');
                 }
                 break;
               }
               case 'u': {
                 Advance(2);
-                size_t value;
+                widechar value;
                 if (unicode_) {
                     if (current() == '{') {
                         if (!ParseBracedHexEscape(&value))
                             return nullptr;
                         if (unicode::IsLeadSurrogate(value)) {
                             builder->AddAtom(LeadSurrogateAtom(alloc, value));
                         } else if (unicode::IsTrailSurrogate(value)) {
                             builder->AddAtom(TrailSurrogateAtom(alloc, value));
                         } else if (value >= unicode::NonBMPMin) {
-                            size_t lead, trail;
+                            char16_t lead, trail;
                             unicode::UTF16Encode(value, &lead, &trail);
                             builder->AddAtom(SurrogatePairAtom(alloc, lead, trail,
                                                                ignore_case_));
                         } else {
                             builder->AddCharacter(value);
                         }
                     } else if (ParseHexEscape(4, &value)) {
                         if (unicode::IsLeadSurrogate(value)) {
-                            size_t trail;
+                            widechar trail;
                             if (ParseTrailSurrogate(&trail)) {
                                 builder->AddAtom(SurrogatePairAtom(alloc, value, trail,
                                                                    ignore_case_));
                             } else {
                                 builder->AddAtom(LeadSurrogateAtom(alloc, value));
                             }
                         } else if (unicode::IsTrailSurrogate(value)) {
                             builder->AddAtom(TrailSurrogateAtom(alloc, value));
--- a/js/src/irregexp/RegExpParser.h
+++ b/js/src/irregexp/RegExpParser.h
@@ -191,23 +191,23 @@ class RegExpParser
     // Tries to parse the input as a single escaped character.  If successful
     // it stores the result in the output parameter and returns true.
     // Otherwise it throws an error and returns false.  The character must not
     // be 'b' or 'B' since they are usually handled specially.
     bool ParseClassCharacterEscape(widechar* code);
 
     // Checks whether the following is a length-digit hexadecimal number,
     // and sets the value if it is.
-    bool ParseHexEscape(int length, size_t* value);
+    bool ParseHexEscape(int length, widechar* value);
 
-    bool ParseBracedHexEscape(size_t* value);
-    bool ParseTrailSurrogate(size_t* value);
+    bool ParseBracedHexEscape(widechar* value);
+    bool ParseTrailSurrogate(widechar* value);
     bool ParseRawSurrogatePair(char16_t* lead, char16_t* trail);
 
-    size_t ParseOctalLiteral();
+    widechar ParseOctalLiteral();
 
     // Tries to parse the input as a back reference.  If successful it
     // stores the result in the output parameter and returns true.  If
     // it fails it will push back the characters read so the same characters
     // can be reparsed.
     bool ParseBackReferenceIndex(int* index_out);
 
     bool ParseClassAtom(char16_t* char_class, widechar *value);
--- a/js/src/jsstr.cpp
+++ b/js/src/jsstr.cpp
@@ -41,25 +41,25 @@
 #endif
 #include "vm/GlobalObject.h"
 #include "vm/Interpreter.h"
 #include "vm/Opcodes.h"
 #include "vm/Printer.h"
 #include "vm/RegExpObject.h"
 #include "vm/RegExpStatics.h"
 #include "vm/StringBuffer.h"
+#include "vm/Unicode.h"
 
 #include "vm/Interpreter-inl.h"
 #include "vm/String-inl.h"
 #include "vm/StringObject-inl.h"
 #include "vm/TypeInference-inl.h"
 
 using namespace js;
 using namespace js::gc;
-using namespace js::unicode;
 
 using JS::Symbol;
 using JS::SymbolCode;
 using JS::ToInt32;
 using JS::ToUint32;
 
 using mozilla::AssertedCast;
 using mozilla::CheckedInt;
@@ -2757,55 +2757,26 @@ js::str_fromCharCode_one_arg(JSContext* 
     uint16_t ucode;
 
     if (!ToUint16(cx, code, &ucode))
         return false;
 
     return CodeUnitToString(cx, ucode, rval);
 }
 
-static inline bool
-IsSupplementary(uint32_t codePoint)
-{
-    return codePoint > 0xFFFF;
-}
-
-static inline char16_t
-LeadSurrogate(uint32_t codePoint)
-{
-    return char16_t((codePoint >> 10) + 0xD7C0);
-}
-
-static inline char16_t
-TrailSurrogate(uint32_t codePoint)
-{
-    return char16_t((codePoint & 0x3FF) | 0xDC00);
-}
-
-static inline void
-UTF16Encode(uint32_t codePoint, char16_t* elements, unsigned* index)
-{
-    if (!IsSupplementary(codePoint)) {
-        elements[(*index)++] = char16_t(codePoint);
-    } else {
-        elements[(*index)++] = LeadSurrogate(codePoint);
-        elements[(*index)++] = TrailSurrogate(codePoint);
-    }
-}
-
 static MOZ_ALWAYS_INLINE bool
 ToCodePoint(JSContext* cx, HandleValue code, uint32_t* codePoint)
 {
     // String.fromCodePoint, Steps 5.a-b.
     double nextCP;
     if (!ToNumber(cx, code, &nextCP))
         return false;
 
     // String.fromCodePoint, Steps 5.c-d.
-    if (JS::ToInteger(nextCP) != nextCP || nextCP < 0 || nextCP > 0x10FFFF) {
+    if (JS::ToInteger(nextCP) != nextCP || nextCP < 0 || nextCP > unicode::NonBMPMax) {
         ToCStringBuf cbuf;
         if (char* numStr = NumberToCString(cx, &cbuf, nextCP))
             JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_NOT_A_CODEPOINT, numStr);
         return false;
     }
 
     *codePoint = uint32_t(nextCP);
     return true;
@@ -2817,20 +2788,20 @@ js::str_fromCodePoint_one_arg(JSContext*
     // Steps 1-4 (omitted).
 
     // Steps 5.a-d.
     uint32_t codePoint;
     if (!ToCodePoint(cx, code, &codePoint))
         return false;
 
     // Steps 5.e, 6.
-    if (!IsSupplementary(codePoint))
+    if (!unicode::IsSupplementary(codePoint))
         return CodeUnitToString(cx, uint16_t(codePoint), rval);
 
-    char16_t chars[] = { LeadSurrogate(codePoint), TrailSurrogate(codePoint) };
+    char16_t chars[] = { unicode::LeadSurrogate(codePoint), unicode::TrailSurrogate(codePoint) };
     JSString* str = NewStringCopyNDontDeflate<CanGC>(cx, chars, 2);
     if (!str)
         return false;
 
     rval.setString(str);
     return true;
 }
 
@@ -2848,17 +2819,17 @@ str_fromCodePoint_few_args(JSContext* cx
     unsigned length = 0;
     for (unsigned nextIndex = 0; nextIndex < args.length(); nextIndex++) {
         // Steps 5.a-d.
         uint32_t codePoint;
         if (!ToCodePoint(cx, args[nextIndex], &codePoint))
             return false;
 
         // Step 5.e.
-        UTF16Encode(codePoint, elements, &length);
+        unicode::UTF16Encode(codePoint, elements, &length);
     }
 
     // Step 6.
     JSString* str = NewStringCopyN<CanGC>(cx, elements, length);
     if (!str)
         return false;
 
     args.rval().setString(str);
@@ -2899,17 +2870,17 @@ js::str_fromCodePoint(JSContext* cx, uns
         // Steps 5.a-d.
         uint32_t codePoint;
         if (!ToCodePoint(cx, args[nextIndex], &codePoint)) {
             js_free(elements);
             return false;
         }
 
         // Step 5.e.
-        UTF16Encode(codePoint, elements, &length);
+        unicode::UTF16Encode(codePoint, elements, &length);
     }
     elements[length] = 0;
 
     // Step 6.
     JSString* str = NewString<CanGC>(cx, elements, length);
     if (!str) {
         js_free(elements);
         return false;
@@ -3608,32 +3579,32 @@ Encode(StringBuffer& sb, const CharT* ch
     hexBuf[3] = 0;
 
     for (size_t k = 0; k < length; k++) {
         char16_t c = chars[k];
         if (c < 128 && (unescapedSet[c] || (unescapedSet2 && unescapedSet2[c]))) {
             if (!sb.append(c))
                 return Encode_Failure;
         } else {
-            if (c >= 0xDC00 && c <= 0xDFFF)
+            if (unicode::IsTrailSurrogate(c))
                 return Encode_BadUri;
 
             uint32_t v;
-            if (c < 0xD800 || c > 0xDBFF) {
+            if (!unicode::IsLeadSurrogate(c)) {
                 v = c;
             } else {
                 k++;
                 if (k == length)
                     return Encode_BadUri;
 
                 char16_t c2 = chars[k];
-                if (c2 < 0xDC00 || c2 > 0xDFFF)
+                if (!unicode::IsTrailSurrogate(c2))
                     return Encode_BadUri;
 
-                v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
+                v = unicode::UTF16Decode(c, c2);
             }
             uint8_t utf8buf[4];
             size_t L = OneUcs4ToUtf8Char(utf8buf, v);
             for (size_t j = 0; j < L; j++) {
                 hexBuf[1] = HexDigits[utf8buf[j] >> 4];
                 hexBuf[2] = HexDigits[utf8buf[j] & 0xf];
                 if (!sb.append(hexBuf, 3))
                     return Encode_Failure;
@@ -3723,25 +3694,24 @@ Decode(StringBuffer& sb, const CharT* ch
                     B = JS7_UNHEX(chars[k+1]) * 16 + JS7_UNHEX(chars[k+2]);
                     if ((B & 0xC0) != 0x80)
                         return Decode_BadUri;
 
                     k += 2;
                     octets[j] = char(B);
                 }
                 uint32_t v = JS::Utf8ToOneUcs4Char(octets, n);
-                if (v >= 0x10000) {
-                    v -= 0x10000;
-                    if (v > 0xFFFFF)
+                if (v >= unicode::NonBMPMin) {
+                    if (v > unicode::NonBMPMax)
                         return Decode_BadUri;
 
-                    c = char16_t((v & 0x3FF) + 0xDC00);
-                    char16_t H = char16_t((v >> 10) + 0xD800);
+                    char16_t H = unicode::LeadSurrogate(v);
                     if (!sb.append(H))
                         return Decode_Failure;
+                    c = unicode::TrailSurrogate(v);
                 } else {
                     c = char16_t(v);
                 }
             }
             if (c < 128 && reservedSet && reservedSet[c]) {
                 if (!sb.append(chars + start, k - start + 1))
                     return Decode_Failure;
             } else {
@@ -3835,17 +3805,17 @@ str_encodeURI_Component(JSContext* cx, u
 
 /*
  * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
  * least 4 bytes long.  Return the number of UTF-8 bytes of data written.
  */
 uint32_t
 js::OneUcs4ToUtf8Char(uint8_t* utf8Buffer, uint32_t ucs4Char)
 {
-    MOZ_ASSERT(ucs4Char <= 0x10FFFF);
+    MOZ_ASSERT(ucs4Char <= unicode::NonBMPMax);
 
     if (ucs4Char < 0x80) {
         utf8Buffer[0] = uint8_t(ucs4Char);
         return 1;
     }
 
     uint32_t a = ucs4Char >> 11;
     uint32_t utf8Length = 2;
--- a/js/src/vm/Unicode.h
+++ b/js/src/vm/Unicode.h
@@ -63,16 +63,25 @@ struct CharFlag {
         LETTER = 1 << 1,
         IDENTIFIER_PART = 1 << 2,
     };
 };
 
 const char16_t BYTE_ORDER_MARK2 = 0xFFFE;
 const char16_t NO_BREAK_SPACE  = 0x00A0;
 
+const char16_t LeadSurrogateMin = 0xD800;
+const char16_t LeadSurrogateMax = 0xDBFF;
+const char16_t TrailSurrogateMin = 0xDC00;
+const char16_t TrailSurrogateMax = 0xDFFF;
+
+const uint32_t UTF16Max = 0xFFFF;
+const uint32_t NonBMPMin = 0x10000;
+const uint32_t NonBMPMax = 0x10FFFF;
+
 class CharacterInfo {
     /*
      * upperCase and lowerCase normally store the delta between two
      * letters. For example the lower case alpha (a) has the char code
      * 97, and the upper case alpha (A) has 65. So for "a" we would
      * store -32 in upperCase (97 + (-32) = 65) and 0 in lowerCase,
      * because this char is already in lower case.
      * Well, not -32 exactly, but (2**16 - 32) to induce
@@ -131,27 +140,41 @@ IsIdentifierStart(char16_t ch)
 
     if (ch < 128)
         return js_isidstart[ch];
 
     return CharInfo(ch).isLetter();
 }
 
 inline bool
+IsIdentifierStart(uint32_t codePoint)
+{
+    // TODO: Supplemental code points not yet supported (bug 1197230).
+    return codePoint <= UTF16Max && IsIdentifierStart(char16_t(codePoint));
+}
+
+inline bool
 IsIdentifierPart(char16_t ch)
 {
     /* Matches ES5 7.6 IdentifierPart. */
 
     if (ch < 128)
         return js_isident[ch];
 
     return CharInfo(ch).isIdentifierPart();
 }
 
 inline bool
+IsIdentifierPart(uint32_t codePoint)
+{
+    // TODO: Supplemental code points not yet supported (bug 1197230).
+    return codePoint <= UTF16Max && IsIdentifierPart(char16_t(codePoint));
+}
+
+inline bool
 IsLetter(char16_t ch)
 {
     return CharInfo(ch).isLetter();
 }
 
 inline bool
 IsSpace(char16_t ch)
 {
@@ -393,50 +416,75 @@ ReverseFoldCase2(char16_t ch)
 
 inline char16_t
 ReverseFoldCase3(char16_t ch)
 {
     const FoldingInfo& info = CaseFoldInfo(ch);
     return uint16_t(ch) + info.reverse3;
 }
 
-const size_t LeadSurrogateMin = 0xD800;
-const size_t LeadSurrogateMax = 0xDBFF;
-const size_t TrailSurrogateMin = 0xDC00;
-const size_t TrailSurrogateMax = 0xDFFF;
-const size_t UTF16Max = 0xFFFF;
-const size_t NonBMPMin = 0x10000;
-const size_t NonBMPMax = 0x10FFFF;
+inline bool
+IsSupplementary(uint32_t codePoint)
+{
+    return codePoint >= NonBMPMin && codePoint <= NonBMPMax;
+}
 
 inline bool
-IsLeadSurrogate(size_t value)
+IsLeadSurrogate(uint32_t codePoint)
 {
-    return value >= LeadSurrogateMin && value <= LeadSurrogateMax;
+    return codePoint >= LeadSurrogateMin && codePoint <= LeadSurrogateMax;
 }
 
 inline bool
-IsTrailSurrogate(size_t value)
+IsTrailSurrogate(uint32_t codePoint)
+{
+    return codePoint >= TrailSurrogateMin && codePoint <= TrailSurrogateMax;
+}
+
+inline char16_t
+LeadSurrogate(uint32_t codePoint)
 {
-    return value >= TrailSurrogateMin && value <= TrailSurrogateMax;
+    MOZ_ASSERT(IsSupplementary(codePoint));
+
+    return char16_t((codePoint >> 10) + (LeadSurrogateMin - (NonBMPMin >> 10)));
+}
+
+inline char16_t
+TrailSurrogate(uint32_t codePoint)
+{
+    MOZ_ASSERT(IsSupplementary(codePoint));
+
+    return char16_t((codePoint & 0x3FF) | TrailSurrogateMin);
 }
 
 inline void
-UTF16Encode(size_t cp, size_t* lead, size_t* trail)
+UTF16Encode(uint32_t codePoint, char16_t* lead, char16_t* trail)
 {
-    MOZ_ASSERT(cp >= NonBMPMin && cp <= NonBMPMax);
+    MOZ_ASSERT(IsSupplementary(codePoint));
 
-    *lead = (cp - NonBMPMin) / 1024 + LeadSurrogateMin;
-    *trail = ((cp - NonBMPMin) % 1024) + TrailSurrogateMin;
+    *lead = LeadSurrogate(codePoint);
+    *trail = TrailSurrogate(codePoint);
 }
 
-inline size_t
-UTF16Decode(size_t lead, size_t trail)
+static inline void
+UTF16Encode(uint32_t codePoint, char16_t* elements, unsigned* index)
+{
+    if (!IsSupplementary(codePoint)) {
+        elements[(*index)++] = char16_t(codePoint);
+    } else {
+        elements[(*index)++] = LeadSurrogate(codePoint);
+        elements[(*index)++] = TrailSurrogate(codePoint);
+    }
+}
+
+inline uint32_t
+UTF16Decode(char16_t lead, char16_t trail)
 {
     MOZ_ASSERT(IsLeadSurrogate(lead));
     MOZ_ASSERT(IsTrailSurrogate(trail));
 
-    return (lead - LeadSurrogateMin) * 1024 + (trail - TrailSurrogateMin) + NonBMPMin;
+    return (lead << 10) + trail + (NonBMPMin - (LeadSurrogateMin << 10) - TrailSurrogateMin);
 }
 
 } /* namespace unicode */
 } /* namespace js */
 
 #endif /* vm_Unicode_h */