Bug 1314037 - Part 1: Use uniform types for UTF-16 code units and code points. r=arai
authorAndré Bargull <andre.bargull@gmail.com>
Mon, 31 Oct 2016 07:15:11 -0700
changeset 351331 80bedfc21ee648de2878922b51e6d503d5716899
parent 351330 2f53c8537285c4b0a1c71be4f4f0a38f85930eaa
child 351332 900466e640ca1bb85d1f01a20ee42645e03d2c7f
push id6795
push userjlund@mozilla.com
push dateMon, 23 Jan 2017 14:19:46 +0000
treeherdermozilla-esr52@76101b503191 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersarai
bugs1314037
milestone52.0a1
Bug 1314037 - Part 1: Use uniform types for UTF-16 code units and code points. r=arai
js/src/frontend/TokenStream.cpp
js/src/frontend/TokenStream.h
js/src/irregexp/RegExpParser.cpp
js/src/irregexp/RegExpParser.h
js/src/jsstr.cpp
js/src/vm/Unicode.h
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@@ -23,20 +23,20 @@
 #include "jsnum.h"
 
 #include "frontend/BytecodeCompiler.h"
 #include "js/CharacterEncoding.h"
 #include "js/UniquePtr.h"
 #include "vm/HelperThreads.h"
 #include "vm/Keywords.h"
 #include "vm/StringBuffer.h"
+#include "vm/Unicode.h"
 
 using namespace js;
 using namespace js::frontend;
-using namespace js::unicode;
 
 using mozilla::Maybe;
 using mozilla::PodAssign;
 using mozilla::PodCopy;
 using mozilla::PodZero;
 
 struct KeywordInfo {
     const char* chars;         // C string with keyword text
@@ -101,22 +101,22 @@ FindKeyword(JSLinearString* str)
 
 template <typename CharT>
 static bool
 IsIdentifier(const CharT* chars, size_t length)
 {
     if (length == 0)
         return false;
 
-    if (!IsIdentifierStart(*chars))
+    if (!unicode::IsIdentifierStart(char16_t(*chars)))
         return false;
 
     const CharT* end = chars + length;
     while (++chars != end) {
-        if (!IsIdentifierPart(*chars))
+        if (!unicode::IsIdentifierPart(char16_t(*chars)))
             return false;
     }
 
     return true;
 }
 
 bool
 frontend::IsIdentifier(JSLinearString* str)
@@ -752,51 +752,51 @@ TokenStream::reportAsmJSError(uint32_t o
     unsigned flags = options().throwOnAsmJSValidationFailureOption
                      ? JSREPORT_ERROR
                      : JSREPORT_WARNING;
     reportCompileErrorNumberVA(offset, flags, errorNumber, args);
     va_end(args);
 }
 
 // We have encountered a '\': check for a Unicode escape sequence after it.
-// Return 'true' and the character code value (by value) if we found a
+// Return 'true' and the character code point (by value) if we found a
 // Unicode escape sequence.  Otherwise, return 'false'.  In both cases, do not
 // advance along the buffer.
 bool
-TokenStream::peekUnicodeEscape(int* result)
+TokenStream::peekUnicodeEscape(uint32_t* codePoint)
 {
     char16_t cp[5];
 
     if (peekChars(5, cp) && cp[0] == 'u' &&
         JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
         JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
     {
-        *result = (((((JS7_UNHEX(cp[1]) << 4)
+        *codePoint = (((((JS7_UNHEX(cp[1]) << 4)
                 + JS7_UNHEX(cp[2])) << 4)
               + JS7_UNHEX(cp[3])) << 4)
             + JS7_UNHEX(cp[4]);
         return true;
     }
     return false;
 }
 
 bool
-TokenStream::matchUnicodeEscapeIdStart(int32_t* cp)
+TokenStream::matchUnicodeEscapeIdStart(uint32_t* codePoint)
 {
-    if (peekUnicodeEscape(cp) && IsIdentifierStart(*cp)) {
+    if (peekUnicodeEscape(codePoint) && unicode::IsIdentifierStart(*codePoint)) {
         skipChars(5);
         return true;
     }
     return false;
 }
 
 bool
-TokenStream::matchUnicodeEscapeIdent(int32_t* cp)
+TokenStream::matchUnicodeEscapeIdent(uint32_t* codePoint)
 {
-    if (peekUnicodeEscape(cp) && IsIdentifierPart(*cp)) {
+    if (peekUnicodeEscape(codePoint) && unicode::IsIdentifierPart(*codePoint)) {
         skipChars(5);
         return true;
     }
     return false;
 }
 
 // Helper function which returns true if the first length(q) characters in p are
 // the same as the characters in q.
@@ -841,17 +841,17 @@ TokenStream::getDirective(bool isMultili
     if (peekChars(directiveLength, peeked) && CharsMatch(peeked, directive)) {
         if (shouldWarnDeprecated &&
             !reportWarning(JSMSG_DEPRECATED_PRAGMA, errorMsgPragma))
             return false;
 
         skipChars(directiveLength);
         tokenbuf.clear();
 
-        while ((c = peekChar()) && c != EOF && !IsSpaceOrBOM2(c)) {
+        while ((c = peekChar()) && c != EOF && !unicode::IsSpaceOrBOM2(c)) {
             getChar();
             // Debugging directives can occur in both single- and multi-line
             // comments. If we're currently inside a multi-line comment, we also
             // need to recognize multi-line comment terminators.
             if (isMultiline && c == '*' && peekChar() == '/') {
                 ungetChar('*');
                 break;
             }
@@ -936,24 +936,25 @@ IsTokenSane(Token* tp)
 
     return true;
 }
 #endif
 
 bool
 TokenStream::putIdentInTokenbuf(const char16_t* identStart)
 {
-    int32_t c, qc;
+    int32_t c;
+    uint32_t qc;
     const char16_t* tmp = userbuf.addressOfNextRawChar();
     userbuf.setAddressOfNextRawChar(identStart);
 
     tokenbuf.clear();
     for (;;) {
         c = getCharIgnoreEOL();
-        if (!IsIdentifierPart(c)) {
+        if (!unicode::IsIdentifierPart(char16_t(c))) {
             if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
                 break;
             c = qc;
         }
         if (!tokenbuf.append(c)) {
             userbuf.setAddressOfNextRawChar(tmp);
             return false;
         }
@@ -1059,17 +1060,18 @@ static const uint8_t firstCharKinds[] = 
 #undef _______
 
 static_assert(LastCharKind < (1 << (sizeof(firstCharKinds[0]) * 8)),
               "Elements of firstCharKinds[] are too small");
 
 bool
 TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
 {
-    int c, qc;
+    int c;
+    uint32_t qc;
     Token* tp;
     FirstCharKind c1kind;
     const char16_t* numStart;
     bool hasExp;
     DecimalPoint decimalPoint;
     const char16_t* identStart;
     bool hadUnicodeEscape;
 
@@ -1090,34 +1092,34 @@ TokenStream::getTokenInternal(TokenKind*
     }
 
     c = userbuf.getRawChar();
     MOZ_ASSERT(c != EOF);
 
     // Chars not in the range 0..127 are rare.  Getting them out of the way
     // early allows subsequent checking to be faster.
     if (MOZ_UNLIKELY(c >= 128)) {
-        if (IsSpaceOrBOM2(c)) {
+        if (unicode::IsSpaceOrBOM2(c)) {
             if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) {
                 updateLineInfoForEOL();
                 updateFlagsForEOL();
             }
 
             goto retry;
         }
 
         tp = newToken(-1);
 
         static_assert('$' < 128,
                       "IdentifierStart contains '$', but as !IsLetter('$'), "
                       "ensure that '$' is never handled here");
         static_assert('_' < 128,
                       "IdentifierStart contains '_', but as !IsLetter('_'), "
                       "ensure that '_' is never handled here");
-        if (IsLetter(c)) {
+        if (unicode::IsLetter(c)) {
             identStart = userbuf.addressOfNextRawChar() - 1;
             hadUnicodeEscape = false;
             goto identifier;
         }
 
         goto badchar;
     }
 
@@ -1163,17 +1165,17 @@ TokenStream::getTokenInternal(TokenKind*
         identStart = userbuf.addressOfNextRawChar() - 1;
         hadUnicodeEscape = false;
 
       identifier:
         for (;;) {
             c = getCharIgnoreEOL();
             if (c == EOF)
                 break;
-            if (!IsIdentifierPart(c)) {
+            if (!unicode::IsIdentifierPart(char16_t(c))) {
                 if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
                     break;
                 hadUnicodeEscape = true;
             }
         }
         ungetCharIgnoreEOL(c);
 
         // Identifiers containing no Unicode escapes can be processed directly
@@ -1257,17 +1259,17 @@ TokenStream::getTokenInternal(TokenKind*
                 goto error;
             }
             do {
                 c = getCharIgnoreEOL();
             } while (JS7_ISDEC(c));
         }
         ungetCharIgnoreEOL(c);
 
-        if (c != EOF && IsIdentifierStart(c)) {
+        if (c != EOF && unicode::IsIdentifierStart(char16_t(c))) {
             reportError(JSMSG_IDSTART_AFTER_NUMBER);
             goto error;
         }
 
         // Unlike identifiers and strings, numbers cannot contain escaped
         // chars, so we don't need to use tokenbuf.  Instead we can just
         // convert the char16_t characters in userbuf to the numeric value.
         double dval;
@@ -1364,17 +1366,17 @@ TokenStream::getTokenInternal(TokenKind*
             }
         } else {
             // '0' not followed by 'x', 'X' or a digit;  scan as a decimal number.
             numStart = userbuf.addressOfNextRawChar() - 1;
             goto decimal;
         }
         ungetCharIgnoreEOL(c);
 
-        if (c != EOF && IsIdentifierStart(c)) {
+        if (c != EOF && unicode::IsIdentifierStart(char16_t(c))) {
             reportError(JSMSG_IDSTART_AFTER_NUMBER);
             goto error;
         }
 
         double dval;
         const char16_t* dummy;
         if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), radix, &dummy, &dval))
             goto error;
@@ -1669,17 +1671,17 @@ TokenStream::getBracedUnicode(uint32_t* 
                 return false;
             break;
         }
 
         if (!JS7_ISHEX(c))
             return false;
 
         code = (code << 4) | JS7_UNHEX(c);
-        if (code > 0x10FFFF)
+        if (code > unicode::NonBMPMax)
             return false;
         first = false;
     }
 
     *cp = code;
     return true;
 }
 
@@ -1722,23 +1724,23 @@ TokenStream::getStringOrTemplateToken(in
               case 'u': {
                 if (peekChar() == '{') {
                     uint32_t code;
                     if (!getBracedUnicode(&code)) {
                         reportError(JSMSG_MALFORMED_ESCAPE, "Unicode");
                         return false;
                     }
 
-                    MOZ_ASSERT(code <= 0x10FFFF);
-                    if (code < 0x10000) {
+                    MOZ_ASSERT(code <= unicode::NonBMPMax);
+                    if (code < unicode::NonBMPMin) {
                         c = code;
                     } else {
-                        if (!tokenbuf.append((code - 0x10000) / 1024 + 0xD800))
+                        if (!tokenbuf.append(unicode::LeadSurrogate(code)))
                             return false;
-                        c = ((code - 0x10000) % 1024) + 0xDC00;
+                        c = unicode::TrailSurrogate(code);
                     }
                     break;
                 }
 
                 char16_t cp[4];
                 if (peekChars(4, cp) &&
                     JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3]))
                 {
--- a/js/src/frontend/TokenStream.h
+++ b/js/src/frontend/TokenStream.h
@@ -943,19 +943,19 @@ class MOZ_STACK_CLASS TokenStream
     MOZ_MUST_USE bool getBracedUnicode(uint32_t* code);
     MOZ_MUST_USE bool getStringOrTemplateToken(int untilChar, Token** tp);
 
     int32_t getChar();
     int32_t getCharIgnoreEOL();
     void ungetChar(int32_t c);
     void ungetCharIgnoreEOL(int32_t c);
     Token* newToken(ptrdiff_t adjust);
-    bool peekUnicodeEscape(int32_t* c);
-    bool matchUnicodeEscapeIdStart(int32_t* c);
-    bool matchUnicodeEscapeIdent(int32_t* c);
+    bool peekUnicodeEscape(uint32_t* codePoint);
+    bool matchUnicodeEscapeIdStart(uint32_t* codePoint);
+    bool matchUnicodeEscapeIdent(uint32_t* codePoint);
     bool peekChars(int n, char16_t* cp);
 
     MOZ_MUST_USE bool getDirectives(bool isMultiline, bool shouldWarnDeprecated);
     MOZ_MUST_USE bool getDirective(bool isMultiline, bool shouldWarnDeprecated,
                                    const char* directive, int directiveLength,
                                    const char* errorMsgPragma,
                                    UniquePtr<char16_t[], JS::FreePolicy>* destination);
     MOZ_MUST_USE bool getDisplayURL(bool isMultiline, bool shouldWarnDeprecated);
--- a/js/src/irregexp/RegExpParser.cpp
+++ b/js/src/irregexp/RegExpParser.cpp
@@ -271,17 +271,17 @@ HexValue(uint32_t c)
     c -= '0';
     if (static_cast<unsigned>(c) <= 9) return c;
     c = (c | 0x20) - ('a' - '0');  // detect 0x11..0x16 and 0x31..0x36.
     if (static_cast<unsigned>(c) <= 5) return c + 10;
     return -1;
 }
 
 template <typename CharT>
-size_t
+widechar
 RegExpParser<CharT>::ParseOctalLiteral()
 {
     MOZ_ASSERT('0' <= current() && current() <= '7');
     // For compatibility with some other browsers (not all), we parse
     // up to three octal digits with a value below 256.
     widechar value = current() - '0';
     Advance();
     if ('0' <= current() && current() <= '7') {
@@ -292,17 +292,17 @@ RegExpParser<CharT>::ParseOctalLiteral()
             Advance();
         }
     }
     return value;
 }
 
 template <typename CharT>
 bool
-RegExpParser<CharT>::ParseHexEscape(int length, size_t* value)
+RegExpParser<CharT>::ParseHexEscape(int length, widechar* value)
 {
     const CharT* start = position();
     uint32_t val = 0;
     bool done = false;
     for (int i = 0; !done; i++) {
         widechar c = current();
         int d = HexValue(c);
         if (d < 0) {
@@ -316,17 +316,17 @@ RegExpParser<CharT>::ParseHexEscape(int 
         }
     }
     *value = val;
     return true;
 }
 
 template <typename CharT>
 bool
-RegExpParser<CharT>::ParseBracedHexEscape(size_t* value)
+RegExpParser<CharT>::ParseBracedHexEscape(widechar* value)
 {
     MOZ_ASSERT(current() == '{');
     Advance();
 
     bool first = true;
     uint32_t code = 0;
     while (true) {
         widechar c = current();
@@ -358,17 +358,17 @@ RegExpParser<CharT>::ParseBracedHexEscap
     }
 
     *value = code;
     return true;
 }
 
 template <typename CharT>
 bool
-RegExpParser<CharT>::ParseTrailSurrogate(size_t* value)
+RegExpParser<CharT>::ParseTrailSurrogate(widechar* value)
 {
     if (current() != '\\')
         return false;
 
     const CharT* start = position();
     Advance();
     if (current() != 'u') {
         Reset(start);
@@ -536,43 +536,43 @@ RegExpParser<CharT>::ParseClassCharacter
         // For compatibility, outside of unicode mode, we interpret a decimal
         // escape that isn't a back reference (and therefore either \0 or not
         // valid according to the specification) as a 1..3 digit octal
         // character code.
         *code = ParseOctalLiteral();
         return true;
       case 'x': {
         Advance();
-        size_t value;
+        widechar value;
         if (ParseHexEscape(2, &value)) {
             *code = value;
             return true;
         }
         if (unicode_) {
             ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
             return false;
         }
         // If \x is not followed by a two-digit hexadecimal, treat it
         // as an identity escape in non-unicode mode.
         *code = 'x';
         return true;
       }
       case 'u': {
         Advance();
-        size_t value;
+        widechar value;
         if (unicode_) {
             if (current() == '{') {
                 if (!ParseBracedHexEscape(&value))
                     return false;
                 *code = value;
                 return true;
             }
             if (ParseHexEscape(4, &value)) {
                 if (unicode::IsLeadSurrogate(value)) {
-                    size_t trail;
+                    widechar trail;
                     if (ParseTrailSurrogate(&trail)) {
                         *code = unicode::UTF16Decode(value, trail);
                         return true;
                     }
                 }
                 *code = value;
                 return true;
             }
@@ -777,20 +777,20 @@ NegateUnicodeRanges(LifoAlloc* alloc, In
     //   SWAP(result_ranges, tmp_ranges)
     // The last SWAP is just for simplicity of the loop.
     for (size_t i = 0; i < (*ranges)->length(); i++) {
         result_ranges->clear();
 
         const RangeType& range = (**ranges)[i];
         for (size_t j = 0; j < tmp_ranges->length(); j++) {
             const RangeType& tmpRange = (*tmp_ranges)[j];
-            size_t from1 = tmpRange.from();
-            size_t to1 = tmpRange.to();
-            size_t from2 = range.from();
-            size_t to2 = range.to();
+            auto from1 = tmpRange.from();
+            auto to1 = tmpRange.to();
+            auto from2 = range.from();
+            auto to2 = range.to();
 
             if (from1 < from2) {
                 if (to1 < from2) {
                     result_ranges->append(tmpRange);
                 } else if (to1 <= to2) {
                     result_ranges->append(RangeType::Range(from1, from2 - 1));
                 } else {
                     result_ranges->append(RangeType::Range(from1, from2 - 1));
@@ -921,18 +921,18 @@ UnicodeRangesAtom(LifoAlloc* alloc,
 
     for (size_t i = 0; i < wide_ranges->length(); i++) {
         if (added)
             builder->NewAlternative();
 
         const WideCharRange& range = (*wide_ranges)[i];
         widechar from = range.from();
         widechar to = range.to();
-        size_t from_lead, from_trail;
-        size_t to_lead, to_trail;
+        char16_t from_lead, from_trail;
+        char16_t to_lead, to_trail;
 
         unicode::UTF16Encode(from, &from_lead, &from_trail);
         if (from == to) {
             builder->AddCharacter(from_lead);
             builder->AddCharacter(from_trail);
         } else {
             unicode::UTF16Encode(to, &to_lead, &to_trail);
             if (from_lead == to_lead) {
@@ -1631,17 +1631,17 @@ RegExpParser<CharT>::ParseDisjunction()
                     Advance(2);
                     if (IsDecimalDigit(current()))
                         return ReportError(JSMSG_INVALID_DECIMAL_ESCAPE);
                     builder->AddCharacter(0);
                     break;
                 }
 
                 Advance();
-                size_t octal = ParseOctalLiteral();
+                widechar octal = ParseOctalLiteral();
                 builder->AddCharacter(octal);
                 break;
               }
                 // ControlEscape :: one of
                 //   f n r t v
               case 'f':
                 Advance(2);
                 builder->AddCharacter('\f');
@@ -1679,48 +1679,48 @@ RegExpParser<CharT>::ParseDisjunction()
                 } else {
                     Advance(2);
                     builder->AddCharacter(controlLetter & 0x1f);
                 }
                 break;
               }
               case 'x': {
                 Advance(2);
-                size_t value;
+                widechar value;
                 if (ParseHexEscape(2, &value)) {
                     builder->AddCharacter(value);
                 } else {
                     if (unicode_)
                         return ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
                     builder->AddCharacter('x');
                 }
                 break;
               }
               case 'u': {
                 Advance(2);
-                size_t value;
+                widechar value;
                 if (unicode_) {
                     if (current() == '{') {
                         if (!ParseBracedHexEscape(&value))
                             return nullptr;
                         if (unicode::IsLeadSurrogate(value)) {
                             builder->AddAtom(LeadSurrogateAtom(alloc, value));
                         } else if (unicode::IsTrailSurrogate(value)) {
                             builder->AddAtom(TrailSurrogateAtom(alloc, value));
                         } else if (value >= unicode::NonBMPMin) {
-                            size_t lead, trail;
+                            char16_t lead, trail;
                             unicode::UTF16Encode(value, &lead, &trail);
                             builder->AddAtom(SurrogatePairAtom(alloc, lead, trail,
                                                                ignore_case_));
                         } else {
                             builder->AddCharacter(value);
                         }
                     } else if (ParseHexEscape(4, &value)) {
                         if (unicode::IsLeadSurrogate(value)) {
-                            size_t trail;
+                            widechar trail;
                             if (ParseTrailSurrogate(&trail)) {
                                 builder->AddAtom(SurrogatePairAtom(alloc, value, trail,
                                                                    ignore_case_));
                             } else {
                                 builder->AddAtom(LeadSurrogateAtom(alloc, value));
                             }
                         } else if (unicode::IsTrailSurrogate(value)) {
                             builder->AddAtom(TrailSurrogateAtom(alloc, value));
--- a/js/src/irregexp/RegExpParser.h
+++ b/js/src/irregexp/RegExpParser.h
@@ -191,23 +191,23 @@ class RegExpParser
     // Tries to parse the input as a single escaped character.  If successful
     // it stores the result in the output parameter and returns true.
     // Otherwise it throws an error and returns false.  The character must not
     // be 'b' or 'B' since they are usually handled specially.
     bool ParseClassCharacterEscape(widechar* code);
 
     // Checks whether the following is a length-digit hexadecimal number,
     // and sets the value if it is.
-    bool ParseHexEscape(int length, size_t* value);
+    bool ParseHexEscape(int length, widechar* value);
 
-    bool ParseBracedHexEscape(size_t* value);
-    bool ParseTrailSurrogate(size_t* value);
+    bool ParseBracedHexEscape(widechar* value);
+    bool ParseTrailSurrogate(widechar* value);
     bool ParseRawSurrogatePair(char16_t* lead, char16_t* trail);
 
-    size_t ParseOctalLiteral();
+    widechar ParseOctalLiteral();
 
     // Tries to parse the input as a back reference.  If successful it
     // stores the result in the output parameter and returns true.  If
     // it fails it will push back the characters read so the same characters
     // can be reparsed.
     bool ParseBackReferenceIndex(int* index_out);
 
     bool ParseClassAtom(char16_t* char_class, widechar *value);
--- a/js/src/jsstr.cpp
+++ b/js/src/jsstr.cpp
@@ -41,25 +41,25 @@
 #endif
 #include "vm/GlobalObject.h"
 #include "vm/Interpreter.h"
 #include "vm/Opcodes.h"
 #include "vm/Printer.h"
 #include "vm/RegExpObject.h"
 #include "vm/RegExpStatics.h"
 #include "vm/StringBuffer.h"
+#include "vm/Unicode.h"
 
 #include "vm/Interpreter-inl.h"
 #include "vm/String-inl.h"
 #include "vm/StringObject-inl.h"
 #include "vm/TypeInference-inl.h"
 
 using namespace js;
 using namespace js::gc;
-using namespace js::unicode;
 
 using JS::Symbol;
 using JS::SymbolCode;
 using JS::ToInt32;
 using JS::ToUint32;
 
 using mozilla::AssertedCast;
 using mozilla::CheckedInt;
@@ -2757,55 +2757,26 @@ js::str_fromCharCode_one_arg(JSContext* 
     uint16_t ucode;
 
     if (!ToUint16(cx, code, &ucode))
         return false;
 
     return CodeUnitToString(cx, ucode, rval);
 }
 
-static inline bool
-IsSupplementary(uint32_t codePoint)
-{
-    return codePoint > 0xFFFF;
-}
-
-static inline char16_t
-LeadSurrogate(uint32_t codePoint)
-{
-    return char16_t((codePoint >> 10) + 0xD7C0);
-}
-
-static inline char16_t
-TrailSurrogate(uint32_t codePoint)
-{
-    return char16_t((codePoint & 0x3FF) | 0xDC00);
-}
-
-static inline void
-UTF16Encode(uint32_t codePoint, char16_t* elements, unsigned* index)
-{
-    if (!IsSupplementary(codePoint)) {
-        elements[(*index)++] = char16_t(codePoint);
-    } else {
-        elements[(*index)++] = LeadSurrogate(codePoint);
-        elements[(*index)++] = TrailSurrogate(codePoint);
-    }
-}
-
 static MOZ_ALWAYS_INLINE bool
 ToCodePoint(JSContext* cx, HandleValue code, uint32_t* codePoint)
 {
     // String.fromCodePoint, Steps 5.a-b.
     double nextCP;
     if (!ToNumber(cx, code, &nextCP))
         return false;
 
     // String.fromCodePoint, Steps 5.c-d.
-    if (JS::ToInteger(nextCP) != nextCP || nextCP < 0 || nextCP > 0x10FFFF) {
+    if (JS::ToInteger(nextCP) != nextCP || nextCP < 0 || nextCP > unicode::NonBMPMax) {
         ToCStringBuf cbuf;
         if (char* numStr = NumberToCString(cx, &cbuf, nextCP))
             JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_NOT_A_CODEPOINT, numStr);
         return false;
     }
 
     *codePoint = uint32_t(nextCP);
     return true;
@@ -2817,20 +2788,20 @@ js::str_fromCodePoint_one_arg(JSContext*
     // Steps 1-4 (omitted).
 
     // Steps 5.a-d.
     uint32_t codePoint;
     if (!ToCodePoint(cx, code, &codePoint))
         return false;
 
     // Steps 5.e, 6.
-    if (!IsSupplementary(codePoint))
+    if (!unicode::IsSupplementary(codePoint))
         return CodeUnitToString(cx, uint16_t(codePoint), rval);
 
-    char16_t chars[] = { LeadSurrogate(codePoint), TrailSurrogate(codePoint) };
+    char16_t chars[] = { unicode::LeadSurrogate(codePoint), unicode::TrailSurrogate(codePoint) };
     JSString* str = NewStringCopyNDontDeflate<CanGC>(cx, chars, 2);
     if (!str)
         return false;
 
     rval.setString(str);
     return true;
 }
 
@@ -2848,17 +2819,17 @@ str_fromCodePoint_few_args(JSContext* cx
     unsigned length = 0;
     for (unsigned nextIndex = 0; nextIndex < args.length(); nextIndex++) {
         // Steps 5.a-d.
         uint32_t codePoint;
         if (!ToCodePoint(cx, args[nextIndex], &codePoint))
             return false;
 
         // Step 5.e.
-        UTF16Encode(codePoint, elements, &length);
+        unicode::UTF16Encode(codePoint, elements, &length);
     }
 
     // Step 6.
     JSString* str = NewStringCopyN<CanGC>(cx, elements, length);
     if (!str)
         return false;
 
     args.rval().setString(str);
@@ -2899,17 +2870,17 @@ js::str_fromCodePoint(JSContext* cx, uns
         // Steps 5.a-d.
         uint32_t codePoint;
         if (!ToCodePoint(cx, args[nextIndex], &codePoint)) {
             js_free(elements);
             return false;
         }
 
         // Step 5.e.
-        UTF16Encode(codePoint, elements, &length);
+        unicode::UTF16Encode(codePoint, elements, &length);
     }
     elements[length] = 0;
 
     // Step 6.
     JSString* str = NewString<CanGC>(cx, elements, length);
     if (!str) {
         js_free(elements);
         return false;
@@ -3608,32 +3579,32 @@ Encode(StringBuffer& sb, const CharT* ch
     hexBuf[3] = 0;
 
     for (size_t k = 0; k < length; k++) {
         char16_t c = chars[k];
         if (c < 128 && (unescapedSet[c] || (unescapedSet2 && unescapedSet2[c]))) {
             if (!sb.append(c))
                 return Encode_Failure;
         } else {
-            if (c >= 0xDC00 && c <= 0xDFFF)
+            if (unicode::IsTrailSurrogate(c))
                 return Encode_BadUri;
 
             uint32_t v;
-            if (c < 0xD800 || c > 0xDBFF) {
+            if (!unicode::IsLeadSurrogate(c)) {
                 v = c;
             } else {
                 k++;
                 if (k == length)
                     return Encode_BadUri;
 
                 char16_t c2 = chars[k];
-                if (c2 < 0xDC00 || c2 > 0xDFFF)
+                if (!unicode::IsTrailSurrogate(c2))
                     return Encode_BadUri;
 
-                v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
+                v = unicode::UTF16Decode(c, c2);
             }
             uint8_t utf8buf[4];
             size_t L = OneUcs4ToUtf8Char(utf8buf, v);
             for (size_t j = 0; j < L; j++) {
                 hexBuf[1] = HexDigits[utf8buf[j] >> 4];
                 hexBuf[2] = HexDigits[utf8buf[j] & 0xf];
                 if (!sb.append(hexBuf, 3))
                     return Encode_Failure;
@@ -3723,25 +3694,24 @@ Decode(StringBuffer& sb, const CharT* ch
                     B = JS7_UNHEX(chars[k+1]) * 16 + JS7_UNHEX(chars[k+2]);
                     if ((B & 0xC0) != 0x80)
                         return Decode_BadUri;
 
                     k += 2;
                     octets[j] = char(B);
                 }
                 uint32_t v = JS::Utf8ToOneUcs4Char(octets, n);
-                if (v >= 0x10000) {
-                    v -= 0x10000;
-                    if (v > 0xFFFFF)
+                if (v >= unicode::NonBMPMin) {
+                    if (v > unicode::NonBMPMax)
                         return Decode_BadUri;
 
-                    c = char16_t((v & 0x3FF) + 0xDC00);
-                    char16_t H = char16_t((v >> 10) + 0xD800);
+                    char16_t H = unicode::LeadSurrogate(v);
                     if (!sb.append(H))
                         return Decode_Failure;
+                    c = unicode::TrailSurrogate(v);
                 } else {
                     c = char16_t(v);
                 }
             }
             if (c < 128 && reservedSet && reservedSet[c]) {
                 if (!sb.append(chars + start, k - start + 1))
                     return Decode_Failure;
             } else {
@@ -3835,17 +3805,17 @@ str_encodeURI_Component(JSContext* cx, u
 
 /*
  * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
  * least 4 bytes long.  Return the number of UTF-8 bytes of data written.
  */
 uint32_t
 js::OneUcs4ToUtf8Char(uint8_t* utf8Buffer, uint32_t ucs4Char)
 {
-    MOZ_ASSERT(ucs4Char <= 0x10FFFF);
+    MOZ_ASSERT(ucs4Char <= unicode::NonBMPMax);
 
     if (ucs4Char < 0x80) {
         utf8Buffer[0] = uint8_t(ucs4Char);
         return 1;
     }
 
     uint32_t a = ucs4Char >> 11;
     uint32_t utf8Length = 2;
--- a/js/src/vm/Unicode.h
+++ b/js/src/vm/Unicode.h
@@ -63,16 +63,25 @@ struct CharFlag {
         LETTER = 1 << 1,
         IDENTIFIER_PART = 1 << 2,
     };
 };
 
 const char16_t BYTE_ORDER_MARK2 = 0xFFFE;
 const char16_t NO_BREAK_SPACE  = 0x00A0;
 
+const char16_t LeadSurrogateMin = 0xD800;
+const char16_t LeadSurrogateMax = 0xDBFF;
+const char16_t TrailSurrogateMin = 0xDC00;
+const char16_t TrailSurrogateMax = 0xDFFF;
+
+const uint32_t UTF16Max = 0xFFFF;
+const uint32_t NonBMPMin = 0x10000;
+const uint32_t NonBMPMax = 0x10FFFF;
+
 class CharacterInfo {
     /*
      * upperCase and lowerCase normally store the delta between two
      * letters. For example the lower case alpha (a) has the char code
      * 97, and the upper case alpha (A) has 65. So for "a" we would
      * store -32 in upperCase (97 + (-32) = 65) and 0 in lowerCase,
      * because this char is already in lower case.
      * Well, not -32 exactly, but (2**16 - 32) to induce
@@ -131,27 +140,41 @@ IsIdentifierStart(char16_t ch)
 
     if (ch < 128)
         return js_isidstart[ch];
 
     return CharInfo(ch).isLetter();
 }
 
 inline bool
+IsIdentifierStart(uint32_t codePoint)
+{
+    // TODO: Supplemental code points not yet supported (bug 1197230).
+    return codePoint <= UTF16Max && IsIdentifierStart(char16_t(codePoint));
+}
+
+inline bool
 IsIdentifierPart(char16_t ch)
 {
     /* Matches ES5 7.6 IdentifierPart. */
 
     if (ch < 128)
         return js_isident[ch];
 
     return CharInfo(ch).isIdentifierPart();
 }
 
 inline bool
+IsIdentifierPart(uint32_t codePoint)
+{
+    // TODO: Supplemental code points not yet supported (bug 1197230).
+    return codePoint <= UTF16Max && IsIdentifierPart(char16_t(codePoint));
+}
+
+inline bool
 IsLetter(char16_t ch)
 {
     return CharInfo(ch).isLetter();
 }
 
 inline bool
 IsSpace(char16_t ch)
 {
@@ -393,50 +416,75 @@ ReverseFoldCase2(char16_t ch)
 
 inline char16_t
 ReverseFoldCase3(char16_t ch)
 {
     const FoldingInfo& info = CaseFoldInfo(ch);
     return uint16_t(ch) + info.reverse3;
 }
 
-const size_t LeadSurrogateMin = 0xD800;
-const size_t LeadSurrogateMax = 0xDBFF;
-const size_t TrailSurrogateMin = 0xDC00;
-const size_t TrailSurrogateMax = 0xDFFF;
-const size_t UTF16Max = 0xFFFF;
-const size_t NonBMPMin = 0x10000;
-const size_t NonBMPMax = 0x10FFFF;
+inline bool
+IsSupplementary(uint32_t codePoint)
+{
+    return codePoint >= NonBMPMin && codePoint <= NonBMPMax;
+}
 
 inline bool
-IsLeadSurrogate(size_t value)
+IsLeadSurrogate(uint32_t codePoint)
 {
-    return value >= LeadSurrogateMin && value <= LeadSurrogateMax;
+    return codePoint >= LeadSurrogateMin && codePoint <= LeadSurrogateMax;
 }
 
 inline bool
-IsTrailSurrogate(size_t value)
+IsTrailSurrogate(uint32_t codePoint)
+{
+    return codePoint >= TrailSurrogateMin && codePoint <= TrailSurrogateMax;
+}
+
+inline char16_t
+LeadSurrogate(uint32_t codePoint)
 {
-    return value >= TrailSurrogateMin && value <= TrailSurrogateMax;
+    MOZ_ASSERT(IsSupplementary(codePoint));
+
+    return char16_t((codePoint >> 10) + (LeadSurrogateMin - (NonBMPMin >> 10)));
+}
+
+inline char16_t
+TrailSurrogate(uint32_t codePoint)
+{
+    MOZ_ASSERT(IsSupplementary(codePoint));
+
+    return char16_t((codePoint & 0x3FF) | TrailSurrogateMin);
 }
 
 inline void
-UTF16Encode(size_t cp, size_t* lead, size_t* trail)
+UTF16Encode(uint32_t codePoint, char16_t* lead, char16_t* trail)
 {
-    MOZ_ASSERT(cp >= NonBMPMin && cp <= NonBMPMax);
+    MOZ_ASSERT(IsSupplementary(codePoint));
 
-    *lead = (cp - NonBMPMin) / 1024 + LeadSurrogateMin;
-    *trail = ((cp - NonBMPMin) % 1024) + TrailSurrogateMin;
+    *lead = LeadSurrogate(codePoint);
+    *trail = TrailSurrogate(codePoint);
 }
 
-inline size_t
-UTF16Decode(size_t lead, size_t trail)
+static inline void
+UTF16Encode(uint32_t codePoint, char16_t* elements, unsigned* index)
+{
+    if (!IsSupplementary(codePoint)) {
+        elements[(*index)++] = char16_t(codePoint);
+    } else {
+        elements[(*index)++] = LeadSurrogate(codePoint);
+        elements[(*index)++] = TrailSurrogate(codePoint);
+    }
+}
+
+inline uint32_t
+UTF16Decode(char16_t lead, char16_t trail)
 {
     MOZ_ASSERT(IsLeadSurrogate(lead));
     MOZ_ASSERT(IsTrailSurrogate(trail));
 
-    return (lead - LeadSurrogateMin) * 1024 + (trail - TrailSurrogateMin) + NonBMPMin;
+    return (lead << 10) + trail + (NonBMPMin - (LeadSurrogateMin << 10) - TrailSurrogateMin);
 }
 
 } /* namespace unicode */
 } /* namespace js */
 
 #endif /* vm_Unicode_h */