Bug 1467336 - Rename getCharIgnoreEOL to getCodeUnit to better indicate that's all it does. r=arai
authorJeff Walden <jwalden@mit.edu>
Thu, 07 Jun 2018 02:15:41 -0700
changeset 478646 b6821b80af877083d6a39b879ffd37709d9a635f
parent 478645 de25c66a84b0619bb7c9f1d10ef565fff4b59df8
child 478647 e90575cf96ce99f89e8b195a58853dee01d2e79d
push id1757
push userffxbld-merge
push dateFri, 24 Aug 2018 17:02:43 +0000
treeherdermozilla-release@736023aebdb1 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersarai
bugs1467336
milestone62.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1467336 - Rename getCharIgnoreEOL to getCodeUnit to better indicate that's all it does. r=arai
js/src/frontend/TokenStream.cpp
js/src/frontend/TokenStream.h
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@@ -525,24 +525,28 @@ TokenStreamChars<char16_t, AnyCharsAcces
 
     if (!updateLineInfoForEOL())
         return false;
 
     *cp = '\n';
     return true;
 }
 
-// This gets the next char. It does nothing special with EOL sequences, not
-// even updating the line counters.  It can be used safely if (a) the
-// resulting char is guaranteed to be ungotten (by ungetCharIgnoreEOL()) if
-// it's an EOL, and (b) the line-related state (lineno, linebase) is not used
-// before it's ungotten.
+// This gets the next code unit -- the next numeric sub-unit of source text,
+// possibly smaller than a full code point.  It is simple and stupid, and it
+// doesn't understand EOL, update line counters, or anything like that.  If you
+// use it to consume an EOL sequence, line counters *will not* be correct for
+// subsequent code.
+//
+// Only use this if (a) the resulting code unit is guaranteed to be ungotten
+// (by ungetCharIgnoreEOL()) if it's an EOL, and (b) the line-related state
+// (lineno, linebase) is not used before it's ungotten.
 template<typename CharT, class AnyCharsAccess>
 int32_t
-GeneralTokenStreamChars<CharT, AnyCharsAccess>::getCharIgnoreEOL()
+GeneralTokenStreamChars<CharT, AnyCharsAccess>::getCodeUnit()
 {
     if (MOZ_LIKELY(sourceUnits.hasRawChars()))
         return sourceUnits.getCodeUnit();
 
     anyCharsAccess().flags.isEOF = true;
     return EOF;
 }
 
@@ -601,17 +605,17 @@ TokenStreamChars<char16_t, AnyCharsAcces
 // consumed: use skipChars(n) to do so after checking that the consumed
 // characters had appropriate values.
 template<typename CharT, class AnyCharsAccess>
 bool
 TokenStreamSpecific<CharT, AnyCharsAccess>::peekChars(int n, CharT* cp)
 {
     int i;
     for (i = 0; i < n; i++) {
-        int32_t c = getCharIgnoreEOL();
+        int32_t c = getCodeUnit();
         if (c == EOF)
             break;
 
         cp[i] = char16_t(c);
     }
 
     for (int j = i - 1; j >= 0; j--)
         ungetCharIgnoreEOL(cp[j]);
@@ -998,25 +1002,25 @@ TokenStreamSpecific<CharT, AnyCharsAcces
 // We have encountered a '\': check for a Unicode escape sequence after it.
 // Return the length of the escape sequence and the character code point (by
 // value) if we found a Unicode escape sequence.  Otherwise, return 0.  In both
 // cases, do not advance along the buffer.
 template<typename CharT, class AnyCharsAccess>
 uint32_t
 TokenStreamSpecific<CharT, AnyCharsAccess>::peekUnicodeEscape(uint32_t* codePoint)
 {
-    int32_t c = getCharIgnoreEOL();
+    int32_t c = getCodeUnit();
     if (c != 'u') {
         ungetCharIgnoreEOL(c);
         return 0;
     }
 
     CharT cp[3];
     uint32_t length;
-    c = getCharIgnoreEOL();
+    c = getCodeUnit();
     if (JS7_ISHEX(c) && peekChars(3, cp) &&
         JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]))
     {
         *codePoint = (JS7_UNHEX(c) << 12) |
                      (JS7_UNHEX(cp[0]) << 8) |
                      (JS7_UNHEX(cp[1]) << 4) |
                      JS7_UNHEX(cp[2]);
         length = 5;
@@ -1031,32 +1035,32 @@ TokenStreamSpecific<CharT, AnyCharsAcces
     return length;
 }
 
 template<typename CharT, class AnyCharsAccess>
 uint32_t
 TokenStreamSpecific<CharT, AnyCharsAccess>::peekExtendedUnicodeEscape(uint32_t* codePoint)
 {
     // The opening brace character was already read.
-    int32_t c = getCharIgnoreEOL();
+    int32_t c = getCodeUnit();
 
     // Skip leading zeros.
     uint32_t leadingZeros = 0;
     while (c == '0') {
         leadingZeros++;
-        c = getCharIgnoreEOL();
+        c = getCodeUnit();
     }
 
     CharT cp[6];
     size_t i = 0;
     uint32_t code = 0;
     while (JS7_ISHEX(c) && i < 6) {
         cp[i++] = c;
         code = code << 4 | JS7_UNHEX(c);
-        c = getCharIgnoreEOL();
+        c = getCodeUnit();
     }
 
     uint32_t length;
     if (c == '}' && (leadingZeros > 0 || i > 0) && code <= unicode::NonBMPMax) {
         *codePoint = code;
         length = leadingZeros + i + 3;
     } else {
         length = 0;
@@ -1308,17 +1312,17 @@ template<class AnyCharsAccess>
 void
 TokenStreamChars<char16_t, AnyCharsAccess>::matchMultiUnitCodePointSlow(char16_t lead,
                                                                         uint32_t* codePoint)
 {
     MOZ_ASSERT(unicode::IsLeadSurrogate(lead),
                "matchMultiUnitCodepoint should have ensured |lead| is a lead "
                "surrogate");
 
-    int32_t maybeTrail = getCharIgnoreEOL();
+    int32_t maybeTrail = getCodeUnit();
     if (MOZ_LIKELY(unicode::IsTrailSurrogate(maybeTrail))) {
         *codePoint = unicode::UTF16Decode(lead, maybeTrail);
     } else {
         ungetCharIgnoreEOL(maybeTrail);
         *codePoint = 0;
     }
 }
 
@@ -1331,17 +1335,17 @@ TokenStreamSpecific<CharT, AnyCharsAcces
 
     auto restoreNextRawCharAddress =
         MakeScopeExit([this, originalAddress]() {
             this->sourceUnits.setAddressOfNextCodeUnit(originalAddress);
         });
 
     tokenbuf.clear();
     for (;;) {
-        int32_t c = getCharIgnoreEOL();
+        int32_t c = getCodeUnit();
 
         uint32_t codePoint;
         if (!matchMultiUnitCodePoint(c, &codePoint))
             return false;
         if (codePoint) {
             if (!unicode::IsIdentifierPart(codePoint))
                 break;
         } else {
@@ -1370,17 +1374,17 @@ TokenStreamSpecific<CharT, AnyCharsAcces
     // Run the bad-token code for every path out of this function except the
     // two success-cases.
     auto noteBadToken = MakeScopeExit([this]() {
         this->badToken();
     });
 
     int c;
     while (true) {
-        c = getCharIgnoreEOL();
+        c = getCodeUnit();
         if (c == EOF)
             break;
 
         uint32_t codePoint;
         if (!matchMultiUnitCodePoint(c, &codePoint))
             return false;
         if (codePoint) {
             if (!unicode::IsIdentifierPart(codePoint))
@@ -1514,17 +1518,17 @@ static_assert(LastCharKind < (1 << (size
               "Elements of firstCharKinds[] are too small");
 
 template<typename CharT, class AnyCharsAccess>
 void
 GeneralTokenStreamChars<CharT, AnyCharsAccess>::consumeRestOfSingleLineComment()
 {
     int32_t c;
     do {
-        c = getCharIgnoreEOL();
+        c = getCodeUnit();
     } while (c != EOF && !SourceUnits::isRawEOLChar(c));
 
     ungetCharIgnoreEOL(c);
 }
 
 template<typename CharT, class AnyCharsAccess>
 MOZ_MUST_USE bool
 TokenStreamSpecific<CharT, AnyCharsAccess>::decimalNumber(int c, TokenStart start,
@@ -1534,17 +1538,17 @@ TokenStreamSpecific<CharT, AnyCharsAcces
     // Run the bad-token code for every path out of this function except the
     // one success-case.
     auto noteBadToken = MakeScopeExit([this]() {
         this->badToken();
     });
 
     // Consume integral component digits.
     while (IsAsciiDigit(c))
-        c = getCharIgnoreEOL();
+        c = getCodeUnit();
 
     // Numbers contain no escapes, so we can read directly from |sourceUnits|.
     double dval;
     DecimalPoint decimalPoint = NoDecimal;
     if (c != '.' && c != 'e' && c != 'E') {
         ungetCharIgnoreEOL(c);
 
         // Most numbers are pure decimal integers without fractional component
@@ -1554,36 +1558,36 @@ TokenStreamSpecific<CharT, AnyCharsAcces
         {
             return false;
         }
     } else {
         // Consume any decimal dot and fractional component.
         if (c == '.') {
             decimalPoint = HasDecimal;
             do {
-                c = getCharIgnoreEOL();
+                c = getCodeUnit();
             } while (IsAsciiDigit(c));
         }
 
         // Consume any exponential notation.
         if (c == 'e' || c == 'E') {
-            c = getCharIgnoreEOL();
+            c = getCodeUnit();
             if (c == '+' || c == '-')
-                c = getCharIgnoreEOL();
+                c = getCodeUnit();
 
             // Exponential notation must contain at least one digit.
             if (!IsAsciiDigit(c)) {
                 ungetCharIgnoreEOL(c);
                 error(JSMSG_MISSING_EXPONENT);
                 return false;
             }
 
             // Consume exponential digits.
             do {
-                c = getCharIgnoreEOL();
+                c = getCodeUnit();
             } while (IsAsciiDigit(c));
         }
 
         ungetCharIgnoreEOL(c);
 
         const CharT* dummy;
         if (!js_strtod(anyCharsAccess().cx, numStart, sourceUnits.addressOfNextCodeUnit(), &dummy,
                        &dval))
@@ -1774,59 +1778,59 @@ TokenStreamSpecific<CharT, AnyCharsAcces
         // number starting with '0' that contains '8' or '9' and is treated as
         // decimal) number.
         //
         if (c1kind == ZeroDigit) {
             TokenStart start(sourceUnits, -1);
 
             int radix;
             const CharT* numStart;
-            c = getCharIgnoreEOL();
+            c = getCodeUnit();
             if (c == 'x' || c == 'X') {
                 radix = 16;
-                c = getCharIgnoreEOL();
+                c = getCodeUnit();
                 if (!JS7_ISHEX(c)) {
                     ungetCharIgnoreEOL(c);
                     reportError(JSMSG_MISSING_HEXDIGITS);
                     return badToken();
                 }
 
                 // one past the '0x'
                 numStart = sourceUnits.addressOfNextCodeUnit() - 1;
 
                 while (JS7_ISHEX(c))
-                    c = getCharIgnoreEOL();
+                    c = getCodeUnit();
             } else if (c == 'b' || c == 'B') {
                 radix = 2;
-                c = getCharIgnoreEOL();
+                c = getCodeUnit();
                 if (c != '0' && c != '1') {
                     ungetCharIgnoreEOL(c);
                     reportError(JSMSG_MISSING_BINARY_DIGITS);
                     return badToken();
                 }
 
                 // one past the '0b'
                 numStart = sourceUnits.addressOfNextCodeUnit() - 1;
 
                 while (c == '0' || c == '1')
-                    c = getCharIgnoreEOL();
+                    c = getCodeUnit();
             } else if (c == 'o' || c == 'O') {
                 radix = 8;
-                c = getCharIgnoreEOL();
+                c = getCodeUnit();
                 if (c < '0' || c > '7') {
                     ungetCharIgnoreEOL(c);
                     reportError(JSMSG_MISSING_OCTAL_DIGITS);
                     return badToken();
                 }
 
                 // one past the '0o'
                 numStart = sourceUnits.addressOfNextCodeUnit() - 1;
 
                 while ('0' <= c && c <= '7')
-                    c = getCharIgnoreEOL();
+                    c = getCodeUnit();
             } else if (IsAsciiDigit(c)) {
                 radix = 8;
                 // one past the '0'
                 numStart = sourceUnits.addressOfNextCodeUnit() - 1;
 
                 do {
                     // Octal integer literals are not permitted in strict mode
                     // code.
@@ -1840,17 +1844,17 @@ TokenStreamSpecific<CharT, AnyCharsAcces
                     if (c >= '8') {
                         if (!warning(JSMSG_BAD_OCTAL, c == '8' ? "08" : "09"))
                             return badToken();
 
                         // Use the decimal scanner for the rest of the number.
                         return decimalNumber(c, start, numStart, modifier, ttp);
                     }
 
-                    c = getCharIgnoreEOL();
+                    c = getCodeUnit();
                 } while (IsAsciiDigit(c));
             } else {
                 // '0' not followed by [XxBbOo0-9];  scan as a decimal number.
                 numStart = sourceUnits.addressOfNextCodeUnit() - 1;
 
                 return decimalNumber(c, start, numStart, modifier, ttp);
             }
             ungetCharIgnoreEOL(c);
@@ -1905,17 +1909,17 @@ TokenStreamSpecific<CharT, AnyCharsAcces
         //
         TokenStart start(sourceUnits, -1);
         TokenKind simpleKind;
 #ifdef DEBUG
         simpleKind = TokenKind::Limit; // sentinel value for code after switch
 #endif
         switch (c) {
           case '.':
-            c = getCharIgnoreEOL();
+            c = getCodeUnit();
             if (IsAsciiDigit(c)) {
                 return decimalNumber('.', start, sourceUnits.addressOfNextCodeUnit() - 2, modifier,
                                      ttp);
             }
 
             if (c == '.') {
                 if (matchChar('.')) {
                     simpleKind = TokenKind::TripleDot;
@@ -2025,17 +2029,17 @@ TokenStreamSpecific<CharT, AnyCharsAcces
                 simpleKind = matchChar('=') ? TokenKind::PowAssign : TokenKind::Pow;
             else
                 simpleKind = matchChar('=') ? TokenKind::MulAssign : TokenKind::Mul;
             break;
 
           case '/':
             // Look for a single-line comment.
             if (matchChar('/')) {
-                c = getCharIgnoreEOL();
+                c = getCodeUnit();
                 if (c == '@' || c == '#') {
                     bool shouldWarn = c == '@';
                     if (!getDirectives(false, shouldWarn))
                         return false;
                 } else {
                     ungetCharIgnoreEOL(c);
                 }
 
@@ -2105,17 +2109,17 @@ TokenStreamSpecific<CharT, AnyCharsAcces
 
                     if (!tokenbuf.append(c))
                         return badToken();
                 } while (true);
 
                 RegExpFlag reflags = NoFlags;
                 while (true) {
                     RegExpFlag flag;
-                    c = getCharIgnoreEOL();
+                    c = getCodeUnit();
                     if (c == 'g')
                         flag = GlobalFlag;
                     else if (c == 'i')
                         flag = IgnoreCaseFlag;
                     else if (c == 'm')
                         flag = MultilineFlag;
                     else if (c == 'y')
                         flag = StickyFlag;
@@ -2202,18 +2206,18 @@ TokenStreamSpecific<CharT, AnyCharsAcces
     // Run the bad-token code for every path out of this function except the
     // one success-case.
     auto noteBadToken = MakeScopeExit([this]() {
         this->badToken();
     });
 
     // We need to detect any of these chars:  " or ', \n (or its
     // equivalents), \\, EOF.  Because we detect EOL sequences here and
-    // put them back immediately, we can use getCharIgnoreEOL().
-    while ((c = getCharIgnoreEOL()) != untilChar) {
+    // put them back immediately, we can use getCodeUnit().
+    while ((c = getCodeUnit()) != untilChar) {
         if (c == EOF) {
             ungetCharIgnoreEOL(c);
             const char delimiters[] = { untilChar, untilChar, '\0' };
             error(JSMSG_EOF_BEFORE_END_OF_LITERAL, delimiters);
             return false;
         }
 
         if (c == '\\') {
@@ -2240,24 +2244,24 @@ TokenStreamSpecific<CharT, AnyCharsAcces
 
               case '\n':
                 // ES5 7.8.4: an escaped line terminator represents
                 // no character.
                 continue;
 
               // Unicode character specification.
               case 'u': {
-                int32_t c2 = getCharIgnoreEOL();
+                int32_t c2 = getCodeUnit();
                 if (c2 == '{') {
                     uint32_t start = sourceUnits.offset() - 3;
                     uint32_t code = 0;
                     bool first = true;
                     bool valid = true;
                     do {
-                        int32_t c = getCharIgnoreEOL();
+                        int32_t c = getCodeUnit();
                         if (c == EOF) {
                             if (parsingTemplate) {
                                 TokenStreamAnyChars& anyChars = anyCharsAccess();
                                 anyChars.setInvalidTemplateEscape(start,
                                                                   InvalidEscapeType::Unicode);
                                 valid = false;
                                 break;
                             }
--- a/js/src/frontend/TokenStream.h
+++ b/js/src/frontend/TokenStream.h
@@ -1183,17 +1183,17 @@ class GeneralTokenStreamChars
                         TokenStreamShared::Modifier modifier, TokenKind* out)
     {
         Token* token = newToken(TokenKind::RegExp, start, modifier, out);
         token->setRegExpFlags(reflags);
     }
 
     MOZ_COLD bool badToken();
 
-    int32_t getCharIgnoreEOL();
+    int32_t getCodeUnit();
 
     void ungetCharIgnoreEOL(int32_t c) {
         MOZ_ASSERT_IF(c == EOF, anyCharsAccess().flags.isEOF);
 
         CharsSharedBase::ungetCharIgnoreEOL(c);
     }
 
     void ungetChar(int32_t c);
@@ -1223,17 +1223,17 @@ class TokenStreamChars<char16_t, AnyChar
     using GeneralCharsBase::asSpecific;
 
     using typename GeneralCharsBase::TokenStreamSpecific;
 
     void matchMultiUnitCodePointSlow(char16_t lead, uint32_t* codePoint);
 
   protected:
     using GeneralCharsBase::anyCharsAccess;
-    using GeneralCharsBase::getCharIgnoreEOL;
+    using GeneralCharsBase::getCodeUnit;
     using GeneralCharsBase::sourceUnits;
     using CharsSharedBase::ungetCharIgnoreEOL;
     using GeneralCharsBase::updateLineInfoForEOL;
 
     using GeneralCharsBase::GeneralCharsBase;
 
     // |c| must be the code unit just gotten.  If it and the subsequent code
     // unit form a valid surrogate pair, get the second code unit, set
@@ -1345,17 +1345,17 @@ class MOZ_STACK_CLASS TokenStreamSpecifi
   private:
     using CharsSharedBase::appendCodePointToTokenbuf;
     using CharsSharedBase::atomizeChars;
     using GeneralCharsBase::badToken;
     using GeneralCharsBase::consumeRestOfSingleLineComment;
     using CharsSharedBase::copyTokenbufTo;
     using CharsSharedBase::fillWithTemplateStringContents;
     using CharsBase::getChar;
-    using GeneralCharsBase::getCharIgnoreEOL;
+    using GeneralCharsBase::getCodeUnit;
     using CharsBase::matchMultiUnitCodePoint;
     using GeneralCharsBase::newAtomToken;
     using GeneralCharsBase::newNameToken;
     using GeneralCharsBase::newNumberToken;
     using GeneralCharsBase::newRegExpToken;
     using GeneralCharsBase::newSimpleToken;
     using CharsSharedBase::sourceUnits;
     using CharsSharedBase::tokenbuf;
@@ -1503,39 +1503,39 @@ class MOZ_STACK_CLASS TokenStreamSpecifi
      * token.
      *
      * |c| must be one of these values:
      *
      *   1. The first decimal digit in the integral part of a decimal number
      *      not starting with '0' or '.', e.g. '1' for "17", '3' for "3.14", or
      *      '8' for "8.675309e6".
      *
-     *   In this case, the next |getCharIgnoreEOL()| must return the code unit
-     *   after |c| in the overall number.
+     *   In this case, the next |getCodeUnit()| must return the code unit after
+     *   |c| in the overall number.
      *
      *   2. The '.' in a "."/"0."-prefixed decimal number or the 'e'/'E' in a
      *      "0e"/"0E"-prefixed decimal number, e.g. ".17", "0.42", or "0.1e3".
      *
-     *   In this case, the next |getCharIgnoreEOL()| must return the code unit
+     *   In this case, the next |getCodeUnit()| must return the code unit
      *   *after* the first decimal digit *after* the '.'.  So the next code
      *   unit would be '7' in ".17", '2' in "0.42", 'e' in "0.4e+8", or '/' in
      *   "0.5/2" (three separate tokens).
      *
      *   3. The code unit after the '0' where "0" is the entire number token.
      *
-     *   In this case, the next |getCharIgnoreEOL()| returns the code unit
-     *   after |c|.
+     *   In this case, the next |getCodeUnit()| returns the code unit after
+     *   |c|.
      *
      *   4. (Non-strict mode code only)  The first '8' or '9' in a "noctal"
      *      number that begins with a '0' but contains a non-octal digit in its
      *      integer part so is interpreted as decimal, e.g. '9' in "09.28" or
      *      '8' in "0386" or '9' in "09+7" (three separate tokens").
      *
-     *   In this case, the next |getCharIgnoreEOL()| returns the code unit
-     *   after |c|: '.', '6', or '+' in the examples above.
+     *   In this case, the next |getCodeUnit()| returns the code unit after
+     *   |c|: '.', '6', or '+' in the examples above.
      *
      * This interface is super-hairy and horribly stateful.  Unfortunately, its
      * hair merely reflects the intricacy of ECMAScript numeric literal syntax.
      * And incredibly, it *improves* on the goto-based horror that predated it.
      */
     MOZ_MUST_USE bool decimalNumber(int c, TokenStart start, const CharT* numStart,
                                     Modifier modifier, TokenKind* out);
 
@@ -1731,31 +1731,31 @@ class MOZ_STACK_CLASS TokenStreamSpecifi
         MOZ_ALWAYS_TRUE(getChar(&c));
         MOZ_ASSERT(c == expect);
     }
 
     void consumeKnownCharIgnoreEOL(int32_t expect) {
 #ifdef DEBUG
         auto c =
 #endif
-            getCharIgnoreEOL();
+            getCodeUnit();
         MOZ_ASSERT(c == expect);
     }
 
     MOZ_MUST_USE bool peekChar(int32_t* c) {
         if (!getChar(c))
             return false;
         ungetChar(*c);
         return true;
     }
 
     void skipChars(uint32_t n) {
         while (n-- > 0) {
             MOZ_ASSERT(sourceUnits.hasRawChars());
-            mozilla::DebugOnly<int32_t> c = getCharIgnoreEOL();
+            mozilla::DebugOnly<int32_t> c = getCodeUnit();
             MOZ_ASSERT(!SourceUnits::isRawEOLChar(c));
         }
     }
 };
 
 // It's preferable to define this in TokenStream.cpp, but its template-ness
 // means we'd then have to *instantiate* this constructor for all possible
 // (CharT, AnyCharsAccess) pairs -- and that gets super-messy as AnyCharsAccess