author | Jeff Walden <jwalden@mit.edu> |
Mon, 09 Jul 2018 14:38:16 -0700 | |
changeset 427435 | 3a4e6ae59b597084afaed1d84d1674ee556406d5 |
parent 427434 | 44b64b5a44fcfdaf086bd32b7c8038efac5bf652 |
child 427436 | 80b3a14e84c23a7215243376d1a3143a985aee8a |
push id | 34304 |
push user | toros@mozilla.com |
push date | Fri, 20 Jul 2018 09:57:23 +0000 |
treeherder | mozilla-central@4f12d77b4f9b [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
reviewers | arai |
bugs | 1476866 |
milestone | 63.0a1 |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
js/src/frontend/TokenStream.cpp | file | annotate | diff | comparison | revisions | |
js/src/frontend/TokenStream.h | file | annotate | diff | comparison | revisions |
--- a/js/src/frontend/TokenStream.cpp +++ b/js/src/frontend/TokenStream.cpp @@ -1324,30 +1324,25 @@ TokenStreamSpecific<CharT, AnyCharsAcces return false; continue; } if (unit != '\\' || !matchUnicodeEscapeIdent(&codePoint)) break; } else { - int32_t cp; - if (!getNonAsciiCodePoint(unit, &cp)) + // |restoreNextRawCharAddress| undoes all gets, and this function + // doesn't update line/column info. + char32_t cp; + if (!getNonAsciiCodePointDontNormalize(unit, &cp)) return false; - codePoint = AssertedCast<uint32_t>(cp); - - if (!unicode::IsIdentifierPart(codePoint)) { - if (MOZ_UNLIKELY(codePoint == '\n')) { - // |restoreNextRawCharAddress| will undo all gets, but we - // have to revert a line/column update manually. - anyCharsAccess().undoInternalUpdateLineInfoForEOL(); - } + codePoint = cp; + if (!unicode::IsIdentifierPart(codePoint)) break; - } } if (!appendCodePointToCharBuffer(codePoint)) return false; } while (true); return true; }
--- a/js/src/frontend/TokenStream.h +++ b/js/src/frontend/TokenStream.h @@ -1313,30 +1313,53 @@ class SpecializedTokenStreamCharsBase; template<> class SpecializedTokenStreamCharsBase<char16_t> : public TokenStreamCharsBase<char16_t> { using CharsBase = TokenStreamCharsBase<char16_t>; protected: + using TokenStreamCharsShared::isAsciiCodePoint; // Deliberately don't |using| |sourceUnits| because of bug 1472569. :-( using typename CharsBase::SourceUnits; protected: // These APIs are only usable by UTF-16-specific code. /** * Consume the rest of a single-line comment (but not the EOL/EOF that * terminates it) -- infallibly because no 16-bit code unit sequence in a * comment is an error. */ void infallibleConsumeRestOfSingleLineComment(); + /** + * Given |lead| already consumed, consume and return the code point encoded + * starting from it. Infallible because lone surrogates in JS encode a + * "code point" of the same value. + */ + char32_t infallibleGetNonAsciiCodePointDontNormalize(char16_t lead) { + MOZ_ASSERT(!isAsciiCodePoint(lead)); + MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == lead); + + // Handle single-unit code points and lone trailing surrogates. + if (MOZ_LIKELY(!unicode::IsLeadSurrogate(lead)) || + // Or handle lead surrogates not paired with trailing surrogates. + MOZ_UNLIKELY(this->sourceUnits.atEnd() || + !unicode::IsTrailSurrogate(this->sourceUnits.peekCodeUnit()))) + { + return lead; + } + + // Otherwise it's a multi-unit code point. + return unicode::UTF16Decode(lead, this->sourceUnits.getCodeUnit()); + } + protected: // These APIs are in both SpecializedTokenStreamCharsBase specializations // and so are usable in subclasses no matter what CharT is. using CharsBase::CharsBase; }; template<> @@ -1520,26 +1543,39 @@ class TokenStreamChars<char16_t, AnyChar using GeneralCharsBase::asSpecific; using typename GeneralCharsBase::TokenStreamSpecific; protected: using GeneralCharsBase::anyCharsAccess; using GeneralCharsBase::getCodeUnit; using SpecializedCharsBase::infallibleConsumeRestOfSingleLineComment; + using SpecializedCharsBase::infallibleGetNonAsciiCodePointDontNormalize; using TokenStreamCharsShared::isAsciiCodePoint; // Deliberately don't |using| |sourceUnits| because of bug 1472569. :-( using GeneralCharsBase::ungetCodeUnit; using GeneralCharsBase::updateLineInfoForEOL; using typename GeneralCharsBase::SourceUnits; protected: using GeneralCharsBase::GeneralCharsBase; + /** + * Given the non-ASCII |lead| code unit just consumed, consume and return a + * complete non-ASCII code point. Line/column updates are not performed, + * and line breaks are returned as-is without normalization. + */ + MOZ_MUST_USE bool getNonAsciiCodePointDontNormalize(char16_t lead, char32_t* codePoint) { + // There are no encoding errors in 16-bit JS, so implement this so that + // the compiler knows it, too. + *codePoint = infallibleGetNonAsciiCodePointDontNormalize(lead); + return true; + } + // Try to get the next code point, normalizing '\r', '\r\n', '\n', and the // Unicode line/paragraph separators into '\n'. Also updates internal // line-counter state. Return true on success and store the code point in // |*c|. Return false and leave |*c| undefined on failure. MOZ_MUST_USE bool getCodePoint(int32_t* cp); /** * Given a just-consumed ASCII code unit/point |lead|, consume a full code @@ -1721,16 +1757,17 @@ class MOZ_STACK_CLASS TokenStreamSpecifi using SpecializedChars::consumeRestOfSingleLineComment; using TokenStreamCharsShared::copyCharBufferTo; using TokenStreamCharsShared::drainCharBufferIntoAtom; using CharsBase::fillCharBufferWithTemplateStringContents; using SpecializedChars::getCodePoint; using GeneralCharsBase::getCodeUnit; using SpecializedChars::getFullAsciiCodePoint; using SpecializedChars::getNonAsciiCodePoint; + using SpecializedChars::getNonAsciiCodePointDontNormalize; using TokenStreamCharsShared::isAsciiCodePoint; using CharsBase::matchCodeUnit; using GeneralCharsBase::matchUnicodeEscapeIdent; using GeneralCharsBase::matchUnicodeEscapeIdStart; using GeneralCharsBase::newAtomToken; using GeneralCharsBase::newNameToken; using GeneralCharsBase::newNumberToken; using GeneralCharsBase::newRegExpToken;