author | Jeff Walden <jwalden@mit.edu> |
Mon, 09 Jul 2018 16:22:50 -0700 | |
changeset 427436 | 80b3a14e84c23a7215243376d1a3143a985aee8a |
parent 427435 | 3a4e6ae59b597084afaed1d84d1674ee556406d5 |
child 427437 | 0ed3f8f103c013b9c360360c6337956175b59ef5 |
push id | 34304 |
push user | toros@mozilla.com |
push date | Fri, 20 Jul 2018 09:57:23 +0000 |
treeherder | mozilla-central@4f12d77b4f9b [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
reviewers | arai |
bugs | 1476866 |
milestone | 63.0a1 |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
js/src/frontend/TokenStream.cpp | file | annotate | diff | comparison | revisions | |
js/src/frontend/TokenStream.h | file | annotate | diff | comparison | revisions |
--- a/js/src/frontend/TokenStream.cpp +++ b/js/src/frontend/TokenStream.cpp @@ -596,31 +596,16 @@ TokenStreamChars<char16_t, AnyCharsAcces unicode::UTF16Encode(codePoint, units, &numUnits); MOZ_ASSERT(numUnits == 1 || numUnits == 2); while (numUnits-- > 0) ungetCodeUnit(units[numUnits]); } -template<class AnyCharsAccess> -void -TokenStreamChars<char16_t, AnyCharsAccess>::ungetLineTerminator() -{ - this->sourceUnits.ungetCodeUnit(); - - char16_t last = this->sourceUnits.peekCodeUnit(); - MOZ_ASSERT(SourceUnits::isRawEOLChar(last)); - - if (last == '\n') - this->sourceUnits.ungetOptionalCRBeforeLF(); - - anyCharsAccess().undoInternalUpdateLineInfoForEOL(); -} - template<typename CharT> size_t SourceUnits<CharT>::findEOLMax(size_t start, size_t max) { const CharT* p = codeUnitPtrAt(start); size_t n = 0; while (true) { @@ -1618,23 +1603,26 @@ template<typename CharT, class AnyCharsA MOZ_MUST_USE bool TokenStreamSpecific<CharT, AnyCharsAccess>::regexpLiteral(TokenStart start, TokenKind* out) { MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == '/'); this->charBuffer.clear(); auto ProcessNonAsciiCodePoint = [this](int32_t lead) { MOZ_ASSERT(lead != EOF); - - int32_t codePoint; - if (!this->getNonAsciiCodePoint(lead, &codePoint)) + MOZ_ASSERT(!this->isAsciiCodePoint(lead)); + + char32_t codePoint; + if (!this->getNonAsciiCodePointDontNormalize(lead, &codePoint)) return false; - if (codePoint == '\n') { - this->ungetLineTerminator(); + if (MOZ_UNLIKELY(codePoint == unicode::LINE_SEPARATOR || + codePoint == unicode::PARA_SEPARATOR)) + { + this->sourceUnits.ungetLineOrParagraphSeparator(); this->reportError(JSMSG_UNTERMINATED_REGEXP); return false; } return this->appendCodePointToCharBuffer(codePoint); }; auto ReportUnterminatedRegExp = [this](CharT unit) {
--- a/js/src/frontend/TokenStream.h +++ b/js/src/frontend/TokenStream.h @@ -1092,16 +1092,19 @@ class SourceUnits MOZ_ASSERT(*ptr == CharT('\n'), "function should only be called when a '\\n' was just " "ungotten, and any '\\r' preceding it must also be " "ungotten"); if (*(ptr - 1) == CharT('\r')) ptr--; } + /** Unget U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR. */ + inline void ungetLineOrParagraphSeparator(); + void ungetCodeUnit() { MOZ_ASSERT(!atStart(), "can't unget if currently at start"); MOZ_ASSERT(ptr); // make sure it hasn't been poisoned ptr--; } const CharT* addressOfNextCodeUnit(bool allowPoisoned = false) const { MOZ_ASSERT_IF(!allowPoisoned, ptr); // make sure it hasn't been poisoned @@ -1141,16 +1144,43 @@ class SourceUnits /** Limit for quick bounds check. */ const CharT* limit_; /** Next char to get. */ const CharT* ptr; }; +template<> +inline void +SourceUnits<char16_t>::ungetLineOrParagraphSeparator() +{ +#ifdef DEBUG + char16_t prev = previousCodeUnit(); +#endif + MOZ_ASSERT(prev == unicode::LINE_SEPARATOR || prev == unicode::PARA_SEPARATOR); + + ungetCodeUnit(); +} + +template<> +inline void +SourceUnits<mozilla::Utf8Unit>::ungetLineOrParagraphSeparator() +{ + unskipCodeUnits(3); + + MOZ_ASSERT(ptr[0].toUint8() == 0xE2); + MOZ_ASSERT(ptr[1].toUint8() == 0x80); + +#ifdef DEBUG + uint8_t last = ptr[2].toUint8(); +#endif + MOZ_ASSERT(last == 0xA8 || last == 0xA9); +} + class TokenStreamCharsShared { // Using char16_t (not CharT) is a simplifying decision that hopefully // eliminates the need for a UTF-8 regular expression parser and makes // |copyCharBufferTo| markedly simpler. using CharBuffer = Vector<char16_t, 32>; protected: @@ -1653,23 +1683,16 @@ class TokenStreamChars<char16_t, AnyChar "should not be ungetting un-normalized code points"); ungetCodePointIgnoreEOL(codePoint); if (codePoint == '\n') anyCharsAccess().undoInternalUpdateLineInfoForEOL(); } /** - * Unget a just-gotten LineTerminator sequence: '\r', '\n', '\r\n', or - * a Unicode line/paragraph separator, also undoing line/column information - * changes reflecting that LineTerminator. - */ - void ungetLineTerminator(); - - /** * Consume code points til EOL/EOF following the start of a single-line * comment, without consuming the EOL/EOF. */ MOZ_MUST_USE bool consumeRestOfSingleLineComment() { // This operation is infallible for UTF-16 -- and this implementation // approach lets the compiler boil away call-side fallibility handling. infallibleConsumeRestOfSingleLineComment(); return true;