Bug 1476866 - Remove ungetLineTerminator, used only to unget Unicode separators, and replace it with a SourceUnits::ungetLineOrParagraphSeparator. r=arai
authorJeff Walden <jwalden@mit.edu>
Mon, 09 Jul 2018 16:22:50 -0700
changeset 427436 80b3a14e84c23a7215243376d1a3143a985aee8a
parent 427435 3a4e6ae59b597084afaed1d84d1674ee556406d5
child 427437 0ed3f8f103c013b9c360360c6337956175b59ef5
push id34304
push usertoros@mozilla.com
push dateFri, 20 Jul 2018 09:57:23 +0000
treeherdermozilla-central@4f12d77b4f9b [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersarai
bugs1476866
milestone63.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1476866 - Remove ungetLineTerminator, used only to unget Unicode separators, and replace it with a SourceUnits::ungetLineOrParagraphSeparator. r=arai
js/src/frontend/TokenStream.cpp
js/src/frontend/TokenStream.h
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@@ -596,31 +596,16 @@ TokenStreamChars<char16_t, AnyCharsAcces
     unicode::UTF16Encode(codePoint, units, &numUnits);
 
     MOZ_ASSERT(numUnits == 1 || numUnits == 2);
 
     while (numUnits-- > 0)
         ungetCodeUnit(units[numUnits]);
 }
 
-template<class AnyCharsAccess>
-void
-TokenStreamChars<char16_t, AnyCharsAccess>::ungetLineTerminator()
-{
-    this->sourceUnits.ungetCodeUnit();
-
-    char16_t last = this->sourceUnits.peekCodeUnit();
-    MOZ_ASSERT(SourceUnits::isRawEOLChar(last));
-
-    if (last == '\n')
-        this->sourceUnits.ungetOptionalCRBeforeLF();
-
-    anyCharsAccess().undoInternalUpdateLineInfoForEOL();
-}
-
 template<typename CharT>
 size_t
 SourceUnits<CharT>::findEOLMax(size_t start, size_t max)
 {
     const CharT* p = codeUnitPtrAt(start);
 
     size_t n = 0;
     while (true) {
@@ -1618,23 +1603,26 @@ template<typename CharT, class AnyCharsA
 MOZ_MUST_USE bool
 TokenStreamSpecific<CharT, AnyCharsAccess>::regexpLiteral(TokenStart start, TokenKind* out)
 {
     MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == '/');
     this->charBuffer.clear();
 
     auto ProcessNonAsciiCodePoint = [this](int32_t lead) {
         MOZ_ASSERT(lead != EOF);
-
-        int32_t codePoint;
-        if (!this->getNonAsciiCodePoint(lead, &codePoint))
+        MOZ_ASSERT(!this->isAsciiCodePoint(lead));
+
+        char32_t codePoint;
+        if (!this->getNonAsciiCodePointDontNormalize(lead, &codePoint))
             return false;
 
-        if (codePoint == '\n') {
-            this->ungetLineTerminator();
+        if (MOZ_UNLIKELY(codePoint == unicode::LINE_SEPARATOR ||
+                         codePoint == unicode::PARA_SEPARATOR))
+        {
+            this->sourceUnits.ungetLineOrParagraphSeparator();
             this->reportError(JSMSG_UNTERMINATED_REGEXP);
             return false;
         }
 
         return this->appendCodePointToCharBuffer(codePoint);
     };
 
     auto ReportUnterminatedRegExp = [this](CharT unit) {
--- a/js/src/frontend/TokenStream.h
+++ b/js/src/frontend/TokenStream.h
@@ -1092,16 +1092,19 @@ class SourceUnits
         MOZ_ASSERT(*ptr == CharT('\n'),
                    "function should only be called when a '\\n' was just "
                    "ungotten, and any '\\r' preceding it must also be "
                    "ungotten");
         if (*(ptr - 1) == CharT('\r'))
             ptr--;
     }
 
+    /** Unget U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR. */
+    inline void ungetLineOrParagraphSeparator();
+
     void ungetCodeUnit() {
         MOZ_ASSERT(!atStart(), "can't unget if currently at start");
         MOZ_ASSERT(ptr);     // make sure it hasn't been poisoned
         ptr--;
     }
 
     const CharT* addressOfNextCodeUnit(bool allowPoisoned = false) const {
         MOZ_ASSERT_IF(!allowPoisoned, ptr);     // make sure it hasn't been poisoned
@@ -1141,16 +1144,43 @@ class SourceUnits
 
     /** Limit for quick bounds check. */
     const CharT* limit_;
 
     /** Next char to get. */
     const CharT* ptr;
 };
 
+template<>
+inline void
+SourceUnits<char16_t>::ungetLineOrParagraphSeparator()
+{
+#ifdef DEBUG
+    char16_t prev = previousCodeUnit();
+#endif
+    MOZ_ASSERT(prev == unicode::LINE_SEPARATOR || prev == unicode::PARA_SEPARATOR);
+
+    ungetCodeUnit();
+}
+
+template<>
+inline void
+SourceUnits<mozilla::Utf8Unit>::ungetLineOrParagraphSeparator()
+{
+    unskipCodeUnits(3);
+
+    MOZ_ASSERT(ptr[0].toUint8() == 0xE2);
+    MOZ_ASSERT(ptr[1].toUint8() == 0x80);
+
+#ifdef DEBUG
+    uint8_t last = ptr[2].toUint8();
+#endif
+    MOZ_ASSERT(last == 0xA8 || last == 0xA9);
+}
+
 class TokenStreamCharsShared
 {
     // Using char16_t (not CharT) is a simplifying decision that hopefully
     // eliminates the need for a UTF-8 regular expression parser and makes
     // |copyCharBufferTo| markedly simpler.
     using CharBuffer = Vector<char16_t, 32>;
 
   protected:
@@ -1653,23 +1683,16 @@ class TokenStreamChars<char16_t, AnyChar
                    "should not be ungetting un-normalized code points");
 
         ungetCodePointIgnoreEOL(codePoint);
         if (codePoint == '\n')
             anyCharsAccess().undoInternalUpdateLineInfoForEOL();
     }
 
     /**
-     * Unget a just-gotten LineTerminator sequence: '\r', '\n', '\r\n', or
-     * a Unicode line/paragraph separator, also undoing line/column information
-     * changes reflecting that LineTerminator.
-     */
-    void ungetLineTerminator();
-
-    /**
      * Consume code points til EOL/EOF following the start of a single-line
      * comment, without consuming the EOL/EOF.
      */
     MOZ_MUST_USE bool consumeRestOfSingleLineComment() {
         // This operation is infallible for UTF-16 -- and this implementation
         // approach lets the compiler boil away call-side fallibility handling.
         infallibleConsumeRestOfSingleLineComment();
         return true;