Bug 1434429 - Move TokenStreamSpecific::ungetChar into a new GeneralTokenStreamChars<CharT, AnyCharsAccess> inserted between TokenStreamCharsBase<CharT> and TokenStreamChars<CharT, AnyCharsAccess> in the token stream inheritance hierarchy. r=arai
☠☠ backed out by 63fe40a76a25 ☠ ☠
authorJeff Walden <jwalden@mit.edu>
Thu, 18 Jan 2018 11:34:27 -0800
changeset 401944 112eaf00632ceff1eaf9a6958e654061ebe4d588
parent 401943 024f70aeba7055c8b34f8b45590ef1df2ec58b40
child 401945 45c9102825ab7d7155a7940cd36334383a067c47
push id99460
push userjwalden@mit.edu
push dateWed, 31 Jan 2018 23:54:36 +0000
treeherdermozilla-inbound@95e07a79f4b2 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersarai
bugs1434429
milestone60.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1434429 - Move TokenStreamSpecific::ungetChar into a new GeneralTokenStreamChars<CharT, AnyCharsAccess> inserted between TokenStreamCharsBase<CharT> and TokenStreamChars<CharT, AnyCharsAccess> in the token stream inheritance hierarchy. r=arai
js/src/frontend/Parser.h
js/src/frontend/TokenStream.cpp
js/src/frontend/TokenStream.h
--- a/js/src/frontend/Parser.h
+++ b/js/src/frontend/Parser.h
@@ -623,20 +623,20 @@ PerHandlerParser<FullParseHandler>::clea
 
 enum class ExpressionClosure { Allowed, Forbidden };
 
 template<class Parser>
 class ParserAnyCharsAccess
 {
   public:
     using TokenStreamSpecific = typename Parser::TokenStream;
-    using TokenStreamChars = typename TokenStreamSpecific::CharsBase;
+    using GeneralTokenStreamChars = typename TokenStreamSpecific::GeneralCharsBase;
 
-    static inline TokenStreamAnyChars& anyChars(TokenStreamChars* ts);
-    static inline const TokenStreamAnyChars& anyChars(const TokenStreamChars* ts);
+    static inline TokenStreamAnyChars& anyChars(GeneralTokenStreamChars* ts);
+    static inline const TokenStreamAnyChars& anyChars(const GeneralTokenStreamChars* ts);
 };
 
 // Specify a value for an ES6 grammar parametrization.  We have no enum for
 // [Return] because its behavior is exactly equivalent to checking whether
 // we're in a function box -- easier and simpler than passing an extra
 // parameter everywhere.
 enum YieldHandling { YieldIsName, YieldIsKeyword };
 enum InHandling { InAllowed, InProhibited };
@@ -1513,69 +1513,65 @@ class Parser<FullParseHandler, CharT> fi
     bool checkLocalExportName(PropertyName* ident, uint32_t offset) {
         return checkLabelOrIdentifierReference(ident, offset, YieldIsName);
     }
 
     bool asmJS(Node list);
 };
 
 template<class Parser>
-/* static */ inline TokenStreamAnyChars&
-ParserAnyCharsAccess<Parser>::anyChars(TokenStreamChars* ts)
+/* static */ inline const TokenStreamAnyChars&
+ParserAnyCharsAccess<Parser>::anyChars(const GeneralTokenStreamChars* ts)
 {
     // The structure we're walking through looks like this:
     //
     //   struct ParserBase
     //   {
     //       ...;
     //       TokenStreamAnyChars anyChars;
     //       ...;
     //   };
-    //   struct Parser : ParserBase
+    //   struct Parser : <class that ultimately inherits from ParserBase>
     //   {
     //       ...;
     //       TokenStreamSpecific tokenStream;
     //       ...;
     //   };
     //
-    // We're passed a TokenStreamChars* corresponding to a base class of
-    // Parser::tokenStream.  We cast that pointer to a TokenStreamSpecific*,
+    // We're passed a GeneralTokenStreamChars* (this being a base class of
+    // Parser::tokenStream).  We cast that pointer to a TokenStreamSpecific*,
     // then translate that to the enclosing Parser*, then return the |anyChars|
     // member within.
 
-    auto* tss = static_cast<TokenStreamSpecific*>(ts);
-
-    auto tssAddr = reinterpret_cast<uintptr_t>(tss);
-
-    using ActualTokenStreamType = decltype(static_cast<Parser*>(nullptr)->tokenStream);
-    static_assert(mozilla::IsSame<ActualTokenStreamType, TokenStreamSpecific>::value,
-                                  "Parser::tokenStream must have type TokenStreamSpecific");
-
-    uintptr_t parserAddr = tssAddr - offsetof(Parser, tokenStream);
-
-    return reinterpret_cast<Parser*>(parserAddr)->anyChars;
-}
-
-template<class Parser>
-/* static */ inline const TokenStreamAnyChars&
-ParserAnyCharsAccess<Parser>::anyChars(const typename Parser::TokenStream::CharsBase* ts)
-{
+    static_assert(mozilla::IsBaseOf<GeneralTokenStreamChars,
+                                    TokenStreamSpecific>::value,
+                  "the static_cast<> below assumes a base-class relationship");
     const auto* tss = static_cast<const TokenStreamSpecific*>(ts);
 
     auto tssAddr = reinterpret_cast<uintptr_t>(tss);
 
     using ActualTokenStreamType = decltype(static_cast<Parser*>(nullptr)->tokenStream);
     static_assert(mozilla::IsSame<ActualTokenStreamType, TokenStreamSpecific>::value,
                                   "Parser::tokenStream must have type TokenStreamSpecific");
 
     uintptr_t parserAddr = tssAddr - offsetof(Parser, tokenStream);
 
     return reinterpret_cast<const Parser*>(parserAddr)->anyChars;
 }
 
+template<class Parser>
+/* static */ inline TokenStreamAnyChars&
+ParserAnyCharsAccess<Parser>::anyChars(GeneralTokenStreamChars* ts)
+{
+    const TokenStreamAnyChars& anyCharsConst =
+        anyChars(const_cast<const GeneralTokenStreamChars*>(ts));
+
+    return const_cast<TokenStreamAnyChars&>(anyCharsConst);
+}
+
 template <class ParseHandler, typename CharT>
 class MOZ_STACK_CLASS AutoAwaitIsKeyword
 {
     using GeneralParser = frontend::GeneralParser<ParseHandler, CharT>;
 
   private:
     GeneralParser* parser_;
     AwaitHandling oldAwaitHandling_;
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@@ -562,17 +562,17 @@ TokenStreamAnyChars::undoGetChar()
     MOZ_ASSERT(prevLinebase != size_t(-1)); // we should never get more than one EOL
     linebase = prevLinebase;
     prevLinebase = size_t(-1);
     lineno--;
 }
 
 template<typename CharT, class AnyCharsAccess>
 void
-TokenStreamSpecific<CharT, AnyCharsAccess>::ungetChar(int32_t c)
+GeneralTokenStreamChars<CharT, AnyCharsAccess>::ungetChar(int32_t c)
 {
     if (c == EOF)
         return;
 
     MOZ_ASSERT(!userbuf.atStart());
     userbuf.ungetRawChar();
     if (c == '\n') {
 #ifdef DEBUG
--- a/js/src/frontend/TokenStream.h
+++ b/js/src/frontend/TokenStream.h
@@ -68,40 +68,72 @@
  * identifier: |a\u0062c| versus |abc|, for example).
  *
  * Additionally, some functions operating on this data are defined the same way
  * no matter what character type you have -- the offset being |offset - start|
  * no matter whether those two variables are single- or double-byte pointers.
  *
  * All such functionality lives in TokenStreamCharsBase<CharT>.
  *
- * == TokenStreamChars<CharT, AnyCharsAccess> → TokenStreamCharsBase<CharT> ==
+ * == GeneralTokenStreamChars<CharT, AnyCharsAccess> →
+ *    TokenStreamCharsBase<CharT> ==
+ *
+ * Some functionality operates differently on different character types, just
+ * as for TokenStreamCharsBase, but additionally requires access to character-
+ * type-agnostic information in TokenStreamAnyChars.  For example, getting the
+ * next character performs different steps for different character types and
+ * must access TokenStreamAnyChars to update line break information.
  *
- * Some functionality operates at a very low level upon character-type-specific
- * data, but in distinct ways.  For example, "is this character the start of a
- * multi-character codepoint?"  Consider how such functionality would work on
- * various encodings (hypothetically -- we haven't fully implemented any
- * particular single-byte encoding support yet):
+ * Such functionality, if it can be defined using the same algorithm for all
+ * character types, lives in GeneralTokenStreamChars<CharT, AnyCharsAccess>.
+ * The AnyCharsAccess parameter provides a way for a GeneralTokenStreamChars
+ * instance to access its corresponding TokenStreamAnyChars, without inheriting
+ * from it.
  *
- *   * For two-byte text, the character must pass |unicode::IsLeadSurrogate|.
- *   * For single-byte Latin-1 text, there are no multi-character codepoints.
- *   * For single-byte UTF-8 text, the answer depends on how many high bits of
- *     the character are set.
+ * GeneralTokenStreamChars<CharT, AnyCharsAccess> is just functionality, no
+ * actual member data.
  *
  * Such functionality all lives in TokenStreamChars<CharT, AnyCharsAccess>, a
  * declared-but-not-defined template class whose specializations have a common
  * public interface (plus whatever private helper functions are desirable).
  *
- * Why the AnyCharsAccess parameter?  Some functionality along these lines
- * really wants TokenStreamSpecific, below, e.g. to report an error.  Providing
- * this parameter allows TokenStreamChars functions to statically cast to this
- * presumed superclass to access its functionality.
+ * == TokenStreamChars<CharT, AnyCharsAccess> →
+ *    GeneralTokenStreamChars<CharT, AnyCharsAccess> ==
+ *
+ * Some functionality is like that in GeneralTokenStreamChars, *but* it's
+ * defined entirely differently for different character types.
+ *
+ * For example, consider "match a multi-code unit code point" (hypothetically:
+ * we've only implemented two-byte tokenizing right now):
  *
- * TokenStreamChars<CharT, AnyCharsAccess> is just functionality, no actual
- * member data.
+ *   * For two-byte text, there must be two code units to get, the leading code
+ *     unit must be a UTF-16 lead surrogate, and the trailing code unit must be
+ *     a UTF-16 trailing surrogate.  (If any of these fail to hold, a next code
+ *     unit encodes that code point and is not multi-code unit.)
+ *   * For single-byte Latin-1 text, there are no multi-code unit code points.
+ *   * For single-byte UTF-8 text, the first code unit must have N > 1 of its
+ *     highest bits set (and the next unset), and |N - 1| successive code units
+ *     must have their high bit set and next-highest bit unset, *and*
+ *     concatenating all unconstrained bits together must not produce a code
+ *     point value that could have been encoded in fewer code units.
+ *
+ * This functionality can't be implemented as member functions in
+ * GeneralTokenStreamChars because we'd need to *partially specialize* those
+ * functions -- hold CharT constant while letting AnyCharsAccess vary.  But
+ * C++ forbids function template partial specialization like this: either you
+ * fix *all* parameters or you fix none of them.
+ *
+ * Fortunately, C++ *does* allow *class* template partial specialization.  So
+ * TokenStreamChars is a template class with one specialization per CharT.
+ * Functions can be defined differently in the different specializations,
+ * because AnyCharsAccess as the only template parameter on member functions
+ * *can* vary.
+ *
+ * All TokenStreamChars<CharT, AnyCharsAccess> specializations, one per CharT,
+ * are just functionality, no actual member data.
  *
  * == TokenStreamSpecific<CharT, AnyCharsAccess> →
  *    TokenStreamChars<CharT, AnyCharsAccess>, TokenStreamShared ==
  *
  * TokenStreamSpecific is operations that are parametrized on character type
  * but implement the *general* idea of tokenizing, without being intrinsically
  * tied to character type.  Notably, this includes all operations that can
  * report warnings or errors at particular offsets, because we include a line
@@ -467,16 +499,17 @@ class TokenStreamSpecific;
 class TokenStreamAnyChars
   : public TokenStreamShared,
     public ErrorReporter
 {
   public:
     TokenStreamAnyChars(JSContext* cx, const ReadOnlyCompileOptions& options,
                         StrictModeGetter* smg);
 
+    template<typename CharT, class AnyCharsAccess> friend class GeneralTokenStreamChars;
     template<typename CharT, class AnyCharsAccess> friend class TokenStreamSpecific;
 
     // Accessors.
     const Token& currentToken() const { return tokens[cursor]; }
     bool isCurrentTokenType(TokenKind type) const {
         return currentToken().type == type;
     }
 
@@ -954,46 +987,69 @@ class TokenStreamCharsBase
 
 template<>
 /* static */ MOZ_ALWAYS_INLINE JSAtom*
 TokenStreamCharsBase<char16_t>::atomizeChars(JSContext* cx, const char16_t* chars, size_t length)
 {
     return AtomizeChars(cx, chars, length);
 }
 
-template<typename CharT, class AnyCharsAccess> class TokenStreamChars;
+template<typename CharT, class AnyCharsAccess>
+class GeneralTokenStreamChars
+  : public TokenStreamCharsBase<CharT>
+{
+    using CharsSharedBase = TokenStreamCharsBase<CharT>;
+
+    using typename CharsSharedBase::TokenBuf;
+
+    using CharsSharedBase::userbuf;
 
-template<class AnyCharsAccess>
-class TokenStreamChars<char16_t, AnyCharsAccess>
-  : public TokenStreamCharsBase<char16_t>
-{
-    using Self = TokenStreamChars<char16_t, AnyCharsAccess>;
-    using CharsBase = TokenStreamCharsBase<char16_t>;
+  public:
+    using CharsSharedBase::CharsSharedBase;
 
-    using TokenStreamSpecific = frontend::TokenStreamSpecific<char16_t, AnyCharsAccess>;
+    TokenStreamAnyChars& anyCharsAccess() {
+        return AnyCharsAccess::anyChars(this);
+    }
+
+    const TokenStreamAnyChars& anyCharsAccess() const {
+        return AnyCharsAccess::anyChars(this);
+    }
+
+    using TokenStreamSpecific = frontend::TokenStreamSpecific<CharT, AnyCharsAccess>;
 
     TokenStreamSpecific* asSpecific() {
-        static_assert(mozilla::IsBaseOf<Self, TokenStreamSpecific>::value,
+        static_assert(mozilla::IsBaseOf<GeneralTokenStreamChars, TokenStreamSpecific>::value,
                       "static_cast below presumes an inheritance relationship");
 
         return static_cast<TokenStreamSpecific*>(this);
     }
 
+    void ungetChar(int32_t c);
+};
+
+template<typename CharT, class AnyCharsAccess> class TokenStreamChars;
+
+template<class AnyCharsAccess>
+class TokenStreamChars<char16_t, AnyCharsAccess>
+  : public GeneralTokenStreamChars<char16_t, AnyCharsAccess>
+{
+    using Self = TokenStreamChars<char16_t, AnyCharsAccess>;
+    using GeneralCharsBase = GeneralTokenStreamChars<char16_t, AnyCharsAccess>;
+    using CharsSharedBase = TokenStreamCharsBase<char16_t>;
+
     bool matchTrailForLeadSurrogate(char16_t lead, uint32_t* codePoint);
 
   public:
-    using CharsBase::CharsBase;
+    using typename GeneralCharsBase::TokenStreamSpecific;
 
-    TokenStreamAnyChars& anyChars() {
-        return AnyCharsAccess::anyChars(this);
-    }
+    using GeneralCharsBase::asSpecific;
+    using GeneralCharsBase::anyCharsAccess;
 
-    const TokenStreamAnyChars& anyChars() const {
-        return AnyCharsAccess::anyChars(this);
-    }
+  public:
+    using GeneralCharsBase::GeneralCharsBase;
 
     MOZ_ALWAYS_INLINE bool isMultiUnitCodepoint(char16_t c, uint32_t* codepoint) {
         if (MOZ_LIKELY(!unicode::IsLeadSurrogate(c)))
             return false;
 
         return matchTrailForLeadSurrogate(c, codepoint);
     }
 };
@@ -1041,16 +1097,17 @@ class TokenStreamChars<char16_t, AnyChar
 //
 template<typename CharT, class AnyCharsAccess>
 class MOZ_STACK_CLASS TokenStreamSpecific
   : public TokenStreamChars<CharT, AnyCharsAccess>,
     public TokenStreamShared
 {
   public:
     using CharsBase = TokenStreamChars<CharT, AnyCharsAccess>;
+    using GeneralCharsBase = GeneralTokenStreamChars<CharT, AnyCharsAccess>;
     using CharsSharedBase = TokenStreamCharsBase<CharT>;
 
     // Anything inherited through a base class whose type depends upon this
     // class's template parameters can only be accessed through a dependent
     // name: prefixed with |this|, by explicit qualification, and so on.  (This
     // is so that references to inherited fields are statically distinguishable
     // from references to names outside of the class.)  This is tedious and
     // onerous.
@@ -1058,42 +1115,36 @@ class MOZ_STACK_CLASS TokenStreamSpecifi
     // As an alternative, we directly add every one of these functions to this
     // class, using explicit qualification to address the dependent-name
     // problem.  |this| or other qualification is no longer necessary -- at
     // cost of this ever-changing laundry list of |using|s.  So it goes.
   public:
     using typename CharsSharedBase::Position;
 
   public:
+    using GeneralCharsBase::anyCharsAccess;
     using CharsSharedBase::getTokenbuf;
 
   private:
     using typename CharsSharedBase::CharBuffer;
     using typename CharsSharedBase::TokenBuf;
 
   private:
     using CharsSharedBase::appendMultiUnitCodepointToTokenbuf;
     using CharsSharedBase::atomizeChars;
     using CharsSharedBase::copyTokenbufTo;
     using CharsBase::isMultiUnitCodepoint;
     using CharsSharedBase::tokenbuf;
+    using GeneralCharsBase::ungetChar;
     using CharsSharedBase::userbuf;
 
   public:
     TokenStreamSpecific(JSContext* cx, const ReadOnlyCompileOptions& options,
                         const CharT* base, size_t length);
 
-    TokenStreamAnyChars& anyCharsAccess() {
-        return CharsBase::anyChars();
-    }
-
-    const TokenStreamAnyChars& anyCharsAccess() const {
-        return CharsBase::anyChars();
-    }
-
     // If there is an invalid escape in a template, report it and return false,
     // otherwise return true.
     bool checkForInvalidTemplateEscapeError() {
         if (anyCharsAccess().invalidTemplateEscapeType == InvalidEscapeType::None)
             return true;
 
         reportInvalidEscapeError(anyCharsAccess().invalidTemplateEscapeOffset,
                                  anyCharsAccess().invalidTemplateEscapeType);
@@ -1357,17 +1408,16 @@ class MOZ_STACK_CLASS TokenStreamSpecifi
 
     // Try to get the next character, normalizing '\r', '\r\n', and '\n' into
     // '\n'.  Also updates internal line-counter state.  Return true on success
     // and store the character in |*c|.  Return false and leave |*c| undefined
     // on failure.
     MOZ_MUST_USE bool getChar(int32_t* cp);
     int32_t getCharIgnoreEOL();
 
-    void ungetChar(int32_t c);
     void ungetCharIgnoreEOL(int32_t c);
     Token* newToken(ptrdiff_t adjust);
     uint32_t peekUnicodeEscape(uint32_t* codePoint);
     uint32_t peekExtendedUnicodeEscape(uint32_t* codePoint);
     uint32_t matchUnicodeEscapeIdStart(uint32_t* codePoint);
     bool matchUnicodeEscapeIdent(uint32_t* codePoint);
     bool peekChars(int n, CharT* cp);