Bug 914072 - Have nsCSSScanner store any implied characters at EOF needed for correct serialization. r=dbaron
authorCameron McCormack <cam@mcc.id.au>
Sat, 07 Dec 2013 12:25:07 +1100
changeset 175014 957d85b31ff325d765c2b5bc09d85c201072362e
parent 175013 a55439a67e05cb3cb7076ac4cee6039102607c93
child 175015 d58ca9a622c04d83f16077d8cfc5ab69e8b868bd
push id445
push userffxbld
push dateMon, 10 Mar 2014 22:05:19 +0000
treeherdermozilla-release@dc38b741b04e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdbaron
bugs914072
milestone28.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 914072 - Have nsCSSScanner store any implied characters at EOF needed for correct serialization. r=dbaron
layout/style/nsCSSScanner.cpp
layout/style/nsCSSScanner.h
--- a/layout/style/nsCSSScanner.cpp
+++ b/layout/style/nsCSSScanner.cpp
@@ -345,16 +345,17 @@ nsCSSScanner::nsCSSScanner(const nsAStri
   , mOffset(0)
   , mCount(aBuffer.Length())
   , mLineNumber(aLineNumber)
   , mLineOffset(0)
   , mTokenLineNumber(aLineNumber)
   , mTokenLineOffset(0)
   , mTokenOffset(0)
   , mRecordStartOffset(0)
+  , mEOFCharacters(eEOFCharacters_None)
   , mReporter(nullptr)
   , mSVGMode(false)
   , mRecording(false)
   , mSeenBadToken(false)
 {
   MOZ_COUNT_CTOR(nsCSSScanner);
 }
 
@@ -507,23 +508,32 @@ void
 nsCSSScanner::SkipComment()
 {
   MOZ_ASSERT(Peek() == '/' && Peek(1) == '*', "should not have been called");
   Advance(2);
   for (;;) {
     int32_t ch = Peek();
     if (ch < 0) {
       mReporter->ReportUnexpectedEOF("PECommentEOF");
+      SetEOFCharacters(eEOFCharacters_Asterisk | eEOFCharacters_Slash);
       return;
     }
-    if (ch == '*' && Peek(1) == '/') {
-      Advance(2);
-      return;
-    }
-    if (IsVertSpace(ch)) {
+    if (ch == '*') {
+      Advance();
+      ch = Peek();
+      if (ch < 0) {
+        mReporter->ReportUnexpectedEOF("PECommentEOF");
+        SetEOFCharacters(eEOFCharacters_Slash);
+        return;
+      }
+      if (ch == '/') {
+        Advance();
+        return;
+      }
+    } else if (IsVertSpace(ch)) {
       AdvanceLine();
     } else {
       Advance();
     }
   }
 }
 
 /**
@@ -538,18 +548,21 @@ nsCSSScanner::GatherEscape(nsString& aOu
 {
   MOZ_ASSERT(Peek() == '\\', "should not have been called");
   int32_t ch = Peek(1);
   if (ch < 0) {
     // If we are in a string (or a url() containing a string), we want to drop
     // the backslash on the floor.  Otherwise, we want to treat it as a U+FFFD
     // character.
     Advance();
-    if (!aInString) {
+    if (aInString) {
+      SetEOFCharacters(eEOFCharacters_DropBackslash);
+    } else {
       aOutput.Append(UCS2_REPLACEMENT_CHAR);
+      SetEOFCharacters(eEOFCharacters_ReplacementChar);
     }
     return true;
   }
   if (IsVertSpace(ch)) {
     if (aInString) {
       // In strings (and in url() containing a string), escaped
       // newlines are completely removed, to allow splitting over
       // multiple lines.
@@ -674,16 +687,18 @@ nsCSSScanner::GatherText(uint8_t aClass,
  * both of which begin indistinguishably from an identifier.  It can
  * produce a Symbol token when an apparent identifier actually led
  * into an invalid escape sequence.
  */
 bool
 nsCSSScanner::ScanIdent(nsCSSToken& aToken)
 {
   if (MOZ_UNLIKELY(!GatherText(IS_IDCHAR, aToken.mIdent))) {
+    MOZ_ASSERT(Peek() == '\\',
+               "unexpected IsIdentStart character that did not begin an ident");
     aToken.mSymbol = Peek();
     Advance();
     return true;
   }
 
   if (MOZ_LIKELY(Peek() != '(')) {
     aToken.mType = eCSSToken_Ident;
     return true;
@@ -906,16 +921,18 @@ nsCSSScanner::ScanString(nsCSSToken& aTo
   aToken.mSymbol = PRUnichar(aStop); // Remember how it's quoted.
   Advance();
 
   for (;;) {
     GatherText(IS_STRING, aToken.mIdent);
 
     int32_t ch = Peek();
     if (ch == -1) {
+      AddEOFCharacters(aStop == '"' ? eEOFCharacters_DoubleQuote :
+                                      eEOFCharacters_SingleQuote);
       break; // EOF ends a string token with no error.
     }
     if (ch == aStop) {
       Advance();
       break;
     }
     // Both " and ' are excluded from IS_STRING.
     if (ch == '"' || ch == '\'') {
@@ -1011,16 +1028,89 @@ nsCSSScanner::ScanURange(nsCSSToken& aRe
 
   aResult.mInteger = low;
   aResult.mInteger2 = high;
   aResult.mIntegerValid = valid;
   aResult.mType = eCSSToken_URange;
   return true;
 }
 
+#ifdef DEBUG
+/* static */ void
+nsCSSScanner::AssertEOFCharactersValid(uint32_t c)
+{
+  MOZ_ASSERT(c == eEOFCharacters_None ||
+             c == eEOFCharacters_ReplacementChar ||
+             c == eEOFCharacters_Slash ||
+             c == (eEOFCharacters_Asterisk |
+                   eEOFCharacters_Slash) ||
+             c == eEOFCharacters_DoubleQuote ||
+             c == eEOFCharacters_SingleQuote ||
+             c == (eEOFCharacters_DropBackslash |
+                   eEOFCharacters_DoubleQuote) ||
+             c == (eEOFCharacters_DropBackslash |
+                   eEOFCharacters_SingleQuote) ||
+             c == eEOFCharacters_CloseParen ||
+             c == (eEOFCharacters_ReplacementChar |
+                   eEOFCharacters_CloseParen) ||
+             c == (eEOFCharacters_DoubleQuote |
+                   eEOFCharacters_CloseParen) ||
+             c == (eEOFCharacters_SingleQuote |
+                   eEOFCharacters_CloseParen) ||
+             c == (eEOFCharacters_DropBackslash |
+                   eEOFCharacters_DoubleQuote |
+                   eEOFCharacters_CloseParen) ||
+             c == (eEOFCharacters_DropBackslash |
+                   eEOFCharacters_SingleQuote |
+                   eEOFCharacters_CloseParen),
+             "invalid EOFCharacters value");
+}
+#endif
+
+void
+nsCSSScanner::SetEOFCharacters(uint32_t aEOFCharacters)
+{
+  mEOFCharacters = EOFCharacters(aEOFCharacters);
+}
+
+void
+nsCSSScanner::AddEOFCharacters(uint32_t aEOFCharacters)
+{
+  mEOFCharacters = EOFCharacters(mEOFCharacters | aEOFCharacters);
+}
+
+static const PRUnichar kImpliedEOFCharacters[] = {
+  UCS2_REPLACEMENT_CHAR, '*', '/', '"', '\'', ')', 0
+};
+
+/* static */ void
+nsCSSScanner::AdjustTokenStreamForEOFCharacters(EOFCharacters aEOFCharacters,
+                                                nsAString& aResult)
+{
+  uint32_t c = aEOFCharacters;
+
+  // First, handle eEOFCharacters_DropBackslash.
+  if (c & eEOFCharacters_DropBackslash) {
+    MOZ_ASSERT(aResult[aResult.Length() - 1] == '\\');
+    aResult.SetLength(aResult.Length() - 1);
+  }
+
+  c >>= 1;
+
+  // All of the remaining EOFCharacters bits represent appended characters,
+  // and the bits are in the order that they need appending.
+  for (const PRUnichar* p = kImpliedEOFCharacters; *p && c; p++, c >>= 1) {
+    if (c & 1) {
+      aResult.Append(*p);
+    }
+  }
+
+  MOZ_ASSERT(c == 0, "too many bits in mEOFCharacters");
+}
+
 /**
  * Consume the part of an URL token after the initial 'url('.  Caller
  * is assumed to have consumed 'url(' already.  Will always produce
  * either an URL or a Bad_URL token.
  *
  * Exposed for use by nsCSSParser::ParseMozDocumentRule, which applies
  * the special lexical rules for URL tokens in a nonstandard context.
  */
@@ -1053,16 +1143,19 @@ nsCSSScanner::NextURL(nsCSSToken& aToken
   }
 
   // Consume trailing whitespace and then look for a close parenthesis.
   SkipWhitespace();
   ch = Peek();
   if (MOZ_LIKELY(ch < 0 || ch == ')')) {
     Advance();
     aToken.mType = eCSSToken_URL;
+    if (ch < 0) {
+      AddEOFCharacters(eEOFCharacters_CloseParen);
+    }
   } else {
     mSeenBadToken = true;
     aToken.mType = eCSSToken_Bad_URL;
   }
   return true;
 }
 
 /**
--- a/layout/style/nsCSSScanner.h
+++ b/layout/style/nsCSSScanner.h
@@ -189,16 +189,56 @@ class nsCSSScanner {
 
   // Abandons recording of the input stream.
   void StopRecording();
 
   // Stops recording of the input stream and appends the recorded
   // input to aBuffer.
   void StopRecording(nsString& aBuffer);
 
+  enum EOFCharacters {
+    eEOFCharacters_None =                    0x0000,
+
+    // to handle \<EOF> inside strings
+    eEOFCharacters_DropBackslash =           0x0001,
+
+    // to handle \<EOF> outside strings
+    eEOFCharacters_ReplacementChar =         0x0002,
+
+    // to close comments
+    eEOFCharacters_Asterisk =                0x0004,
+    eEOFCharacters_Slash =                   0x0008,
+
+    // to close double-quoted strings
+    eEOFCharacters_DoubleQuote =             0x0010,
+
+    // to close single-quoted strings
+    eEOFCharacters_SingleQuote =             0x0020,
+
+    // to close URLs
+    eEOFCharacters_CloseParen =              0x0040,
+  };
+
+  // Appends or drops any characters to/from the specified string
+  // the input stream to make the last token not rely on special EOF handling
+  // behavior.
+  static void AdjustTokenStreamForEOFCharacters(EOFCharacters aEOFCharacters,
+                                                nsAString& aString);
+
+  EOFCharacters GetEOFCharacters() const {
+#ifdef DEBUG
+    AssertEOFCharactersValid(mEOFCharacters);
+#endif
+    return mEOFCharacters;
+  }
+
+#ifdef DEBUG
+  static void AssertEOFCharactersValid(uint32_t c);
+#endif
+
 protected:
   int32_t Peek(uint32_t n = 0);
   void Advance(uint32_t n = 1);
   void AdvanceLine();
 
   void SkipWhitespace();
   void SkipComment();
 
@@ -207,28 +247,32 @@ protected:
 
   bool ScanIdent(nsCSSToken& aResult);
   bool ScanAtKeyword(nsCSSToken& aResult);
   bool ScanHash(nsCSSToken& aResult);
   bool ScanNumber(nsCSSToken& aResult);
   bool ScanString(nsCSSToken& aResult);
   bool ScanURange(nsCSSToken& aResult);
 
+  void SetEOFCharacters(uint32_t aEOFCharacters);
+  void AddEOFCharacters(uint32_t aEOFCharacters);
+
   const PRUnichar *mBuffer;
   uint32_t mOffset;
   uint32_t mCount;
 
   uint32_t mLineNumber;
   uint32_t mLineOffset;
 
   uint32_t mTokenLineNumber;
   uint32_t mTokenLineOffset;
   uint32_t mTokenOffset;
 
   uint32_t mRecordStartOffset;
+  EOFCharacters mEOFCharacters;
 
   mozilla::css::ErrorReporter *mReporter;
 
   // True if we are in SVG mode; false in "normal" CSS
   bool mSVGMode;
   bool mRecording;
   bool mSeenBadToken;
 };