Bug 543151, part A1: Preliminary cleanups to the scanner/parser interface and the organization of nsCSSScanner.cpp. r=heycam
authorZack Weinberg <zackw@panix.com>
Sat, 16 Feb 2013 18:27:53 -0500
changeset 122153 a94f2ab8ddf94486b724578854d02161c83ade23
parent 122152 81fac90f0e9fa739de50aa9a76258b38e0c0c8b5
child 122154 68c9f2677339f5a238a08af93c80f9afd2cf0f77
push id24320
push userryanvm@gmail.com
push dateSun, 17 Feb 2013 12:06:45 +0000
treeherdermozilla-central@5e137a87e84f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersheycam
bugs543151
milestone21.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 543151, part A1: Preliminary cleanups to the scanner/parser interface and the organization of nsCSSScanner.cpp. r=heycam
layout/style/nsCSSParser.cpp
layout/style/nsCSSScanner.cpp
layout/style/nsCSSScanner.h
--- a/layout/style/nsCSSParser.cpp
+++ b/layout/style/nsCSSParser.cpp
@@ -335,25 +335,16 @@ protected:
   void ReleaseScanner(void);
   bool IsSVGMode() const {
     return mScanner->IsSVGMode();
   }
 
   bool GetToken(bool aSkipWS);
   void UngetToken();
 
-  // get the part in paretheses of the url() function, which is really a
-  // part of a token in the CSS grammar, but we're using a combination
-  // of the parser and the scanner to do it to handle the backtracking
-  // required by the error handling of the tokenization (since if we
-  // fail to scan the full token, we should fall back to tokenizing as
-  // FUNCTION ... ')').
-  // Note that this function WILL WRITE TO aURL IN SOME FAILURE CASES.
-  bool GetURLInParens(nsString& aURL);
-
   bool ExpectSymbol(PRUnichar aSymbol, bool aSkipWS);
   bool ExpectEndProperty();
   bool CheckEndProperty();
   nsSubstring* NextIdent();
 
   // returns true when the stop symbol is found, and false for EOF
   bool SkipUntil(PRUnichar aStopSymbol);
   void SkipUntilOneOf(const PRUnichar* aStopSymbolChars);
@@ -1432,54 +1423,23 @@ CSSParserImpl::EvaluateSupportsCondition
   return parsedOK && conditionMet;
 }
 
 //----------------------------------------------------------------------
 
 bool
 CSSParserImpl::GetToken(bool aSkipWS)
 {
-  for (;;) {
-    if (!mHavePushBack) {
-      if (!mScanner->Next(mToken)) {
-        break;
-      }
-    }
+  if (mHavePushBack) {
     mHavePushBack = false;
-    if (aSkipWS && (eCSSToken_WhiteSpace == mToken.mType)) {
-      continue;
-    }
-    return true;
-  }
-  return false;
-}
-
-bool
-CSSParserImpl::GetURLInParens(nsString& aURL)
-{
-  NS_ASSERTION(!mHavePushBack, "mustn't have pushback at this point");
-  if (! mScanner->NextURL(mToken)) {
-    // EOF
-    return false;
-  }
-
-  aURL = mToken.mIdent;
-
-  if (eCSSToken_URL != mToken.mType) {
-    // In the failure case (which gives a token of type
-    // eCSSToken_Bad_URL), we do not have to match parentheses *inside*
-    // the Bad_URL token, since this is now an invalid URL token.  But
-    // we do need to match the closing parenthesis to match the 'url('.
-    NS_ABORT_IF_FALSE(mToken.mType == eCSSToken_Bad_URL,
-                      "unexpected token type");
-    SkipUntil(')');
-    return false;
-  }
-
-  return true;
+    if (!aSkipWS || mToken.mType != eCSSToken_Whitespace) {
+      return true;
+    }
+  }
+  return mScanner->Next(mToken, aSkipWS);
 }
 
 void
 CSSParserImpl::UngetToken()
 {
   NS_PRECONDITION(!mHavePushBack, "double pushback");
   mHavePushBack = true;
 }
@@ -2202,27 +2162,28 @@ CSSParserImpl::ParseMozDocumentRule(Rule
       }
     } else {
       if (mToken.mIdent.LowerCaseEqualsLiteral("url-prefix")) {
         cur->func = css::DocumentRule::eURLPrefix;
       } else if (mToken.mIdent.LowerCaseEqualsLiteral("domain")) {
         cur->func = css::DocumentRule::eDomain;
       }
 
-      nsAutoString url;
-      if (!GetURLInParens(url)) {
+      NS_ASSERTION(!mHavePushBack, "mustn't have pushback at this point");
+      if (!mScanner->NextURL(mToken) || mToken.mType != eCSSToken_URL) {
         REPORT_UNEXPECTED_TOKEN(PEMozDocRuleNotURI);
+        SkipUntil(')');
         delete urls;
         return false;
       }
 
       // We could try to make the URL (as long as it's not domain())
       // canonical and absolute with NS_NewURI and GetSpec, but I'm
       // inclined to think we shouldn't.
-      CopyUTF16toUTF8(url, cur->url);
+      CopyUTF16toUTF8(mToken.mIdent, cur->url);
     }
   } while (ExpectSymbol(',', true));
 
   nsRefPtr<css::DocumentRule> rule = new css::DocumentRule();
   rule->SetURLs(urls);
 
   return ParseGroupRule(rule, aAppendFunc, aData);
 }
@@ -3034,17 +2995,17 @@ CSSParserImpl::ParseSelectorGroup(nsCSSS
     }
 
     // Look for a combinator.
     if (!GetToken(false)) {
       break; // EOF ok here
     }
 
     combinator = PRUnichar(0);
-    if (mToken.mType == eCSSToken_WhiteSpace) {
+    if (mToken.mType == eCSSToken_Whitespace) {
       if (!GetToken(true)) {
         break; // EOF ok here
       }
       combinator = PRUnichar(' ');
     }
 
     if (mToken.mType != eCSSToken_Symbol) {
       UngetToken(); // not a combinator
@@ -4116,17 +4077,17 @@ CSSParserImpl::ParseColor(nsCSSValue& aV
     REPORT_UNEXPECTED_EOF(PEColorEOF);
     return false;
   }
 
   nsCSSToken* tk = &mToken;
   nscolor rgba;
   switch (tk->mType) {
     case eCSSToken_ID:
-    case eCSSToken_Ref:
+    case eCSSToken_Hash:
       // #xxyyzz
       if (NS_HexToRGB(tk->mIdent, &rgba)) {
         aValue.SetColorValue(rgba);
         return true;
       }
       break;
 
     case eCSSToken_Ident:
@@ -5026,17 +4987,17 @@ CSSParserImpl::ParseVariant(nsCSSValue& 
   if ((aVariantMask & VARIANT_ELEMENT) != 0 &&
       eCSSToken_Function == tk->mType &&
       tk->mIdent.LowerCaseEqualsLiteral("-moz-element")) {
     return ParseElement(aValue);
   }
   if ((aVariantMask & VARIANT_COLOR) != 0) {
     if (mHashlessColorQuirk || // NONSTANDARD: Nav interprets 'xxyyzz' values even without '#' prefix
         (eCSSToken_ID == tk->mType) ||
-        (eCSSToken_Ref == tk->mType) ||
+        (eCSSToken_Hash == tk->mType) ||
         (eCSSToken_Ident == tk->mType) ||
         ((eCSSToken_Function == tk->mType) &&
          (tk->mIdent.LowerCaseEqualsLiteral("rgb") ||
           tk->mIdent.LowerCaseEqualsLiteral("hsl") ||
           tk->mIdent.LowerCaseEqualsLiteral("-moz-rgba") ||
           tk->mIdent.LowerCaseEqualsLiteral("-moz-hsla") ||
           tk->mIdent.LowerCaseEqualsLiteral("rgba") ||
           tk->mIdent.LowerCaseEqualsLiteral("hsla"))))
@@ -5743,17 +5704,17 @@ CSSParserImpl::IsLegacyGradientLine(cons
   case eCSSToken_Function:
     if (aId.LowerCaseEqualsLiteral("calc") ||
         aId.LowerCaseEqualsLiteral("-moz-calc")) {
       haveGradientLine = true;
       break;
     }
     // fall through
   case eCSSToken_ID:
-  case eCSSToken_Ref:
+  case eCSSToken_Hash:
     // this is a color
     break;
 
   case eCSSToken_Ident: {
     // This is only a gradient line if it's a box position keyword.
     nsCSSKeyword kw = nsCSSKeywords::LookupKeyword(aId);
     int32_t junk;
     if (kw != eCSSKeyword_UNKNOWN &&
@@ -8016,17 +7977,17 @@ CSSParserImpl::ParseCalcTerm(nsCSSValue&
 
 // This function consumes all consecutive whitespace and returns whether
 // there was any.
 bool
 CSSParserImpl::RequireWhitespace()
 {
   if (!GetToken(false))
     return false;
-  if (mToken.mType != eCSSToken_WhiteSpace) {
+  if (mToken.mType != eCSSToken_Whitespace) {
     UngetToken();
     return false;
   }
   // Skip any additional whitespace tokens.
   if (GetToken(true)) {
     UngetToken();
   }
   return true;
@@ -8422,17 +8383,17 @@ CSSParserImpl::ParseOneFamily(nsAString&
     aFamily.Append(tk->mIdent);
     for (;;) {
       if (!GetToken(false))
         break;
 
       if (eCSSToken_Ident == tk->mType) {
         aOneKeyword = false;
         aFamily.Append(tk->mIdent);
-      } else if (eCSSToken_WhiteSpace == tk->mType) {
+      } else if (eCSSToken_Whitespace == tk->mType) {
         // Lookahead one token and drop whitespace if we are ending the
         // font name.
         if (!GetToken(true))
           break;
 
         UngetToken();
         if (eCSSToken_Ident == tk->mType)
           aFamily.Append(PRUnichar(' '));
--- a/layout/style/nsCSSScanner.cpp
+++ b/layout/style/nsCSSScanner.cpp
@@ -116,36 +116,31 @@ HexDigitValue(int32_t ch)
   } else {
     // Note: c&7 just keeps the low three bits which causes
     // upper and lower case alphabetics to both yield their
     // "relative to 10" value for computing the hex value.
     return (ch & 0x7) + 9;
   }
 }
 
-nsCSSToken::nsCSSToken()
-{
-  mType = eCSSToken_Symbol;
-}
-
 void
 nsCSSToken::AppendToString(nsString& aBuffer) const
 {
   switch (mType) {
     case eCSSToken_Ident:
       nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
       break;
 
     case eCSSToken_AtKeyword:
       aBuffer.Append('@');
       nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
       break;
 
     case eCSSToken_ID:
-    case eCSSToken_Ref:
+    case eCSSToken_Hash:
       aBuffer.Append('#');
       nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
       break;
 
     case eCSSToken_Function:
       nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
       aBuffer.Append('(');
       break;
@@ -195,17 +190,17 @@ nsCSSToken::AppendToString(nsString& aBu
     case eCSSToken_String:
       nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
       break;
 
     case eCSSToken_Symbol:
       aBuffer.Append(mSymbol);
       break;
 
-    case eCSSToken_WhiteSpace:
+    case eCSSToken_Whitespace:
       aBuffer.Append(' ');
       break;
 
     case eCSSToken_HTMLComment:
     case eCSSToken_URange:
       aBuffer.Append(mIdent);
       break;
 
@@ -227,17 +222,17 @@ nsCSSToken::AppendToString(nsString& aBu
 
     default:
       NS_ERROR("invalid token type");
       break;
   }
 }
 
 nsCSSScanner::nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber)
-  : mReadPointer(aBuffer.BeginReading())
+  : mBuffer(aBuffer.BeginReading())
   , mOffset(0)
   , mCount(aBuffer.Length())
   , mPushback(mLocalPushback)
   , mPushbackCount(0)
   , mPushbackSize(ArrayLength(mLocalPushback))
   , mLineNumber(aLineNumber)
   , mLineOffset(0)
   , mTokenLineNumber(aLineNumber)
@@ -254,32 +249,69 @@ nsCSSScanner::nsCSSScanner(const nsAStri
 nsCSSScanner::~nsCSSScanner()
 {
   MOZ_COUNT_DTOR(nsCSSScanner);
   if (mLocalPushback != mPushback) {
     delete [] mPushback;
   }
 }
 
+void
+nsCSSScanner::StartRecording()
+{
+  NS_ASSERTION(!mRecording, "already started recording");
+  mRecording = true;
+  mRecordStartOffset = mOffset - mPushbackCount;
+}
+
+void
+nsCSSScanner::StopRecording()
+{
+  NS_ASSERTION(mRecording, "haven't started recording");
+  mRecording = false;
+}
+
+void
+nsCSSScanner::StopRecording(nsString& aBuffer)
+{
+  NS_ASSERTION(mRecording, "haven't started recording");
+  mRecording = false;
+  aBuffer.Append(mBuffer + mRecordStartOffset,
+                 mOffset - mPushbackCount - mRecordStartOffset);
+}
+
+nsDependentSubstring
+nsCSSScanner::GetCurrentLine() const
+{
+  uint32_t end = mTokenOffset;
+  while (end < mCount &&
+         mBuffer[end] != '\n' && mBuffer[end] != '\r' &&
+         mBuffer[end] != '\f') {
+    end++;
+  }
+  return nsDependentSubstring(mBuffer + mTokenLineOffset,
+                              mBuffer + end);
+}
+
 // Returns -1 on error or eof
 int32_t
 nsCSSScanner::Read()
 {
   int32_t rv;
   if (0 < mPushbackCount) {
     rv = int32_t(mPushback[--mPushbackCount]);
   } else {
     if (mOffset == mCount) {
       return -1;
     }
-    rv = int32_t(mReadPointer[mOffset++]);
+    rv = int32_t(mBuffer[mOffset++]);
     // There are four types of newlines in CSS: "\r", "\n", "\r\n", and "\f".
     // To simplify dealing with newlines, they are all normalized to "\n" here
     if (rv == '\r') {
-      if (mOffset < mCount && mReadPointer[mOffset] == '\n') {
+      if (mOffset < mCount && mBuffer[mOffset] == '\n') {
         mOffset++;
       }
       rv = '\n';
     } else if (rv == '\f') {
       rv = '\n';
     }
     if (rv == '\n') {
       // 0 is a magical line number meaning that we don't know (i.e., script)
@@ -318,53 +350,16 @@ nsCSSScanner::Pushback(PRUnichar aChar)
     if (mPushback != mLocalPushback) {
       delete [] mPushback;
     }
     mPushback = newPushback;
   }
   mPushback[mPushbackCount++] = aChar;
 }
 
-void
-nsCSSScanner::StartRecording()
-{
-  NS_ASSERTION(!mRecording, "already started recording");
-  mRecording = true;
-  mRecordStartOffset = mOffset - mPushbackCount;
-}
-
-void
-nsCSSScanner::StopRecording()
-{
-  NS_ASSERTION(mRecording, "haven't started recording");
-  mRecording = false;
-}
-
-void
-nsCSSScanner::StopRecording(nsString& aBuffer)
-{
-  NS_ASSERTION(mRecording, "haven't started recording");
-  mRecording = false;
-  aBuffer.Append(mReadPointer + mRecordStartOffset,
-                 mOffset - mPushbackCount - mRecordStartOffset);
-}
-
-nsDependentSubstring
-nsCSSScanner::GetCurrentLine() const
-{
-  uint32_t end = mTokenOffset;
-  while (end < mCount &&
-         mReadPointer[end] != '\n' && mReadPointer[end] != '\r' &&
-         mReadPointer[end] != '\f') {
-    end++;
-  }
-  return nsDependentSubstring(mReadPointer + mTokenLineOffset,
-                              mReadPointer + end);
-}
-
 bool
 nsCSSScanner::LookAhead(PRUnichar aChar)
 {
   int32_t ch = Read();
   if (ch < 0) {
     return false;
   }
   if (ch == aChar) {
@@ -384,257 +379,52 @@ nsCSSScanner::LookAheadOrEOF(PRUnichar a
   if (ch == aChar) {
     return true;
   }
   Pushback(ch);
   return false;
 }
 
 void
-nsCSSScanner::EatWhiteSpace()
+nsCSSScanner::SkipWhitespace()
 {
   for (;;) {
     int32_t ch = Read();
     if (ch < 0) {
       break;
     }
     if ((ch != ' ') && (ch != '\n') && (ch != '\t')) {
       Pushback(ch);
       break;
     }
   }
 }
 
-bool
-nsCSSScanner::Next(nsCSSToken& aToken)
+void
+nsCSSScanner::SkipComment()
 {
-  for (;;) { // Infinite loop so we can restart after comments.
-    mTokenOffset = mOffset;
-    mTokenLineOffset = mLineOffset;
-    mTokenLineNumber = mLineNumber;
-
+  for (;;) {
     int32_t ch = Read();
-    if (ch < 0) {
-      return false;
-    }
-
-    // UNICODE-RANGE
-    if ((ch == 'u' || ch == 'U') && Peek() == '+')
-      return ParseURange(ch, aToken);
-
-    // IDENT
-    if (StartsIdent(ch, Peek()))
-      return ParseIdent(ch, aToken);
-
-    // AT_KEYWORD
-    if (ch == '@') {
-      return ParseAtKeyword(aToken);
-    }
-
-    // NUMBER or DIM
-    if ((ch == '.') || (ch == '+') || (ch == '-')) {
-      int32_t nextChar = Peek();
-      if (IsDigit(nextChar)) {
-        return ParseNumber(ch, aToken);
-      }
-      else if (('.' == nextChar) && ('.' != ch)) {
-        nextChar = Read();
-        int32_t followingChar = Peek();
-        Pushback(nextChar);
-        if (IsDigit(followingChar))
-          return ParseNumber(ch, aToken);
-      }
-    }
-    if (IsDigit(ch)) {
-      return ParseNumber(ch, aToken);
-    }
-
-    // ID
-    if (ch == '#') {
-      return ParseRef(ch, aToken);
-    }
-
-    // STRING
-    if ((ch == '"') || (ch == '\'')) {
-      return ParseString(ch, aToken);
-    }
-
-    // WS
-    if (IsWhitespace(ch)) {
-      aToken.mType = eCSSToken_WhiteSpace;
-      aToken.mIdent.Assign(PRUnichar(ch));
-      EatWhiteSpace();
-      return true;
-    }
-    if (ch == '/' && !IsSVGMode()) {
-      int32_t nextChar = Peek();
-      if (nextChar == '*') {
-        Read();
-        // FIXME: Editor wants comments to be preserved (bug 60290).
-        if (!SkipCComment()) {
-          return false;
-        }
-        continue; // start again at the beginning
-      }
-    }
-    if (ch == '<') {  // consume HTML comment tags
-      if (LookAhead('!')) {
-        if (LookAhead('-')) {
-          if (LookAhead('-')) {
-            aToken.mType = eCSSToken_HTMLComment;
-            aToken.mIdent.AssignLiteral("<!--");
-            return true;
-          }
-          Pushback('-');
-        }
-        Pushback('!');
-      }
-    }
-    if (ch == '-') {  // check for HTML comment end
-      if (LookAhead('-')) {
-        if (LookAhead('>')) {
-          aToken.mType = eCSSToken_HTMLComment;
-          aToken.mIdent.AssignLiteral("-->");
-          return true;
-        }
-        Pushback('-');
+    if (ch < 0) break;
+    if (ch == '*') {
+      if (LookAhead('/')) {
+        return;
       }
     }
-
-    // INCLUDES ("~=") and DASHMATCH ("|=")
-    if (( ch == '|' ) || ( ch == '~' ) || ( ch == '^' ) ||
-        ( ch == '$' ) || ( ch == '*' )) {
-      int32_t nextChar = Read();
-      if ( nextChar == '=' ) {
-        if (ch == '~') {
-          aToken.mType = eCSSToken_Includes;
-        }
-        else if (ch == '|') {
-          aToken.mType = eCSSToken_Dashmatch;
-        }
-        else if (ch == '^') {
-          aToken.mType = eCSSToken_Beginsmatch;
-        }
-        else if (ch == '$') {
-          aToken.mType = eCSSToken_Endsmatch;
-        }
-        else if (ch == '*') {
-          aToken.mType = eCSSToken_Containsmatch;
-        }
-        return true;
-      } else if (nextChar >= 0) {
-        Pushback(nextChar);
-      }
-    }
-    aToken.mType = eCSSToken_Symbol;
-    aToken.mSymbol = ch;
-    return true;
-  }
-}
-
-bool
-nsCSSScanner::NextURL(nsCSSToken& aToken)
-{
-  EatWhiteSpace();
-
-  int32_t ch = Read();
-  if (ch < 0) {
-    return false;
-  }
-
-  // STRING
-  if ((ch == '"') || (ch == '\'')) {
-#ifdef DEBUG
-    bool ok =
-#endif
-      ParseString(ch, aToken);
-    NS_ABORT_IF_FALSE(ok, "ParseString should never fail, "
-                          "since there's always something read");
-
-    NS_ABORT_IF_FALSE(aToken.mType == eCSSToken_String ||
-                      aToken.mType == eCSSToken_Bad_String,
-                      "unexpected token type");
-    if (MOZ_LIKELY(aToken.mType == eCSSToken_String)) {
-      EatWhiteSpace();
-      if (LookAheadOrEOF(')')) {
-        aToken.mType = eCSSToken_URL;
-      } else {
-        aToken.mType = eCSSToken_Bad_URL;
-      }
-    } else {
-      aToken.mType = eCSSToken_Bad_URL;
-    }
-    return true;
-  }
-
-  // Process a url lexical token. A CSS1 url token can contain
-  // characters beyond identifier characters (e.g. '/', ':', etc.)
-  // Because of this the normal rules for tokenizing the input don't
-  // apply very well. To simplify the parser and relax some of the
-  // requirements on the scanner we parse url's here. If we find a
-  // malformed URL then we emit a token of type "Bad_URL" so that
-  // the CSS1 parser can ignore the invalid input.  The parser must
-  // treat a Bad_URL token like a Function token, and process
-  // tokens until a matching parenthesis.
-
-  aToken.mType = eCSSToken_Bad_URL;
-  aToken.mSymbol = PRUnichar(0);
-  nsString& ident = aToken.mIdent;
-  ident.SetLength(0);
-
-  // start of a non-quoted url (which may be empty)
-  bool ok = true;
-  for (;;) {
-    if (IsURLChar(ch)) {
-      // A regular url character.
-      ident.Append(PRUnichar(ch));
-    } else if (ch == ')') {
-      // All done
-      break;
-    } else if (IsWhitespace(ch)) {
-      // Whitespace is allowed at the end of the URL
-      EatWhiteSpace();
-      // Consume the close paren if we have it; if not we're an invalid URL.
-      ok = LookAheadOrEOF(')');
-      break;
-    } else if (ch == '\\') {
-      if (!ParseAndAppendEscape(ident, false)) {
-        ok = false;
-        Pushback(ch);
-        break;
-      }
-    } else {
-      // This is an invalid URL spec
-      ok = false;
-      Pushback(ch); // push it back so the parser can match tokens and
-                    // then closing parenthesis
-      break;
-    }
-
-    ch = Read();
-    if (ch < 0) {
-      break;
-    }
   }
 
-  // If the result of the above scanning is ok then change the token
-  // type to a useful one.
-  if (ok) {
-    aToken.mType = eCSSToken_URL;
-  }
-  return true;
+  mReporter->ReportUnexpectedEOF("PECommentEOF");
 }
 
-
 /**
  * Returns whether an escape was succesfully parsed; if it was not,
  * the backslash needs to be its own symbol token.
  */
 bool
-nsCSSScanner::ParseAndAppendEscape(nsString& aOutput, bool aInString)
+nsCSSScanner::GatherEscape(nsString& aOutput, bool aInString)
 {
   int32_t ch = Read();
   if (ch < 0) {
     return false;
   }
   if (IsHexDigit(ch)) {
     int32_t rv = 0;
     int i;
@@ -712,87 +502,59 @@ nsCSSScanner::ParseAndAppendEscape(nsStr
  * Returns failure when the character sequence does not form an ident at
  * all, in which case the caller is responsible for pushing back or
  * otherwise handling aChar.  (This occurs only when aChar is '\'.)
  */
 bool
 nsCSSScanner::GatherIdent(int32_t aChar, nsString& aIdent)
 {
   if (aChar == '\\') {
-    if (!ParseAndAppendEscape(aIdent, false)) {
+    if (!GatherEscape(aIdent, false)) {
       return false;
     }
   } else {
     MOZ_ASSERT(aChar > 0);
     aIdent.Append(aChar);
   }
   for (;;) {
     // If nothing in pushback, first try to get as much as possible in one go
     if (!mPushbackCount && mOffset < mCount) {
       // See how much we can consume and append in one go
       uint32_t n = mOffset;
       // Count number of Ident characters that can be processed
-      while (n < mCount && IsIdent(mReadPointer[n])) {
+      while (n < mCount && IsIdent(mBuffer[n])) {
         ++n;
       }
       // Add to the token what we have so far
       if (n > mOffset) {
-        aIdent.Append(&mReadPointer[mOffset], n - mOffset);
+        aIdent.Append(&mBuffer[mOffset], n - mOffset);
         mOffset = n;
       }
     }
 
     aChar = Read();
     if (aChar < 0) break;
     if (aChar == '\\') {
-      if (!ParseAndAppendEscape(aIdent, false)) {
+      if (!GatherEscape(aIdent, false)) {
         Pushback(aChar);
         break;
       }
     } else if (IsIdent(aChar)) {
       aIdent.Append(PRUnichar(aChar));
     } else {
       Pushback(aChar);
       break;
     }
   }
   MOZ_ASSERT(aIdent.Length() > 0);
   return true;
 }
 
 bool
-nsCSSScanner::ParseRef(int32_t aChar, nsCSSToken& aToken)
-{
-  // Fall back for when we don't have name characters following:
-  aToken.mType = eCSSToken_Symbol;
-  aToken.mSymbol = aChar;
-
-  int32_t ch = Read();
-  if (ch < 0) {
-    return true;
-  }
-  if (IsIdent(ch) || ch == '\\') {
-    // First char after the '#' is a valid ident char (or an escape),
-    // so it makes sense to keep going
-    nsCSSTokenType type =
-      StartsIdent(ch, Peek()) ? eCSSToken_ID : eCSSToken_Ref;
-    aToken.mIdent.SetLength(0);
-    if (GatherIdent(ch, aToken.mIdent)) {
-      aToken.mType = type;
-      return true;
-    }
-  }
-
-  // No ident chars after the '#'.  Just unread |ch| and get out of here.
-  Pushback(ch);
-  return true;
-}
-
-bool
-nsCSSScanner::ParseIdent(int32_t aChar, nsCSSToken& aToken)
+nsCSSScanner::ScanIdent(int32_t aChar, nsCSSToken& aToken)
 {
   nsString& ident = aToken.mIdent;
   ident.SetLength(0);
   if (!GatherIdent(aChar, ident)) {
     aToken.mType = eCSSToken_Symbol;
     aToken.mSymbol = aChar;
     return true;
   }
@@ -809,17 +571,17 @@ nsCSSScanner::ParseIdent(int32_t aChar, 
     }
   }
 
   aToken.mType = tokenType;
   return true;
 }
 
 bool
-nsCSSScanner::ParseAtKeyword(nsCSSToken& aToken)
+nsCSSScanner::ScanAtKeyword(nsCSSToken& aToken)
 {
   int32_t ch = Read();
   if (StartsIdent(ch, Peek())) {
     aToken.mIdent.SetLength(0);
     aToken.mType = eCSSToken_AtKeyword;
     if (GatherIdent(ch, aToken.mIdent)) {
       return true;
     }
@@ -828,17 +590,45 @@ nsCSSScanner::ParseAtKeyword(nsCSSToken&
     Pushback(ch);
   }
   aToken.mType = eCSSToken_Symbol;
   aToken.mSymbol = PRUnichar('@');
   return true;
 }
 
 bool
-nsCSSScanner::ParseNumber(int32_t c, nsCSSToken& aToken)
+nsCSSScanner::ScanHash(int32_t aChar, nsCSSToken& aToken)
+{
+  // Fall back for when we don't have name characters following:
+  aToken.mType = eCSSToken_Symbol;
+  aToken.mSymbol = aChar;
+
+  int32_t ch = Read();
+  if (ch < 0) {
+    return true;
+  }
+  if (IsIdent(ch) || ch == '\\') {
+    // First char after the '#' is a valid ident char (or an escape),
+    // so it makes sense to keep going
+    nsCSSTokenType type =
+      StartsIdent(ch, Peek()) ? eCSSToken_ID : eCSSToken_Hash;
+    aToken.mIdent.SetLength(0);
+    if (GatherIdent(ch, aToken.mIdent)) {
+      aToken.mType = type;
+      return true;
+    }
+  }
+
+  // No ident chars after the '#'.  Just unread |ch| and get out of here.
+  Pushback(ch);
+  return true;
+}
+
+bool
+nsCSSScanner::ScanNumber(int32_t c, nsCSSToken& aToken)
 {
   NS_PRECONDITION(c == '.' || c == '+' || c == '-' || IsDigit(c),
                   "Why did we get called?");
   aToken.mHasSign = (c == '+' || c == '-');
 
   // Our sign.
   int32_t sign = c == '-' ? -1 : 1;
   // Absolute value of the integer part of the mantissa.  This is a double so
@@ -862,29 +652,29 @@ nsCSSScanner::ParseNumber(int32_t c, nsC
   if (aToken.mHasSign) {
     NS_ASSERTION(c != '.', "How did that happen?");
     c = Read();
   }
 
   bool gotDot = (c == '.');
 
   if (!gotDot) {
-    // Parse the integer part of the mantisssa
+    // Scan the integer part of the mantisssa
     NS_ASSERTION(IsDigit(c), "Why did we get called?");
     do {
       intPart = 10*intPart + DecimalDigitValue(c);
       c = Read();
       // The IsDigit check will do the right thing even if Read() returns < 0
     } while (IsDigit(c));
 
     gotDot = (c == '.') && IsDigit(Peek());
   }
 
   if (gotDot) {
-    // Parse the fractional part of the mantissa.
+    // Scan the fractional part of the mantissa.
     c = Read();
     NS_ASSERTION(IsDigit(c), "How did we get here?");
     // Power of ten by which we need to divide our next digit
     float divisor = 10;
     do {
       fracPart += DecimalDigitValue(c) / divisor;
       divisor *= 10;
       c = Read();
@@ -962,71 +752,54 @@ nsCSSScanner::ParseNumber(int32_t c, nsC
     }
   }
   aToken.mNumber = value;
   aToken.mType = type;
   return true;
 }
 
 bool
-nsCSSScanner::SkipCComment()
-{
-  for (;;) {
-    int32_t ch = Read();
-    if (ch < 0) break;
-    if (ch == '*') {
-      if (LookAhead('/')) {
-        return true;
-      }
-    }
-  }
-
-  mReporter->ReportUnexpectedEOF("PECommentEOF");
-  return false;
-}
-
-bool
-nsCSSScanner::ParseString(int32_t aStop, nsCSSToken& aToken)
+nsCSSScanner::ScanString(int32_t aStop, nsCSSToken& aToken)
 {
   aToken.mIdent.SetLength(0);
   aToken.mType = eCSSToken_String;
   aToken.mSymbol = PRUnichar(aStop); // remember how it's quoted
   for (;;) {
     // If nothing in pushback, first try to get as much as possible in one go
     if (!mPushbackCount && mOffset < mCount) {
       // See how much we can consume and append in one go
       uint32_t n = mOffset;
       // Count number of characters that can be processed
       for (;n < mCount; ++n) {
-        PRUnichar nextChar = mReadPointer[n];
+        PRUnichar nextChar = mBuffer[n];
         if ((nextChar == aStop) || (nextChar == '\\') ||
             (nextChar == '\n') || (nextChar == '\r') || (nextChar == '\f')) {
           break;
         }
       }
       // Add to the token what we have so far
       if (n > mOffset) {
-        aToken.mIdent.Append(&mReadPointer[mOffset], n - mOffset);
+        aToken.mIdent.Append(&mBuffer[mOffset], n - mOffset);
         mOffset = n;
       }
     }
     int32_t ch = Read();
     if (ch < 0 || ch == aStop) {
       break;
     }
     if (ch == '\n') {
       aToken.mType = eCSSToken_Bad_String;
       mReporter->ReportUnexpected("SEUnterminatedString", aToken);
       break;
     }
     if (ch == '\\') {
-      if (!ParseAndAppendEscape(aToken.mIdent, true)) {
+      if (!GatherEscape(aToken.mIdent, true)) {
         aToken.mType = eCSSToken_Bad_String;
         Pushback(ch);
-        // For strings, the only case where ParseAndAppendEscape will
+        // For strings, the only case where GatherEscape will
         // return false is when there's a backslash to start an escape
         // immediately followed by end-of-stream.  In that case, the
         // correct tokenization is badstring *followed* by a DELIM for
         // the backslash, but as far as the author is concerned, it
         // works pretty much the same as an unterminated string, so we
         // use the same error message.
         mReporter->ReportUnexpected("SEUnterminatedString", aToken);
         break;
@@ -1047,34 +820,34 @@ nsCSSScanner::ParseString(int32_t aStop,
 //     u+[0-9a-f]{x}              1 <= x <= 6
 //     u+[0-9a-f]{x}\?{y}         1 <= x+y <= 6
 //     u+[0-9a-f]{x}-[0-9a-f]{y}  1 <= x <= 6, 1 <= y <= 6
 //
 // All unicode-range tokens have their text recorded in mIdent; valid ones
 // are also decoded into mInteger and mInteger2, and mIntegerValid is set.
 
 bool
-nsCSSScanner::ParseURange(int32_t aChar, nsCSSToken& aResult)
+nsCSSScanner::ScanURange(int32_t aChar, nsCSSToken& aResult)
 {
   int32_t intro2 = Read();
   int32_t ch = Peek();
 
   // We should only ever be called if these things are true.
   NS_ASSERTION(aChar == 'u' || aChar == 'U',
                "unicode-range called with improper introducer (U)");
   NS_ASSERTION(intro2 == '+',
                "unicode-range called with improper introducer (+)");
 
   // If the character immediately after the '+' is not a hex digit or
   // '?', this is not really a unicode-range token; push everything
   // back and scan the U as an ident.
   if (!IsHexDigit(ch) && ch != '?') {
     Pushback(intro2);
     Pushback(aChar);
-    return ParseIdent(aChar, aResult);
+    return ScanIdent(aChar, aResult);
   }
 
   aResult.mIdent.Truncate();
   aResult.mIdent.Append(aChar);
   aResult.mIdent.Append(intro2);
 
   bool valid = true;
   bool haveQues = false;
@@ -1124,8 +897,228 @@ nsCSSScanner::ParseURange(int32_t aChar,
   Pushback(ch);
 
   aResult.mInteger = low;
   aResult.mInteger2 = high;
   aResult.mIntegerValid = valid;
   aResult.mType = eCSSToken_URange;
   return true;
 }
+
+bool
+nsCSSScanner::NextURL(nsCSSToken& aToken)
+{
+  SkipWhitespace();
+
+  int32_t ch = Read();
+  if (ch < 0) {
+    return false;
+  }
+
+  // STRING
+  if ((ch == '"') || (ch == '\'')) {
+#ifdef DEBUG
+    bool ok =
+#endif
+      ScanString(ch, aToken);
+    NS_ABORT_IF_FALSE(ok, "ScanString should never fail, "
+                          "since there's always something read");
+
+    NS_ABORT_IF_FALSE(aToken.mType == eCSSToken_String ||
+                      aToken.mType == eCSSToken_Bad_String,
+                      "unexpected token type");
+    if (MOZ_LIKELY(aToken.mType == eCSSToken_String)) {
+      SkipWhitespace();
+      if (LookAheadOrEOF(')')) {
+        aToken.mType = eCSSToken_URL;
+      } else {
+        aToken.mType = eCSSToken_Bad_URL;
+      }
+    } else {
+      aToken.mType = eCSSToken_Bad_URL;
+    }
+    return true;
+  }
+
+  // Process a url lexical token. A CSS1 url token can contain
+  // characters beyond identifier characters (e.g. '/', ':', etc.)
+  // Because of this the normal rules for tokenizing the input don't
+  // apply very well. To simplify the parser and relax some of the
+  // requirements on the scanner we parse url's here. If we find a
+  // malformed URL then we emit a token of type "Bad_URL" so that
+  // the CSS1 parser can ignore the invalid input.  The parser must
+  // treat a Bad_URL token like a Function token, and process
+  // tokens until a matching parenthesis.
+
+  aToken.mType = eCSSToken_Bad_URL;
+  aToken.mSymbol = PRUnichar(0);
+  nsString& ident = aToken.mIdent;
+  ident.SetLength(0);
+
+  // start of a non-quoted url (which may be empty)
+  bool ok = true;
+  for (;;) {
+    if (IsURLChar(ch)) {
+      // A regular url character.
+      ident.Append(PRUnichar(ch));
+    } else if (ch == ')') {
+      // All done
+      break;
+    } else if (IsWhitespace(ch)) {
+      // Whitespace is allowed at the end of the URL
+      SkipWhitespace();
+      // Consume the close paren if we have it; if not we're an invalid URL.
+      ok = LookAheadOrEOF(')');
+      break;
+    } else if (ch == '\\') {
+      if (!GatherEscape(ident, false)) {
+        ok = false;
+        Pushback(ch);
+        break;
+      }
+    } else {
+      // This is an invalid URL spec
+      ok = false;
+      Pushback(ch); // push it back so the parser can match tokens and
+                    // then closing parenthesis
+      break;
+    }
+
+    ch = Read();
+    if (ch < 0) {
+      break;
+    }
+  }
+
+  // If the result of the above scanning is ok then change the token
+  // type to a useful one.
+  if (ok) {
+    aToken.mType = eCSSToken_URL;
+  }
+  return true;
+}
+
+bool
+nsCSSScanner::Next(nsCSSToken& aToken, bool aSkipWS)
+{
+  for (;;) { // Infinite loop so we can restart after comments.
+    mTokenOffset = mOffset;
+    mTokenLineOffset = mLineOffset;
+    mTokenLineNumber = mLineNumber;
+
+    int32_t ch = Read();
+    if (ch < 0) {
+      return false;
+    }
+
+    // UNICODE-RANGE
+    if ((ch == 'u' || ch == 'U') && Peek() == '+')
+      return ScanURange(ch, aToken);
+
+    // IDENT
+    if (StartsIdent(ch, Peek()))
+      return ScanIdent(ch, aToken);
+
+    // AT_KEYWORD
+    if (ch == '@') {
+      return ScanAtKeyword(aToken);
+    }
+
+    // NUMBER or DIM
+    if ((ch == '.') || (ch == '+') || (ch == '-')) {
+      int32_t nextChar = Peek();
+      if (IsDigit(nextChar)) {
+        return ScanNumber(ch, aToken);
+      }
+      else if (('.' == nextChar) && ('.' != ch)) {
+        nextChar = Read();
+        int32_t followingChar = Peek();
+        Pushback(nextChar);
+        if (IsDigit(followingChar))
+          return ScanNumber(ch, aToken);
+      }
+    }
+    if (IsDigit(ch)) {
+      return ScanNumber(ch, aToken);
+    }
+
+    // ID
+    if (ch == '#') {
+      return ScanHash(ch, aToken);
+    }
+
+    // STRING
+    if ((ch == '"') || (ch == '\'')) {
+      return ScanString(ch, aToken);
+    }
+
+    // WS
+    if (IsWhitespace(ch)) {
+      SkipWhitespace();
+      if (!aSkipWS) {
+        aToken.mType = eCSSToken_Whitespace;
+        return true;
+      }
+      continue; // start again at the beginning
+    }
+    if (ch == '/' && !IsSVGMode()) {
+      int32_t nextChar = Peek();
+      if (nextChar == '*') {
+        Read();
+        // FIXME: Editor wants comments to be preserved (bug 60290).
+        SkipComment();
+        continue; // start again at the beginning
+      }
+    }
+    if (ch == '<') {  // consume HTML comment tags
+      if (LookAhead('!')) {
+        if (LookAhead('-')) {
+          if (LookAhead('-')) {
+            aToken.mType = eCSSToken_HTMLComment;
+            aToken.mIdent.AssignLiteral("<!--");
+            return true;
+          }
+          Pushback('-');
+        }
+        Pushback('!');
+      }
+    }
+    if (ch == '-') {  // check for HTML comment end
+      if (LookAhead('-')) {
+        if (LookAhead('>')) {
+          aToken.mType = eCSSToken_HTMLComment;
+          aToken.mIdent.AssignLiteral("-->");
+          return true;
+        }
+        Pushback('-');
+      }
+    }
+
+    // INCLUDES ("~=") and DASHMATCH ("|=")
+    if (( ch == '|' ) || ( ch == '~' ) || ( ch == '^' ) ||
+        ( ch == '$' ) || ( ch == '*' )) {
+      int32_t nextChar = Read();
+      if ( nextChar == '=' ) {
+        if (ch == '~') {
+          aToken.mType = eCSSToken_Includes;
+        }
+        else if (ch == '|') {
+          aToken.mType = eCSSToken_Dashmatch;
+        }
+        else if (ch == '^') {
+          aToken.mType = eCSSToken_Beginsmatch;
+        }
+        else if (ch == '$') {
+          aToken.mType = eCSSToken_Endsmatch;
+        }
+        else if (ch == '*') {
+          aToken.mType = eCSSToken_Containsmatch;
+        }
+        return true;
+      } else if (nextChar >= 0) {
+        Pushback(nextChar);
+      }
+    }
+    aToken.mType = eCSSToken_Symbol;
+    aToken.mSymbol = ch;
+    return true;
+  }
+}
--- a/layout/style/nsCSSScanner.h
+++ b/layout/style/nsCSSScanner.h
@@ -11,91 +11,123 @@
 #include "nsString.h"
 
 namespace mozilla {
 namespace css {
 class ErrorReporter;
 }
 }
 
-// Token types
+// Token types; in close but not perfect correspondence to the token
+// categorization in section 4.1.1 of CSS2.1.  (The deviations are all
+// the fault of css3-selectors, which has requirements that can only be
+// met by changing the generic tokenization.)  The comment on each line
+// illustrates the form of each identifier.
+
 enum nsCSSTokenType {
-  // A css identifier (e.g. foo)
-  eCSSToken_Ident,          // mIdent
-
-  // A css at keyword (e.g. @foo)
-  eCSSToken_AtKeyword,      // mIdent
+  // White space of any kind.  No value fields are used.  Note that
+  // comments do *not* count as white space; comments separate tokens
+  // but are not themselves tokens.
+  eCSSToken_Whitespace,     //
 
-  // A css number without a percentage or dimension; with percentage;
-  // without percentage but with a dimension
-  eCSSToken_Number,         // mNumber
-  eCSSToken_Percentage,     // mNumber
-  eCSSToken_Dimension,      // mNumber + mIdent
+  // Identifier-like tokens.  mIdent is the text of the identifier.
+  // The difference between ID and Hash is: if the text after the #
+  // would have been a valid Ident if the # hadn't been there, the
+  // scanner produces an ID token.  Otherwise it produces a Hash token.
+  // (This distinction is required by css3-selectors.)
+  eCSSToken_Ident,          // word
+  eCSSToken_Function,       // word(
+  eCSSToken_AtKeyword,      // @word
+  eCSSToken_ID,             // #word
+  eCSSToken_Hash,           // #0word
 
-  // A css string (e.g. "foo" or 'foo')
-  eCSSToken_String,         // mSymbol + mIdent + mSymbol
-
-  // Whitespace (e.g. " " or "/* abc */")
-  eCSSToken_WhiteSpace,     // mIdent
-
-  // A css symbol (e.g. ':', ';', '+', etc.)
-  eCSSToken_Symbol,         // mSymbol
+  // Numeric tokens.  mNumber is the floating-point value of the
+  // number, and mHasSign indicates whether there was an explicit sign
+  // (+ or -) in front of the number.  If mIntegerValid is true, the
+  // number had the lexical form of an integer, and mInteger is its
+  // integer value.  Lexically integer values outside the range of a
+  // 32-bit signed number are clamped to the maximum values; mNumber
+  // will indicate a 'truer' value in that case.  Percentage tokens
+  // are always considered not to be integers, even if their numeric
+  // value is integral (100% => mNumber = 1.0).  For Dimension
+  // tokens, mIdent holds the text of the unit.
+  eCSSToken_Number,         // 1 -5 +2e3 3.14159 7.297352e-3
+  eCSSToken_Dimension,      // 24px 8.5in
+  eCSSToken_Percentage,     // 85% 1280.4%
 
-  // A css1 id (e.g. #foo3)
-  eCSSToken_ID,             // mIdent
-  // Just like eCSSToken_ID, except the part following the '#' is not
-  // a valid CSS identifier (eg. starts with a digit, is the empty
-  // string, etc).
-  eCSSToken_Ref,            // mIdent
+  // String-like tokens.  In all cases, mIdent holds the text
+  // belonging to the string, and mSymbol holds the delimiter
+  // character, which may be ', ", or zero (only for unquoted URLs).
+  // Bad_String and Bad_URL tokens are emitted when the closing
+  // delimiter or parenthesis was missing.
+  eCSSToken_String,         // 'foo bar' "foo bar"
+  eCSSToken_Bad_String,     // 'foo bar
+  eCSSToken_URL,            // url(foobar) url("foo bar")
+  eCSSToken_Bad_URL,        // url(foo
 
-  eCSSToken_Function,       // mIdent
-
-  eCSSToken_URL,            // mIdent + mSymbol
-  eCSSToken_Bad_URL,        // mIdent + mSymbol
-
-  eCSSToken_HTMLComment,    // "<!--" or "-->"
+  // Any one-character symbol.  mSymbol holds the character.
+  eCSSToken_Symbol,         // . ; { } ! *
 
-  eCSSToken_Includes,       // "~="
-  eCSSToken_Dashmatch,      // "|="
-  eCSSToken_Beginsmatch,    // "^="
-  eCSSToken_Endsmatch,      // "$="
-  eCSSToken_Containsmatch,  // "*="
+  // Match operators.  These are single tokens rather than pairs of
+  // Symbol tokens because css3-selectors forbids the presence of
+  // comments between the two characters.  No value fields are used;
+  // the token type indicates which operator.
+  eCSSToken_Includes,       // ~=
+  eCSSToken_Dashmatch,      // |=
+  eCSSToken_Beginsmatch,    // ^=
+  eCSSToken_Endsmatch,      // $=
+  eCSSToken_Containsmatch,  // *=
 
-  eCSSToken_URange,         // Low in mInteger, high in mInteger2;
-                            // mIntegerValid is true if the token is a
-                            // valid range; mIdent preserves the textual
-                            // form of the token for error reporting
+  // Unicode-range token: currently used only in @font-face.
+  // The lexical rule for this token includes several forms that are
+  // semantically invalid.  Therefore, mIdent always holds the
+  // complete original text of the token (so we can print it
+  // accurately in diagnostics), and mIntegerValid is true iff the
+  // token is semantically valid.  In that case, mInteger holds the
+  // lowest value included in the range, and mInteger2 holds the
+  // highest value included in the range.
+  eCSSToken_URange,         // U+007e U+01?? U+2000-206F
 
-  // An unterminated string, which is always an error.
-  eCSSToken_Bad_String      // mSymbol + mIdent
+  // HTML comment delimiters, ignored as a unit when they appear at
+  // the top level of a style sheet, for compatibility with websites
+  // written for compatibility with pre-CSS browsers.  This token type
+  // subsumes the css2.1 CDO and CDC tokens, which are always treated
+  // the same by the parser.  mIdent holds the text of the token, for
+  // diagnostics.
+  eCSSToken_HTMLComment,    // <!-- -->
 };
 
+// A single token returned from the scanner.  mType is always
+// meaningful; comments above describe which other fields are
+// meaningful for which token types.
 struct nsCSSToken {
   nsAutoString    mIdent NS_OKONHEAP;
   float           mNumber;
   int32_t         mInteger;
   int32_t         mInteger2;
   nsCSSTokenType  mType;
   PRUnichar       mSymbol;
-  bool            mIntegerValid; // for number, dimension, urange
-  bool            mHasSign; // for number, percentage, and dimension
+  bool            mIntegerValid;
+  bool            mHasSign;
 
-  nsCSSToken();
+  nsCSSToken()
+    : mNumber(0), mInteger(0), mInteger2(0), mType(eCSSToken_Whitespace),
+      mSymbol('\0'), mIntegerValid(false), mHasSign(false)
+  {}
 
-  bool IsSymbol(PRUnichar aSymbol) {
-    return bool((eCSSToken_Symbol == mType) && (mSymbol == aSymbol));
+  bool IsSymbol(PRUnichar aSymbol) const {
+    return mType == eCSSToken_Symbol && mSymbol == aSymbol;
   }
 
   void AppendToString(nsString& aBuffer) const;
 };
 
-// CSS Scanner API. Used to tokenize an input stream using the CSS
-// forward compatible tokenization rules. This implementation is
-// private to this package and is only used internally by the css
-// parser.
+// nsCSSScanner tokenizes an input stream using the CSS2.1 forward
+// compatible tokenization rules.  Used internally by nsCSSParser;
+// not available for use by other code.
 class nsCSSScanner {
   public:
   // |aLineNumber == 1| is the beginning of a file, use |aLineNumber == 0|
   // when the line number is unknown.
   nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber);
   ~nsCSSScanner();
 
   void SetErrorReporter(mozilla::css::ErrorReporter* aReporter) {
@@ -117,27 +149,33 @@ class nsCSSScanner {
   // the most recently processed token.
   uint32_t GetColumnNumber() const
   { return mTokenOffset - mTokenLineOffset; }
 
   // Get the text of the line containing the first character of
   // the most recently processed token.
   nsDependentSubstring GetCurrentLine() const;
 
-  // Get the next token. Return false on EOF. aTokenResult
-  // is filled in with the data for the token.
-  bool Next(nsCSSToken& aTokenResult);
+  // Get the next token.  Return false on EOF.  aTokenResult is filled
+  // in with the data for the token.  If aSkipWS is true, skip over
+  // eCSSToken_Whitespace tokens rather than returning them.
+  bool Next(nsCSSToken& aTokenResult, bool aSkipWS);
 
-  // Get the next token that may be a string or unquoted URL
+  // Get the body of an URL token (everything after the 'url(').
+  // This is exposed for use by nsCSSParser::ParseMozDocumentRule,
+  // which, for historical reasons, must make additional function
+  // tokens behave like url().  Please do not add new uses to the
+  // parser.
   bool NextURL(nsCSSToken& aTokenResult);
 
-  // It's really ugly that we have to expose this, but it's the easiest
-  // way to do :nth-child() parsing sanely.  (In particular, in
-  // :nth-child(2n-1), "2n-1" is a dimension, and we need to push the
-  // "-1" back so we can read it again as a number.)
+  // This is exposed for use by nsCSSParser::ParsePseudoClassWithNthPairArg,
+  // because "2n-1" is a single DIMENSION token, and "n-1" is a single
+  // IDENT token, but the :nth() selector syntax wants to interpret
+  // them the same as "2n -1" and "n -1" respectively.  Please do not
+  // add new uses to the parser.
   void Pushback(PRUnichar aChar);
 
   // Starts recording the input stream from the current position.
   void StartRecording();
 
   // Abandons recording of the input stream.
   void StopRecording();
 
@@ -145,30 +183,31 @@ class nsCSSScanner {
   // input to aBuffer.
   void StopRecording(nsString& aBuffer);
 
 protected:
   int32_t Read();
   int32_t Peek();
   bool LookAhead(PRUnichar aChar);
   bool LookAheadOrEOF(PRUnichar aChar); // expect either aChar or EOF
-  void EatWhiteSpace();
 
-  bool ParseAndAppendEscape(nsString& aOutput, bool aInString);
-  bool ParseIdent(int32_t aChar, nsCSSToken& aResult);
-  bool ParseAtKeyword(nsCSSToken& aResult);
-  bool ParseNumber(int32_t aChar, nsCSSToken& aResult);
-  bool ParseRef(int32_t aChar, nsCSSToken& aResult);
-  bool ParseString(int32_t aChar, nsCSSToken& aResult);
-  bool ParseURange(int32_t aChar, nsCSSToken& aResult);
-  bool SkipCComment();
+  void SkipWhitespace();
+  void SkipComment();
 
+  bool GatherEscape(nsString& aOutput, bool aInString);
   bool GatherIdent(int32_t aChar, nsString& aIdent);
 
-  const PRUnichar *mReadPointer;
+  bool ScanIdent(int32_t aChar, nsCSSToken& aResult);
+  bool ScanAtKeyword(nsCSSToken& aResult);
+  bool ScanHash(int32_t aChar, nsCSSToken& aResult);
+  bool ScanNumber(int32_t aChar, nsCSSToken& aResult);
+  bool ScanString(int32_t aChar, nsCSSToken& aResult);
+  bool ScanURange(int32_t aChar, nsCSSToken& aResult);
+
+  const PRUnichar *mBuffer;
   uint32_t mOffset;
   uint32_t mCount;
 
   PRUnichar* mPushback;
   int32_t mPushbackCount;
   int32_t mPushbackSize;
   PRUnichar mLocalPushback[4];