Bug 1315368. Back out changeset bf190e326bfd (bug 790997) because what it implements doesn't actually follow the CSS syntax editor's draft and breaks some sites in the process. r=dbaron,a=ritu FIREFOX_50_0_BUILD2 FIREFOX_50_0_RELEASE
authorBoris Zbarsky <bzbarsky@mit.edu>
Fri, 04 Nov 2016 22:06:25 -0400
changeset 445892 dc617d65c9f0cdbbe4351cc1e5c288b05f25f8f7
parent 445891 8e32fc90a6aadb62ab464ef56a53da8a5d66091a
child 445893 57518a2fb7794c7a2804c02c36783c8bc7bf206b
push id37654
push usermozilla@kaply.com
push dateWed, 30 Nov 2016 15:39:34 +0000
reviewersdbaron, ritu
bugs1315368, 790997
milestone50.0
Bug 1315368. Back out changeset bf190e326bfd (bug 790997) because what it implements doesn't actually follow the CSS syntax editor's draft and breaks some sites in the process. r=dbaron,a=ritu
devtools/shared/css-lexer.js
devtools/shared/tests/unit/test_csslexer.js
dom/webidl/CSSLexer.webidl
layout/reftests/css-parsing/invalid-url-handling.xhtml
layout/style/nsCSSScanner.cpp
layout/style/nsCSSScanner.h
layout/style/test/test_csslexer.js
layout/style/test/test_parser_diagnostics_unprintables.html
--- a/devtools/shared/css-lexer.js
+++ b/devtools/shared/css-lexer.js
@@ -1068,19 +1068,16 @@ Scanner.prototype = {
     aToken.mIdent.length = 0;
 
     let ch = this.Peek();
     // Do we have a string?
     if (ch == QUOTATION_MARK || ch == APOSTROPHE) {
       this.ScanString(aToken);
       if (aToken.mType == eCSSToken_Bad_String) {
         aToken.mType = eCSSToken_Bad_URL;
-        // Flag us as having been a Bad_String.
-        aToken.mInteger2 = 1;
-        this.ConsumeBadURLRemnants(aToken);
         return;
       }
     } else {
       // Otherwise, this is the start of a non-quoted url (which may be empty).
       aToken.mSymbol = 0;
       this.GatherText(IS_URL_CHAR, aToken.mIdent);
     }
 
@@ -1091,54 +1088,19 @@ Scanner.prototype = {
     if (ch < 0 || ch == RIGHT_PARENTHESIS) {
       this.Advance();
       aToken.mType = eCSSToken_URL;
       if (ch < 0) {
         this.AddEOFCharacters(eEOFCharacters_CloseParen);
       }
     } else {
       aToken.mType = eCSSToken_Bad_URL;
-      if (aToken.mSymbol != 0) {
-        // Flag us as having been a String, not a Bad_String.
-        aToken.mInteger2 = 0;
-      }
-      this.ConsumeBadURLRemnants(aToken);
     }
   },
 
-  ConsumeBadURLRemnants: function (aToken) {
-    aToken.mInteger = aToken.mIdent.length;
-    let ch = this.Peek();
-    do {
-      if (ch < 0) {
-        this.AddEOFCharacters(eEOFCharacters_CloseParen);
-        break;
-      }
-
-      if (ch == REVERSE_SOLIDUS && this.GatherEscape(aToken.mIdent, false)) {
-        // Nothing else needs to be done here for the moment; we've consumed the
-        // backslash and following escape.
-      } else {
-        // We always want to consume this character.
-        if (IsVertSpace(ch)) {
-          this.AdvanceLine();
-        } else {
-          this.Advance();
-        }
-        if (ch == 0) {
-          aToken.mIdent.push(UCS2_REPLACEMENT_CHAR);
-        } else {
-          aToken.mIdent.push(ch);
-        }
-      }
-
-      ch = this.Peek();
-    } while (ch != RIGHT_PARENTHESIS);
-  },
-
   /**
    * Primary scanner entry point.  Consume one token and fill in
    * |aToken| accordingly.  Will skip over any number of comments first,
    * and will also skip over rather than return whitespace and comment
    * tokens, depending on the value of |aSkip|.
    *
    * Returns true if it successfully consumed a token, false if EOF has
    * been reached.  Will always advance the current read position by at
--- a/devtools/shared/tests/unit/test_csslexer.js
+++ b/devtools/shared/tests/unit/test_csslexer.js
@@ -123,19 +123,18 @@ var LEX_TESTS = [
   ["23px", ["dimension:px"]],
   ["23%", ["percentage"]],
   ["url(http://example.com)", ["url:http://example.com"]],
   ["url('http://example.com')", ["url:http://example.com"]],
   ["url(  'http://example.com'  )",
              ["url:http://example.com"]],
   // In CSS Level 3, this is an ordinary URL, not a BAD_URL.
   ["url(http://example.com", ["url:http://example.com"]],
-  // We end up losing the whitespace before the '@' because it's skipped by the
-  // lexer before we discover we have a BAD_URL token.
-  ["url(http://example.com @", ["bad_url:http://example.com@"]],
+  // See bug 1153981 to understand why this gets a SYMBOL token.
+  ["url(http://example.com @", ["bad_url:http://example.com", "symbol:@"]],
   ["quo\\ting", ["ident:quoting"]],
   ["'bad string\n", ["bad_string:bad string", "whitespace"]],
   ["~=", ["includes"]],
   ["|=", ["dashmatch"]],
   ["^=", ["beginsmatch"]],
   ["$=", ["endsmatch"]],
   ["*=", ["containsmatch"]],
 
--- a/dom/webidl/CSSLexer.webidl
+++ b/dom/webidl/CSSLexer.webidl
@@ -31,20 +31,18 @@ enum CSSTokenType {
   // A string.
   "string",
   // A "bad string".  This can only be returned when a string is
   // unterminated at EOF.  (However, currently the lexer returns
   // ordinary STRING tokens in this situation.)
   "bad_string",
   // A URL.  |text| holds the URL.
   "url",
-  // A "bad URL".  This is a URL that either contains a bad_string or contains
-  // garbage after the string or unquoted URL test.  |text| holds the URL and
-  // potentially whatever garbage came after it, up to but not including the
-  // following ')'.
+  // A "bad URL".  This is a URL that is unterminated at EOF.  |text|
+  // holds the URL.
   "bad_url",
   // A "symbol" is any one-character symbol.  This corresponds to the
   // DELIM token in the CSS specification.
   "symbol",
   // The "~=" token.
   "includes",
   // The "|=" token.
   "dashmatch",
--- a/layout/reftests/css-parsing/invalid-url-handling.xhtml
+++ b/layout/reftests/css-parsing/invalid-url-handling.xhtml
@@ -17,29 +17,27 @@
   </style>
   <style type="text/css">
   /* not a URI token, but handled according to rules for parsing errors */
   #foo { background: url(foo"bar
   ) }
   #two { background-color: green; }
   </style>
   <style type="text/css">
-  /* not a URI token, the invalid URI token consumes everything up to the ')'. */
+  /* not a URI token; the unterminated string ends at end of line, so
+     the brace never matches */
+  #three { background-color: green; }
+  #foo { background: url(foo"bar) }
   #three { background-color: red; }
-  #foo { background: url(foo"bar) }
-  #three { background-color: green; }
   </style>
   <style type="text/css">
-  #four { background-color: green; }
-  /* not a URI token; the invalid URI token consumes everything up to the ')'
-     and then there is some garbage that prevents the next rule from being
-     parsed. */
+  /* not a URI token; the unterminated string ends at end of line */
   #foo { background: url(foo"bar) }
   ) }
-  #four { background-color: red; }
+  #four { background-color: green; }
   </style>
   <style type="text/css">
   /* not a URI token; the unterminated string ends at end of line, so
      the brace never matches */
   #five { background-color: green; }
   #foo { background: url("bar) }
   #five { background-color: red; }
   </style>
@@ -65,30 +63,28 @@
   /* perfectly good URI token (image is a 404, though) */
   #ten { background: url({) green; }
   </style>
   <style type="text/css">
   /* perfectly good URI token (image is a 404, though) */
   #eleven { background: url([) green; }
   </style>
   <style type="text/css">
-  /* not a URI token; brace matching is ignored while looking for the closing
-     ')' but is used after that. */
-  #twelve { background: url(}{""{)(}); background-color: green; }
+  /* not a URI token; brace matching should work only after invalid URI token */
+  #twelve { background: url(}{""{)}); background-color: green; }
   </style>
   <style type="text/css">
   /* invalid URI token absorbs the [ */
   #thirteen { background: url([""); background-color: green; }
   </style>
   <style type="text/css">
-  /* not a URI token; the invalid URI token consumes everything up to the
-     next ')'. */
+  /* not a URI token; the opening ( is never matched */
+  #fourteen { background-color: green; }
+  #foo { background: url(() }
   #fourteen { background-color: red; }
-  #foo { background: url(() }
-  #fourteen { background-color: green; }
   </style>
   <!-- The next three tests test that invalid URI tokens absorb [ and { -->
   <style type="text/css">
   #foo { background: url(a()); }
   #fifteen { background-color: green }
   </style>
   <style type="text/css">
   #foo { background: url([()); }
--- a/layout/style/nsCSSScanner.cpp
+++ b/layout/style/nsCSSScanner.cpp
@@ -255,35 +255,23 @@ nsCSSToken::AppendToString(nsString& aBu
       nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
       aBuffer.Append('(');
       break;
 
     case eCSSToken_URL:
     case eCSSToken_Bad_URL:
       aBuffer.AppendLiteral("url(");
       if (mSymbol != char16_t(0)) {
-        if (mType == eCSSToken_URL) {
-          nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
-        } else {
-          // Only things up to mInteger were part of the string.
-          nsStyleUtil::AppendEscapedCSSString(StringHead(mIdent, mInteger),
-                                              aBuffer, mSymbol);
-          MOZ_ASSERT(mInteger2 == 0 || mInteger2 == 1);
-          if (mInteger2 == 1) {
-            // This was a Bad_String; strip off the closing quote.
-            aBuffer.Truncate(aBuffer.Length() - 1);
-          }
-
-          // Now append the remaining garbage.
-          aBuffer.Append(Substring(mIdent, mInteger));
-        }
+        nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
       } else {
         aBuffer.Append(mIdent);
       }
-      aBuffer.Append(char16_t(')'));
+      if (mType == eCSSToken_URL) {
+        aBuffer.Append(char16_t(')'));
+      }
       break;
 
     case eCSSToken_Number:
       if (mIntegerValid) {
         aBuffer.AppendInt(mInteger, 10);
       } else {
         aBuffer.AppendFloat(mNumber);
       }
@@ -1173,19 +1161,16 @@ nsCSSScanner::NextURL(nsCSSToken& aToken
   aToken.mIdent.Truncate();
 
   int32_t ch = Peek();
   // Do we have a string?
   if (ch == '"' || ch == '\'') {
     ScanString(aToken);
     if (MOZ_UNLIKELY(aToken.mType == eCSSToken_Bad_String)) {
       aToken.mType = eCSSToken_Bad_URL;
-      // Flag us as having been a Bad_String.
-      aToken.mInteger2 = 1;
-      ConsumeBadURLRemnants(aToken);
       return;
     }
     MOZ_ASSERT(aToken.mType == eCSSToken_String, "unexpected token type");
 
   } else {
     // Otherwise, this is the start of a non-quoted url (which may be empty).
     aToken.mSymbol = char16_t(0);
     GatherText(IS_URL_CHAR, aToken.mIdent);
@@ -1199,56 +1184,19 @@ nsCSSScanner::NextURL(nsCSSToken& aToken
     Advance();
     aToken.mType = eCSSToken_URL;
     if (ch < 0) {
       AddEOFCharacters(eEOFCharacters_CloseParen);
     }
   } else {
     mSeenBadToken = true;
     aToken.mType = eCSSToken_Bad_URL;
-    if (aToken.mSymbol != 0) {
-      // Flag us as having been a String, not a Bad_String.
-      aToken.mInteger2 = 0;
-    }
-    ConsumeBadURLRemnants(aToken);
   }
 }
 
-void
-nsCSSScanner::ConsumeBadURLRemnants(nsCSSToken& aToken)
-{
-  aToken.mInteger = aToken.mIdent.Length();
-  int32_t ch = Peek();
-  do {
-    if (ch < 0) {
-      AddEOFCharacters(eEOFCharacters_CloseParen);
-      break;
-    }
-
-    if (ch == '\\' && GatherEscape(aToken.mIdent, false)) {
-      // Nothing else needs to be done here for the moment; we've consumed the
-      // backslash and following escape.
-    } else {
-      // We always want to consume this character.
-      if (IsVertSpace(ch)) {
-        AdvanceLine();
-      } else {
-        Advance();
-      }
-      if (ch == 0) {
-        aToken.mIdent.Append(UCS2_REPLACEMENT_CHAR);
-      } else {
-        aToken.mIdent.Append(ch);
-      }
-    }
-
-    ch = Peek();
-  } while (ch != ')');
-}
-
 /**
  * Primary scanner entry point.  Consume one token and fill in
  * |aToken| accordingly.  Will skip over any number of comments first,
  * and will also skip over rather than return whitespace and comment
  * tokens, depending on the value of |aSkip|.
  *
  * Returns true if it successfully consumed a token, false if EOF has
  * been reached.  Will always advance the current read position by at
--- a/layout/style/nsCSSScanner.h
+++ b/layout/style/nsCSSScanner.h
@@ -54,30 +54,21 @@ enum nsCSSTokenType {
   eCSSToken_Number,         // 1 -5 +2e3 3.14159 7.297352e-3
   eCSSToken_Dimension,      // 24px 8.5in
   eCSSToken_Percentage,     // 85% 1280.4%
 
   // String-like tokens.  In all cases, mIdent holds the text
   // belonging to the string, and mSymbol holds the delimiter
   // character, which may be ', ", or zero (only for unquoted URLs).
   // Bad_String and Bad_URL tokens are emitted when the closing
-  // delimiter was missing.  Bad_URL is also emitted if there was trailing
-  // garbage after the string or unquoted url value.
+  // delimiter or parenthesis was missing.
   eCSSToken_String,         // 'foo bar' "foo bar"
   eCSSToken_Bad_String,     // 'foo bar
   eCSSToken_URL,            // url(foobar) url("foo bar")
-  // For Bad_URL tokens, we need to keep track of the following state:
-  // (1) Was there a quoted string?  If so, was it a String or Bad_String?
-  // (2) Was there trailing garbage, and if so what was it?
-  // We keep track of whether there was a quoted string by setting mSymbol as
-  // described above.  If that's nonzero, then mInteger2 indicates whether we
-  // have a String or Bad_String by taking on the values 0 and 1 respectively.
-  // mInteger indicates the start of trailing garbage in mIdent (and is set to
-  // mIdent.Length() when there is no trailing garbage).
-  eCSSToken_Bad_URL,        // url(foo') url('foo'a) url('foo
+  eCSSToken_Bad_URL,        // url(foo
 
   // Any one-character symbol.  mSymbol holds the character.
   eCSSToken_Symbol,         // . ; { } ! *
 
   // Match operators.  These are single tokens rather than pairs of
   // Symbol tokens because css3-selectors forbids the presence of
   // comments between the two characters.  No value fields are used;
   // the token type indicates which operator.
@@ -259,20 +250,16 @@ class nsCSSScanner {
 
   // Get the body of an URL token (everything after the 'url(').
   // This is exposed for use by nsCSSParser::ParseMozDocumentRule,
   // which, for historical reasons, must make additional function
   // tokens behave like url().  Please do not add new uses to the
   // parser.
   void NextURL(nsCSSToken& aTokenResult);
 
-  // Implement the "consume the remnants of a bad url" algorithm from CSS3
-  // Syntax, except we don't consume the ')'.
-  void ConsumeBadURLRemnants(nsCSSToken& aToken);
-
   // This is exposed for use by nsCSSParser::ParsePseudoClassWithNthPairArg,
   // because "2n-1" is a single DIMENSION token, and "n-1" is a single
   // IDENT token, but the :nth() selector syntax wants to interpret
   // them the same as "2n -1" and "n -1" respectively.  Please do not
   // add new uses to the parser.
   //
   // Note: this function may not be used to back up over a line boundary.
   void Backup(uint32_t n);
--- a/layout/style/test/test_csslexer.js
+++ b/layout/style/test/test_csslexer.js
@@ -50,19 +50,18 @@ var LEX_TESTS = [
   ["23px", ["dimension:px"]],
   ["23%", ["percentage"]],
   ["url(http://example.com)", ["url:http://example.com"]],
   ["url('http://example.com')", ["url:http://example.com"]],
   ["url(  'http://example.com'  )",
              ["url:http://example.com"]],
   // In CSS Level 3, this is an ordinary URL, not a BAD_URL.
   ["url(http://example.com", ["url:http://example.com"]],
-  // We end up losing the whitespace before the '@' because it's skipped by the
-  // lexer before we discover we have a BAD_URL token.
-  ["url(http://example.com @", ["bad_url:http://example.com@"]],
+  // See bug 1153981 to understand why this gets a SYMBOL token.
+  ["url(http://example.com @", ["bad_url:http://example.com", "symbol:@"]],
   ["quo\\ting", ["ident:quoting"]],
   ["'bad string\n", ["bad_string:bad string", "whitespace"]],
   ["~=", ["includes"]],
   ["|=", ["dashmatch"]],
   ["^=", ["beginsmatch"]],
   ["$=", ["endsmatch"]],
   ["*=", ["containsmatch"]],
 
--- a/layout/style/test/test_parser_diagnostics_unprintables.html
+++ b/layout/style/test/test_parser_diagnostics_unprintables.html
@@ -40,17 +40,17 @@ const patterns = [
   { i: "x{@<t>: }",        o: "declaration but found \u2018@<i>\u2019." },
   // _String
   { i: "x{ '<t>'}" ,       o: "declaration but found \u2018'<s>'\u2019." },
   // _Bad_String
   { i: "x{ '<t>\n}",      o: "declaration but found \u2018'<s>\u2019." },
   // _URL
   { i: "x{ url('<t>')}",   o: "declaration but found \u2018url('<s>')\u2019." },
   // _Bad_URL
-  { i: "x{ url('<t>'.)}" , o: "declaration but found \u2018url('<s>'.)\u2019." }
+  { i: "x{ url('<t>'.)}" , o: "declaration but found \u2018url('<s>'\u2019." }
 ];
 
 // Blocks of characters to test, and how they should be escaped when
 // they appear in identifiers and string constants.
 const substitutions = [
   // ASCII printables that _can_ normally appear in identifiers,
   // so should of course _not_ be escaped.
   { t: "-_0123456789",               i: "-_0123456789",