Bug 1546783 - Add a best-effort @import rule scanner for the parser. r=bzbarsky
authorEmilio Cobos Álvarez <emilio@crisal.io>
Mon, 30 Sep 2019 13:29:23 +0000
changeset 495607 78d0e221f6a497003d5a7339d5ac13d0de28f74e
parent 495606 73fae0c9b2306b3485f9ed4c9a5e75d8d507a52e
child 495608 dfccd315582994034ef6c2e1fb2f9f47c865a873
push id96719
push userealvarez@mozilla.com
push dateMon, 30 Sep 2019 13:48:22 +0000
treeherderautoland@56599f0a87a4 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbzbarsky
bugs1546783
milestone71.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1546783 - Add a best-effort @import rule scanner for the parser. r=bzbarsky Doesn't recognize all the edge cases, but I think this should be good enough. Let me know if you think something common is missing. Differential Revision: https://phabricator.services.mozilla.com/D47469
layout/style/ImportScanner.cpp
layout/style/ImportScanner.h
layout/style/moz.build
layout/style/nsICSSDeclaration.cpp
layout/style/test/gtest/ImportScannerTest.cpp
layout/style/test/gtest/moz.build
new file mode 100644
--- /dev/null
+++ b/layout/style/ImportScanner.cpp
@@ -0,0 +1,199 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=2 sw=2 et tw=78: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ImportScanner.h"
+#include "nsContentUtils.h"
+
+namespace mozilla {
+
+static inline bool IsWhitespace(char16_t aChar) {
+  return nsContentUtils::IsHTMLWhitespace(aChar);
+}
+
+void ImportScanner::Start() {
+  Stop();
+  mState = State::Idle;
+}
+
+void ImportScanner::EmitUrl() {
+  MOZ_ASSERT(mState == State::AfterRuleValue);
+  if (mInImportRule) {
+    // Trim trailing whitespace from an unquoted URL.
+    if (mUrlValueDelimiterClosingChar == ')') {
+      // FIXME: Add a convenience function in nsContentUtils or something?
+      mRuleValue.Trim(" \t\n\r\f", false);
+    }
+    mUrlsFound.AppendElement(std::move(mRuleValue));
+  }
+  mInImportRule = false;
+  // We try to avoid freeing the buffers here.
+  mRuleName.Truncate(0);
+  mRuleValue.Truncate(0);
+  MOZ_ASSERT(mRuleValue.IsEmpty());
+}
+
+nsTArray<nsString> ImportScanner::Stop() {
+  if (mState == State::AfterRuleValue) {
+    EmitUrl();
+  }
+  mState = State::OutsideOfStyleElement;
+  mInImportRule = false;
+  mRuleName.Truncate(0);
+  mRuleValue.Truncate(0);
+  return std::move(mUrlsFound);
+}
+
+void ImportScanner::DoScan(Span<const char16_t> aFragment) {
+  MOZ_ASSERT(mState != State::OutsideOfStyleElement);
+  MOZ_ASSERT(mState != State::Done);
+
+  for (char16_t c : aFragment) {
+    mState = Scan(c);
+    if (mState == State::Done) {
+      return;
+    }
+  }
+}
+
+auto ImportScanner::Scan(char16_t aChar) -> State {
+  switch (mState) {
+    case State::OutsideOfStyleElement:
+    case State::Done:
+      MOZ_ASSERT_UNREACHABLE("How?");
+      return mState;
+    case State::Idle: {
+      // TODO(emilio): Maybe worth caring about html-style comments like:
+      // <style>
+      // <!--
+      //   @import url(stuff);
+      // -->
+      // </style>
+      if (IsWhitespace(aChar)) {
+        return mState;
+      }
+      if (aChar == '/') {
+        return State::MaybeAtCommentStart;
+      }
+      if (aChar == '@') {
+        MOZ_ASSERT(mRuleName.IsEmpty());
+        return State::AtRuleName;
+      }
+      return State::Done;
+    }
+    case State::MaybeAtCommentStart: {
+      return aChar == '*' ? State::AtComment : State::Done;
+    }
+    case State::AtComment: {
+      return aChar == '*' ? State::MaybeAtCommentEnd : mState;
+    }
+    case State::MaybeAtCommentEnd: {
+      return aChar == '/' ? State::Idle : State::AtComment;
+    }
+    case State::AtRuleName: {
+      if (IsAsciiAlpha(aChar)) {
+        if (mRuleName.Length() > kMaxRuleNameLength - 1) {
+          return State::Done;
+        }
+        mRuleName.Append(aChar);
+        return mState;
+      }
+      if (IsWhitespace(aChar)) {
+        mInImportRule = mRuleName.LowerCaseEqualsLiteral("import");
+        if (mInImportRule || mRuleName.LowerCaseEqualsLiteral("charset")) {
+          MOZ_ASSERT(mRuleValue.IsEmpty());
+          return State::AtRuleValue;
+        }
+      }
+      return State::Done;
+    }
+    case State::AtRuleValue: {
+      if (mRuleValue.IsEmpty()) {
+        if (IsWhitespace(aChar)) {
+          return mState;
+        }
+        if (aChar == '"' || aChar == '\'') {
+          mUrlValueDelimiterClosingChar = aChar;
+          return State::AtRuleValueDelimited;
+        }
+        if (!mInImportRule) {
+          return State::Done;
+        }
+        if (aChar == 'u' || aChar == 'U') {
+          mRuleValue.Append('u');
+          return mState;
+        }
+        return State::Done;
+      }
+      if (mRuleValue.Length() == 1) {
+        MOZ_ASSERT(mRuleValue.EqualsLiteral("u"));
+        if (aChar == 'r' || aChar == 'R') {
+          mRuleValue.Append('r');
+          return mState;
+        }
+        return State::Done;
+      }
+      if (mRuleValue.Length() == 2) {
+        MOZ_ASSERT(mRuleValue.EqualsLiteral("ur"));
+        if (aChar == 'l' || aChar == 'L') {
+          mRuleValue.Append('l');
+        }
+        return mState;
+      }
+      if (mRuleValue.Length() == 3) {
+        MOZ_ASSERT(mRuleValue.EqualsLiteral("url"));
+        if (aChar == '(') {
+          mUrlValueDelimiterClosingChar = ')';
+          mRuleValue.Truncate(0);
+          return State::AtRuleValueDelimited;
+        }
+        return State::Done;
+      }
+      MOZ_ASSERT_UNREACHABLE(
+          "How? We should find a paren or a string delimiter");
+      return State::Done;
+    }
+    case State::AtRuleValueDelimited: {
+      if (aChar == mUrlValueDelimiterClosingChar) {
+        return State::AfterRuleValue;
+      }
+      if (!mInImportRule) {
+        return mState;
+      }
+      if (mUrlValueDelimiterClosingChar == ')' && mRuleValue.IsEmpty()) {
+        if (IsWhitespace(aChar)) {
+          return mState;
+        }
+        if (aChar == '"' || aChar == '\'') {
+          // Handle url("") and url('').
+          mUrlValueDelimiterClosingChar = aChar;
+          return mState;
+        }
+      }
+      if (!mRuleValue.Append(aChar, mozilla::fallible)) {
+        mRuleValue.Truncate(0);
+        return State::Done;
+      }
+      return mState;
+    }
+    case State::AfterRuleValue: {
+      if (aChar == ';') {
+        EmitUrl();
+        return State::Idle;
+      }
+      // If there's a selector here and the import was unterminated, just give
+      // up.
+      if (aChar == '{') {
+        return State::Done;
+      }
+      return mState;  // There can be all sorts of stuff here like media
+                      // queries or what not.
+    }
+  }
+  MOZ_ASSERT_UNREACHABLE("Forgot to handle a state?");
+  return State::Done;
+}
+
+}  // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/layout/style/ImportScanner.h
@@ -0,0 +1,90 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=2 sw=2 et tw=78: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_ImportScanner_h
+#define mozilla_ImportScanner_h
+
+/* A simple best-effort scanner for @import rules for the HTML parser */
+
+#include "nsString.h"
+#include "nsTArray.h"
+
+namespace mozilla {
+
+struct ImportScanner final {
+  ImportScanner() = default;
+
+  // Called when a <style> element starts.
+  //
+  // Note that this function cannot make assumptions about the internal state,
+  // as you can nest <svg:style> elements.
+  void Start();
+
+  // Called when a <style> element ends. Returns the list of URLs scanned.
+  nsTArray<nsString> Stop();
+
+  // Scan() should be called when text content is parsed.
+  void Scan(Span<const char16_t> aFragment) {
+    if (mState == State::OutsideOfStyleElement || mState == State::Done) {
+      return;
+    }
+    DoScan(aFragment);
+  }
+
+ private:
+  enum class State {
+    // Initial state, doesn't scan anything until Start() is called.
+    OutsideOfStyleElement,
+    // In an idle state during the stylesheet scanning, either at the
+    // beginning or after parsing a rule.
+    Idle,
+    // We've seen a '/' character, but not the '*' yet, so we don't know if
+    // it's a comment.
+    MaybeAtCommentStart,
+    // We're inside a comment.
+    AtComment,
+    // We've seen a '*' while we're in a comment, but we don't now yet whether
+    // '/' comes afterwards (thus ending the comment).
+    MaybeAtCommentEnd,
+    // We're parsing the '@' rule name.
+    AtRuleName,
+    // We're parsing the '@' rule value.
+    AtRuleValue,
+    // We're parsing the '@' rule value and we've seen the delimiter (quote or
+    // url() function) that encloses the url.
+    AtRuleValueDelimited,
+    // We've seen the url, but haven't seen the ';' finishing the rule yet.
+    AfterRuleValue,
+    // We've seen anything that is not an @import or a @charset rule, and thus
+    // further @import / @charset should not be parsed.
+    Done,
+  };
+
+  void EmitUrl();
+  void DoScan(Span<const char16_t> aFragment);
+  MOZ_MUST_USE State Scan(char16_t aChar);
+
+  static constexpr const uint32_t kMaxRuleNameLength = 7;  // (charset, import)
+
+  State mState = State::OutsideOfStyleElement;
+  nsAutoStringN<kMaxRuleNameLength> mRuleName;
+  nsAutoStringN<128> mRuleValue;
+  nsTArray<nsString> mUrlsFound;
+
+  // This is conceptually part of the AtRuleValue* / AfterRuleValue states,
+  // and serves to differentiate between @import (where we actually care about
+  // the value) and @charset (where we don't). It's just more convenient this
+  // way than having separate states for them.
+  bool mInImportRule = false;
+  // If we're in the AtRuleValueDelimited state, what is the closing character
+  // that will end the value. This is either a parenthesis (for unquoted
+  // urls), or a quote, either single or double.
+  char16_t mUrlValueDelimiterClosingChar = 0;
+};
+
+}  // namespace mozilla
+
+#endif
--- a/layout/style/moz.build
+++ b/layout/style/moz.build
@@ -76,16 +76,17 @@ EXPORTS.mozilla += [
     'CachedInheritingStyles.h',
     'ComputedStyle.h',
     'ComputedStyleInlines.h',
     'CSSEnabledState.h',
     'CSSPropFlags.h',
     'DeclarationBlock.h',
     'DocumentStyleRootIterator.h',
     'GeckoBindings.h',
+    'ImportScanner.h',
     'LayerAnimationInfo.h',
     'MappedDeclarations.h',
     'MediaFeatureChange.h',
     'PostTraversalTask.h',
     'PreferenceSheet.h',
     'PreloadedStyleSheet.h',
     'PseudoStyleType.h',
     'RustCell.h',
@@ -176,16 +177,17 @@ UNIFIED_SOURCES += [
     'DocumentStyleRootIterator.cpp',
     'ErrorReporter.cpp',
     'FontFace.cpp',
     'FontFaceSet.cpp',
     'FontFaceSetIterator.cpp',
     'GeckoBindings.cpp',
     'GroupRule.cpp',
     'ImageLoader.cpp',
+    'ImportScanner.cpp',
     'LayerAnimationInfo.cpp',
     'Loader.cpp',
     'MappedDeclarations.cpp',
     'MediaList.cpp',
     'MediaQueryList.cpp',
     'nsAnimationManager.cpp',
     'nsComputedDOMStyle.cpp',
     'nsCSSAnonBoxes.cpp',
--- a/layout/style/nsICSSDeclaration.cpp
+++ b/layout/style/nsICSSDeclaration.cpp
@@ -4,16 +4,17 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 /* Implementation of non-inline bits of nsICSSDeclaration. */
 
 #include "nsICSSDeclaration.h"
 
 #include "nsINode.h"
+#include "mozilla/css/Rule.h"
 
 using mozilla::dom::DocGroup;
 
 DocGroup* nsICSSDeclaration::GetDocGroup() {
   nsINode* parentNode = GetParentObject();
   if (!parentNode) {
     return nullptr;
   }
new file mode 100644
--- /dev/null
+++ b/layout/style/test/gtest/ImportScannerTest.cpp
@@ -0,0 +1,61 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "gtest/gtest.h"
+#include "mozilla/ImportScanner.h"
+
+using namespace mozilla;
+
+static nsTArray<nsString> Scan(const char* aCssCode) {
+  ImportScanner scanner;
+  scanner.Start();
+  scanner.Scan(NS_ConvertUTF8toUTF16(aCssCode));
+  return scanner.Stop();
+}
+
+TEST(ImportScanner, Simple)
+{
+  auto urls = Scan("/* Something something */ "
+      "@charset \"utf-8\";"
+      "@import url(bar);"
+      "@import uRL( baz );"
+      "@import \"bazz)\"");
+
+  ASSERT_EQ(urls.Length(), 3u);
+  ASSERT_EQ(urls[0], NS_LITERAL_STRING("bar"));
+  ASSERT_EQ(urls[1], NS_LITERAL_STRING("baz"));
+  ASSERT_EQ(urls[2], NS_LITERAL_STRING("bazz)"));
+}
+
+TEST(ImportScanner, UrlWithQuotes)
+{
+  auto urls = Scan("/* Something something */ "
+      "@import url(\"bar\");"
+      "@import\tuRL( \"baz\" );"
+      "@imPort\turL( 'bazz' );"
+      "something else {}"
+      "@import\turL( 'bazz' ); ");
+
+  ASSERT_EQ(urls.Length(), 3u);
+  ASSERT_EQ(urls[0], NS_LITERAL_STRING("bar"));
+  ASSERT_EQ(urls[1], NS_LITERAL_STRING("baz"));
+  ASSERT_EQ(urls[2], NS_LITERAL_STRING("bazz"));
+}
+
+TEST(ImportScanner, MediaIsIgnored)
+{
+  auto urls = Scan(
+      "@chArset \"utf-8\";"
+      "@import url(\"bar\") print;"
+      "/* Something something */ "
+      "@import\tuRL( \"baz\" ) (min-width: 100px);"
+      "@import\turL( bazz ) (max-width: 100px);");
+
+  ASSERT_EQ(urls.Length(), 3u);
+  ASSERT_EQ(urls[0], NS_LITERAL_STRING("bar"));
+  ASSERT_EQ(urls[1], NS_LITERAL_STRING("baz"));
+  ASSERT_EQ(urls[2], NS_LITERAL_STRING("bazz"));
+}
--- a/layout/style/test/gtest/moz.build
+++ b/layout/style/test/gtest/moz.build
@@ -2,17 +2,18 @@
 # vim: set filetype=python:
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 Library('style-gtest')
 
 UNIFIED_SOURCES = [
-    'StyloParsingBench.cpp'
+    'ImportScannerTest.cpp',
+    'StyloParsingBench.cpp',
 ]
 
 LOCAL_INCLUDES += [
     '/layout/style',
 ]
 
 GENERATED_FILES += [
     'ExampleStylesheet.h',