Bug 1454460 - Buffer the potentially-BOM-related bytes separately and handle them on the fly. r=bz
authorBobby Holley <bobbyholley@gmail.com>
Mon, 16 Apr 2018 12:23:44 -0700
changeset 467673 c92f6b8e882beb93b0f7eb3448aa8132d6cdb28e
parent 467672 376bf7bbb8285b179c3279bc14ab8dd054cc758f
child 467674 985679bb19281871a38e16b330fa63c63881513e
push id9165
push userasasaki@mozilla.com
push dateThu, 26 Apr 2018 21:04:54 +0000
treeherdermozilla-beta@064c3804de2e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbz
bugs1454460
milestone61.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1454460 - Buffer the potentially-BOM-related bytes separately and handle them on the fly. r=bz MozReview-Commit-ID: 5zrKyadBppO
dom/security/SRICheck.cpp
dom/security/SRICheck.h
layout/style/Loader.cpp
layout/style/SheetLoadData.h
layout/style/StreamLoader.cpp
layout/style/StreamLoader.h
--- a/dom/security/SRICheck.cpp
+++ b/dom/security/SRICheck.cpp
@@ -177,17 +177,18 @@ SRICheck::IntegrityMetadata(const nsAStr
     }
   }
   return NS_OK;
 }
 
 /* static */ nsresult
 SRICheck::VerifyIntegrity(const SRIMetadata& aMetadata,
                           nsIChannel* aChannel,
-                          const nsACString& aBytes,
+                          const nsACString& aFirst,
+                          const nsACString& aSecond,
                           const nsACString& aSourceFileURI,
                           nsIConsoleReportCollector* aReporter)
 {
   NS_ENSURE_ARG_POINTER(aReporter);
 
   if (MOZ_LOG_TEST(SRILogHelper::GetSriLog(), mozilla::LogLevel::Debug)) {
     nsAutoCString requestURL;
     nsCOMPtr<nsIURI> originalURI;
@@ -196,17 +197,20 @@ SRICheck::VerifyIntegrity(const SRIMetad
         originalURI) {
       originalURI->GetAsciiSpec(requestURL);
     }
     SRILOG(("SRICheck::VerifyIntegrity (unichar stream)"));
   }
 
   SRICheckDataVerifier verifier(aMetadata, aSourceFileURI, aReporter);
   nsresult rv =
-    verifier.Update(aBytes.Length(), (const uint8_t*)aBytes.BeginReading());
+    verifier.Update(aFirst.Length(), (const uint8_t*)aFirst.BeginReading());
+  NS_ENSURE_SUCCESS(rv, rv);
+  rv =
+    verifier.Update(aSecond.Length(), (const uint8_t*)aSecond.BeginReading());
   NS_ENSURE_SUCCESS(rv, rv);
 
   return verifier.Verify(aMetadata, aChannel, aSourceFileURI, aReporter);
 }
 
 //////////////////////////////////////////////////////////////
 //
 //////////////////////////////////////////////////////////////
--- a/dom/security/SRICheck.h
+++ b/dom/security/SRICheck.h
@@ -31,20 +31,24 @@ public:
   static nsresult IntegrityMetadata(const nsAString& aMetadataList,
                                     const nsACString& aSourceFileURI,
                                     nsIConsoleReportCollector* aReporter,
                                     SRIMetadata* outMetadata);
 
   /**
    * Process the integrity attribute of the element.  A result of false
    * must prevent the resource from loading.
+   *
+   * For uninteresting reasons, the caller may have the data split into two
+   * strings, so we support that here.
    */
   static nsresult VerifyIntegrity(const SRIMetadata& aMetadata,
                                   nsIChannel* aChannel,
-                                  const nsACString& aBytes,
+                                  const nsACString& aFirst,
+                                  const nsACString& aSecond,
                                   const nsACString& aSourceFileURI,
                                   nsIConsoleReportCollector* aReporter);
 };
 
 // The SRICheckDataVerifier can be used in 2 different mode:
 //
 // 1. The streaming mode involves reading bytes from an input, and to use
 //    the |Update| function to stream new bytes, and to use the |Verify|
--- a/layout/style/Loader.cpp
+++ b/layout/style/Loader.cpp
@@ -574,17 +574,18 @@ SheetLoadData::GetReferrerURI()
  * Stream completion code shared by Stylo and the old style system.
  *
  * Here we need to check that the load did not give us an http error
  * page and check the mimetype on the channel to make sure we're not
  * loading non-text/css data in standards mode.
  */
 nsresult
 SheetLoadData::VerifySheetReadyToParse(nsresult aStatus,
-                                       const nsACString& aBytes,
+                                       const nsACString& aBytes1,
+                                       const nsACString& aBytes2,
                                        nsIChannel* aChannel)
 {
   LOG(("SheetLoadData::OnStreamComplete"));
   NS_ASSERTION(!mLoader->mSyncCallback, "Synchronous callback from necko");
 
   if (mIsCancelled) {
     // Just return.  Don't call SheetComplete -- it's already been
     // called and calling it again will lead to an extra NS_RELEASE on
@@ -767,17 +768,17 @@ SheetLoadData::VerifySheetReadyToParse(n
       return NS_OK;
     }
   } else {
     nsAutoCString sourceUri;
     if (mLoader->mDocument && mLoader->mDocument->GetDocumentURI()) {
       mLoader->mDocument->GetDocumentURI()->GetAsciiSpec(sourceUri);
     }
     nsresult rv = SRICheck::VerifyIntegrity(
-      sriMetadata, aChannel, aBytes, sourceUri, mLoader->mReporter);
+      sriMetadata, aChannel, aBytes1, aBytes2, sourceUri, mLoader->mReporter);
 
     nsCOMPtr<nsILoadGroup> loadGroup;
     aChannel->GetLoadGroup(getter_AddRefs(loadGroup));
     if (loadGroup) {
       mLoader->mReporter->FlushConsoleReports(loadGroup);
     } else {
       mLoader->mReporter->FlushConsoleReports(mLoader->mDocument);
     }
--- a/layout/style/SheetLoadData.h
+++ b/layout/style/SheetLoadData.h
@@ -75,18 +75,21 @@ public:
 
   already_AddRefed<nsIURI> GetReferrerURI();
 
   void ScheduleLoadEventIfNeeded();
 
   NotNull<const Encoding*> DetermineNonBOMEncoding(nsACString const& aSegment,
                                                    nsIChannel* aChannel);
 
+  // The caller may have the bytes for the stylesheet split across two strings,
+  // so aBytes1 and aBytes2 refer to those pieces.
   nsresult VerifySheetReadyToParse(nsresult aStatus,
-                                   const nsACString& aBytes,
+                                   const nsACString& aBytes1,
+                                   const nsACString& aBytes2,
                                    nsIChannel* aChannel);
 
   NS_DECL_ISUPPORTS
   NS_DECL_NSIRUNNABLE
   NS_DECL_NSITHREADOBSERVER
 
   // Hold a ref to the CSSLoader so we can call back to it to let it
   // know the load finished
--- a/layout/style/StreamLoader.cpp
+++ b/layout/style/StreamLoader.cpp
@@ -54,86 +54,75 @@ StreamLoader::OnStartRequest(nsIRequest*
 
 NS_IMETHODIMP
 StreamLoader::OnStopRequest(nsIRequest* aRequest,
                             nsISupports* aContext,
                             nsresult aStatus)
 {
   // Decoded data
   nsCString utf8String;
-  // How many bytes of decoded data to skip (3 when skipping UTF-8 BOM needed,
-  // 0 otherwise)
-  size_t skip = 0;
-
-  const Encoding* encoding;
-
-  nsresult rv = NS_OK;
-
   {
     // Hold the nsStringBuffer for the bytes from the stack to ensure release
     // no matter which return branch is taken.
     nsCString bytes(mBytes);
     mBytes.Truncate();
 
     nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest);
 
     if (NS_FAILED(mStatus)) {
-      mSheetLoadData->VerifySheetReadyToParse(mStatus, EmptyCString(), channel);
+      mSheetLoadData->VerifySheetReadyToParse(mStatus, EmptyCString(), EmptyCString(), channel);
       return mStatus;
     }
 
     nsresult rv =
-      mSheetLoadData->VerifySheetReadyToParse(aStatus, bytes, channel);
+      mSheetLoadData->VerifySheetReadyToParse(aStatus, mBOMBytes, bytes, channel);
     if (rv != NS_OK_PARSE_SHEET) {
       return rv;
     }
-
     rv = NS_OK;
 
-    size_t bomLength;
-    Tie(encoding, bomLength) = Encoding::ForBOM(bytes);
+    // BOM detection generally happens during the write callback, but that won't
+    // have happened if fewer than three bytes were received.
+    if (mEncodingFromBOM.isNothing()) {
+      HandleBOM();
+      MOZ_ASSERT(mEncodingFromBOM.isSome());
+    }
+
+    // The BOM handling has happened, but we still may not have an encoding if
+    // there was no BOM. Ensure we have one.
+    const Encoding* encoding = mEncodingFromBOM.value();
     if (!encoding) {
       // No BOM
       encoding = mSheetLoadData->DetermineNonBOMEncoding(bytes, channel);
+    }
+    mSheetLoadData->mEncoding = encoding;
 
-      rv = encoding->DecodeWithoutBOMHandling(bytes, utf8String);
-    } else if (encoding == UTF_8_ENCODING) {
-      // UTF-8 BOM; handling this manually because mozilla::Encoding
-      // can't handle this without copying with C++ types and uses
-      // infallible allocation with Rust types (which could avoid
-      // the copy).
+    size_t validated = 0;
+    if (encoding == UTF_8_ENCODING) {
+      validated = Encoding::UTF8ValidUpTo(bytes);
+    }
 
-      // First, chop off the BOM.
-      auto tail = Span<const uint8_t>(bytes).From(bomLength);
-      size_t upTo = Encoding::UTF8ValidUpTo(tail);
-      if (upTo == tail.Length()) {
-        // No need to copy
-        skip = bomLength;
-        utf8String.Assign(bytes);
-      } else {
-        rv = encoding->DecodeWithoutBOMHandling(tail, utf8String, upTo);
-      }
+    if (validated == bytes.Length()) {
+     // Either this is UTF-8 and all valid, or it's not UTF-8 but is an
+     // empty string. This assumes that an empty string in any encoding
+     // decodes to empty string, which seems like a plausible assumption.
+      utf8String.Assign(bytes);
     } else {
-      // UTF-16LE or UTF-16BE
-      rv = encoding->DecodeWithBOMRemoval(bytes, utf8String);
+      rv = encoding->DecodeWithoutBOMHandling(bytes, utf8String, validated);
+      NS_ENSURE_SUCCESS(rv, rv);
     }
   } // run destructor for `bytes`
 
-  if (NS_FAILED(rv)) {
-    return rv;
-  }
-
   // For reasons I don't understand, factoring the below lines into
   // a method on SheetLoadData resulted in a linker error. Hence,
   // accessing fields of mSheetLoadData from here.
-  mSheetLoadData->mEncoding = encoding;
   bool dummy;
   return mSheetLoadData->mLoader->ParseSheet(
     EmptyString(),
-    Span<const uint8_t>(utf8String).From(skip),
+    Span<const uint8_t>(utf8String).From(0),
     mSheetLoadData,
     /* aAllowAsync = */ true,
     dummy);
 }
 
 /* nsIStreamListener implementation */
 NS_IMETHODIMP
 StreamLoader::OnDataAvailable(nsIRequest*,
@@ -144,30 +133,66 @@ StreamLoader::OnDataAvailable(nsIRequest
 {
   if (NS_FAILED(mStatus)) {
     return mStatus;
   }
   uint32_t dummy;
   return aInputStream->ReadSegments(WriteSegmentFun, this, aCount, &dummy);
 }
 
+void
+StreamLoader::HandleBOM()
+{
+  MOZ_ASSERT(mEncodingFromBOM.isNothing());
+  MOZ_ASSERT(mBytes.IsEmpty());
+
+  const Encoding* encoding;
+  size_t bomLength;
+  Tie(encoding, bomLength) = Encoding::ForBOM(mBOMBytes);
+  mEncodingFromBOM.emplace(encoding); // Null means no BOM.
+
+  // BOMs are three bytes at most, but may be fewer. Copy over anything
+  // that wasn't part of the BOM to mBytes. Note that we need to track
+  // any BOM bytes as well for SRI handling.
+  mBytes.Append(Substring(mBOMBytes, bomLength));
+  mBOMBytes.Truncate(bomLength);
+}
+
 nsresult
 StreamLoader::WriteSegmentFun(nsIInputStream*,
                               void* aClosure,
                               const char* aSegment,
                               uint32_t,
                               uint32_t aCount,
                               uint32_t* aWriteCount)
 {
+  *aWriteCount = 0;
   StreamLoader* self = static_cast<StreamLoader*>(aClosure);
   if (NS_FAILED(self->mStatus)) {
     return self->mStatus;
   }
+
+  // If we haven't done BOM detection yet, divert bytes into the special buffer.
+  if (self->mEncodingFromBOM.isNothing()) {
+    size_t bytesToCopy = std::min(3 - self->mBOMBytes.Length(), aCount);
+    self->mBOMBytes.Append(aSegment, bytesToCopy);
+    aSegment += bytesToCopy;
+    *aWriteCount += bytesToCopy;
+    aCount -= bytesToCopy;
+
+    if (self->mBOMBytes.Length() == 3) {
+      self->HandleBOM();
+    } else {
+      return NS_OK;
+    }
+  }
+
   if (!self->mBytes.Append(aSegment, aCount, mozilla::fallible_t())) {
     self->mBytes.Truncate();
     return (self->mStatus = NS_ERROR_OUT_OF_MEMORY);
   }
-  *aWriteCount = aCount;
+
+  *aWriteCount += aCount;
   return NS_OK;
 }
 
 } // namespace css
 } // namespace mozilla
--- a/layout/style/StreamLoader.h
+++ b/layout/style/StreamLoader.h
@@ -33,17 +33,25 @@ private:
    */
   static nsresult WriteSegmentFun(nsIInputStream*,
                                   void*,
                                   const char*,
                                   uint32_t,
                                   uint32_t,
                                   uint32_t*);
 
+  void HandleBOM();
+
   RefPtr<mozilla::css::SheetLoadData> mSheetLoadData;
+  nsresult mStatus;
+  Maybe<const Encoding*> mEncodingFromBOM;
+
+  // We store the initial three bytes of the stream into mBOMBytes, and then
+  // use that buffer to detect a BOM. We then shift any non-BOM bytes into
+  // mBytes, and store all subsequent data in that buffer.
   nsCString mBytes;
-  nsresult mStatus;
+  nsAutoCStringN<3> mBOMBytes;
 };
 
 } // namespace css
 } // namespace mozilla
 
 #endif // mozilla_css_StreamLoader_h