Bug 1553502 - Implement ScriptLoader::ConvertToUTF8. r=bzbarsky
authorJeff Walden <jwalden@mit.edu>
Sat, 25 May 2019 19:45:50 +0000
changeset 475598 1337df4d9d115578e65cabfe75f0f679dbb9d7ab
parent 475597 edbf8267dd4f5a786ae660ff9e2fe890cf74c48e
child 475599 20f03bc56440ea70ec402d3c15780d1fb969f829
push id86381
push userjwalden@mit.edu
push dateSat, 25 May 2019 19:47:01 +0000
treeherderautoland@492f35b04474 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbzbarsky
bugs1553502
milestone69.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1553502 - Implement ScriptLoader::ConvertToUTF8. r=bzbarsky Differential Revision: https://phabricator.services.mozilla.com/D32255
dom/script/ScriptLoader.cpp
dom/script/ScriptLoader.h
--- a/dom/script/ScriptLoader.cpp
+++ b/dom/script/ScriptLoader.cpp
@@ -65,19 +65,21 @@
 #include "nsINetworkPredictor.h"
 #include "nsMimeTypes.h"
 #include "mozilla/ConsoleReportCollector.h"
 #include "mozilla/LoadInfo.h"
 #include "ReferrerInfo.h"
 
 #include "mozilla/AsyncEventDispatcher.h"
 #include "mozilla/Attributes.h"
+#include "mozilla/ScopeExit.h"
 #include "mozilla/Telemetry.h"
 #include "mozilla/TimeStamp.h"
 #include "mozilla/Unused.h"
+#include "mozilla/Utf8.h"  // mozilla::Utf8Unit
 #include "nsIScriptError.h"
 #include "nsIAsyncOutputStream.h"
 
 using JS::SourceText;
 
 using mozilla::Telemetry::LABELS_DOM_SCRIPT_PRELOAD_RESULT;
 
 namespace mozilla {
@@ -3099,22 +3101,83 @@ bool ScriptLoader::ReadyToExecuteParserB
       AddParserBlockingScriptExecutionBlocker();
       return false;
     }
   }
 
   return true;
 }
 
-/* static */
-nsresult ScriptLoader::ConvertToUTF16(nsIChannel* aChannel,
-                                      const uint8_t* aData, uint32_t aLength,
-                                      const nsAString& aHintCharset,
-                                      Document* aDocument, char16_t*& aBufOut,
-                                      size_t& aLengthOut) {
+template <typename Unit>
+struct Conversion;
+
+template <>
+struct Conversion<char16_t> {
+  static CheckedInt<size_t> MaxBufferLength(
+      const UniquePtr<Decoder>& aUnicodeDecoder, size_t aByteLength) {
+    return aUnicodeDecoder->MaxUTF16BufferLength(aByteLength);
+  }
+
+  static size_t DecodeInto(const UniquePtr<Decoder>& aUnicodeDecoder,
+                           const Span<const uint8_t>& aData, char16_t* aDest,
+                           size_t aDestLength) {
+    uint32_t result;
+    size_t read;
+    size_t written;
+    bool hadErrors;
+    Tie(result, read, written, hadErrors) = aUnicodeDecoder->DecodeToUTF16(
+        aData, MakeSpan(aDest, aDestLength), true);
+    MOZ_ASSERT(result == kInputEmpty);
+    MOZ_ASSERT(read == aData.Length());
+    MOZ_ASSERT(written <= aDestLength);
+    Unused << hadErrors;
+
+    return written;
+  }
+};
+
+template <>
+struct Conversion<Utf8Unit> {
+  static CheckedInt<size_t> MaxBufferLength(
+      const UniquePtr<Decoder>& aUnicodeDecoder, size_t aByteLength) {
+    return aUnicodeDecoder->MaxUTF8BufferLength(aByteLength);
+  }
+
+  static size_t DecodeInto(const UniquePtr<Decoder>& aUnicodeDecoder,
+                           const Span<const uint8_t>& aData, Utf8Unit* aDest,
+                           size_t aDestLength) {
+    uint32_t result;
+    size_t read;
+    size_t written;
+    bool hadErrors;
+    // Until C++ char8_t happens, our decoder APIs deal in |uint8_t| while
+    // |Utf8Unit| internally deals with |char|, so there's inevitable impedance
+    // mismatch.  :-(  The written memory will be interpreted through
+    // |char Utf8Unit::mValue| which is *permissible* because any object's
+    // memory can be interpreted as |char|.  Unfortunately, until
+    // twos-complement is mandated, we have to play fast and loose and *hope*
+    // interpreting memory storing |uint8_t| as |char| will pick up the desired
+    // wrapped-around value.  ¯\_(ツ)_/¯
+    Tie(result, read, written, hadErrors) = aUnicodeDecoder->DecodeToUTF8(
+        aData, MakeSpan(reinterpret_cast<uint8_t*>(aDest), aDestLength), true);
+    MOZ_ASSERT(result == kInputEmpty);
+    MOZ_ASSERT(read == aData.Length());
+    MOZ_ASSERT(written <= aDestLength);
+    Unused << hadErrors;
+
+    return written;
+  }
+};
+
+template <typename Unit>
+static nsresult ConvertToUnicode(nsIChannel* aChannel, const uint8_t* aData,
+                                 uint32_t aLength,
+                                 const nsAString& aHintCharset,
+                                 Document* aDocument, Unit*& aBufOut,
+                                 size_t& aLengthOut) {
   if (!aLength) {
     aBufOut = nullptr;
     aLengthOut = 0;
     return NS_OK;
   }
 
   auto data = MakeSpan(aData, aLength);
 
@@ -3152,54 +3215,63 @@ nsresult ScriptLoader::ConvertToUTF16(ns
 
   if (!unicodeDecoder) {
     // Curiously, there are various callers that don't pass aDocument. The
     // fallback in the old code was ISO-8859-1, which behaved like
     // windows-1252.
     unicodeDecoder = WINDOWS_1252_ENCODING->NewDecoderWithoutBOMHandling();
   }
 
-  CheckedInt<size_t> maxLength = unicodeDecoder->MaxUTF16BufferLength(aLength);
-  if (!maxLength.isValid()) {
+  auto signalOOM = mozilla::MakeScopeExit([&aBufOut, &aLengthOut]() {
     aBufOut = nullptr;
     aLengthOut = 0;
+  });
+
+  CheckedInt<size_t> bufferLength =
+      Conversion<Unit>::MaxBufferLength(unicodeDecoder, aLength);
+  if (!bufferLength.isValid()) {
     return NS_ERROR_OUT_OF_MEMORY;
   }
 
-  size_t unicodeLength = maxLength.value();
-
-  maxLength *= sizeof(char16_t);
-
-  if (!maxLength.isValid()) {
-    aBufOut = nullptr;
-    aLengthOut = 0;
+  CheckedInt<size_t> bufferByteSize = bufferLength * sizeof(Unit);
+  if (!bufferByteSize.isValid()) {
+    return NS_ERROR_OUT_OF_MEMORY;
+  }
+
+  aBufOut = static_cast<Unit*>(js_malloc(bufferByteSize.value()));
+  if (!aBufOut) {
     return NS_ERROR_OUT_OF_MEMORY;
   }
 
-  aBufOut = static_cast<char16_t*>(js_malloc(maxLength.value()));
-  if (!aBufOut) {
-    aLengthOut = 0;
-    return NS_ERROR_OUT_OF_MEMORY;
-  }
-
-  uint32_t result;
-  size_t read;
-  size_t written;
-  bool hadErrors;
-  Tie(result, read, written, hadErrors) = unicodeDecoder->DecodeToUTF16(
-      data, MakeSpan(aBufOut, unicodeLength), true);
-  MOZ_ASSERT(result == kInputEmpty);
-  MOZ_ASSERT(read == aLength);
-  MOZ_ASSERT(written <= unicodeLength);
-  Unused << hadErrors;
-  aLengthOut = written;
-
+  signalOOM.release();
+  aLengthOut = Conversion<Unit>::DecodeInto(unicodeDecoder, data, aBufOut,
+                                            bufferLength.value());
   return NS_OK;
 }
 
+/* static */
+nsresult ScriptLoader::ConvertToUTF16(nsIChannel* aChannel,
+                                      const uint8_t* aData, uint32_t aLength,
+                                      const nsAString& aHintCharset,
+                                      Document* aDocument, char16_t*& aBufOut,
+                                      size_t& aLengthOut) {
+  return ConvertToUnicode(aChannel, aData, aLength, aHintCharset, aDocument,
+                          aBufOut, aLengthOut);
+}
+
+/* static */
+nsresult ScriptLoader::ConvertToUTF8(nsIChannel* aChannel, const uint8_t* aData,
+                                     uint32_t aLength,
+                                     const nsAString& aHintCharset,
+                                     Document* aDocument, Utf8Unit*& aBufOut,
+                                     size_t& aLengthOut) {
+  return ConvertToUnicode(aChannel, aData, aLength, aHintCharset, aDocument,
+                          aBufOut, aLengthOut);
+}
+
 nsresult ScriptLoader::OnStreamComplete(
     nsIIncrementalStreamLoader* aLoader, ScriptLoadRequest* aRequest,
     nsresult aChannelStatus, nsresult aSRIStatus,
     SRICheckDataVerifier* aSRIDataVerifier) {
   NS_ASSERTION(aRequest, "null request in stream complete handler");
   NS_ENSURE_TRUE(aRequest, NS_ERROR_FAILURE);
 
   nsresult rv = VerifySRI(aRequest, aLoader, aSRIStatus, aSRIDataVerifier);
--- a/dom/script/ScriptLoader.h
+++ b/dom/script/ScriptLoader.h
@@ -22,16 +22,17 @@
 #include "mozilla/CORSMode.h"
 #include "mozilla/dom/ScriptLoadRequest.h"
 #include "mozilla/dom/SRIMetadata.h"
 #include "mozilla/dom/SRICheck.h"
 #include "mozilla/Maybe.h"
 #include "mozilla/MozPromise.h"
 #include "mozilla/net/ReferrerPolicy.h"
 #include "mozilla/StaticPrefs.h"
+#include "mozilla/Utf8.h"  // mozilla::Utf8Unit
 #include "mozilla/Vector.h"
 
 class nsIURI;
 
 namespace JS {
 
 template <typename UnitT>
 class SourceText;
@@ -176,27 +177,32 @@ class ScriptLoader final : public nsISup
   void RemoveExecuteBlocker() {
     MOZ_ASSERT(mBlockerCount);
     if (!--mBlockerCount) {
       ProcessPendingRequestsAsync();
     }
   }
 
   /**
-   * Convert the given buffer to a UTF-16 string.
+   * Convert the given buffer to a UTF-16 string.  If the buffer begins with a
+   * BOM, it is interpreted as that encoding; otherwise the first of |aChannel|,
+   * |aHintCharset|, or |aDocument| that provides a recognized encoding is used,
+   * or Windows-1252 if none of them do.
+   *
+   * Encoding errors in the buffer are converted to replacement characters, so
+   * allocation failure is the only way this function can fail.
+   *
    * @param aChannel     Channel corresponding to the data. May be null.
    * @param aData        The data to convert
    * @param aLength      Length of the data
-   * @param aHintCharset Hint for the character set (e.g., from a charset
-   *                     attribute). May be the empty string.
-   * @param aDocument    Document which the data is loaded for. Must not be
-   *                     null.
-   * @param aBufOut      [out] char16_t array allocated by ConvertToUTF16 and
-   *                     containing data converted to unicode.  Caller must
-   *                     js_free() this data when no longer needed.
+   * @param aHintCharset Character set hint (e.g., from a charset attribute).
+   * @param aDocument    Document which the data is loaded for. May be null.
+   * @param aBufOut      [out] fresh char16_t array containing data converted to
+   *                     Unicode.  Caller must js_free() this data when finished
+   *                     with it.
    * @param aLengthOut   [out] Length of array returned in aBufOut in number
    *                     of char16_t code units.
    */
   static nsresult ConvertToUTF16(nsIChannel* aChannel, const uint8_t* aData,
                                  uint32_t aLength,
                                  const nsAString& aHintCharset,
                                  Document* aDocument, char16_t*& aBufOut,
                                  size_t& aLengthOut);
@@ -212,16 +218,41 @@ class ScriptLoader final : public nsISup
                                  aDocument, bufOut, aLengthOut);
     if (NS_SUCCEEDED(rv)) {
       aBufOut.reset(bufOut);
     }
     return rv;
   };
 
   /**
+   * Convert the given buffer to a UTF-8 string.  If the buffer begins with a
+   * BOM, it is interpreted as that encoding; otherwise the first of |aChannel|,
+   * |aHintCharset|, or |aDocument| that provides a recognized encoding is used,
+   * or Windows-1252 if none of them do.
+   *
+   * Encoding errors in the buffer are converted to replacement characters, so
+   * allocation failure is the only way this function can fail.
+   *
+   * @param aChannel     Channel corresponding to the data. May be null.
+   * @param aData        The data to convert
+   * @param aLength      Length of the data
+   * @param aHintCharset Character set hint (e.g., from a charset attribute).
+   * @param aDocument    Document which the data is loaded for. May be null.
+   * @param aBufOut      [out] fresh Utf8Unit array containing data converted to
+   *                     Unicode.  Caller must js_free() this data when finished
+   *                     with it.
+   * @param aLengthOut   [out] Length of array returned in aBufOut in UTF-8 code
+   *                     units (i.e. in bytes).
+   */
+  static nsresult ConvertToUTF8(nsIChannel* aChannel, const uint8_t* aData,
+                                uint32_t aLength, const nsAString& aHintCharset,
+                                Document* aDocument, Utf8Unit*& aBufOut,
+                                size_t& aLengthOut);
+
+  /**
    * Handle the completion of a stream.  This is called by the
    * ScriptLoadHandler object which observes the IncrementalStreamLoader
    * loading the script. The streamed content is expected to be stored on the
    * aRequest argument.
    */
   nsresult OnStreamComplete(nsIIncrementalStreamLoader* aLoader,
                             ScriptLoadRequest* aRequest,
                             nsresult aChannelStatus, nsresult aSRIStatus,