Bug 1349528 - Use encoding_rs for normalizing USVString. r=smaug
authorHenri Sivonen <hsivonen@hsivonen.fi>
Thu, 16 Aug 2018 10:15:07 +0000
changeset 486998 42d6f6784fc21d04838241d35d3ceb82a1ecc935
parent 486997 30befe133f2375412cd68a44a234e3aa52de6eca
child 486999 906fa624f03ba319ba1286058c13b2ead72678b4
push id9719
push userffxbld-merge
push dateFri, 24 Aug 2018 17:49:46 +0000
treeherdermozilla-beta@719ec98fba77 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssmaug
bugs1349528
milestone63.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1349528 - Use encoding_rs for normalizing USVString. r=smaug MozReview-Commit-ID: 9uG6j8UdfKR Differential Revision: https://phabricator.services.mozilla.com/D3413
dom/bindings/BindingUtils.cpp
dom/bindings/FakeString.h
intl/encoding_glue/src/lib.rs
xpcom/string/nsReadableUtils.h
--- a/dom/bindings/BindingUtils.cpp
+++ b/dom/bindings/BindingUtils.cpp
@@ -32,16 +32,17 @@
 #include "WorkerPrivate.h"
 #include "WorkerRunnable.h"
 #include "WrapperFactory.h"
 #include "xpcprivate.h"
 #include "XrayWrapper.h"
 #include "nsPrintfCString.h"
 #include "mozilla/Sprintf.h"
 #include "nsGlobalWindow.h"
+#include "nsReadableUtils.h"
 
 #include "mozilla/dom/ScriptSettings.h"
 #include "mozilla/dom/CustomElementRegistry.h"
 #include "mozilla/dom/DOMException.h"
 #include "mozilla/dom/ElementBinding.h"
 #include "mozilla/dom/HTMLObjectElement.h"
 #include "mozilla/dom/HTMLObjectElementBinding.h"
 #include "mozilla/dom/HTMLEmbedElement.h"
@@ -2776,44 +2777,26 @@ NonVoidByteStringToJsval(JSContext *cx, 
 
     if (!jsStr)
         return false;
 
     rval.setString(jsStr);
     return true;
 }
 
-
-template<typename T> static void
-NormalizeUSVStringInternal(T& aString)
-{
-  char16_t* start = aString.BeginWriting();
-  // Must use const here because we can't pass char** to UTF16CharEnumerator as
-  // it expects const char**.  Unclear why this is illegal...
-  const char16_t* nextChar = start;
-  const char16_t* end = aString.Data() + aString.Length();
-  while (nextChar < end) {
-    uint32_t enumerated = UTF16CharEnumerator::NextChar(&nextChar, end);
-    if (enumerated == UCS2_REPLACEMENT_CHAR) {
-      int32_t lastCharIndex = (nextChar - start) - 1;
-      start[lastCharIndex] = static_cast<char16_t>(enumerated);
-    }
-  }
-}
-
 void
 NormalizeUSVString(nsAString& aString)
 {
-  NormalizeUSVStringInternal(aString);
+  EnsureUTF16Validity(aString);
 }
 
 void
 NormalizeUSVString(binding_detail::FakeString& aString)
 {
-  NormalizeUSVStringInternal(aString);
+  EnsureUTF16ValiditySpan(aString);
 }
 
 bool
 ConvertJSValueToByteString(JSContext* cx, JS::Handle<JS::Value> v,
                            bool nullable, nsACString& result)
 {
   JS::Rooted<JSString*> s(cx);
   if (v.isString()) {
--- a/dom/bindings/FakeString.h
+++ b/dom/bindings/FakeString.h
@@ -5,16 +5,17 @@
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifndef mozilla_dom_FakeString_h__
 #define mozilla_dom_FakeString_h__
 
 #include "nsString.h"
 #include "nsStringBuffer.h"
 #include "mozilla/RefPtr.h"
+#include "mozilla/Span.h"
 
 namespace mozilla {
 namespace dom {
 namespace binding_detail {
 // A struct that has the same layout as an nsString but much faster
 // constructor and destructor behavior. FakeString uses inline storage
 // for small strings and a nsStringBuffer for longer strings.
 struct FakeString {
@@ -64,24 +65,36 @@ struct FakeString {
 
   const nsString::char_type* Data() const
   {
     return mData;
   }
 
   nsString::char_type* BeginWriting()
   {
+    MOZ_ASSERT(!(mDataFlags & nsString::DataFlags::REFCOUNTED) || 
+               !nsStringBuffer::FromData(mData)->IsReadonly());
     return mData;
   }
 
   nsString::size_type Length() const
   {
     return mLength;
   }
 
+  operator mozilla::Span<const nsString::char_type>() const
+  {
+    return mozilla::MakeSpan(Data(), Length());
+  }
+
+  operator mozilla::Span<nsString::char_type>()
+  {
+    return mozilla::MakeSpan(BeginWriting(), Length());
+  }
+
   // Reserve space to write aLength chars, not including null-terminator.
   bool SetLength(nsString::size_type aLength, mozilla::fallible_t const&) {
     // Use mInlineStorage for small strings.
     if (aLength < sInlineCapacity) {
       SetData(mInlineStorage);
     } else {
       RefPtr<nsStringBuffer> buf = nsStringBuffer::Alloc((aLength + 1) * sizeof(nsString::char_type));
       if (MOZ_UNLIKELY(!buf)) {
--- a/intl/encoding_glue/src/lib.rs
+++ b/intl/encoding_glue/src/lib.rs
@@ -614,16 +614,26 @@ pub unsafe extern "C" fn encoding_mem_is
 #[no_mangle]
 pub unsafe extern "C" fn encoding_mem_is_str_latin1(buffer: *const u8, len: usize) -> bool {
     encoding_rs::mem::is_str_latin1(::std::str::from_utf8_unchecked(
         ::std::slice::from_raw_parts(buffer, len),
     ))
 }
 
 #[no_mangle]
+pub unsafe extern "C" fn encoding_mem_utf16_valid_up_to(buffer: *const u16, len: usize) -> usize {
+    encoding_rs::mem::utf16_valid_up_to(::std::slice::from_raw_parts(buffer, len))
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn encoding_mem_ensure_utf16_validity(buffer: *mut u16, len: usize) {
+    encoding_rs::mem::ensure_utf16_validity(::std::slice::from_raw_parts_mut(buffer, len));
+}
+
+#[no_mangle]
 pub unsafe extern "C" fn encoding_mem_convert_utf16_to_latin1_lossy(
     src: *const u16,
     src_len: usize,
     dst: *mut u8,
     dst_len: usize,
 ) {
     encoding_rs::mem::convert_utf16_to_latin1_lossy(
         ::std::slice::from_raw_parts(src, src_len),
--- a/xpcom/string/nsReadableUtils.h
+++ b/xpcom/string/nsReadableUtils.h
@@ -34,16 +34,22 @@ extern "C" {
   encoding_mem_is_utf8_latin1(uint8_t const* buffer, size_t buffer_len);
 
   bool
   encoding_mem_is_str_latin1(uint8_t const* buffer, size_t buffer_len);
 
   bool
   encoding_mem_is_utf16_latin1(char16_t const* buffer, size_t buffer_len);
 
+  size_t
+  encoding_mem_utf16_valid_up_to(char16_t const* buffer, size_t buffer_len);
+
+  void
+  encoding_mem_ensure_utf16_validity(char16_t* buffer, size_t buffer_len);
+
   void
   encoding_mem_convert_utf16_to_latin1_lossy(const char16_t* src,
                                              size_t src_len,
                                              char* dst,
                                              size_t dst_len);
 
   size_t
   encoding_mem_convert_utf8_to_latin1_lossy(const char* src,
@@ -635,16 +641,55 @@ IsUTF8(mozilla::Span<const char> aString
       }
     }
     return true;
   }
   end:
   return length == encoding_utf8_valid_up_to(ptr, length);
 }
 
+/**
+ * Returns the index of the first unpaired surrogate or
+ * the length of the string if there are none.
+ */
+inline uint32_t
+UTF16ValidUpTo(mozilla::Span<const char16_t> aString)
+{
+  return encoding_mem_utf16_valid_up_to(aString.Elements(), aString.Length());
+}
+
+/**
+ * Replaces unpaired surrogates with U+FFFD in the argument.
+ */
+inline void
+EnsureUTF16ValiditySpan(mozilla::Span<char16_t> aString)
+{
+  encoding_mem_ensure_utf16_validity(aString.Elements(), aString.Length());
+}
+
+/**
+ * Replaces unpaired surrogates with U+FFFD in the argument.
+ *
+ * Copies a shared string buffer or an otherwise read-only
+ * buffer only if there are unpaired surrogates.
+ */
+inline void
+EnsureUTF16Validity(nsAString& aString)
+{
+  uint32_t upTo = UTF16ValidUpTo(aString);
+  uint32_t len = aString.Length();
+  if (upTo == len) {
+    return;
+  }
+  char16_t* ptr = aString.BeginWriting();
+  auto span = mozilla::MakeSpan(ptr, len);
+  span[upTo] = 0xFFFD;
+  EnsureUTF16ValiditySpan(span.From(upTo + 1));
+}
+
 bool ParseString(const nsACString& aAstring, char aDelimiter,
                  nsTArray<nsCString>& aArray);
 
 /**
  * Converts case in place in the argument string.
  */
 void ToUpperCase(nsACString&);