Bug 586838 - Add NEON versions of LossyConvertEncoding. r=erahm
authorMakoto Kato <m_kato@ga2.so-net.ne.jp>
Fri, 12 Jan 2018 15:46:11 +0900
changeset 450810 3db5dd3a6fe64dd31129fdaa2e2015b93801c675
parent 450713 3776f91840a4c641317b4719332a5103e317f6bb
child 450811 86d41b5efe074f6988085082df9ef537eee0813a
push id8543
push userryanvm@gmail.com
push dateTue, 16 Jan 2018 14:33:22 +0000
treeherdermozilla-beta@a6525ed16a32 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerserahm
bugs586838
milestone59.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 586838 - Add NEON versions of LossyConvertEncoding. r=erahm
xpcom/string/moz.build
xpcom/string/nsUTF8Utils.h
xpcom/string/nsUTF8UtilsNEON.cpp
--- a/xpcom/string/moz.build
+++ b/xpcom/string/moz.build
@@ -54,16 +54,20 @@ UNIFIED_SOURCES += [
 # Are we targeting x86 or x86-64?  If so, compile the SSE2 functions for
 # nsUTF8Utils.cpp and nsReadableUtils.cpp.
 if CONFIG['INTEL_ARCHITECTURE']:
     SOURCES += ['nsUTF8UtilsSSE2.cpp']
     SOURCES['nsUTF8UtilsSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']
     SOURCES += ['nsReadableUtilsSSE2.cpp']
     SOURCES['nsReadableUtilsSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']
 
+if CONFIG['BUILD_ARM_NEON'] or CONFIG['CPU_ARCH'] == 'aarch64':
+    SOURCES += ['nsUTF8UtilsNEON.cpp']
+    SOURCES['nsUTF8UtilsNEON.cpp'].flags += CONFIG['NEON_FLAGS']
+
 # MSVC 2017 has a bug that incorrectly generates C5037 warning which
 # hits the template string code. We need to disable this warning as a
 # workaround. See https://developercommunity.visualstudio.com/
 # content/problem/81223/incorrect-error-c5037-with-permissive.html
 if CONFIG['CC_TYPE'] in ('msvc', 'clang-cl'):
     CXXFLAGS += ['-wd5037']
 
 FINAL_LIBRARY = 'xul'
--- a/xpcom/string/nsUTF8Utils.h
+++ b/xpcom/string/nsUTF8Utils.h
@@ -6,17 +6,19 @@
 #ifndef nsUTF8Utils_h_
 #define nsUTF8Utils_h_
 
 // This file may be used in two ways: if MOZILLA_INTERNAL_API is defined, this
 // file will provide signatures for the Mozilla abstract string types. It will
 // use XPCOM assertion/debugging macros, etc.
 
 #include "nscore.h"
+#include "mozilla/arm.h"
 #include "mozilla/Assertions.h"
+#include "mozilla/EndianUtils.h"
 #include "mozilla/SSE.h"
 #include "mozilla/TypeTraits.h"
 
 #include "nsCharTraits.h"
 
 #ifdef MOZILLA_INTERNAL_API
 #define UTF8UTILS_WARNING(msg) NS_WARNING(msg)
 #else
@@ -658,24 +660,34 @@ public:
   write(const char* aSource, uint32_t aSourceLength)
   {
 #ifdef MOZILLA_MAY_SUPPORT_SSE2
     if (mozilla::supports_sse2()) {
       write_sse2(aSource, aSourceLength);
       return;
     }
 #endif
+#if defined(MOZILLA_MAY_SUPPORT_NEON) && defined(MOZ_LITTLE_ENDIAN)
+    if (mozilla::supports_neon()) {
+      write_neon(aSource, aSourceLength);
+      return;
+    }
+#endif
     const char* done_writing = aSource + aSourceLength;
     while (aSource < done_writing) {
       *mDestination++ = (char16_t)(unsigned char)(*aSource++);
     }
   }
 
   void
   write_sse2(const char* aSource, uint32_t aSourceLength);
+#if defined(MOZILLA_MAY_SUPPORT_NEON) && defined(MOZ_LITTLE_ENDIAN)
+  void
+  write_neon(const char* aSource, uint32_t aSourceLength);
+#endif
 
   void
   write_terminator()
   {
     *mDestination = (char16_t)(0);
   }
 
 private:
@@ -702,26 +714,36 @@ public:
   write(const char16_t* aSource, uint32_t aSourceLength)
   {
 #ifdef MOZILLA_MAY_SUPPORT_SSE2
     if (mozilla::supports_sse2()) {
       write_sse2(aSource, aSourceLength);
       return;
     }
 #endif
+#if defined(MOZILLA_MAY_SUPPORT_NEON) && defined(MOZ_LITTLE_ENDIAN)
+    if (mozilla::supports_neon()) {
+      write_neon(aSource, aSourceLength);
+      return;
+    }
+#endif
     const char16_t* done_writing = aSource + aSourceLength;
     while (aSource < done_writing) {
       *mDestination++ = (char)(*aSource++);
     }
   }
 
 #ifdef MOZILLA_MAY_SUPPORT_SSE2
   void
   write_sse2(const char16_t* aSource, uint32_t aSourceLength);
 #endif
+#if defined(MOZILLA_MAY_SUPPORT_NEON) && defined(MOZ_LITTLE_ENDIAN)
+  void
+  write_neon(const char16_t* aSource, uint32_t aSourceLength);
+#endif
 
   void
   write_terminator()
   {
     *mDestination = '\0';
   }
 
 private:
new file mode 100644
--- /dev/null
+++ b/xpcom/string/nsUTF8UtilsNEON.cpp
@@ -0,0 +1,129 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nscore.h"
+#include "nsAlgorithm.h"
+#include "nsUTF8Utils.h"
+
+#include <arm_neon.h>
+
+void
+LossyConvertEncoding16to8::write_neon(const char16_t* aSource,
+                                      uint32_t aSourceLength)
+{
+  char* dest = mDestination;
+
+  // Align source to a 16-byte boundary and destination to 8-bytes boundary.
+  uint32_t i = 0;
+  while (((reinterpret_cast<uintptr_t>(aSource + i) & 0xf) ||
+          (reinterpret_cast<uintptr_t>(dest + i) & 0x7)) &&
+         i < aSourceLength) {
+    dest[i] = static_cast<unsigned char>(aSource[i]);
+    i++;
+  }
+
+  while ((reinterpret_cast<uintptr_t>(dest + i) & 0xf) &&
+         aSourceLength - i > 7) {
+    // source is aligned, but destination isn't aligned by 16-byte yet
+    uint16x8_t s =
+      vld1q_u16(reinterpret_cast<const uint16_t*>(
+                  __builtin_assume_aligned(aSource + i, 16)));
+    vst1_u8(reinterpret_cast<uint8_t*>(
+              __builtin_assume_aligned(dest + i, 8)),
+            vmovn_u16(s));
+    i += 8;
+  }
+
+  // Align source and destination to a 16-byte boundary.
+  while (aSourceLength - i > 15) {
+    uint16x8_t low =
+      vld1q_u16(reinterpret_cast<const uint16_t*>(
+                  __builtin_assume_aligned(aSource + i, 16)));
+    uint16x8_t high =
+      vld1q_u16(reinterpret_cast<const uint16_t*>(
+                  __builtin_assume_aligned(aSource + i + 8, 16)));
+    vst1q_u8(reinterpret_cast<uint8_t*>(
+               __builtin_assume_aligned(dest + i, 16)),
+             vcombine_u8(vmovn_u16(low), vmovn_u16(high)));
+    i += 16;
+  }
+
+  if (aSourceLength - i > 7) {
+    uint16x8_t s = vld1q_u16(reinterpret_cast<const uint16_t*>(
+                               __builtin_assume_aligned(aSource + i, 16)));
+    vst1_u8(reinterpret_cast<uint8_t*>(
+              __builtin_assume_aligned(dest + i, 8)),
+            vmovn_u16(s));
+    i += 8;
+  }
+
+  // Finish up the rest.
+  for (; i < aSourceLength; ++i) {
+    dest[i] = static_cast<unsigned char>(aSource[i]);
+  }
+
+  mDestination += i;
+}
+
+void
+LossyConvertEncoding8to16::write_neon(const char* aSource,
+                                      uint32_t aSourceLength)
+{
+  char16_t* dest = mDestination;
+
+  // Align source to a 8-byte boundary and destination to 16-bytes boundary.
+  uint32_t i = 0;
+  while (((reinterpret_cast<uintptr_t>(aSource + i) & 0x7) ||
+          (reinterpret_cast<uintptr_t>(dest + i) & 0xf)) &&
+         i < aSourceLength) {
+    dest[i] = static_cast<unsigned char>(aSource[i]);
+    i++;
+  }
+
+  if ((uintptr_t(aSource + i) & 0xf) && aSourceLength - i > 7) {
+    // destination is aligned, but source isn't aligned by 16-byte yet
+    uint8x8_t s =
+      vld1_u8(reinterpret_cast<const uint8_t*>(
+                __builtin_assume_aligned(aSource + i, 8)));
+    vst1q_u16(reinterpret_cast<uint16_t*>(
+                __builtin_assume_aligned(dest + i, 16)),
+              vmovl_u8(s));
+    i += 8;
+  }
+
+  // Align source and destination to a 16-byte boundary.
+  while (aSourceLength - i > 15) {
+    uint8x16_t s =
+      vld1q_u8(reinterpret_cast<const uint8_t*>(
+                 __builtin_assume_aligned(aSource + i, 16)));
+    uint16x8_t low = vmovl_u8(vget_low_u8(s));
+    uint16x8_t high = vmovl_u8(vget_high_u8(s));
+    vst1q_u16(reinterpret_cast<uint16_t*>(
+                __builtin_assume_aligned(dest + i, 16)),
+              low);
+    vst1q_u16(reinterpret_cast<uint16_t*>(
+                __builtin_assume_aligned(dest + i + 8, 16)),
+              high);
+    i += 16;
+  }
+
+  if (aSourceLength - i > 7) {
+    uint8x8_t s =
+      vld1_u8(reinterpret_cast<const uint8_t*>(
+                __builtin_assume_aligned(aSource + i, 8)));
+    vst1q_u16(reinterpret_cast<uint16_t*>(
+                __builtin_assume_aligned(dest + i, 16)),
+              vmovl_u8(s));
+    i += 8;
+  }
+
+  // Finish up whatever's left.
+  for (; i < aSourceLength; ++i) {
+    dest[i] = static_cast<unsigned char>(aSource[i]);
+  }
+
+  mDestination += i;
+}