author | Cameron Kaiser <spectre@floodgap.com> |
Wed, 28 Aug 2019 05:03:54 +0000 | |
changeset 554075 | d3fe24ea630981ac7b2fe46912a03aff2665e1b6 |
parent 554074 | d920fb2cbec23e894932d1892fb166aefac39e78 |
child 554076 | 9a111ae683fae0100c145a333e8fdd95086a2ef1 |
push id | 2165 |
push user | ffxbld-merge |
push date | Mon, 14 Oct 2019 16:30:58 +0000 |
treeherder | mozilla-release@0eae18af659f [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
reviewers | bzbarsky |
bugs | 817058 |
milestone | 70.0a1 |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
--- a/dom/base/moz.build +++ b/dom/base/moz.build @@ -463,16 +463,22 @@ SOURCES += [ ] # Are we targeting x86-32 or x86-64? If so, we want to include SSE2 code for # nsTextFragment.cpp if CONFIG['INTEL_ARCHITECTURE']: SOURCES += ['nsTextFragmentSSE2.cpp'] SOURCES['nsTextFragmentSSE2.cpp'].flags += CONFIG['SSE2_FLAGS'] +# Are we targeting PowerPC? If so, we can enable a SIMD version for +# nsTextFragment.cpp as well. +if CONFIG['CPU_ARCH'].startswith('ppc'): + SOURCES += ['nsTextFragmentVMX.cpp'] + SOURCES['nsTextFragmentVMX.cpp'].flags += CONFIG['PPC_VMX_FLAGS'] + EXTRA_JS_MODULES += [ 'ContentAreaDropListener.jsm', 'DOMRequestHelper.jsm', 'IndexedDBHelper.jsm', 'ProcessSelector.jsm', 'SlowScriptDebug.jsm', ]
--- a/dom/base/nsTextFragment.cpp +++ b/dom/base/nsTextFragment.cpp @@ -14,16 +14,17 @@ #include "nsCRT.h" #include "nsReadableUtils.h" #include "nsMemory.h" #include "nsBidiUtils.h" #include "nsUnicharUtils.h" #include "mozilla/CheckedInt.h" #include "mozilla/MemoryReporting.h" #include "mozilla/SSE.h" +#include "mozilla/ppc.h" #include "nsTextFragmentImpl.h" #include <algorithm> #define TEXTFRAG_WHITE_AFTER_NEWLINE 50 #define TEXTFRAG_MAX_NEWLINES 7 // Static buffer used for common fragments static char* sSpaceSharedString[TEXTFRAG_MAX_NEWLINES + 1]; @@ -161,28 +162,40 @@ static inline int32_t FirstNon8BitUnvect #ifdef MOZILLA_MAY_SUPPORT_SSE2 namespace mozilla { namespace SSE2 { int32_t FirstNon8Bit(const char16_t* str, const char16_t* end); } // namespace SSE2 } // namespace mozilla #endif +#ifdef __powerpc__ +namespace mozilla { +namespace VMX { +int32_t FirstNon8Bit(const char16_t* str, const char16_t* end); +} // namespace VMX +} // namespace mozilla +#endif + /* * This function returns -1 if all characters in str are 8 bit characters. * Otherwise, it returns a value less than or equal to the index of the first * non-8bit character in str. For example, if first non-8bit character is at * position 25, it may return 25, or for example 24, or 16. But it guarantees * there is no non-8bit character before returned value. */ static inline int32_t FirstNon8Bit(const char16_t* str, const char16_t* end) { #ifdef MOZILLA_MAY_SUPPORT_SSE2 if (mozilla::supports_sse2()) { return mozilla::SSE2::FirstNon8Bit(str, end); } +#elif defined(__powerpc__) + if (mozilla::supports_vmx()) { + return mozilla::VMX::FirstNon8Bit(str, end); + } #endif return FirstNon8BitUnvectorized(str, end); } bool nsTextFragment::SetTo(const char16_t* aBuffer, int32_t aLength, bool aUpdateBidi, bool aForce2b) { if (aForce2b && mState.mIs2b && !m2b->IsReadonly()) {
new file mode 100644 --- /dev/null +++ b/dom/base/nsTextFragmentVMX.cpp @@ -0,0 +1,100 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// This file should only be compiled if you're on Power ISA. + +#include "nscore.h" +#include "nsAlgorithm.h" +#include "nsTextFragmentImpl.h" +#include <altivec.h> + +namespace mozilla { +namespace VMX { + +int32_t FirstNon8Bit(const char16_t* str, const char16_t* end) { + const uint32_t numUnicharsPerVector = 8; + const uint32_t numCharsPerVector = 16; + // Paranoia. If this assertion is wrong, change the vector loop below. + MOZ_ASSERT((numCharsPerVector / numUnicharsPerVector) == sizeof(char16_t)); + + typedef Non8BitParameters<sizeof(size_t)> p; + const uint32_t alignMask = p::alignMask(); + const size_t mask = p::mask(); + const uint32_t numUnicharsPerWord = p::numUnicharsPerWord(); + + const uint32_t len = end - str; + + // i shall count the index in unichars; i2 shall count the index in chars. + uint32_t i = 0; + uint32_t i2 = 0; + + // Align ourselves to a 16-byte boundary, as required by VMX loads. + uint32_t alignLen = std::min( + len, uint32_t(((-NS_PTR_TO_UINT32(str)) & 0xf) / sizeof(char16_t))); + + if ((len - alignLen) >= numUnicharsPerVector) { + for (; i < alignLen; i++) { + if (str[i] > 255) return i; + } + + // Construct a vector of shorts. +#if __LITTLE_ENDIAN__ + register const vector unsigned short gtcompare = + reinterpret_cast<vector unsigned short>( + vec_mergel(vec_splat_s8(-1), vec_splat_s8(0))); +#else + register const vector unsigned short gtcompare = + reinterpret_cast<vector unsigned short>( + vec_mergel(vec_splat_s8(0), vec_splat_s8(-1))); +#endif + const uint32_t vectWalkEnd = + ((len - i) / numUnicharsPerVector) * numUnicharsPerVector; + i2 = i * sizeof(char16_t); + + while (1) { + register vector unsigned short vect; + + // Check one VMX register (8 unichars) at a time. The vec_any_gt + // intrinsic does exactly what we want. This loop is manually unrolled; + // it yields notable performance improvements this way. +#define CheckForASCII \ + vect = vec_ld(i2, reinterpret_cast<const unsigned short*>(str)); \ + if (vec_any_gt(vect, gtcompare)) return i; \ + i += numUnicharsPerVector; \ + if (!(i < vectWalkEnd)) break; \ + i2 += numCharsPerVector; + + CheckForASCII CheckForASCII + +#undef CheckForASCII + } + } else { + // Align ourselves to a word boundary. + alignLen = std::min(len, uint32_t(((-NS_PTR_TO_UINT32(str)) & alignMask) / + sizeof(char16_t))); + for (; i < alignLen; i++) { + if (str[i] > 255) return i; + } + } + + // Check one word at a time. + const uint32_t wordWalkEnd = + ((len - i) / numUnicharsPerWord) * numUnicharsPerWord; + for (; i < wordWalkEnd; i += numUnicharsPerWord) { + const size_t word = *reinterpret_cast<const size_t*>(str + i); + if (word & mask) return i; + } + + // Take care of the remainder one character at a time. + for (; i < len; i++) { + if (str[i] > 255) { + return i; + } + } + + return -1; +} + +} // namespace VMX +} // namespace mozilla