Bug 1375842 - increase SIMD padding for convolution filter to 31 bytes. r=jrmuizel
authorLee Salzman <lsalzman@mozilla.com>
Fri, 07 Jul 2017 13:32:05 -0400
changeset 605477 31f3099d70826885f85a1d57e3efd77480a3267c
parent 605476 f48de44a55daa4b938e937e502a8ac46da9931a0
child 605478 c30acbc140733a651e3cb3a2b72acba58ca881b8
push id67415
push usercholler@mozilla.com
push dateFri, 07 Jul 2017 19:26:29 +0000
reviewersjrmuizel
bugs1375842
milestone56.0a1
Bug 1375842 - increase SIMD padding for convolution filter to 31 bytes. r=jrmuizel MozReview-Commit-ID: IAwY4xbA0P2
gfx/2d/ConvolutionFilter.h
image/Downscaler.cpp
image/DownscalingFilter.h
--- a/gfx/2d/ConvolutionFilter.h
+++ b/gfx/2d/ConvolutionFilter.h
@@ -32,16 +32,20 @@ public:
     TRIANGLE,
     LANCZOS3,
     HAMMING,
     MITCHELL
   };
 
   bool ComputeResizeFilter(ResizeMethod aResizeMethod, int32_t aSrcSize, int32_t aDstSize);
 
+  static inline size_t PadBytesForSIMD(size_t aBytes) {
+    return (aBytes + 31) & ~31;
+  }
+
 private:
   UniquePtr<SkConvolutionFilter1D> mFilter;
 };
 
 } // namespace gfx
 } // namespace mozilla
 
 #endif /* MOZILLA_GFX_CONVOLUTION_FILTER_H_ */
--- a/image/Downscaler.cpp
+++ b/image/Downscaler.cpp
@@ -100,18 +100,18 @@ Downscaler::BeginFrame(const nsIntSize& 
   auto resizeMethod = gfx::ConvolutionFilter::ResizeMethod::LANCZOS3;
   if (!mXFilter.ComputeResizeFilter(resizeMethod, mOriginalSize.width, mTargetSize.width) ||
       !mYFilter.ComputeResizeFilter(resizeMethod, mOriginalSize.height, mTargetSize.height)) {
     NS_WARNING("Failed to compute filters for image downscaling");
     return NS_ERROR_OUT_OF_MEMORY;
   }
 
   // Allocate the buffer, which contains scanlines of the original image.
-  // pad by 15 to handle overreads by the simd code
-  size_t bufferLen = mOriginalSize.width * sizeof(uint32_t) + 15;
+  // pad to handle overreads by the simd code
+  size_t bufferLen = gfx::ConvolutionFilter::PadBytesForSIMD(mOriginalSize.width * sizeof(uint32_t));
   mRowBuffer.reset(new (fallible) uint8_t[bufferLen]);
   if (MOZ_UNLIKELY(!mRowBuffer)) {
     return NS_ERROR_OUT_OF_MEMORY;
   }
 
   // Zero buffer to keep valgrind happy.
   memset(mRowBuffer.get(), 0, bufferLen);
 
@@ -120,18 +120,18 @@ Downscaler::BeginFrame(const nsIntSize& 
   // filter is separable.)
   mWindowCapacity = mYFilter.MaxFilter();
   mWindow.reset(new (fallible) uint8_t*[mWindowCapacity]);
   if (MOZ_UNLIKELY(!mWindow)) {
     return NS_ERROR_OUT_OF_MEMORY;
   }
 
   bool anyAllocationFailed = false;
-  // pad by 15 to handle overreads by the simd code
-  const int rowSize = mTargetSize.width * sizeof(uint32_t) + 15;
+  // pad to handle overreads by the simd code
+  const size_t rowSize = gfx::ConvolutionFilter::PadBytesForSIMD(mTargetSize.width * sizeof(uint32_t));
   for (int32_t i = 0; i < mWindowCapacity; ++i) {
     mWindow[i] = new (fallible) uint8_t[rowSize];
     anyAllocationFailed = anyAllocationFailed || mWindow[i] == nullptr;
   }
 
   if (MOZ_UNLIKELY(anyAllocationFailed)) {
     // We intentionally iterate through the entire array even if an allocation
     // fails, to ensure that all the pointers in it are either valid or nullptr.
--- a/image/DownscalingFilter.h
+++ b/image/DownscalingFilter.h
@@ -172,17 +172,17 @@ public:
     }
 
     // Allocate the "window" of recent rows that we keep in memory as input for
     // the downscaling code. We intentionally iterate through the entire array
     // even if an allocation fails, to ensure that all the pointers in it are
     // either valid or nullptr. That in turn ensures that ReleaseWindow() can
     // clean up correctly.
     bool anyAllocationFailed = false;
-    const uint32_t windowRowSizeInBytes = PaddedWidthInBytes(outputSize.width);
+    const size_t windowRowSizeInBytes = PaddedWidthInBytes(outputSize.width);
     for (int32_t i = 0; i < mWindowCapacity; ++i) {
       mWindow[i] = new (fallible) uint8_t[windowRowSizeInBytes];
       anyAllocationFailed = anyAllocationFailed || mWindow[i] == nullptr;
     }
 
     if (MOZ_UNLIKELY(anyAllocationFailed)) {
       return NS_ERROR_OUT_OF_MEMORY;
     }
@@ -256,21 +256,21 @@ protected:
 
     return mInputRow < mInputSize.height ? GetRowPointer()
                                          : nullptr;
   }
 
 private:
   uint8_t* GetRowPointer() const { return mRowBuffer.get(); }
 
-  static uint32_t PaddedWidthInBytes(uint32_t aLogicalWidth)
+  static size_t PaddedWidthInBytes(size_t aLogicalWidth)
   {
-    // Convert from width in BGRA/BGRX pixels to width in bytes, padding by 15
+    // Convert from width in BGRA/BGRX pixels to width in bytes, padding
     // to handle overreads by the SIMD code inside Skia.
-    return aLogicalWidth * sizeof(uint32_t) + 15;
+    return gfx::ConvolutionFilter::PadBytesForSIMD(aLogicalWidth * sizeof(uint32_t));
   }
 
   void DownscaleInputRow()
   {
     MOZ_ASSERT(mOutputRow < mNext.InputSize().height,
                "Writing past end of output");
 
     int32_t filterOffset = 0;