Bug 958977. r=Bas, a=sledru
authorMarkus Stange <mstange@themasta.com>
Fri, 07 Feb 2014 13:24:54 +0100
changeset 176294 784092b96308b1cc04ab2cd4604d8814f3eb86c0
parent 176293 54f798e1ae0f01eb1424fa69027bd43c33788eea
child 176295 1f89dd5f5d38ea7361e472ec5ae547f8c338f04c
child 176297 dd211f258b1b9ccff65a04404bb1ff50b41cb887
push id445
push userffxbld
push dateMon, 10 Mar 2014 22:05:19 +0000
treeherdermozilla-release@dc38b741b04e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersBas, sledru
bugs958977
milestone28.0
Bug 958977. r=Bas, a=sledru
gfx/2d/FilterProcessingSIMD-inl.h
--- a/gfx/2d/FilterProcessingSIMD-inl.h
+++ b/gfx/2d/FilterProcessingSIMD-inl.h
@@ -377,33 +377,38 @@ inline void ApplyMorphologyHorizontal_SI
 
   int32_t kernelSize = aRadius + 1 + aRadius;
   MOZ_ASSERT(kernelSize >= 3, "don't call this with aRadius <= 0");
   MOZ_ASSERT(kernelSize % 4 == 1 || kernelSize % 4 == 3);
   int32_t completeKernelSizeForFourPixels = kernelSize + 3;
   MOZ_ASSERT(completeKernelSizeForFourPixels % 4 == 0 ||
              completeKernelSizeForFourPixels % 4 == 2);
 
-  // aSourceData[0] and aDestData[-aRadius] are both aligned to 16 bytes, just
+  // aSourceData[-aRadius] and aDestData[0] are both aligned to 16 bytes, just
   // the way we need them to be.
 
+  IntRect sourceRect = aDestRect;
+  sourceRect.Inflate(aRadius, 0);
+
   for (int32_t y = aDestRect.y; y < aDestRect.YMost(); y++) {
     int32_t kernelStartX = aDestRect.x - aRadius;
     for (int32_t x = aDestRect.x; x < aDestRect.XMost(); x += 4, kernelStartX += 4) {
       // We process four pixels (16 color values) at a time.
       // aSourceData[0] points to the pixel located at aDestRect.TopLeft();
       // source values can be read beyond that because the source is extended
       // by aRadius pixels.
 
       int32_t sourceIndex = y * aSourceStride + 4 * kernelStartX;
       u8x16_t p1234 = simd::Load8<u8x16_t>(&aSourceData[sourceIndex]);
       u8x16_t m1234 = p1234;
 
       for (int32_t i = 4; i < completeKernelSizeForFourPixels; i += 4) {
-        u8x16_t p5678 = simd::Load8<u8x16_t>(&aSourceData[sourceIndex + 4 * i]);
+        u8x16_t p5678 = (kernelStartX + i < sourceRect.XMost()) ?
+          simd::Load8<u8x16_t>(&aSourceData[sourceIndex + 4 * i]) :
+          simd::FromZero8<u8x16_t>();
         u8x16_t p2345 = simd::Rotate8<4>(p1234, p5678);
         u8x16_t p3456 = simd::Rotate8<8>(p1234, p5678);
         m1234 = Morph8<op,u8x16_t>(m1234, p2345);
         m1234 = Morph8<op,u8x16_t>(m1234, p3456);
         if (i + 2 < completeKernelSizeForFourPixels) {
           u8x16_t p4567 = simd::Rotate8<12>(p1234, p5678);
           m1234 = Morph8<op,u8x16_t>(m1234, p4567);
           m1234 = Morph8<op,u8x16_t>(m1234, p5678);