Bug 416305. Part 2: Introduce image flag to indicate that all the color channel values are equal, set it for source-alpha images, and optimize Gaussian blur when the color channel values are known to be all-zero. r=longsonr,sr=mats
authorRobert O'Callahan <robert@ocallahan.org>
Mon, 14 Jul 2008 14:49:07 +1200
changeset 15918 baa5a51b7f9072ed35b59a82acefedf0e3b60d83
parent 15917 62d71a45512cbc0c805cbf55bf115e97f9b8eb0d
child 15919 c1eadc4655be4f3e3a7dda6468c62986d7914549
push id605
push userrocallahan@mozilla.com
push dateMon, 14 Jul 2008 02:49:22 +0000
treeherderautoland@baa5a51b7f90 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerslongsonr, mats
bugs416305
milestone1.9.1a1pre
Bug 416305. Part 2: Introduce image flag to indicate that all the color channel values are equal, set it for source-alpha images, and optimize Gaussian blur when the color channel values are known to be all-zero. r=longsonr,sr=mats
content/svg/content/src/nsSVGFilters.cpp
content/svg/content/src/nsSVGFilters.h
layout/svg/base/src/nsSVGFilterInstance.cpp
--- a/content/svg/content/src/nsSVGFilters.cpp
+++ b/content/svg/content/src/nsSVGFilters.cpp
@@ -341,21 +341,19 @@ protected:
   enum { RESULT, IN1 };
   nsSVGString mStringAttributes[2];
   static StringInfo sStringInfo[2];
 
 private:
   nsresult GetDXY(PRUint32 *aDX, PRUint32 *aDY, const nsSVGFilterInstance& aInstance);
   void InflateRectForBlur(nsRect* aRect, const nsSVGFilterInstance& aInstance);
 
-  void GaussianBlur(PRUint8 *aInput, PRUint8 *aOutput,
-                    gfxImageSurface *aTarget,
+  void GaussianBlur(const Image *aSource, const Image *aTarget,
                     const nsIntRect& aDataRect,
                     PRUint32 aDX, PRUint32 aDY);
-
 };
 
 nsSVGElement::NumberInfo nsSVGFEGaussianBlurElement::sNumberInfo[2] =
 {
   { &nsGkAtoms::stdDeviation, 0 },
   { &nsGkAtoms::stdDeviation, 0 }
 };
 
@@ -456,17 +454,17 @@ nsSVGFEGaussianBlurElement::SetStdDeviat
 static PRUint32 ComputeScaledDivisor(PRUint32 aDivisor)
 {
   return PR_UINT32_MAX/(255*aDivisor);
 }
   
 static void
 BoxBlur(const PRUint8 *aInput, PRUint8 *aOutput,
         PRInt32 aStrideMinor, PRInt32 aStartMinor, PRInt32 aEndMinor,
-        PRUint32 aLeftLobe, PRUint32 aRightLobe)
+        PRUint32 aLeftLobe, PRUint32 aRightLobe, PRBool aAlphaOnly)
 {
   PRUint32 boxSize = aLeftLobe + aRightLobe + 1;
   PRUint32 scaledDivisor = ComputeScaledDivisor(boxSize);
   PRUint32 sums[4] = {0, 0, 0, 0};
 
   for (PRUint32 i=0; i < boxSize; i++) {
     PRInt32 pos = aStartMinor - aLeftLobe + i;
     pos = PR_MAX(pos, aStartMinor);
@@ -477,52 +475,68 @@ BoxBlur(const PRUint8 *aInput, PRUint8 *
   }
 
   aOutput += aStrideMinor*aStartMinor;
   if (aStartMinor + boxSize <= aEndMinor) {
     const PRUint8 *lastInput = aInput + aStartMinor*aStrideMinor;
     const PRUint8 *nextInput = aInput + (aStartMinor + aRightLobe + 1)*aStrideMinor;
 #define OUTPUT(j)     aOutput[j] = (sums[j]*scaledDivisor) >> 24;
 #define SUM(j)        sums[j] += nextInput[j] - lastInput[j];
+    // process pixels in B, G, R, A order because that's 0, 1, 2, 3 for x86
+#define OUTPUT_PIXEL() \
+        if (!aAlphaOnly) { OUTPUT(GFX_ARGB32_OFFSET_B); \
+                           OUTPUT(GFX_ARGB32_OFFSET_G); \
+                           OUTPUT(GFX_ARGB32_OFFSET_R); } \
+        OUTPUT(GFX_ARGB32_OFFSET_A);
+#define SUM_PIXEL() \
+        if (!aAlphaOnly) { SUM(GFX_ARGB32_OFFSET_B); \
+                           SUM(GFX_ARGB32_OFFSET_G); \
+                           SUM(GFX_ARGB32_OFFSET_R); } \
+        SUM(GFX_ARGB32_OFFSET_A);
     for (PRInt32 minor = aStartMinor; minor < aStartMinor + aLeftLobe; minor++) {
-      OUTPUT(0); OUTPUT(1); OUTPUT(2); OUTPUT(3);
-      SUM(0); SUM(1); SUM(2); SUM(3);
+      OUTPUT_PIXEL();
+      SUM_PIXEL();
       nextInput += aStrideMinor;
       aOutput += aStrideMinor;
     }
     for (PRInt32 minor = aStartMinor + aLeftLobe; minor < aEndMinor - aRightLobe - 1; minor++) {
-      OUTPUT(0); OUTPUT(1); OUTPUT(2); OUTPUT(3);
-      SUM(0); SUM(1); SUM(2); SUM(3);
+      OUTPUT_PIXEL();
+      SUM_PIXEL();
       lastInput += aStrideMinor;
       nextInput += aStrideMinor;
       aOutput += aStrideMinor;
     }
     // nextInput is now aInput + aEndMinor*aStrideMinor. Set it back to
     // aInput + (aEndMinor - 1)*aStrideMinor so we read the last pixel in every
     // iteration of the next loop.
     nextInput -= aStrideMinor;
     for (PRInt32 minor = aEndMinor - aRightLobe - 1; minor < aEndMinor; minor++) {
-      OUTPUT(0); OUTPUT(1); OUTPUT(2); OUTPUT(3);
-      SUM(0); SUM(1); SUM(2); SUM(3);
+      OUTPUT_PIXEL();
+      SUM_PIXEL();
       lastInput += aStrideMinor;
       aOutput += aStrideMinor;
+#undef SUM_PIXEL
 #undef SUM
     }
   } else {
     for (PRInt32 minor = aStartMinor; minor < aEndMinor; minor++) {
       PRInt32 tmp = minor - aLeftLobe;
       PRInt32 last = PR_MAX(tmp, aStartMinor);
       PRInt32 next = PR_MIN(tmp + boxSize, aEndMinor - 1);
 
-      OUTPUT(0); OUTPUT(1); OUTPUT(2); OUTPUT(3);
+      OUTPUT_PIXEL();
 #define SUM(j)     sums[j] += aInput[aStrideMinor*next + j] - \
                               aInput[aStrideMinor*last + j];
-      SUM(0); SUM(1); SUM(2); SUM(3);
+      if (!aAlphaOnly) { SUM(GFX_ARGB32_OFFSET_B);
+                         SUM(GFX_ARGB32_OFFSET_G);
+                         SUM(GFX_ARGB32_OFFSET_R); }
+      SUM(GFX_ARGB32_OFFSET_A);
       aOutput += aStrideMinor;
 #undef SUM
+#undef OUTPUT_PIXEL
 #undef OUTPUT
     }
   }
 }
 
 nsresult
 nsSVGFEGaussianBlurElement::GetDXY(PRUint32 *aDX, PRUint32 *aDY,
                                    const nsSVGFilterInstance& aInstance)
@@ -543,73 +557,82 @@ nsSVGFEGaussianBlurElement::GetDXY(PRUin
   if (stdX == 0 || stdY == 0)
     return NS_ERROR_UNEXPECTED;
 
   *aDX = PRUint32(floor(stdX * 3*sqrt(2*M_PI)/4 + 0.5));
   *aDY = PRUint32(floor(stdY * 3*sqrt(2*M_PI)/4 + 0.5));
   return NS_OK;
 }
 
+static PRBool
+AreAllColorChannelsZero(const nsSVGFE::Image* aTarget)
+{
+  return aTarget->mConstantColorChannels &&
+         aTarget->mImage->GetDataSize() >= 4 &&
+         (*reinterpret_cast<PRUint32*>(aTarget->mImage->Data()) & 0x00FFFFFF) == 0;
+}
+
 void
-nsSVGFEGaussianBlurElement::GaussianBlur(PRUint8 *aInput, PRUint8 *aOutput,
-                                         gfxImageSurface *aTarget,
-                                         const nsIntRect& aDataRect,
+nsSVGFEGaussianBlurElement::GaussianBlur(const Image *aSource,
+                                         const Image *aTarget,                                         const nsIntRect& aDataRect,
                                          PRUint32 aDX, PRUint32 aDY)
 {
-  NS_ASSERTION(nsIntRect(0,0,aTarget->Width(),aTarget->Height()).Contains(aDataRect),
+  NS_ASSERTION(nsIntRect(0,0,aTarget->mImage->Width(),aTarget->mImage->Height()).Contains(aDataRect),
                "aDataRect out of bounds");
 
-  nsAutoArrayPtr<PRUint8> tmp(new PRUint8[aTarget->GetDataSize()]);
-  if (!tmp)
+  nsAutoArrayPtr<PRUint8> tmp(new PRUint8[aTarget->mImage->GetDataSize()]);  if (!tmp)
     return;
-  memset(tmp, 0, aTarget->GetDataSize());
-
-  PRUint32 stride = aTarget->Stride();
+  memset(tmp, 0, aTarget->mImage->GetDataSize());
+
+  PRBool alphaOnly = AreAllColorChannelsZero(aTarget);
+  
+  const PRUint8* sourceData = aSource->mImage->Data();
+  PRUint8* targetData = aTarget->mImage->Data();
+  PRUint32 stride = aTarget->mImage->Stride();
 
   if (aDX == 0) {
-    CopyDataRect(tmp, aInput, stride, aDataRect);
+    CopyDataRect(tmp, sourceData, stride, aDataRect);
   } else {
     PRInt32 longLobe = aDX/2;
     PRInt32 shortLobe = (aDX & 1) ? longLobe : longLobe - 1;
     for (PRInt32 major = aDataRect.y; major < aDataRect.YMost(); ++major) {
       PRInt32 ms = major*stride;
-      BoxBlur(aInput + ms, tmp + ms, 4, aDataRect.x, aDataRect.XMost(), longLobe, shortLobe);
-      BoxBlur(tmp + ms, aOutput + ms, 4, aDataRect.x, aDataRect.XMost(), shortLobe, longLobe);
-      BoxBlur(aOutput + ms, tmp + ms, 4, aDataRect.x, aDataRect.XMost(), longLobe, longLobe);
+      BoxBlur(sourceData + ms, tmp + ms, 4, aDataRect.x, aDataRect.XMost(), longLobe, shortLobe, alphaOnly);
+      BoxBlur(tmp + ms, targetData + ms, 4, aDataRect.x, aDataRect.XMost(), shortLobe, longLobe, alphaOnly);
+      BoxBlur(targetData + ms, tmp + ms, 4, aDataRect.x, aDataRect.XMost(), longLobe, longLobe, alphaOnly);
     }
   }
 
   if (aDY == 0) {
-    CopyDataRect(aOutput, tmp, stride, aDataRect);
+    CopyDataRect(targetData, tmp, stride, aDataRect);
   } else {
     PRInt32 longLobe = aDY/2;
     PRInt32 shortLobe = (aDY & 1) ? longLobe : longLobe - 1;
     for (PRInt32 major = aDataRect.x; major < aDataRect.XMost(); ++major) {
       PRInt32 ms = major*4;
-      BoxBlur(tmp + ms, aOutput + ms, stride, aDataRect.y, aDataRect.YMost(), longLobe, shortLobe);
-      BoxBlur(aOutput + ms, tmp + ms, stride, aDataRect.y, aDataRect.YMost(), shortLobe, longLobe);
-      BoxBlur(tmp + ms, aOutput + ms, stride, aDataRect.y, aDataRect.YMost(), longLobe, longLobe);
+      BoxBlur(tmp + ms, targetData + ms, stride, aDataRect.y, aDataRect.YMost(), longLobe, shortLobe, alphaOnly);
+      BoxBlur(targetData + ms, tmp + ms, stride, aDataRect.y, aDataRect.YMost(), shortLobe, longLobe, alphaOnly);
+      BoxBlur(tmp + ms, targetData + ms, stride, aDataRect.y, aDataRect.YMost(), longLobe, longLobe, alphaOnly);
     }
   }
 }
 
 nsresult
 nsSVGFEGaussianBlurElement::Filter(nsSVGFilterInstance* aInstance,
                                    const nsTArray<const Image*>& aSources,
                                    const Image* aTarget,
                                    const nsIntRect& rect)
 {
   PRUint32 dx, dy;
   nsresult rv = GetDXY(&dx, &dy, *aInstance);
   if (rv == NS_ERROR_UNEXPECTED) // zero std deviation
     return NS_OK;
   if (NS_FAILED(rv))
     return rv;
-  GaussianBlur(aSources[0]->mImage->Data(), aTarget->mImage->Data(),
-               aTarget->mImage, rect, dx, dy);
+  GaussianBlur(aSources[0], aTarget, rect, dx, dy);
   return NS_OK;
 }
 
 void
 nsSVGFEGaussianBlurElement::GetSourceImageNames(nsTArray<nsSVGString*>* aSources)
 {
   aSources->AppendElement(&mStringAttributes[IN1]);
 }
--- a/content/svg/content/src/nsSVGFilters.h
+++ b/content/svg/content/src/nsSVGFilters.h
@@ -77,16 +77,20 @@ public:
   };
 
   struct Image {
     // The device offset of mImage makes it relative to filter space
     nsRefPtr<gfxImageSurface> mImage;
     // The filter primitive subregion bounding this image, in filter space
     gfxRect                   mFilterPrimitiveSubregion;
     ColorModel                mColorModel;
+    // When true, the RGB values are the same for all pixels in mImage
+    PRPackedBool              mConstantColorChannels;
+    
+    Image() : mConstantColorChannels(PR_FALSE) {}
   };
 
 protected:
   nsSVGFE(nsINodeInfo *aNodeInfo) : nsSVGFEBase(aNodeInfo) {}
 
   struct ScaleInfo {
     nsRefPtr<gfxImageSurface> mRealTarget;
     nsRefPtr<gfxImageSurface> mSource;
--- a/layout/svg/base/src/nsSVGFilterInstance.cpp
+++ b/layout/svg/base/src/nsSVGFilterInstance.cpp
@@ -343,16 +343,17 @@ nsSVGFilterInstance::BuildSourceImages()
     const PRUint32* src = reinterpret_cast<PRUint32*>(sourceColorAlpha->Data());
     PRUint32* dest = reinterpret_cast<PRUint32*>(mSourceAlpha.mImage.mImage->Data());
     for (PRInt32 y = 0; y < mSurfaceRect.height; y++) {
       PRUint32 rowOffset = (mSourceAlpha.mImage.mImage->Stride()*y) >> 2;
       for (PRInt32 x = 0; x < mSurfaceRect.width; x++) {
         dest[rowOffset + x] = src[rowOffset + x] & 0xFF000000U;
       }
     }
+    mSourceAlpha.mImage.mConstantColorChannels = PR_TRUE;
   }
   
   return NS_OK;
 }
 
 void
 nsSVGFilterInstance::EnsureColorModel(PrimitiveInfo* aPrimitive,
                                       ColorModel aColorModel)