Bug 1330012 - When caching power table for filters, avoid and optimize out the degenerate cases. r=bas, r=mstange, a=lizzard
authorMilan Sreckovic <milan@mozilla.com>
Wed, 18 Jan 2017 13:36:23 -0500
changeset 378952 dd6cf0b21528034f95953aa6a9b4324fcadb1685
parent 378951 2bbd7a57c79b244198273bb5348dba629af3e12a
child 378953 c9a2f2cda1199ea76b8181f609028c07c906daaa
push id1419
push userjlund@mozilla.com
push dateMon, 10 Apr 2017 20:44:07 +0000
treeherdermozilla-release@5e6801b73ef6 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbas, mstange, lizzard
bugs1330012
milestone53.0
Bug 1330012 - When caching power table for filters, avoid and optimize out the degenerate cases. r=bas, r=mstange, a=lizzard MozReview-Commit-ID: GlJYE8SLwJC
gfx/2d/FilterNodeSoftware.cpp
--- a/gfx/2d/FilterNodeSoftware.cpp
+++ b/gfx/2d/FilterNodeSoftware.cpp
@@ -23,75 +23,89 @@
 
 namespace mozilla {
 namespace gfx {
 
 namespace {
 
 /**
  * This class provides a way to get a pow() results in constant-time. It works
- * by caching 256 values for bases between 0 and 1 and a fixed exponent.
+ * by caching 129 ((1 << sCacheIndexPrecisionBits) + 1) values for bases between
+ * 0 and 1 and a fixed exponent.
  **/
 class PowCache
 {
 public:
   PowCache()
+    : mNumPowTablePreSquares(-1)
   {
-    CacheForExponent(0.0f);
   }
 
   void CacheForExponent(Float aExponent)
   {
-    mExponent = aExponent;
+    // Since we are in the world where we only care about
+    // input and results in [0,1], there is no point in
+    // dealing with non-positive exponents.
+    if (aExponent <= 0) {
+      mNumPowTablePreSquares = -1;
+      return;
+    }
     int numPreSquares = 0;
-    while (numPreSquares < 5 && mExponent > (1 << (numPreSquares + 2))) {
+    while (numPreSquares < 5 && aExponent > (1 << (numPreSquares + 2))) {
       numPreSquares++;
     }
     mNumPowTablePreSquares = numPreSquares;
     for (size_t i = 0; i < sCacheSize; i++) {
       // sCacheSize is chosen in such a way that a takes values
       // from 0.0 to 1.0 inclusive.
       Float a = i / Float(1 << sCacheIndexPrecisionBits);
       MOZ_ASSERT(0.0f <= a && a <= 1.0f, "We only want to cache for bases between 0 and 1.");
 
       for (int j = 0; j < mNumPowTablePreSquares; j++) {
         a = sqrt(a);
       }
-      uint32_t cachedInt = pow(a, mExponent) * (1 << sOutputIntPrecisionBits);
+      uint32_t cachedInt = pow(a, aExponent) * (1 << sOutputIntPrecisionBits);
       MOZ_ASSERT(cachedInt < (1 << (sizeof(mPowTable[i]) * 8)), "mPowCache integer type too small");
 
       mPowTable[i] = cachedInt;
     }
   }
 
+  // Only call Pow() if HasPowerTable() would return true, to avoid complicating
+  // this code and having it just return (1 << sOutputIntPrecisionBits))
   uint16_t Pow(uint16_t aBase)
   {
+    MOZ_ASSERT(HasPowerTable());
     // Results should be similar to what the following code would produce:
     // Float x = Float(aBase) / (1 << sInputIntPrecisionBits);
-    // return uint16_t(pow(x, mExponent) * (1 << sOutputIntPrecisionBits));
+    // return uint16_t(pow(x, aExponent) * (1 << sOutputIntPrecisionBits));
 
     MOZ_ASSERT(aBase <= (1 << sInputIntPrecisionBits), "aBase needs to be between 0 and 1!");
 
     uint32_t a = aBase;
     for (int j = 0; j < mNumPowTablePreSquares; j++) {
       a = a * a >> sInputIntPrecisionBits;
     }
     uint32_t i = a >> (sInputIntPrecisionBits - sCacheIndexPrecisionBits);
     MOZ_ASSERT(i < sCacheSize, "out-of-bounds mPowTable access");
     return mPowTable[i];
   }
 
   static const int sInputIntPrecisionBits = 15;
   static const int sOutputIntPrecisionBits = 15;
   static const int sCacheIndexPrecisionBits = 7;
 
+  inline bool HasPowerTable() const
+  {
+    return mNumPowTablePreSquares >= 0;
+  }
+
 private:
   static const size_t sCacheSize = (1 << sCacheIndexPrecisionBits) + 1;
 
-  Float mExponent;
   int mNumPowTablePreSquares;
   uint16_t mPowTable[sCacheSize];
 };
 
 class PointLightSoftware
 {
 public:
   bool SetAttribute(uint32_t aIndex, Float) { return false; }
@@ -3382,24 +3396,33 @@ SpotLightSoftware::GetVectorToLight(cons
 
 uint32_t
 SpotLightSoftware::GetColor(uint32_t aLightColor, const Point3D &aVectorToLight)
 {
   union {
     uint32_t color;
     uint8_t colorC[4];
   };
-  color = aLightColor;
+
   Float dot = -aVectorToLight.DotProduct(mVectorFromFocusPointToLight);
-  uint16_t doti = dot * (dot >= 0) * (1 << PowCache::sInputIntPrecisionBits);
-  uint32_t tmp = mPowCache.Pow(doti) * (dot >= mLimitingConeCos);
-  MOZ_ASSERT(tmp <= (1 << PowCache::sOutputIntPrecisionBits), "pow() result must not exceed 1.0");
-  colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_R] = uint8_t((colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_R] * tmp) >> PowCache::sOutputIntPrecisionBits);
-  colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_G] = uint8_t((colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_G] * tmp) >> PowCache::sOutputIntPrecisionBits);
-  colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_B] = uint8_t((colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_B] * tmp) >> PowCache::sOutputIntPrecisionBits);
+  if (!mPowCache.HasPowerTable()) {
+    dot *= (dot >= mLimitingConeCos);
+    color = aLightColor;
+    colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_R] *= dot;
+    colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_G] *= dot;
+    colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_B] *= dot;
+  } else {
+    color = aLightColor;
+    uint16_t doti = dot * (dot >= 0) * (1 << PowCache::sInputIntPrecisionBits);
+    uint32_t tmp = mPowCache.Pow(doti) * (dot >= mLimitingConeCos);
+    MOZ_ASSERT(tmp <= (1 << PowCache::sOutputIntPrecisionBits), "pow() result must not exceed 1.0");
+    colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_R] = uint8_t((colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_R] * tmp) >> PowCache::sOutputIntPrecisionBits);
+    colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_G] = uint8_t((colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_G] * tmp) >> PowCache::sOutputIntPrecisionBits);
+    colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_B] = uint8_t((colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_B] * tmp) >> PowCache::sOutputIntPrecisionBits);
+  }
   colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_A] = 255;
   return color;
 }
 
 void
 DistantLightSoftware::Prepare()
 {
   const double radPerDeg = M_PI / 180.0;
@@ -3631,16 +3654,19 @@ uint32_t
 SpecularLightingSoftware::LightPixel(const Point3D &aNormal,
                                      const Point3D &aVectorToLight,
                                      uint32_t aColor)
 {
   Point3D vectorToEye(0, 0, 1);
   Point3D halfwayVector = Normalized(aVectorToLight + vectorToEye);
   Float dotNH = aNormal.DotProduct(halfwayVector);
   uint16_t dotNHi = uint16_t(dotNH * (dotNH >= 0) * (1 << PowCache::sInputIntPrecisionBits));
+  // The exponent for specular is in [1,128] range, so we don't need to check and
+  // optimize for the "default power table" scenario here.
+  MOZ_ASSERT(mPowCache.HasPowerTable());
   uint32_t specularNHi = uint32_t(mSpecularConstantInt) * mPowCache.Pow(dotNHi) >> 8;
 
   union {
     uint32_t bgra;
     uint8_t components[4];
   } color = { aColor };
   color.components[B8G8R8A8_COMPONENT_BYTEOFFSET_B] =
     umin(