Bug 960178 - Fix lighting filter PowCache. r=Bas, a=abillings
authorMarkus Stange <mstange@themasta.com>
Wed, 05 Mar 2014 18:41:38 +0100
changeset 183178 e6474e309d6e1c2b92d27383326f89cc9c3ea8c7
parent 183177 a0eed30f6cfec6e5d329553b2a0271b6e75eb5f0
child 183179 f081a9d022d66ad09716899b08d3818e5878bbf9
push id3343
push userffxbld
push dateMon, 17 Mar 2014 21:55:32 +0000
treeherdermozilla-beta@2f7d3415f79f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersBas, abillings
bugs960178
milestone29.0a2
Bug 960178 - Fix lighting filter PowCache. r=Bas, a=abillings
gfx/2d/FilterNodeSoftware.cpp
--- a/gfx/2d/FilterNodeSoftware.cpp
+++ b/gfx/2d/FilterNodeSoftware.cpp
@@ -56,44 +56,55 @@ public:
   void CacheForExponent(Float aExponent)
   {
     mExponent = aExponent;
     int numPreSquares = 0;
     while (numPreSquares < 5 && mExponent > (1 << (numPreSquares + 2))) {
       numPreSquares++;
     }
     mNumPowTablePreSquares = numPreSquares;
-    for (int i = 0; i < sCacheSize; i++) {
-      Float a = i / Float(sCacheSize - 1);
+    for (size_t i = 0; i < sCacheSize; i++) {
+      // sCacheSize is chosen in such a way that a takes values
+      // from 0.0 to 1.0 inclusive.
+      Float a = i / Float(1 << sCacheIndexPrecisionBits);
+      MOZ_ASSERT(0.0f <= a && a <= 1.0f, "We only want to cache for bases between 0 and 1.");
+
       for (int j = 0; j < mNumPowTablePreSquares; j++) {
         a = sqrt(a);
       }
-      mPowTable[i] = uint16_t(pow(a, mExponent) * (1 << sOutputIntPrecisionBits));
+      uint32_t cachedInt = pow(a, mExponent) * (1 << sOutputIntPrecisionBits);
+      MOZ_ASSERT(cachedInt < (1 << (sizeof(mPowTable[i]) * 8)), "mPowCache integer type too small");
+
+      mPowTable[i] = cachedInt;
     }
   }
 
   uint16_t Pow(uint16_t aBase)
   {
     // Results should be similar to what the following code would produce:
-    // double x = double(aBase) / (1 << sInputIntPrecisionBits);
+    // Float x = Float(aBase) / (1 << sInputIntPrecisionBits);
     // return uint16_t(pow(x, mExponent) * (1 << sOutputIntPrecisionBits));
 
+    MOZ_ASSERT(aBase <= (1 << sInputIntPrecisionBits), "aBase needs to be between 0 and 1!");
+
     uint32_t a = aBase;
     for (int j = 0; j < mNumPowTablePreSquares; j++) {
       a = a * a >> sInputIntPrecisionBits;
     }
-    static_assert(sCacheSize == (1 << sInputIntPrecisionBits >> 7), "please fix index calculation below");
-    int i = a >> 7;
+    uint32_t i = a >> (sInputIntPrecisionBits - sCacheIndexPrecisionBits);
+    MOZ_ASSERT(i < sCacheSize, "out-of-bounds mPowTable access");
     return mPowTable[i];
   }
 
+  static const int sInputIntPrecisionBits = 15;
+  static const int sOutputIntPrecisionBits = 15;
+  static const int sCacheIndexPrecisionBits = 7;
+
 private:
-  static const int sCacheSize = 256;
-  static const int sInputIntPrecisionBits = 15;
-  static const int sOutputIntPrecisionBits = 8;
+  static const size_t sCacheSize = (1 << sCacheIndexPrecisionBits) + 1;
 
   Float mExponent;
   int mNumPowTablePreSquares;
   uint16_t mPowTable[sCacheSize];
 };
 
 class PointLightSoftware
 {
@@ -3246,18 +3257,17 @@ PointLightSoftware::GetColor(uint32_t aL
 {
   return aLightColor;
 }
 
 void
 SpotLightSoftware::Prepare()
 {
   mVectorFromFocusPointToLight = Normalized(mPointsAt - mPosition);
-  const float radPerDeg = static_cast<float>(M_PI/180.0);
-  mLimitingConeCos = std::max<double>(cos(mLimitingConeAngle * radPerDeg), 0.0);
+  mLimitingConeCos = std::max<double>(cos(mLimitingConeAngle * M_PI/180.0), 0.0);
   mPowCache.CacheForExponent(mSpecularFocus);
 }
 
 Point3D
 SpotLightSoftware::GetVectorToLight(const Point3D &aTargetPoint)
 {
   return Normalized(mPosition - aTargetPoint);
 }
@@ -3266,21 +3276,22 @@ uint32_t
 SpotLightSoftware::GetColor(uint32_t aLightColor, const Point3D &aVectorToLight)
 {
   union {
     uint32_t color;
     uint8_t colorC[4];
   };
   color = aLightColor;
   Float dot = -aVectorToLight.DotProduct(mVectorFromFocusPointToLight);
-  int16_t doti = dot * (255 << 7);
-  uint16_t tmp = mPowCache.Pow(doti) * (dot >= mLimitingConeCos);
-  colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_R] = uint8_t((colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_R] * tmp) >> 8);
-  colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_G] = uint8_t((colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_G] * tmp) >> 8);
-  colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_B] = uint8_t((colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_B] * tmp) >> 8);
+  uint16_t doti = dot * (dot >= 0) * (1 << PowCache::sInputIntPrecisionBits);
+  uint32_t tmp = mPowCache.Pow(doti) * (dot >= mLimitingConeCos);
+  MOZ_ASSERT(tmp <= (1 << PowCache::sOutputIntPrecisionBits), "pow() result must not exceed 1.0");
+  colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_R] = uint8_t((colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_R] * tmp) >> PowCache::sOutputIntPrecisionBits);
+  colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_G] = uint8_t((colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_G] * tmp) >> PowCache::sOutputIntPrecisionBits);
+  colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_B] = uint8_t((colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_B] * tmp) >> PowCache::sOutputIntPrecisionBits);
   colorC[B8G8R8A8_COMPONENT_BYTEOFFSET_A] = 255;
   return color;
 }
 
 void
 DistantLightSoftware::Prepare()
 {
   const double radPerDeg = M_PI / 180.0;
@@ -3497,32 +3508,32 @@ SpecularLightingSoftware::Prepare()
 uint32_t
 SpecularLightingSoftware::LightPixel(const Point3D &aNormal,
                                      const Point3D &aVectorToLight,
                                      uint32_t aColor)
 {
   Point3D vectorToEye(0, 0, 1);
   Point3D halfwayVector = Normalized(aVectorToLight + vectorToEye);
   Float dotNH = aNormal.DotProduct(halfwayVector);
-  uint16_t dotNHi = uint16_t(dotNH * (dotNH >= 0) * (255 << 7));
-  uint32_t specularNHi = mSpecularConstantInt * mPowCache.Pow(dotNHi);
+  uint16_t dotNHi = uint16_t(dotNH * (dotNH >= 0) * (1 << PowCache::sInputIntPrecisionBits));
+  uint32_t specularNHi = uint32_t(mSpecularConstantInt) * mPowCache.Pow(dotNHi) >> 8;
 
   union {
     uint32_t bgra;
     uint8_t components[4];
   } color = { aColor };
   color.components[B8G8R8A8_COMPONENT_BYTEOFFSET_B] =
-    umin(FilterProcessing::FastDivideBy255<uint16_t>(
-      specularNHi * color.components[B8G8R8A8_COMPONENT_BYTEOFFSET_B] >> 8), 255U);
+    umin(
+      (specularNHi * color.components[B8G8R8A8_COMPONENT_BYTEOFFSET_B]) >> PowCache::sOutputIntPrecisionBits, 255U);
   color.components[B8G8R8A8_COMPONENT_BYTEOFFSET_G] =
-    umin(FilterProcessing::FastDivideBy255<uint16_t>(
-      specularNHi * color.components[B8G8R8A8_COMPONENT_BYTEOFFSET_G] >> 8), 255U);
+    umin(
+      (specularNHi * color.components[B8G8R8A8_COMPONENT_BYTEOFFSET_G]) >> PowCache::sOutputIntPrecisionBits, 255U);
   color.components[B8G8R8A8_COMPONENT_BYTEOFFSET_R] =
-    umin(FilterProcessing::FastDivideBy255<uint16_t>(
-      specularNHi * color.components[B8G8R8A8_COMPONENT_BYTEOFFSET_R] >> 8), 255U);
+    umin(
+      (specularNHi * color.components[B8G8R8A8_COMPONENT_BYTEOFFSET_R]) >> PowCache::sOutputIntPrecisionBits, 255U);
 
   color.components[B8G8R8A8_COMPONENT_BYTEOFFSET_A] =
     umax(color.components[B8G8R8A8_COMPONENT_BYTEOFFSET_B],
       umax(color.components[B8G8R8A8_COMPONENT_BYTEOFFSET_G],
                color.components[B8G8R8A8_COMPONENT_BYTEOFFSET_R]));
   return color.bgra;
 }