Bug 1387399 - Part 2: SIMD optimize ScaleToOuterPixels. r=jrmuizel draft
authorBas Schouten <bschouten@mozilla.com>
Tue, 08 Aug 2017 17:31:19 +0200
changeset 642607 b4e3c00bd920bc8845a23760b84a73d040c3cb04
parent 642606 be561c57d801bd8ea1fb4f5388a6d53002687670
child 725054 e2d809ab0a3d1df739da6ff545ff1426c8031c5d
push id72819
push userbschouten@mozilla.com
push dateTue, 08 Aug 2017 15:31:50 +0000
reviewersjrmuizel
bugs1387399
milestone57.0a1
Bug 1387399 - Part 2: SIMD optimize ScaleToOuterPixels. r=jrmuizel MozReview-Commit-ID: C667VsdFibx
gfx/src/nsRect.h
--- a/gfx/src/nsRect.h
+++ b/gfx/src/nsRect.h
@@ -14,16 +14,17 @@
 #include "mozilla/gfx/2D.h"             // for Factory
 #include "mozilla/gfx/Rect.h"
 #include "nsCoord.h"                    // for nscoord, etc
 #include "nsISupportsImpl.h"            // for MOZ_COUNT_CTOR, etc
 #include "nsPoint.h"                    // for nsIntPoint, nsPoint
 #include "nsMargin.h"                   // for nsIntMargin, nsMargin
 #include "nsSize.h"                     // for IntSize, nsSize
 #include "nscore.h"                     // for NS_BUILD_REFCNT_LOGGING
+#include <mozilla/FloatingPoint.h>
 
 typedef mozilla::gfx::IntRect nsIntRect;
 
 struct nsRect :
   public mozilla::gfx::BaseRect<nscoord, nsRect, nsPoint, nsSize, nsMargin> {
   typedef mozilla::gfx::BaseRect<nscoord, nsRect, nsPoint, nsSize, nsMargin> Super;
 
   static void VERIFY_COORD(nscoord aValue) { ::VERIFY_COORD(aValue); }
@@ -317,23 +318,55 @@ nsRect::ScaleToNearestPixels(float aXSca
 }
 
 // scale the rect but round to smallest containing rect
 inline mozilla::gfx::IntRect
 nsRect::ScaleToOutsidePixels(float aXScale, float aYScale,
                              nscoord aAppUnitsPerPixel) const
 {
   mozilla::gfx::IntRect rect;
+
+#if defined(__SSE2__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
+  __m128 c1 = _mm_set_ps(aAppUnitsPerPixel, aAppUnitsPerPixel, aAppUnitsPerPixel, aAppUnitsPerPixel);
+  __m128 c2 = _mm_set_ps(aYScale, aXScale, aYScale, aXScale);
+  __m128 c3 = _mm_set_ps(0, 0, 1 - FLT_EPSILON, 1 - FLT_EPSILON);
+
+  _MM_SET_ROUNDING_MODE(_MM_ROUND_DOWN);
+
+  __m128i recti = _mm_loadu_si128((__m128i*)this); // x, y, w, h
+  __m128i widthheight = _mm_slli_si128(recti, 8); // 0, 0, x, y
+
+  recti = _mm_add_epi32(recti, widthheight); // X, Y, XMost(), YMost()
+
+  __m128 rectf = _mm_cvtepi32_ps(recti);
+
+  // Scale
+  rectf = _mm_mul_ps(_mm_div_ps(rectf, c1), c2);
+
+  // Floor
+  // Executed with bias and roundmode down, since round-nearest rounds 0.5 downward.
+  rectf = _mm_add_ps(rectf, c3);
+
+  recti = _mm_cvtps_epi32(rectf); // r.x, r.y, r.XMost(), r.YMost()
+
+  widthheight = _mm_slli_si128(recti, 8); // 0, 0, r.x, r.y
+  recti = _mm_sub_epi32(recti, widthheight); // r.x, r.y, r.w, r.h
+
+  _mm_storeu_si128((__m128i*)&rect, recti);
+
+  _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST);
+#else
   rect.x = NSToIntFloor(NSAppUnitsToFloatPixels(x, float(aAppUnitsPerPixel)) * aXScale);
   rect.y = NSToIntFloor(NSAppUnitsToFloatPixels(y, float(aAppUnitsPerPixel)) * aYScale);
   // Avoid negative widths and heights due to overflow
-  rect.width  = std::max(0, NSToIntCeil(NSAppUnitsToFloatPixels(XMost(),
-                            float(aAppUnitsPerPixel)) * aXScale) - rect.x);
-  rect.height = std::max(0, NSToIntCeil(NSAppUnitsToFloatPixels(YMost(),
-                            float(aAppUnitsPerPixel)) * aYScale) - rect.y);
+  rect.width = NSToIntFloor(NSAppUnitsToFloatPixels(XMost(),
+    float(aAppUnitsPerPixel)) * aXScale) - rect.x + (1 - FLT_EPSILON));
+  rect.height = std::max(0, NSToIntFloor(NSAppUnitsToFloatPixels(YMost(),
+    float(aAppUnitsPerPixel)) * aYScale) - rect.y + (1 - FLT_EPSILON));
+#endif
   return rect;
 }
 
 // scale the rect but round to largest contained rect
 inline mozilla::gfx::IntRect
 nsRect::ScaleToInsidePixels(float aXScale, float aYScale,
                             nscoord aAppUnitsPerPixel) const
 {