Bug 1541350 - restore SkBlitRow::Color32 optimizations. r=rhunt
authorLee Salzman <lsalzman@mozilla.com>
Mon, 15 Apr 2019 18:46:10 +0000
changeset 469544 03c29de4a29d
parent 469543 2f6e8916a498
child 469545 b1d9126436b5
push id35874
push userccoroiu@mozilla.com
push dateTue, 16 Apr 2019 04:04:58 +0000
treeherdermozilla-central@be3f40425b52 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersrhunt
bugs1541350
milestone68.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1541350 - restore SkBlitRow::Color32 optimizations. r=rhunt Differential Revision: https://phabricator.services.mozilla.com/D27157
gfx/skia/skia/src/core/SkBlitRow_D32.cpp
gfx/skia/skia/src/core/SkOpts.cpp
gfx/skia/skia/src/core/SkOpts.h
gfx/skia/skia/src/opts/SkBlitRow_opts.h
--- a/gfx/skia/skia/src/core/SkBlitRow_D32.cpp
+++ b/gfx/skia/skia/src/core/SkBlitRow_D32.cpp
@@ -306,20 +306,10 @@ SkBlitRow::Proc32 SkBlitRow::Factory32(u
                       : kProcs[flags];
 }
 
 void SkBlitRow::Color32(SkPMColor dst[], const SkPMColor src[], int count, SkPMColor color) {
     switch (SkGetPackedA32(color)) {
         case   0: memmove(dst, src, count * sizeof(SkPMColor)); return;
         case 255: sk_memset32(dst, color, count);               return;
     }
-
-    unsigned invA = 255 - SkGetPackedA32(color);
-    invA += invA >> 7;
-    SkASSERT(invA < 256);  // We've should have already handled alpha == 0 externally.
-
-    Sk16h colorHighAndRound = (Sk4px::DupPMColor(color).widen() << 8) + Sk16h(128);
-    Sk16b invA_16x(invA);
-
-    Sk4px::MapSrc(count, dst, src, [&](const Sk4px& src4) -> Sk4px {
-        return (src4 * invA_16x).addNarrowHi(colorHighAndRound);
-    });
+    return SkOpts::blit_row_color32(dst, src, count, color);
 }
--- a/gfx/skia/skia/src/core/SkOpts.cpp
+++ b/gfx/skia/skia/src/core/SkOpts.cpp
@@ -51,16 +51,17 @@ namespace SkOpts {
     // If our global compile options are set high enough, these defaults might even be
     // CPU-specialized, e.g. a typical x86-64 machine might start with SSE2 defaults.
     // They'll still get a chance to be replaced with even better ones, e.g. using SSE4.1.
 #define DEFINE_DEFAULT(name) decltype(name) name = SK_OPTS_NS::name
     DEFINE_DEFAULT(create_xfermode);
 
     DEFINE_DEFAULT(blit_mask_d32_a8);
 
+    DEFINE_DEFAULT(blit_row_color32);
     DEFINE_DEFAULT(blit_row_s32a_opaque);
 
     DEFINE_DEFAULT(RGBA_to_BGRA);
     DEFINE_DEFAULT(RGBA_to_rgbA);
     DEFINE_DEFAULT(RGBA_to_bgrA);
     DEFINE_DEFAULT(RGB_to_RGB1);
     DEFINE_DEFAULT(RGB_to_BGR1);
     DEFINE_DEFAULT(gray_to_RGB1);
--- a/gfx/skia/skia/src/core/SkOpts.h
+++ b/gfx/skia/skia/src/core/SkOpts.h
@@ -22,16 +22,17 @@ namespace SkOpts {
     void Init();
 
     // Declare function pointers here...
 
     // May return nullptr if we haven't specialized the given Mode.
     extern SkXfermode* (*create_xfermode)(SkBlendMode);
 
     extern void (*blit_mask_d32_a8)(SkPMColor*, size_t, const SkAlpha*, size_t, SkColor, int, int);
+    extern void (*blit_row_color32)(SkPMColor*, const SkPMColor*, int, SkPMColor);
     extern void (*blit_row_s32a_opaque)(SkPMColor*, const SkPMColor*, int, U8CPU);
 
     // Swizzle input into some sort of 8888 pixel, {premul,unpremul} x {rgba,bgra}.
     typedef void (*Swizzle_8888_u32)(uint32_t*, const uint32_t*, int);
     extern Swizzle_8888_u32 RGBA_to_BGRA,          // i.e. just swap RB
                             RGBA_to_rgbA,          // i.e. just premultiply
                             RGBA_to_bgrA,          // i.e. swap RB and premultiply
                             inverted_CMYK_to_RGB1, // i.e. convert color space
--- a/gfx/skia/skia/src/opts/SkBlitRow_opts.h
+++ b/gfx/skia/skia/src/opts/SkBlitRow_opts.h
@@ -3,16 +3,17 @@
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */
 
 #ifndef SkBlitRow_opts_DEFINED
 #define SkBlitRow_opts_DEFINED
 
+#include "Sk4px.h"
 #include "SkColorData.h"
 #include "SkMSAN.h"
 
 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
     #include <immintrin.h>
 
     static inline __m128i SkPMSrcOver_SSE2(const __m128i& src, const __m128i& dst) {
         auto SkAlphaMulQ_SSE2 = [](const __m128i& c, const __m128i& scale) {
@@ -35,16 +36,35 @@
         return _mm_add_epi32(src,
                              SkAlphaMulQ_SSE2(dst, _mm_sub_epi32(_mm_set1_epi32(256),
                                                                  _mm_srli_epi32(src, 24))));
     }
 #endif
 
 namespace SK_OPTS_NS {
 
+// Color32 uses the blend_256_round_alt algorithm from tests/BlendTest.cpp.
+// It's not quite perfect, but it's never wrong in the interesting edge cases,
+// and it's quite a bit faster than blend_perfect.
+//
+// blend_256_round_alt is our currently blessed algorithm.  Please use it or an analogous one.
+static inline
+void blit_row_color32(SkPMColor* dst, const SkPMColor* src, int count, SkPMColor color) {
+    unsigned invA = 255 - SkGetPackedA32(color);
+    invA += invA >> 7;
+    SkASSERT(invA < 256);  // We've should have already handled alpha == 0 externally.
+
+    Sk16h colorHighAndRound = (Sk4px::DupPMColor(color).widen() << 8) + Sk16h(128);
+    Sk16b invA_16x(invA);
+
+    Sk4px::MapSrc(count, dst, src, [&](const Sk4px& src4) -> Sk4px {
+        return (src4 * invA_16x).addNarrowHi(colorHighAndRound);
+    });
+}
+
 #if defined(SK_ARM_HAS_NEON)
 
 // Return a uint8x8_t value, r, computed as r[i] = SkMulDiv255Round(x[i], y[i]), where r[i], x[i],
 // y[i] are the i-th lanes of the corresponding NEON vectors.
 static inline uint8x8_t SkMulDiv255Round_neon8(uint8x8_t x, uint8x8_t y) {
     uint16x8_t prod = vmull_u8(x, y);
     return vraddhn_u16(prod, vrshrq_n_u16(prod, 8));
 }