Bug 1116070 - Use neon to speed up the mask creation. r=longsonr
authorEthan Lin <etlin@mozilla.com>
Tue, 06 Jan 2015 02:44:00 -0500
changeset 222467 a437d18a4b9b3b3d8c36c8cd32f9f16f76dc47db
parent 222466 adbabd8de78c56a3db8f54a3be87d28d848ad790
child 222468 cb37bece36f37b0e647f273969442f59b44b9113
push id53631
push userryanvm@gmail.com
push dateWed, 07 Jan 2015 18:00:19 +0000
treeherdermozilla-inbound@cb37bece36f3 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerslongsonr
bugs1116070
milestone37.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1116070 - Use neon to speed up the mask creation. r=longsonr
layout/reftests/svg/svg-integration/reftest.list
layout/svg/moz.build
layout/svg/nsSVGMaskFrame.cpp
layout/svg/nsSVGMaskFrame.h
layout/svg/nsSVGMaskFrameNEON.cpp
layout/svg/nsSVGMaskFrameNEON.h
--- a/layout/reftests/svg/svg-integration/reftest.list
+++ b/layout/reftests/svg/svg-integration/reftest.list
@@ -22,16 +22,16 @@ fuzzy-if(true,140,70) == clipPath-html-0
 == dynamic-conditions-outer-svg-03.xhtml ../pass.svg
 == dynamic-conditions-outer-svg-04.xhtml ../pass.svg
 == filter-html-01.xhtml filter-html-01-ref.svg
 random-if(B2G) == filter-html-01-extref.xhtml filter-html-01-ref.svg # Bug 1063987
 == filter-html-zoomed-01.xhtml filter-html-01-ref.svg
 == mask-html-01.xhtml mask-html-01-ref.svg
 == mask-html-01-extref-01.xhtml mask-html-01-ref.svg
 random == mask-html-01-extref-02.xhtml mask-html-01-ref.svg # random due to bug 877661
-fuzzy-if(B2G&&browserIsRemote,1,2000) == mask-html-zoomed-01.xhtml mask-html-01-ref.svg
+fuzzy-if(B2G&&browserIsRemote,1,2300) == mask-html-zoomed-01.xhtml mask-html-01-ref.svg
 # Skil XBL test case on B2G
 skip-if(B2G) == mask-html-xbl-bound-01.html mask-html-01-ref.svg
 == mask-transformed-html-01.xhtml ../pass.svg
 == mask-transformed-html-02.xhtml ../pass.svg
 == patterned-svg-under-transformed-html-01.xhtml ../pass.svg
 == patterned-svg-under-transformed-html-02.xhtml ../pass.svg
 
--- a/layout/svg/moz.build
+++ b/layout/svg/moz.build
@@ -43,16 +43,20 @@ UNIFIED_SOURCES += [
     'SVGFEContainerFrame.cpp',
     'SVGFEImageFrame.cpp',
     'SVGFELeafFrame.cpp',
     'SVGFEUnstyledLeafFrame.cpp',
     'SVGTextFrame.cpp',
     'SVGViewFrame.cpp',
 ]
 
+if CONFIG['CPU_ARCH'] == 'arm' and CONFIG['BUILD_ARM_NEON']:
+    SOURCES += ['nsSVGMaskFrameNEON.cpp']
+    SOURCES['nsSVGMaskFrameNEON.cpp'].flags += ['-mfpu=neon']
+
 FAIL_ON_WARNINGS = True
 
 FINAL_LIBRARY = 'xul'
 LOCAL_INCLUDES += [
     '../../widget',
     '../base',
     '../generic',
     '../style',
--- a/layout/svg/nsSVGMaskFrame.cpp
+++ b/layout/svg/nsSVGMaskFrame.cpp
@@ -8,36 +8,25 @@
 
 // Keep others in (case-insensitive) order:
 #include "gfx2DGlue.h"
 #include "gfxContext.h"
 #include "mozilla/gfx/2D.h"
 #include "mozilla/RefPtr.h"
 #include "nsSVGEffects.h"
 #include "mozilla/dom/SVGMaskElement.h"
+#ifdef BUILD_ARM_NEON
+#include "mozilla/arm.h"
+#include "nsSVGMaskFrameNEON.h"
+#endif
 
 using namespace mozilla;
 using namespace mozilla::dom;
 using namespace mozilla::gfx;
 
-/**
- * Byte offsets of channels in a native packed gfxColor or cairo image surface.
- */
-#ifdef IS_BIG_ENDIAN
-#define GFX_ARGB32_OFFSET_A 0
-#define GFX_ARGB32_OFFSET_R 1
-#define GFX_ARGB32_OFFSET_G 2
-#define GFX_ARGB32_OFFSET_B 3
-#else
-#define GFX_ARGB32_OFFSET_A 3
-#define GFX_ARGB32_OFFSET_R 2
-#define GFX_ARGB32_OFFSET_G 1
-#define GFX_ARGB32_OFFSET_B 0
-#endif
-
 // c = n / 255
 // c <= 0.04045 ? c / 12.92 : pow((c + 0.055) / 1.055, 2.4)) * 255 + 0.5
 static const uint8_t gsRGBToLinearRGBMap[256] = {
   0,   0,   0,   0,   0,   0,   0,   1,
   1,   1,   1,   1,   1,   1,   1,   1,
   1,   1,   2,   2,   2,   2,   2,   2,
   2,   2,   3,   3,   3,   3,   3,   3,
   4,   4,   4,   4,   4,   5,   5,   5,
@@ -71,48 +60,62 @@ 239, 242, 244, 246, 248, 250, 253, 255
 };
 
 static void
 ComputesRGBLuminanceMask(uint8_t *aData,
                          int32_t aStride,
                          const IntSize &aSize,
                          float aOpacity)
 {
+#ifdef BUILD_ARM_NEON
+  if (mozilla::supports_neon()) {
+    ComputesRGBLuminanceMask_NEON(aData, aStride, aSize, aOpacity);
+    return;
+  }
+#endif
+
+  int32_t redFactor = 55 * aOpacity; // 255 * 0.2125 * opacity
+  int32_t greenFactor = 183 * aOpacity; // 255 * 0.7154 * opacity
+  int32_t blueFactor = 18 * aOpacity; // 255 * 0.0721
+  int32_t offset = aStride - 4 * aSize.width;
+  uint8_t *pixel = aData;
+
   for (int32_t y = 0; y < aSize.height; y++) {
     for (int32_t x = 0; x < aSize.width; x++) {
-      uint8_t *pixel = aData + aStride * y + 4 * x;
       uint8_t a = pixel[GFX_ARGB32_OFFSET_A];
 
       uint8_t luminance;
       if (a) {
-        /* sRGB -> intensity (unpremultiply cancels out the
-         * (a/255.0) multiplication with aOpacity */
-        luminance =
-          static_cast<uint8_t>
-                     ((pixel[GFX_ARGB32_OFFSET_R] * 0.2125 +
-                       pixel[GFX_ARGB32_OFFSET_G] * 0.7154 +
-                       pixel[GFX_ARGB32_OFFSET_B] * 0.0721) *
-                      aOpacity);
+        luminance = (redFactor * pixel[GFX_ARGB32_OFFSET_R] +
+                     greenFactor * pixel[GFX_ARGB32_OFFSET_G] +
+                     blueFactor * pixel[GFX_ARGB32_OFFSET_B]) >> 8;
       } else {
         luminance = 0;
       }
       memset(pixel, luminance, 4);
+      pixel += 4;
     }
+    pixel += offset;
   }
 }
 
 static void
 ComputeLinearRGBLuminanceMask(uint8_t *aData,
                               int32_t aStride,
                               const IntSize &aSize,
                               float aOpacity)
 {
+  int32_t redFactor = 55 * aOpacity; // 255 * 0.2125 * opacity
+  int32_t greenFactor = 183 * aOpacity; // 255 * 0.7154 * opacity
+  int32_t blueFactor = 18 * aOpacity; // 255 * 0.0721
+  int32_t offset = aStride - 4 * aSize.width;
+  uint8_t *pixel = aData;
+
   for (int32_t y = 0; y < aSize.height; y++) {
     for (int32_t x = 0; x < aSize.width; x++) {
-      uint8_t *pixel = aData + aStride * y + 4 * x;
       uint8_t a = pixel[GFX_ARGB32_OFFSET_A];
 
       uint8_t luminance;
       // unpremultiply
       if (a) {
         if (a != 255) {
           pixel[GFX_ARGB32_OFFSET_B] =
             (255 * pixel[GFX_ARGB32_OFFSET_B]) / a;
@@ -120,42 +123,48 @@ ComputeLinearRGBLuminanceMask(uint8_t *a
             (255 * pixel[GFX_ARGB32_OFFSET_G]) / a;
           pixel[GFX_ARGB32_OFFSET_R] =
             (255 * pixel[GFX_ARGB32_OFFSET_R]) / a;
         }
 
         /* sRGB -> linearRGB -> intensity */
         luminance =
           static_cast<uint8_t>
-                     ((gsRGBToLinearRGBMap[pixel[GFX_ARGB32_OFFSET_R]] *
-                       0.2125 +
-                       gsRGBToLinearRGBMap[pixel[GFX_ARGB32_OFFSET_G]] *
-                       0.7154 +
-                       gsRGBToLinearRGBMap[pixel[GFX_ARGB32_OFFSET_B]] *
-                       0.0721) * (a / 255.0) * aOpacity);
+                     (((gsRGBToLinearRGBMap[pixel[GFX_ARGB32_OFFSET_R]] *
+                        redFactor +
+                        gsRGBToLinearRGBMap[pixel[GFX_ARGB32_OFFSET_G]] *
+                        greenFactor +
+                        gsRGBToLinearRGBMap[pixel[GFX_ARGB32_OFFSET_B]] *
+                        blueFactor) >> 8) * (a / 255.0f));
       } else {
         luminance = 0;
       }
       memset(pixel, luminance, 4);
+      pixel += 4;
     }
+    pixel += offset;
   }
 }
 
 static void
 ComputeAlphaMask(uint8_t *aData,
                  int32_t aStride,
                  const IntSize &aSize,
                  float aOpacity)
 {
+  int32_t offset = aStride - 4 * aSize.width;
+  uint8_t *pixel = aData;
+
   for (int32_t y = 0; y < aSize.height; y++) {
     for (int32_t x = 0; x < aSize.width; x++) {
-      uint8_t *pixel = aData + aStride * y + 4 * x;
       uint8_t luminance = pixel[GFX_ARGB32_OFFSET_A] * aOpacity;
       memset(pixel, luminance, 4);
+      pixel += 4;
     }
+    pixel += offset;
   }
 }
 
 //----------------------------------------------------------------------
 // Implementation
 
 nsIFrame*
 NS_NewSVGMaskFrame(nsIPresShell* aPresShell, nsStyleContext* aContext)
--- a/layout/svg/nsSVGMaskFrame.h
+++ b/layout/svg/nsSVGMaskFrame.h
@@ -13,16 +13,31 @@
 #include "gfxMatrix.h"
 #include "nsSVGContainerFrame.h"
 #include "nsSVGUtils.h"
 
 class gfxContext;
 
 typedef nsSVGContainerFrame nsSVGMaskFrameBase;
 
+/**
+ * Byte offsets of channels in a native packed gfxColor or cairo image surface.
+ */
+#ifdef IS_BIG_ENDIAN
+#define GFX_ARGB32_OFFSET_A 0
+#define GFX_ARGB32_OFFSET_R 1
+#define GFX_ARGB32_OFFSET_G 2
+#define GFX_ARGB32_OFFSET_B 3
+#else
+#define GFX_ARGB32_OFFSET_A 3
+#define GFX_ARGB32_OFFSET_R 2
+#define GFX_ARGB32_OFFSET_G 1
+#define GFX_ARGB32_OFFSET_B 0
+#endif
+
 class nsSVGMaskFrame MOZ_FINAL : public nsSVGMaskFrameBase
 {
   friend nsIFrame*
   NS_NewSVGMaskFrame(nsIPresShell* aPresShell, nsStyleContext* aContext);
 
   typedef mozilla::gfx::Matrix Matrix;
   typedef mozilla::gfx::SourceSurface SourceSurface;
 
new file mode 100644
--- /dev/null
+++ b/layout/svg/nsSVGMaskFrameNEON.cpp
@@ -0,0 +1,71 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsSVGMaskFrameNEON.h"
+#include "nsSVGMaskFrame.h"
+#include <arm_neon.h>
+
+void
+ComputesRGBLuminanceMask_NEON(uint8_t *aData,
+                              int32_t aStride,
+                              const IntSize &aSize,
+                              float aOpacity)
+{
+  int32_t redFactor = 55 * aOpacity; // 255 * 0.2125 * opacity
+  int32_t greenFactor = 183 * aOpacity; // 255 * 0.7154 * opacity
+  int32_t blueFactor = 18 * aOpacity; // 255 * 0.0721
+  uint8_t *pixel = aData;
+  int32_t offset = aStride - 4 * aSize.width;
+
+  // Set the value to zero if the alpha is zero
+  for (int32_t y = 0; y < aSize.height; y++) {
+    for (int32_t x = 0; x < aSize.width; x++) {
+      if (!pixel[GFX_ARGB32_OFFSET_A]) {
+        memset(pixel, 0, 4);
+      }
+      pixel += 4;
+    }
+    pixel += offset;
+  }
+
+  pixel = aData;
+  int32_t remainderWidth = aSize.width % 8;
+  int32_t roundedWidth = aSize.width - remainderWidth;
+  uint16x8_t temp;
+  uint8x8_t gray;
+  uint8x8x4_t result;
+  uint8x8_t redVec = vdup_n_u8(redFactor);
+  uint8x8_t greenVec = vdup_n_u8(greenFactor);
+  uint8x8_t blueVec = vdup_n_u8(blueFactor);
+  for (int32_t y = 0; y < aSize.height; y++) {
+    // Calculate luminance by neon with 8 pixels per loop 
+    for (int32_t x = 0; x < roundedWidth; x += 8) {
+      uint8x8x4_t argb  = vld4_u8(pixel);
+      temp = vmull_u8(argb.val[GFX_ARGB32_OFFSET_R], redVec); // temp = red * redFactor
+      temp = vmlal_u8(temp, argb.val[GFX_ARGB32_OFFSET_G], greenVec); // temp += green * greenFactor
+      temp = vmlal_u8(temp, argb.val[GFX_ARGB32_OFFSET_B], blueVec); // temp += blue * blueFactor
+      gray = vshrn_n_u16(temp, 8); // gray = temp >> 8
+
+      // Put the result to the 8 pixels in argb format
+      result.val[0] = gray;
+      result.val[1] = gray;
+      result.val[2] = gray;
+      result.val[3] = gray;
+      vst4_u8(pixel, result);
+      pixel += 8 * 4;
+    }
+
+    // Calculate the rest pixels of the line by cpu
+    for (int32_t x = 0; x < remainderWidth; x++) {
+      pixel[0] = (redFactor * pixel[GFX_ARGB32_OFFSET_R] +
+                  greenFactor * pixel[GFX_ARGB32_OFFSET_G] +
+                  blueFactor * pixel[GFX_ARGB32_OFFSET_B]) >> 8;
+      memset(pixel + 1, pixel[0], 3);
+      pixel += 4;
+    }
+    pixel += offset;
+  }
+}
+
new file mode 100644
--- /dev/null
+++ b/layout/svg/nsSVGMaskFrameNEON.h
@@ -0,0 +1,19 @@
+/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* this source code form is subject to the terms of the mozilla public
+ * license, v. 2.0. if a copy of the mpl was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __NS_SVGMASKFRAMENEON_H__
+#define __NS_SVGMASKFRAMENEON_H__
+
+#include "mozilla/gfx/2D.h"
+
+using namespace mozilla::gfx;
+
+void
+ComputesRGBLuminanceMask_NEON(uint8_t *aData,
+                              int32_t aStride,
+                              const IntSize &aSize,
+                              float aOpacity);
+
+#endif /* __NS_SVGMASKFRAMENEON_H__ */