Bug 1116070 - Use neon to speed up the mask creation. r=longsonr
authorEthan Lin <etlin@mozilla.com>
Tue, 06 Jan 2015 02:44:00 -0500
changeset 239266 a437d18a4b9b3b3d8c36c8cd32f9f16f76dc47db
parent 239265 adbabd8de78c56a3db8f54a3be87d28d848ad790
child 239267 cb37bece36f37b0e647f273969442f59b44b9113
push id7472
push userraliiev@mozilla.com
push dateMon, 12 Jan 2015 20:36:27 +0000
treeherdermozilla-aurora@300ca104f8fb [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerslongsonr
bugs1116070
milestone37.0a1
Bug 1116070 - Use neon to speed up the mask creation. r=longsonr
layout/reftests/svg/svg-integration/reftest.list
layout/svg/moz.build
layout/svg/nsSVGMaskFrame.cpp
layout/svg/nsSVGMaskFrame.h
layout/svg/nsSVGMaskFrameNEON.cpp
layout/svg/nsSVGMaskFrameNEON.h
--- a/layout/reftests/svg/svg-integration/reftest.list
+++ b/layout/reftests/svg/svg-integration/reftest.list
@@ -22,16 +22,16 @@ fuzzy-if(true,140,70) == clipPath-html-0
 == dynamic-conditions-outer-svg-03.xhtml ../pass.svg
 == dynamic-conditions-outer-svg-04.xhtml ../pass.svg
 == filter-html-01.xhtml filter-html-01-ref.svg
 random-if(B2G) == filter-html-01-extref.xhtml filter-html-01-ref.svg # Bug 1063987
 == filter-html-zoomed-01.xhtml filter-html-01-ref.svg
 == mask-html-01.xhtml mask-html-01-ref.svg
 == mask-html-01-extref-01.xhtml mask-html-01-ref.svg
 random == mask-html-01-extref-02.xhtml mask-html-01-ref.svg # random due to bug 877661
-fuzzy-if(B2G&&browserIsRemote,1,2000) == mask-html-zoomed-01.xhtml mask-html-01-ref.svg
+fuzzy-if(B2G&&browserIsRemote,1,2300) == mask-html-zoomed-01.xhtml mask-html-01-ref.svg
 # Skil XBL test case on B2G
 skip-if(B2G) == mask-html-xbl-bound-01.html mask-html-01-ref.svg
 == mask-transformed-html-01.xhtml ../pass.svg
 == mask-transformed-html-02.xhtml ../pass.svg
 == patterned-svg-under-transformed-html-01.xhtml ../pass.svg
 == patterned-svg-under-transformed-html-02.xhtml ../pass.svg
 
--- a/layout/svg/moz.build
+++ b/layout/svg/moz.build
@@ -43,16 +43,20 @@ UNIFIED_SOURCES += [
     'SVGFEContainerFrame.cpp',
     'SVGFEImageFrame.cpp',
     'SVGFELeafFrame.cpp',
     'SVGFEUnstyledLeafFrame.cpp',
     'SVGTextFrame.cpp',
     'SVGViewFrame.cpp',
 ]
 
+if CONFIG['CPU_ARCH'] == 'arm' and CONFIG['BUILD_ARM_NEON']:
+    SOURCES += ['nsSVGMaskFrameNEON.cpp']
+    SOURCES['nsSVGMaskFrameNEON.cpp'].flags += ['-mfpu=neon']
+
 FAIL_ON_WARNINGS = True
 
 FINAL_LIBRARY = 'xul'
 LOCAL_INCLUDES += [
     '../../widget',
     '../base',
     '../generic',
     '../style',
--- a/layout/svg/nsSVGMaskFrame.cpp
+++ b/layout/svg/nsSVGMaskFrame.cpp
@@ -8,36 +8,25 @@
 
 // Keep others in (case-insensitive) order:
 #include "gfx2DGlue.h"
 #include "gfxContext.h"
 #include "mozilla/gfx/2D.h"
 #include "mozilla/RefPtr.h"
 #include "nsSVGEffects.h"
 #include "mozilla/dom/SVGMaskElement.h"
+#ifdef BUILD_ARM_NEON
+#include "mozilla/arm.h"
+#include "nsSVGMaskFrameNEON.h"
+#endif
 
 using namespace mozilla;
 using namespace mozilla::dom;
 using namespace mozilla::gfx;
 
-/**
- * Byte offsets of channels in a native packed gfxColor or cairo image surface.
- */
-#ifdef IS_BIG_ENDIAN
-#define GFX_ARGB32_OFFSET_A 0
-#define GFX_ARGB32_OFFSET_R 1
-#define GFX_ARGB32_OFFSET_G 2
-#define GFX_ARGB32_OFFSET_B 3
-#else
-#define GFX_ARGB32_OFFSET_A 3
-#define GFX_ARGB32_OFFSET_R 2
-#define GFX_ARGB32_OFFSET_G 1
-#define GFX_ARGB32_OFFSET_B 0
-#endif
-
 // c = n / 255
 // c <= 0.04045 ? c / 12.92 : pow((c + 0.055) / 1.055, 2.4)) * 255 + 0.5
 static const uint8_t gsRGBToLinearRGBMap[256] = {
   0,   0,   0,   0,   0,   0,   0,   1,
   1,   1,   1,   1,   1,   1,   1,   1,
   1,   1,   2,   2,   2,   2,   2,   2,
   2,   2,   3,   3,   3,   3,   3,   3,
   4,   4,   4,   4,   4,   5,   5,   5,
@@ -71,48 +60,62 @@ 239, 242, 244, 246, 248, 250, 253, 255
 };
 
 static void
 ComputesRGBLuminanceMask(uint8_t *aData,
                          int32_t aStride,
                          const IntSize &aSize,
                          float aOpacity)
 {
+#ifdef BUILD_ARM_NEON
+  if (mozilla::supports_neon()) {
+    ComputesRGBLuminanceMask_NEON(aData, aStride, aSize, aOpacity);
+    return;
+  }
+#endif
+
+  int32_t redFactor = 55 * aOpacity; // 255 * 0.2125 * opacity
+  int32_t greenFactor = 183 * aOpacity; // 255 * 0.7154 * opacity
+  int32_t blueFactor = 18 * aOpacity; // 255 * 0.0721
+  int32_t offset = aStride - 4 * aSize.width;
+  uint8_t *pixel = aData;
+
   for (int32_t y = 0; y < aSize.height; y++) {
     for (int32_t x = 0; x < aSize.width; x++) {
-      uint8_t *pixel = aData + aStride * y + 4 * x;
       uint8_t a = pixel[GFX_ARGB32_OFFSET_A];
 
       uint8_t luminance;
       if (a) {
-        /* sRGB -> intensity (unpremultiply cancels out the
-         * (a/255.0) multiplication with aOpacity */
-        luminance =
-          static_cast<uint8_t>
-                     ((pixel[GFX_ARGB32_OFFSET_R] * 0.2125 +
-                       pixel[GFX_ARGB32_OFFSET_G] * 0.7154 +
-                       pixel[GFX_ARGB32_OFFSET_B] * 0.0721) *
-                      aOpacity);
+        luminance = (redFactor * pixel[GFX_ARGB32_OFFSET_R] +
+                     greenFactor * pixel[GFX_ARGB32_OFFSET_G] +
+                     blueFactor * pixel[GFX_ARGB32_OFFSET_B]) >> 8;
       } else {
         luminance = 0;
       }
       memset(pixel, luminance, 4);
+      pixel += 4;
     }
+    pixel += offset;
   }
 }
 
 static void
 ComputeLinearRGBLuminanceMask(uint8_t *aData,
                               int32_t aStride,
                               const IntSize &aSize,
                               float aOpacity)
 {
+  int32_t redFactor = 55 * aOpacity; // 255 * 0.2125 * opacity
+  int32_t greenFactor = 183 * aOpacity; // 255 * 0.7154 * opacity
+  int32_t blueFactor = 18 * aOpacity; // 255 * 0.0721
+  int32_t offset = aStride - 4 * aSize.width;
+  uint8_t *pixel = aData;
+
   for (int32_t y = 0; y < aSize.height; y++) {
     for (int32_t x = 0; x < aSize.width; x++) {
-      uint8_t *pixel = aData + aStride * y + 4 * x;
       uint8_t a = pixel[GFX_ARGB32_OFFSET_A];
 
       uint8_t luminance;
       // unpremultiply
       if (a) {
         if (a != 255) {
           pixel[GFX_ARGB32_OFFSET_B] =
             (255 * pixel[GFX_ARGB32_OFFSET_B]) / a;
@@ -120,42 +123,48 @@ ComputeLinearRGBLuminanceMask(uint8_t *a
             (255 * pixel[GFX_ARGB32_OFFSET_G]) / a;
           pixel[GFX_ARGB32_OFFSET_R] =
             (255 * pixel[GFX_ARGB32_OFFSET_R]) / a;
         }
 
         /* sRGB -> linearRGB -> intensity */
         luminance =
           static_cast<uint8_t>
-                     ((gsRGBToLinearRGBMap[pixel[GFX_ARGB32_OFFSET_R]] *
-                       0.2125 +
-                       gsRGBToLinearRGBMap[pixel[GFX_ARGB32_OFFSET_G]] *
-                       0.7154 +
-                       gsRGBToLinearRGBMap[pixel[GFX_ARGB32_OFFSET_B]] *
-                       0.0721) * (a / 255.0) * aOpacity);
+                     (((gsRGBToLinearRGBMap[pixel[GFX_ARGB32_OFFSET_R]] *
+                        redFactor +
+                        gsRGBToLinearRGBMap[pixel[GFX_ARGB32_OFFSET_G]] *
+                        greenFactor +
+                        gsRGBToLinearRGBMap[pixel[GFX_ARGB32_OFFSET_B]] *
+                        blueFactor) >> 8) * (a / 255.0f));
       } else {
         luminance = 0;
       }
       memset(pixel, luminance, 4);
+      pixel += 4;
     }
+    pixel += offset;
   }
 }
 
 static void
 ComputeAlphaMask(uint8_t *aData,
                  int32_t aStride,
                  const IntSize &aSize,
                  float aOpacity)
 {
+  int32_t offset = aStride - 4 * aSize.width;
+  uint8_t *pixel = aData;
+
   for (int32_t y = 0; y < aSize.height; y++) {
     for (int32_t x = 0; x < aSize.width; x++) {
-      uint8_t *pixel = aData + aStride * y + 4 * x;
       uint8_t luminance = pixel[GFX_ARGB32_OFFSET_A] * aOpacity;
       memset(pixel, luminance, 4);
+      pixel += 4;
     }
+    pixel += offset;
   }
 }
 
 //----------------------------------------------------------------------
 // Implementation
 
 nsIFrame*
 NS_NewSVGMaskFrame(nsIPresShell* aPresShell, nsStyleContext* aContext)
--- a/layout/svg/nsSVGMaskFrame.h
+++ b/layout/svg/nsSVGMaskFrame.h
@@ -13,16 +13,31 @@
 #include "gfxMatrix.h"
 #include "nsSVGContainerFrame.h"
 #include "nsSVGUtils.h"
 
 class gfxContext;
 
 typedef nsSVGContainerFrame nsSVGMaskFrameBase;
 
+/**
+ * Byte offsets of channels in a native packed gfxColor or cairo image surface.
+ */
+#ifdef IS_BIG_ENDIAN
+#define GFX_ARGB32_OFFSET_A 0
+#define GFX_ARGB32_OFFSET_R 1
+#define GFX_ARGB32_OFFSET_G 2
+#define GFX_ARGB32_OFFSET_B 3
+#else
+#define GFX_ARGB32_OFFSET_A 3
+#define GFX_ARGB32_OFFSET_R 2
+#define GFX_ARGB32_OFFSET_G 1
+#define GFX_ARGB32_OFFSET_B 0
+#endif
+
 class nsSVGMaskFrame MOZ_FINAL : public nsSVGMaskFrameBase
 {
   friend nsIFrame*
   NS_NewSVGMaskFrame(nsIPresShell* aPresShell, nsStyleContext* aContext);
 
   typedef mozilla::gfx::Matrix Matrix;
   typedef mozilla::gfx::SourceSurface SourceSurface;
 
new file mode 100644
--- /dev/null
+++ b/layout/svg/nsSVGMaskFrameNEON.cpp
@@ -0,0 +1,71 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsSVGMaskFrameNEON.h"
+#include "nsSVGMaskFrame.h"
+#include <arm_neon.h>
+
+void
+ComputesRGBLuminanceMask_NEON(uint8_t *aData,
+                              int32_t aStride,
+                              const IntSize &aSize,
+                              float aOpacity)
+{
+  int32_t redFactor = 55 * aOpacity; // 255 * 0.2125 * opacity
+  int32_t greenFactor = 183 * aOpacity; // 255 * 0.7154 * opacity
+  int32_t blueFactor = 18 * aOpacity; // 255 * 0.0721
+  uint8_t *pixel = aData;
+  int32_t offset = aStride - 4 * aSize.width;
+
+  // Set the value to zero if the alpha is zero
+  for (int32_t y = 0; y < aSize.height; y++) {
+    for (int32_t x = 0; x < aSize.width; x++) {
+      if (!pixel[GFX_ARGB32_OFFSET_A]) {
+        memset(pixel, 0, 4);
+      }
+      pixel += 4;
+    }
+    pixel += offset;
+  }
+
+  pixel = aData;
+  int32_t remainderWidth = aSize.width % 8;
+  int32_t roundedWidth = aSize.width - remainderWidth;
+  uint16x8_t temp;
+  uint8x8_t gray;
+  uint8x8x4_t result;
+  uint8x8_t redVec = vdup_n_u8(redFactor);
+  uint8x8_t greenVec = vdup_n_u8(greenFactor);
+  uint8x8_t blueVec = vdup_n_u8(blueFactor);
+  for (int32_t y = 0; y < aSize.height; y++) {
+    // Calculate luminance by neon with 8 pixels per loop 
+    for (int32_t x = 0; x < roundedWidth; x += 8) {
+      uint8x8x4_t argb  = vld4_u8(pixel);
+      temp = vmull_u8(argb.val[GFX_ARGB32_OFFSET_R], redVec); // temp = red * redFactor
+      temp = vmlal_u8(temp, argb.val[GFX_ARGB32_OFFSET_G], greenVec); // temp += green * greenFactor
+      temp = vmlal_u8(temp, argb.val[GFX_ARGB32_OFFSET_B], blueVec); // temp += blue * blueFactor
+      gray = vshrn_n_u16(temp, 8); // gray = temp >> 8
+
+      // Put the result to the 8 pixels in argb format
+      result.val[0] = gray;
+      result.val[1] = gray;
+      result.val[2] = gray;
+      result.val[3] = gray;
+      vst4_u8(pixel, result);
+      pixel += 8 * 4;
+    }
+
+    // Calculate the rest pixels of the line by cpu
+    for (int32_t x = 0; x < remainderWidth; x++) {
+      pixel[0] = (redFactor * pixel[GFX_ARGB32_OFFSET_R] +
+                  greenFactor * pixel[GFX_ARGB32_OFFSET_G] +
+                  blueFactor * pixel[GFX_ARGB32_OFFSET_B]) >> 8;
+      memset(pixel + 1, pixel[0], 3);
+      pixel += 4;
+    }
+    pixel += offset;
+  }
+}
+
new file mode 100644
--- /dev/null
+++ b/layout/svg/nsSVGMaskFrameNEON.h
@@ -0,0 +1,19 @@
+/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* this source code form is subject to the terms of the mozilla public
+ * license, v. 2.0. if a copy of the mpl was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __NS_SVGMASKFRAMENEON_H__
+#define __NS_SVGMASKFRAMENEON_H__
+
+#include "mozilla/gfx/2D.h"
+
+using namespace mozilla::gfx;
+
+void
+ComputesRGBLuminanceMask_NEON(uint8_t *aData,
+                              int32_t aStride,
+                              const IntSize &aSize,
+                              float aOpacity);
+
+#endif /* __NS_SVGMASKFRAMENEON_H__ */