Bug 751814 - Various Skia fixes for ARM without EDSP and ARMv6+. r=derf
authorMike Hommey <mh+mozilla@glandium.org>
Tue, 15 May 2012 07:46:02 +0200
changeset 96230 2b809c95bd215b587d9be4d22d4f8ea428e7b652
parent 96229 835cdc6f333f2cdaf2c2ad15ce22e6e27f29c38b
child 96231 f91162904ffca1beba2d2220046221bcc0dfbcf9
push idunknown
push userunknown
push dateunknown
reviewersderf
bugs751814
milestone15.0a1
Bug 751814 - Various Skia fixes for ARM without EDSP and ARMv6+. r=derf
gfx/skia/arm-fixes.patch
gfx/skia/include/core/SkMath.h
gfx/skia/include/core/SkPostConfig.h
gfx/skia/src/opts/SkBitmapProcState_opts_arm.cpp
gfx/skia/src/opts/SkBlitRow_opts_arm.cpp
gfx/skia/update.sh
new file mode 100644
--- /dev/null
+++ b/gfx/skia/arm-fixes.patch
@@ -0,0 +1,191 @@
+diff --git a/gfx/skia/include/core/SkMath.h b/gfx/skia/include/core/SkMath.h
+--- a/gfx/skia/include/core/SkMath.h
++++ b/gfx/skia/include/core/SkMath.h
+@@ -148,20 +148,17 @@ static inline bool SkIsPow2(int value) {
+ }
+ 
+ ///////////////////////////////////////////////////////////////////////////////
+ 
+ /** SkMulS16(a, b) multiplies a * b, but requires that a and b are both int16_t.
+     With this requirement, we can generate faster instructions on some
+     architectures.
+ */
+-#if defined(__arm__) \
+-  && !defined(__thumb__) \
+-  && !defined(__ARM_ARCH_4T__) \
+-  && !defined(__ARM_ARCH_5T__)
++#ifdef SK_ARM_HAS_EDSP
+     static inline int32_t SkMulS16(S16CPU x, S16CPU y) {
+         SkASSERT((int16_t)x == x);
+         SkASSERT((int16_t)y == y);
+         int32_t product;
+         asm("smulbb %0, %1, %2 \n"
+             : "=r"(product)
+             : "r"(x), "r"(y)
+             );
+diff --git a/gfx/skia/include/core/SkPostConfig.h b/gfx/skia/include/core/SkPostConfig.h
+--- a/gfx/skia/include/core/SkPostConfig.h
++++ b/gfx/skia/include/core/SkPostConfig.h
+@@ -300,8 +300,53 @@
+ #endif
+ #endif
+ 
+ //////////////////////////////////////////////////////////////////////
+ 
+ #ifndef SK_ALLOW_STATIC_GLOBAL_INITIALIZERS
+ #define SK_ALLOW_STATIC_GLOBAL_INITIALIZERS 1
+ #endif
++
++//////////////////////////////////////////////////////////////////////
++// ARM defines
++
++#if defined(__GNUC__) && defined(__arm__)
++
++#  define SK_ARM_ARCH 3
++
++#  if defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__) \
++   || defined(_ARM_ARCH_4)
++#    undef SK_ARM_ARCH
++#    define SK_ARM_ARCH 4
++#  endif
++
++#  if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
++   || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
++   || defined(__ARM_ARCH_5TEJ__) || defined(_ARM_ARCH_5)
++#    undef SK_ARM_ARCH
++#    define SK_ARM_ARCH 5
++#  endif
++ 
++#  if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
++   || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
++   || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \
++   || defined(__ARM_ARCH_6M__) || defined(_ARM_ARCH_6)
++#    undef SK_ARM_ARCH
++#    define SK_ARM_ARCH 6
++#  endif
++
++#  if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
++   || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
++   || defined(__ARM_ARCH_7EM__) || defined(_ARM_ARCH_7)
++#    undef SK_ARM_ARCH
++#    define SK_ARM_ARCH 7
++#  endif
++
++#  undef SK_ARM_HAS_EDSP
++#  if defined(__thumb2__) && (SK_ARM_ARCH >= 6) \
++   || !defined(__thumb__) \
++   && ((SK_ARM_ARCH > 5) || defined(__ARM_ARCH_5E__) \
++       || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__))
++#    define SK_ARM_HAS_EDSP 1
++#  endif
++
++#endif
+diff --git a/gfx/skia/src/opts/SkBitmapProcState_opts_arm.cpp b/gfx/skia/src/opts/SkBitmapProcState_opts_arm.cpp
+--- a/gfx/skia/src/opts/SkBitmapProcState_opts_arm.cpp
++++ b/gfx/skia/src/opts/SkBitmapProcState_opts_arm.cpp
+@@ -6,17 +6,17 @@
+  * found in the LICENSE file.
+  */
+ 
+ 
+ #include "SkBitmapProcState.h"
+ #include "SkColorPriv.h"
+ #include "SkUtils.h"
+ 
+-#if __ARM_ARCH__ >= 6 && !defined(SK_CPU_BENDIAN)
++#if SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN)
+ void SI8_D16_nofilter_DX_arm(
+     const SkBitmapProcState& s,
+     const uint32_t* SK_RESTRICT xy,
+     int count,
+     uint16_t* SK_RESTRICT colors) __attribute__((optimize("O1")));
+ 
+ void SI8_D16_nofilter_DX_arm(const SkBitmapProcState& s,
+                              const uint32_t* SK_RESTRICT xy,
+@@ -177,17 +177,17 @@ void SI8_opaque_D32_nofilter_DX_arm(cons
+                       : [xx] "+r" (xx), [count] "+r" (count), [colors] "+r" (colors)
+                       : [table] "r" (table), [srcAddr] "r" (srcAddr)
+                       : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
+                       );
+     }
+ 
+     s.fBitmap->getColorTable()->unlockColors(false);
+ }
+-#endif //__ARM_ARCH__ >= 6 && !defined(SK_CPU_BENDIAN)
++#endif // SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN)
+ 
+ ///////////////////////////////////////////////////////////////////////////////
+ 
+ /*  If we replace a sampleproc, then we null-out the associated shaderproc,
+     otherwise the shader won't even look at the matrix/sampler
+  */
+ void SkBitmapProcState::platformProcs() {
+     bool doFilter = fDoFilter;
+@@ -195,17 +195,17 @@ void SkBitmapProcState::platformProcs() 
+     bool justDx = false;
+ 
+     if (fInvType <= (SkMatrix::kTranslate_Mask | SkMatrix::kScale_Mask)) {
+         justDx = true;
+     }
+ 
+     switch (fBitmap->config()) {
+         case SkBitmap::kIndex8_Config:
+-#if __ARM_ARCH__ >= 6 && !defined(SK_CPU_BENDIAN)
++#if SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN)
+             if (justDx && !doFilter) {
+ #if 0   /* crashing on android device */
+                 fSampleProc16 = SI8_D16_nofilter_DX_arm;
+                 fShaderProc16 = NULL;
+ #endif
+                 if (isOpaque) {
+                     // this one is only very slighty faster than the C version
+                     fSampleProc32 = SI8_opaque_D32_nofilter_DX_arm;
+diff --git a/gfx/skia/src/opts/SkBlitRow_opts_arm.cpp b/gfx/skia/src/opts/SkBlitRow_opts_arm.cpp
+--- a/gfx/skia/src/opts/SkBlitRow_opts_arm.cpp
++++ b/gfx/skia/src/opts/SkBlitRow_opts_arm.cpp
+@@ -669,18 +669,23 @@ static void __attribute((noinline,optimi
+                   /* Double Loop */
+                   "1:                                \n\t" /* <double loop> */
+                   "ldm    %[src]!, {r5, r6}          \n\t" /* loading src pointers into r5 and r6 */
+                   "ldm    %[dst], {r7, r8}           \n\t" /* loading dst pointers into r7 and r8 */
+ 
+                   /* dst1_scale and dst2_scale*/
+                   "lsr    r9, r5, #24                \n\t" /* src >> 24 */
+                   "lsr    r10, r6, #24               \n\t" /* src >> 24 */
++#ifdef SK_ARM_HAS_EDSP
+                   "smulbb r9, r9, %[alpha]           \n\t" /* r9 = SkMulS16 r9 with src_scale */
+                   "smulbb r10, r10, %[alpha]         \n\t" /* r10 = SkMulS16 r10 with src_scale */
++#else
++                  "mul    r9, r9, %[alpha]           \n\t" /* r9 = SkMulS16 r9 with src_scale */
++                  "mul    r10, r10, %[alpha]         \n\t" /* r10 = SkMulS16 r10 with src_scale */
++#endif
+                   "lsr    r9, r9, #8                 \n\t" /* r9 >> 8 */
+                   "lsr    r10, r10, #8               \n\t" /* r10 >> 8 */
+                   "rsb    r9, r9, #256               \n\t" /* dst1_scale = r9 = 255 - r9 + 1 */
+                   "rsb    r10, r10, #256             \n\t" /* dst2_scale = r10 = 255 - r10 + 1 */
+ 
+                   /* ---------------------- */
+ 
+                   /* src1, src1_scale */
+@@ -739,17 +744,21 @@ static void __attribute((noinline,optimi
+                                                            /* else get into the single loop */
+                   /* Single Loop */
+                   "2:                                \n\t" /* <single loop> */
+                   "ldr    r5, [%[src]], #4           \n\t" /* loading src pointer into r5: r5=src */
+                   "ldr    r7, [%[dst]]               \n\t" /* loading dst pointer into r7: r7=dst */
+ 
+                   "lsr    r6, r5, #24                \n\t" /* src >> 24 */
+                   "and    r8, r12, r5, lsr #8        \n\t" /* ag = r8 = r5 masked by r12 lsr by #8 */
++#ifdef SK_ARM_HAS_EDSP
+                   "smulbb r6, r6, %[alpha]           \n\t" /* r6 = SkMulS16 with src_scale */
++#else
++                  "mul    r6, r6, %[alpha]           \n\t" /* r6 = SkMulS16 with src_scale */
++#endif
+                   "and    r9, r12, r5                \n\t" /* rb = r9 = r5 masked by r12 */
+                   "lsr    r6, r6, #8                 \n\t" /* r6 >> 8 */
+                   "mul    r8, r8, %[alpha]           \n\t" /* ag = r8 times scale */
+                   "rsb    r6, r6, #256               \n\t" /* r6 = 255 - r6 + 1 */
+ 
+                   /* src, src_scale */
+                   "mul    r9, r9, %[alpha]           \n\t" /* rb = r9 times scale */
+                   "and    r8, r8, r12, lsl #8        \n\t" /* ag masked by reverse mask (r12) */
--- a/gfx/skia/include/core/SkMath.h
+++ b/gfx/skia/include/core/SkMath.h
@@ -148,20 +148,17 @@ static inline bool SkIsPow2(int value) {
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 
 /** SkMulS16(a, b) multiplies a * b, but requires that a and b are both int16_t.
     With this requirement, we can generate faster instructions on some
     architectures.
 */
-#if defined(__arm__) \
-  && !defined(__thumb__) \
-  && !defined(__ARM_ARCH_4T__) \
-  && !defined(__ARM_ARCH_5T__)
+#ifdef SK_ARM_HAS_EDSP
     static inline int32_t SkMulS16(S16CPU x, S16CPU y) {
         SkASSERT((int16_t)x == x);
         SkASSERT((int16_t)y == y);
         int32_t product;
         asm("smulbb %0, %1, %2 \n"
             : "=r"(product)
             : "r"(x), "r"(y)
             );
--- a/gfx/skia/include/core/SkPostConfig.h
+++ b/gfx/skia/include/core/SkPostConfig.h
@@ -300,8 +300,53 @@
 #endif
 #endif
 
 //////////////////////////////////////////////////////////////////////
 
 #ifndef SK_ALLOW_STATIC_GLOBAL_INITIALIZERS
 #define SK_ALLOW_STATIC_GLOBAL_INITIALIZERS 1
 #endif
+
+//////////////////////////////////////////////////////////////////////
+// ARM defines
+
+#if defined(__GNUC__) && defined(__arm__)
+
+#  define SK_ARM_ARCH 3
+
+#  if defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__) \
+   || defined(_ARM_ARCH_4)
+#    undef SK_ARM_ARCH
+#    define SK_ARM_ARCH 4
+#  endif
+
+#  if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
+   || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
+   || defined(__ARM_ARCH_5TEJ__) || defined(_ARM_ARCH_5)
+#    undef SK_ARM_ARCH
+#    define SK_ARM_ARCH 5
+#  endif
+ 
+#  if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+   || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
+   || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \
+   || defined(__ARM_ARCH_6M__) || defined(_ARM_ARCH_6)
+#    undef SK_ARM_ARCH
+#    define SK_ARM_ARCH 6
+#  endif
+
+#  if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
+   || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
+   || defined(__ARM_ARCH_7EM__) || defined(_ARM_ARCH_7)
+#    undef SK_ARM_ARCH
+#    define SK_ARM_ARCH 7
+#  endif
+
+#  undef SK_ARM_HAS_EDSP
+#  if defined(__thumb2__) && (SK_ARM_ARCH >= 6) \
+   || !defined(__thumb__) \
+   && ((SK_ARM_ARCH > 5) || defined(__ARM_ARCH_5E__) \
+       || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__))
+#    define SK_ARM_HAS_EDSP 1
+#  endif
+
+#endif
--- a/gfx/skia/src/opts/SkBitmapProcState_opts_arm.cpp
+++ b/gfx/skia/src/opts/SkBitmapProcState_opts_arm.cpp
@@ -6,17 +6,17 @@
  * found in the LICENSE file.
  */
 
 
 #include "SkBitmapProcState.h"
 #include "SkColorPriv.h"
 #include "SkUtils.h"
 
-#if __ARM_ARCH__ >= 6 && !defined(SK_CPU_BENDIAN)
+#if SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN)
 void SI8_D16_nofilter_DX_arm(
     const SkBitmapProcState& s,
     const uint32_t* SK_RESTRICT xy,
     int count,
     uint16_t* SK_RESTRICT colors) __attribute__((optimize("O1")));
 
 void SI8_D16_nofilter_DX_arm(const SkBitmapProcState& s,
                              const uint32_t* SK_RESTRICT xy,
@@ -177,17 +177,17 @@ void SI8_opaque_D32_nofilter_DX_arm(cons
                       : [xx] "+r" (xx), [count] "+r" (count), [colors] "+r" (colors)
                       : [table] "r" (table), [srcAddr] "r" (srcAddr)
                       : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
                       );
     }
 
     s.fBitmap->getColorTable()->unlockColors(false);
 }
-#endif //__ARM_ARCH__ >= 6 && !defined(SK_CPU_BENDIAN)
+#endif // SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN)
 
 ///////////////////////////////////////////////////////////////////////////////
 
 /*  If we replace a sampleproc, then we null-out the associated shaderproc,
     otherwise the shader won't even look at the matrix/sampler
  */
 void SkBitmapProcState::platformProcs() {
     bool doFilter = fDoFilter;
@@ -195,17 +195,17 @@ void SkBitmapProcState::platformProcs() 
     bool justDx = false;
 
     if (fInvType <= (SkMatrix::kTranslate_Mask | SkMatrix::kScale_Mask)) {
         justDx = true;
     }
 
     switch (fBitmap->config()) {
         case SkBitmap::kIndex8_Config:
-#if __ARM_ARCH__ >= 6 && !defined(SK_CPU_BENDIAN)
+#if SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN)
             if (justDx && !doFilter) {
 #if 0   /* crashing on android device */
                 fSampleProc16 = SI8_D16_nofilter_DX_arm;
                 fShaderProc16 = NULL;
 #endif
                 if (isOpaque) {
                     // this one is only very slighty faster than the C version
                     fSampleProc32 = SI8_opaque_D32_nofilter_DX_arm;
--- a/gfx/skia/src/opts/SkBlitRow_opts_arm.cpp
+++ b/gfx/skia/src/opts/SkBlitRow_opts_arm.cpp
@@ -669,18 +669,23 @@ static void __attribute((noinline,optimi
                   /* Double Loop */
                   "1:                                \n\t" /* <double loop> */
                   "ldm    %[src]!, {r5, r6}          \n\t" /* loading src pointers into r5 and r6 */
                   "ldm    %[dst], {r7, r8}           \n\t" /* loading dst pointers into r7 and r8 */
 
                   /* dst1_scale and dst2_scale*/
                   "lsr    r9, r5, #24                \n\t" /* src >> 24 */
                   "lsr    r10, r6, #24               \n\t" /* src >> 24 */
+#ifdef SK_ARM_HAS_EDSP
                   "smulbb r9, r9, %[alpha]           \n\t" /* r9 = SkMulS16 r9 with src_scale */
                   "smulbb r10, r10, %[alpha]         \n\t" /* r10 = SkMulS16 r10 with src_scale */
+#else
+                  "mul    r9, r9, %[alpha]           \n\t" /* r9 = SkMulS16 r9 with src_scale */
+                  "mul    r10, r10, %[alpha]         \n\t" /* r10 = SkMulS16 r10 with src_scale */
+#endif
                   "lsr    r9, r9, #8                 \n\t" /* r9 >> 8 */
                   "lsr    r10, r10, #8               \n\t" /* r10 >> 8 */
                   "rsb    r9, r9, #256               \n\t" /* dst1_scale = r9 = 255 - r9 + 1 */
                   "rsb    r10, r10, #256             \n\t" /* dst2_scale = r10 = 255 - r10 + 1 */
 
                   /* ---------------------- */
 
                   /* src1, src1_scale */
@@ -739,17 +744,21 @@ static void __attribute((noinline,optimi
                                                            /* else get into the single loop */
                   /* Single Loop */
                   "2:                                \n\t" /* <single loop> */
                   "ldr    r5, [%[src]], #4           \n\t" /* loading src pointer into r5: r5=src */
                   "ldr    r7, [%[dst]]               \n\t" /* loading dst pointer into r7: r7=dst */
 
                   "lsr    r6, r5, #24                \n\t" /* src >> 24 */
                   "and    r8, r12, r5, lsr #8        \n\t" /* ag = r8 = r5 masked by r12 lsr by #8 */
+#ifdef SK_ARM_HAS_EDSP
                   "smulbb r6, r6, %[alpha]           \n\t" /* r6 = SkMulS16 with src_scale */
+#else
+                  "mul    r6, r6, %[alpha]           \n\t" /* r6 = SkMulS16 with src_scale */
+#endif
                   "and    r9, r12, r5                \n\t" /* rb = r9 = r5 masked by r12 */
                   "lsr    r6, r6, #8                 \n\t" /* r6 >> 8 */
                   "mul    r8, r8, %[alpha]           \n\t" /* ag = r8 times scale */
                   "rsb    r6, r6, #256               \n\t" /* r6 = 255 - r6 + 1 */
 
                   /* src, src_scale */
                   "mul    r9, r9, %[alpha]           \n\t" /* rb = r9 times scale */
                   "and    r8, r8, r12, lsl #8        \n\t" /* ag masked by reverse mask (r12) */
--- a/gfx/skia/update.sh
+++ b/gfx/skia/update.sh
@@ -109,8 +109,10 @@ patch -p3 < user-config.patch
 # Bug 715718 - Unitialized variable 'margin' in compute_bounds : SkDraw.cpp
 patch -p3 < uninitialized-margin.patch
 # Bug 722011 - Fix comma at end of enum list
 patch -p3 < fix-comma-end-enum-list.patch
 # Bug 719872 - Fix crash on Android by reverting to older FontHost impl
 patch -p3 < old-android-fonthost.patch
 # Bug 731384 - Fix compile errors on older versions of clang
 patch -p3 < SkPostConfig.patch
+# Bug 751814 - Various Skia fixes for ARM without EDSP and ARMv6+
+patch -p3 < arm-fixes.patch