Bug 583958 - Add NEON detection to SSE.h - r=jmuizelaar
authorTim Terribe <tterribe@vt.edu>
Sat, 09 Apr 2011 06:57:52 +1200
changeset 67675 3b33622916e25577951983861865fb9358afff4d
parent 67674 ce528cff2f8f96bba6a3df8ef3897bab32395989
child 67676 acf9030b80b4c6478bfe7ec469ce9de370bf40a1
push idunknown
push userunknown
push dateunknown
reviewersjmuizelaar
bugs583958
milestone2.2a1pre
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 583958 - Add NEON detection to SSE.h - r=jmuizelaar
gfx/ycbcr/ycbcr_to_rgb565.cpp
gfx/ycbcr/ycbcr_to_rgb565.h
xpcom/glue/Makefile.in
xpcom/glue/arm.cpp
xpcom/glue/arm.h
xpcom/glue/nomozalloc/Makefile.in
xpcom/glue/objs.mk
--- a/gfx/ycbcr/ycbcr_to_rgb565.cpp
+++ b/gfx/ycbcr/ycbcr_to_rgb565.cpp
@@ -33,88 +33,23 @@
  * and other provisions required by the GPL or the LGPL. If you do not delete
  * the provisions above, a recipient may use your version of this file under
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
 
 #include "ycbcr_to_rgb565.h"
 
-//The logic for have_ycbcr_to_rgb565 is taken from pixman-cpu.c
-
-#if !defined (HAVE_ARM_NEON)
+#if !defined (MOZILLA_MAY_SUPPORT_NEON)
 
 int have_ycbcr_to_rgb565 ()
 {
     return 0;
 }
 
 #else
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <elf.h>
-
-#ifdef ANDROID
-
 int have_ycbcr_to_rgb565 ()
 {
-    static int have_ycbcr_to_rgb565_initialized = 0;
-    static int arm_has_neon = 0;
-    if (!have_ycbcr_to_rgb565_initialized)
-    {
-        have_ycbcr_to_rgb565_initialized = 1;
-
-        char buf[1024];
-        const char* ver_token = "CPU architecture: ";
-        FILE* f = fopen("/proc/cpuinfo", "r");
-        if (!f) {
-	        return 0;
-        }
-
-        fread(buf, sizeof(char), 1024, f);
-        arm_has_neon = strstr(buf, "neon") != NULL;
-        fclose(f);
-    }
-    return arm_has_neon;
+    return mozilla::supports_neon();
 }
 
-#else
-
-int have_ycbcr_to_rgb565 ()
-{
-    static int have_ycbcr_to_rgb565_initialized = 0;
-    static int arm_has_neon = 0;
-    if (!have_ycbcr_to_rgb565_initialized)
-    {
-        have_ycbcr_to_rgb565_initialized = 1;
-        int fd;
-        Elf32_auxv_t aux;
-
-        fd = open ("/proc/self/auxv", O_RDONLY);
-        if (fd >= 0)
-        {
-            while (read (fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t))
-            {
-                if (aux.a_type == AT_HWCAP)
-                {
-                    uint32_t hwcap = aux.a_un.a_val;
-                    arm_has_neon = (hwcap & 4096) != 0;
-                    break;
-                }
-            }
-            close (fd);
-         }
-    }
-
-    return arm_has_neon;
-}
-
-#endif //ANDROID
-
-#endif //_MSC_VER
-
+#endif //MOZILLA_MAY_SUPPORT_NEON
--- a/gfx/ycbcr/ycbcr_to_rgb565.h
+++ b/gfx/ycbcr/ycbcr_to_rgb565.h
@@ -1,5 +1,10 @@
 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
+#ifndef MEDIA_BASE_YCBCR_TO_RGB565_H_
+#define MEDIA_BASE_YCBCR_TO_RGB565_H_
+#include "mozilla/arm.h"
 
 int have_ycbcr_to_rgb565();
+
+#endif // MEDIA_BASE_YCBCR_TO_RGB565_H_
--- a/xpcom/glue/Makefile.in
+++ b/xpcom/glue/Makefile.in
@@ -127,16 +127,17 @@ EXPORTS_mozilla = \
   CondVar.h \
   DeadlockDetector.h \
   FileUtils.h \
   GenericFactory.h \
   IntentionalCrash.h \
   Monitor.h \
   Mutex.h \
   SSE.h \
+  arm.h \
   unused.h \
   $(NULL)
 
 EXPORTS_mozilla/threads = \
   nsThreadIDs.h \
   $(NULL)
 
 SDK_LIBRARY     =                        \
new file mode 100644
--- /dev/null
+++ b/xpcom/glue/arm.cpp
@@ -0,0 +1,200 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is arm.cpp
+ *
+ * The Initial Developer of the Original Code is the Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2011
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Timothy B. Terriberry <tterriberry@mozilla.com>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+/* compile-time and runtime tests for whether to use various ARM extensions */
+
+#include "arm.h"
+
+namespace {
+
+// arm.h has parallel #ifs which declare MOZILLA_ARM_HAVE_CPUID_DETECTION.
+// We don't check it here so that we get compile errors if it's defined, but
+// we don't compile one of these detection methods. The detection code here is
+// based on the CPU detection in libtheora.
+
+#if defined(_MSC_VER)
+//For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.
+#  define WIN32_LEAN_AND_MEAN
+#  define WIN32_EXTRA_LEAN
+#  include <windows.h>
+
+static bool
+check_edsp(void)
+{
+#  if defined(MOZILLA_MAY_SUPPORT_EDSP)
+  __try
+  {
+    //PLD [r13]
+    __emit(0xF5DDF000);
+    return true;
+  }
+  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION)
+  {
+    //Ignore exception.
+  }
+#  endif
+  return false;
+}
+
+static bool
+check_armv6(void)
+{
+#  if defined(MOZILLA_MAY_SUPPORT_ARMV6)
+  __try
+  {
+    //SHADD8 r3,r3,r3
+    __emit(0xE6333F93);
+    return true;
+  }
+  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION)
+  {
+    //Ignore exception.
+  }
+#  endif
+  return false;
+}
+
+static bool
+check_neon(void)
+{
+#  if defined(MOZILLA_MAY_SUPPORT_NEON)
+  __try
+  {
+    //VORR q0,q0,q0
+    __emit(0xF2200150);
+    return true;
+  }
+  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION)
+  {
+    //Ignore exception.
+  }
+#  endif
+  return false;
+}
+
+#elif defined(__linux__) || defined(ANDROID)
+#  include <stdio.h>
+#  include <stdlib.h>
+#  include <string.h>
+
+enum{
+  MOZILLA_HAS_EDSP_FLAG=1,
+  MOZILLA_HAS_ARMV6_FLAG=2,
+  MOZILLA_HAS_NEON_FLAG=4
+};
+
+static unsigned
+get_arm_cpu_flags(void)
+{
+  unsigned  flags;
+  FILE     *fin;
+  flags = 0;
+  /*Reading /proc/self/auxv would be easier, but that doesn't work reliably on
+    Android. This also means that detection will fail in Scratchbox, which is
+    desirable, as NEON does not work in the qemu shipped with the Maemo 5 SDK.
+    I don't know if /proc/self/auxv would do any better in that case, anyway,
+    or if it would return random flags from the host CPU.*/
+  fin = fopen ("/proc/cpuinfo","r");
+  if (fin != NULL)
+  {
+    /*512 should be enough for anybody (it's even enough for all the flags that
+      x86 has accumulated... so far).*/
+    char buf[512];
+    while (fgets(buf, 511, fin) != NULL)
+    {
+      if (memcmp(buf, "Features", 8) == 0)
+      {
+        char *p;
+        p = strstr(buf, " edsp");
+        if (p != NULL && (p[5] == ' ' || p[5] == '\n'))
+          flags |= MOZILLA_HAS_EDSP_FLAG;
+        p = strstr(buf, " neon");
+        if( p != NULL && (p[5] == ' ' || p[5] == '\n'))
+          flags |= MOZILLA_HAS_NEON_FLAG;
+      }
+      if (memcmp(buf, "CPU architecture:", 17) == 0)
+      {
+        int version;
+        version = atoi(buf + 17);
+        if (version >= 6)
+          flags |= MOZILLA_HAS_ARMV6_FLAG;
+      }
+    }
+    fclose(fin);
+  }
+  return flags;
+}
+
+// Cache a local copy so we only have to read /proc/cpuinfo once.
+static unsigned arm_cpu_flags = get_arm_cpu_flags();
+
+static bool
+check_edsp(void)
+{
+  return (arm_cpu_flags & MOZILLA_HAS_EDSP_FLAG) != 0;
+}
+
+static bool
+check_armv6(void)
+{
+  return (arm_cpu_flags & MOZILLA_HAS_ARMV6_FLAG) != 0;
+}
+
+static bool
+check_neon(void)
+{
+  return (arm_cpu_flags & MOZILLA_HAS_NEON_FLAG) != 0;
+}
+
+#endif
+
+}
+
+namespace mozilla {
+  namespace arm_private {
+#if defined(MOZILLA_ARM_HAVE_CPUID_DETECTION)
+#if !defined(MOZILLA_PRESUME_EDSP)
+    bool edsp_enabled = check_edsp();
+#endif
+#if !defined(MOZILLA_PRESUME_ARMV6)
+    bool armv6_enabled = check_armv6();
+#endif
+#if !defined(MOZILLA_PRESUME_NEON)
+    bool neon_enabled = check_neon();
+#endif
+#endif
+  } // namespace arm_private
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/xpcom/glue/arm.h
@@ -0,0 +1,180 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is arm.h
+ *
+ * The Initial Developer of the Original Code is the Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2011
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Timothy B. Terriberry <tterriberry@mozilla.com>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+/* compile-time and runtime tests for whether to use SSE instructions */
+
+#ifndef mozilla_arm_h_
+#define mozilla_arm_h_
+
+// for definition of NS_COM_GLUE
+#include "nscore.h"
+
+/* This is patterned after SSE.h, but provides ARMv5E, ARMv6, and NEON
+   detection. For reasons similar to the SSE code, code using NEON (even just
+   in inline asm) needs to be in a separate compilation unit from the regular
+   code, because it requires an ".fpu neon" directive which can't be undone.
+   ARMv5E and ARMv6 code may also require an .arch directive, since by default
+   the assembler refuses to generate code for opcodes outside of its current
+   .arch setting.
+
+   TODO: Add Thumb, Thumb2, VFP, iwMMX, etc. detection, if we need it. */
+
+#if defined(__GNUC__) && defined(__arm__)
+
+#  define MOZILLA_ARM_ARCH 3
+
+#  if defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__) \
+   || defined(_ARM_ARCH_4)
+#    undef MOZILLA_ARM_ARCH
+#    define MOZILLA_ARM_ARCH 4
+#  endif
+
+#  if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
+   || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
+   || defined(__ARM_ARCH_5TEJ__) || defined(_ARM_ARCH_5)
+#    undef MOZILLA_ARM_ARCH
+#    define MOZILLA_ARM_ARCH 5
+#  endif
+
+#  if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+   || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
+   || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \
+   || defined(__ARM_ARCH_6M__) || defined(_ARM_ARCH_6)
+#    undef MOZILLA_ARM_ARCH
+#    define MOZILLA_ARM_ARCH 6
+#  endif
+
+#  if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
+   || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
+   || defined(__ARM_ARCH_7EM__) || defined(_ARM_ARCH_7)
+#    undef MOZILLA_ARM_ARCH
+#    define MOZILLA_ARM_ARCH 7
+#  endif
+
+
+#  if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95)
+#    define MOZILLA_MAY_SUPPORT_EDSP 1
+#  endif
+
+#  if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+#    if defined(HAVE_ARM_SIMD)
+#      define MOZILLA_MAY_SUPPORT_ARMV6 1
+#    endif
+#  endif
+
+  // Technically 4.2.x only works in the CodeSourcery releases, but I don't
+  // know how to detect those separately from mainline gcc (which got support
+  // in 4.3). The Maemo version 5 SDK shipped with the CodeSourcery 4.2.1
+  // release, which we need to work.
+#  if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2)
+#    if defined(HAVE_ARM_NEON)
+#      define MOZILLA_MAY_SUPPORT_NEON 1
+#    endif
+#  endif
+
+  // Currently we only have CPU detection for Linux via /proc/cpuinfo
+#  if defined(__linux__) || defined(ANDROID)
+#    define MOZILLA_ARM_HAVE_CPUID_DETECTION 1
+#  endif
+
+#elif defined(_MSC_VER) && defined(_M_ARM)
+
+#  define MOZILLA_ARM_HAVE_CPUID_DETECTION 1
+  // I don't know how to do arch detection at compile time for MSVC, so assume
+  // the worst for now.
+#  define MOZILLA_ARM_ARCH 3
+
+  // MSVC only allows external asm for ARM, so we don't have to rely on
+  // compiler support.
+#  define MOZILLA_MAY_SUPPORT_EDSP 1
+#  if defined(HAVE_ARM_SIMD)
+#    define MOZILLA_MAY_SUPPORT_ARMV6 1
+#  endif
+#  if defined(HAVE_ARM_SIMD)
+#    define MOZILLA_MAY_SUPPORT_NEON 1
+#  endif
+
+#endif
+
+namespace mozilla {
+
+  namespace arm_private {
+#if defined(MOZILLA_ARM_HAVE_CPUID_DETECTION)
+#if !defined(MOZILLA_PRESUME_EDSP)
+    extern bool NS_COM_GLUE edsp_enabled;
+#endif
+#if !defined(MOZILLA_PRESUME_ARMV6)
+    extern bool NS_COM_GLUE armv6_enabled;
+#endif
+#if !defined(MOZILLA_PRESUME_NEON)
+    extern bool NS_COM_GLUE neon_enabled;
+#endif
+#endif
+  }
+
+#if defined(MOZILLA_PRESUME_EDSP)
+#  define MOZILLA_MAY_SUPPORT_EDSP 1
+  inline bool supports_edsp() { return true; }
+#elif defined(MOZILLA_MAY_SUPPORT_EDSP) \
+   && defined(MOZILLA_ARM_HAVE_CPUID_DETECTION)
+  inline bool supports_edsp() { return arm_private::edsp_enabled; }
+#else
+  inline bool supports_edsp() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_ARMV6)
+#  define MOZILLA_MAY_SUPPORT_ARMV6 1
+  inline bool supports_armv6() { return true; }
+#elif defined(MOZILLA_MAY_SUPPORT_ARMV6) \
+   && defined(MOZILLA_ARM_HAVE_CPUID_DETECTION)
+  inline bool supports_armv6() { return arm_private::armv6_enabled; }
+#else
+  inline bool supports_armv6() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_NEON)
+#  define MOZILLA_MAY_SUPPORT_NEON 1
+  inline bool supports_neon() { return true; }
+#elif defined(MOZILLA_MAY_SUPPORT_NEON) \
+   && defined(MOZILLA_ARM_HAVE_CPUID_DETECTION)
+  inline bool supports_neon() { return arm_private::neon_enabled; }
+#else
+  inline bool supports_neon() { return false; }
+#endif
+
+}
+
+#endif /* !defined(mozilla_arm_h_) */
--- a/xpcom/glue/nomozalloc/Makefile.in
+++ b/xpcom/glue/nomozalloc/Makefile.in
@@ -64,17 +64,17 @@ CPPSRCS		= \
 		nsStringAPI.cpp \
 		GenericModule.cpp \
 		$(NULL)
 
 SDK_LIBRARY     =                        \
 		$(LIB_PREFIX)xpcomglue_s_nomozalloc.$(LIB_SUFFIX) \
 		$(NULL)
 
-GARBAGE += $(CSRCS) $(CPPSRCS) DeadlockDetector.h SSE.h
+GARBAGE += $(CSRCS) $(CPPSRCS) DeadlockDetector.h SSE.h arm.h
 
 # we don't want the shared lib, but we want to force the creation of a static lib.
 FORCE_STATIC_LIB = 1
 
 # Force use of PIC
 FORCE_USE_PIC	= 1
 
 # Pretend we're statically linking the CRT, even though we might not be: this
@@ -89,16 +89,16 @@ include $(topsrcdir)/config/rules.mk
 
 ifdef _MSC_VER
 # Don't include directives about which CRT to use
 OS_COMPILE_CXXFLAGS += -Zl
 OS_COMPILE_CFLAGS += -Zl
 DEFINES += -D_USE_ANSI_CPP
 endif
 
-export:: $(XPCOM_GLUE_SRC_CSRCS) $(XPCOM_GLUE_SRC_CPPSRCS) $(XPCOM_GLUENS_SRC_CPPSRCS) $(topsrcdir)/xpcom/glue/nsStringAPI.cpp $(topsrcdir)/xpcom/glue/GenericModule.cpp $(topsrcdir)/xpcom/glue/DeadlockDetector.h $(topsrcdir)/xpcom/glue/SSE.h
+export:: $(XPCOM_GLUE_SRC_CSRCS) $(XPCOM_GLUE_SRC_CPPSRCS) $(XPCOM_GLUENS_SRC_CPPSRCS) $(topsrcdir)/xpcom/glue/nsStringAPI.cpp $(topsrcdir)/xpcom/glue/GenericModule.cpp $(topsrcdir)/xpcom/glue/DeadlockDetector.h $(topsrcdir)/xpcom/glue/SSE.h $(topsrcdir)/xpcom/glue/arm.h
 	$(INSTALL) $^ .
 
 ifdef TARGET_XPCOM_ABI
 DEFINES += -DTARGET_XPCOM_ABI=\"$(TARGET_XPCOM_ABI)\"
 endif
 
 DEFINES += -DMOZ_NO_MOZALLOC
--- a/xpcom/glue/objs.mk
+++ b/xpcom/glue/objs.mk
@@ -70,16 +70,17 @@ XPCOM_GLUE_SRC_LCPPSRCS =        \
   $(NULL)
 
 XPCOM_GLUE_SRC_CPPSRCS = $(addprefix $(topsrcdir)/xpcom/glue/, $(XPCOM_GLUE_SRC_LCPPSRCS))
 
 XPCOM_GLUENS_SRC_LCPPSRCS =      \
   BlockingResourceBase.cpp       \
   DeadlockDetector.cpp           \
   SSE.cpp                        \
+  arm.cpp                        \
   unused.cpp                     \
   nsProxyRelease.cpp             \
   nsTextFormatter.cpp            \
   GenericFactory.cpp             \
   FileUtils.cpp                  \
   $(NULL)
 
 XPCOM_GLUENS_SRC_CPPSRCS = $(addprefix $(topsrcdir)/xpcom/glue/,$(XPCOM_GLUENS_SRC_LCPPSRCS))