bug 1171649 - Implement arm/iOS support in JS JITs. r=jandem
authorTed Mielczarek <ted@mielczarek.org>
Wed, 23 Sep 2015 10:21:41 -0400
changeset 264153 cbceedb4370fbe729def191742ed1b5a4df34fa3
parent 264152 ebbd463ff0d6971ada8c67a7a31c09874d342e51
child 264154 50de26148bc6780056d33fb5a2abbb9a0ebfa659
push id29429
push usercbook@mozilla.com
push dateThu, 24 Sep 2015 10:05:08 +0000
treeherdermozilla-central@001942e4617b [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjandem
bugs1171649
milestone44.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
bug 1171649 - Implement arm/iOS support in JS JITs. r=jandem This patch includes some assembly files from llvm's compiler-rt to implement __aeabi_ividmod and __aeabi_uidivmod.
js/src/asmjs/AsmJSSignalHandlers.cpp
js/src/irregexp/NativeRegExpMacroAssembler.cpp
js/src/jit/ExecutableAllocator.h
js/src/jit/arm/Architecture-arm.cpp
js/src/jit/arm/Architecture-arm.h
js/src/jit/arm/llvm-compiler-rt/arm/aeabi_idivmod.S
js/src/jit/arm/llvm-compiler-rt/arm/aeabi_uidivmod.S
js/src/jit/arm/llvm-compiler-rt/assembly.h
js/src/moz.build
--- a/js/src/asmjs/AsmJSSignalHandlers.cpp
+++ b/js/src/asmjs/AsmJSSignalHandlers.cpp
@@ -196,16 +196,17 @@ class AutoSetHandlingSignal
 # if defined(__FreeBSD__) && defined(__arm__)
 #  define R15_sig(p) ((p)->uc_mcontext.__gregs[_REG_R15])
 # else
 #  define R15_sig(p) ((p)->uc_mcontext.mc_r15)
 # endif
 #elif defined(XP_DARWIN)
 # define EIP_sig(p) ((p)->uc_mcontext->__ss.__eip)
 # define RIP_sig(p) ((p)->uc_mcontext->__ss.__rip)
+# define R15_sig(p) ((p)->uc_mcontext->__ss.__pc)
 #else
 # error "Don't know how to read/write to the thread state via the mcontext_t."
 #endif
 
 #if defined(XP_WIN)
 # include "jswin.h"
 #else
 # include <signal.h>
@@ -315,22 +316,30 @@ enum { REG_EIP = 14 };
 // sigaction-style signal handler.
 #if defined(XP_DARWIN) && defined(ASMJS_MAY_USE_SIGNAL_HANDLERS_FOR_OOB)
 # if defined(JS_CODEGEN_X64)
 struct macos_x64_context {
     x86_thread_state64_t thread;
     x86_float_state64_t float_;
 };
 #  define EMULATOR_CONTEXT macos_x64_context
-# else
+# elif defined(JS_CODEGEN_X86)
 struct macos_x86_context {
     x86_thread_state_t thread;
     x86_float_state_t float_;
 };
 #  define EMULATOR_CONTEXT macos_x86_context
+# elif defined(JS_CODEGEN_ARM)
+struct macos_arm_context {
+    arm_thread_state_t thread;
+    arm_neon_state_t float_;
+};
+#  define EMULATOR_CONTEXT macos_arm_context
+# else
+#  error Unsupported architecture
 # endif
 #else
 # define EMULATOR_CONTEXT CONTEXT
 #endif
 
 #if defined(JS_CPU_X64)
 # define PC_sig(p) RIP_sig(p)
 #elif defined(JS_CPU_X86)
@@ -798,20 +807,26 @@ AsmJSFaultHandler(LPEXCEPTION_POINTERS e
 
 static uint8_t**
 ContextToPC(EMULATOR_CONTEXT* context)
 {
 # if defined(JS_CPU_X64)
     static_assert(sizeof(context->thread.__rip) == sizeof(void*),
                   "stored IP should be compile-time pointer-sized");
     return reinterpret_cast<uint8_t**>(&context->thread.__rip);
-# else
+# elif defined(JS_CPU_X86)
     static_assert(sizeof(context->thread.uts.ts32.__eip) == sizeof(void*),
                   "stored IP should be compile-time pointer-sized");
     return reinterpret_cast<uint8_t**>(&context->thread.uts.ts32.__eip);
+# elif defined(JS_CPU_ARM)
+    static_assert(sizeof(context->thread.__pc) == sizeof(void*),
+                  "stored IP should be compile-time pointer-sized");
+    return reinterpret_cast<uint8_t**>(&context->thread.__pc);
+# else
+#  error Unsupported architecture
 # endif
 }
 
 // This definition was generated by mig (the Mach Interface Generator) for the
 // routine 'exception_raise' (exc.defs).
 #pragma pack(4)
 typedef struct {
     mach_msg_header_t Head;
@@ -847,21 +862,28 @@ HandleMachException(JSRuntime* rt, const
 
     // Read out the JSRuntime thread's register state.
     EMULATOR_CONTEXT context;
 # if defined(JS_CODEGEN_X64)
     unsigned int thread_state_count = x86_THREAD_STATE64_COUNT;
     unsigned int float_state_count = x86_FLOAT_STATE64_COUNT;
     int thread_state = x86_THREAD_STATE64;
     int float_state = x86_FLOAT_STATE64;
-# else
+# elif defined(JS_CODEGEN_X86)
     unsigned int thread_state_count = x86_THREAD_STATE_COUNT;
     unsigned int float_state_count = x86_FLOAT_STATE_COUNT;
     int thread_state = x86_THREAD_STATE;
     int float_state = x86_FLOAT_STATE;
+# elif defined(JS_CODEGEN_ARM)
+    unsigned int thread_state_count = ARM_THREAD_STATE_COUNT;
+    unsigned int float_state_count = ARM_NEON_STATE_COUNT;
+    int thread_state = ARM_THREAD_STATE;
+    int float_state = ARM_NEON_STATE;
+# else
+#  error Unsupported architecture
 # endif
     kern_return_t kret;
     kret = thread_get_state(rtThread, thread_state,
                             (thread_state_t)&context.thread, &thread_state_count);
     if (kret != KERN_SUCCESS)
         return false;
     kret = thread_get_state(rtThread, float_state,
                             (thread_state_t)&context.float_, &float_state_count);
--- a/js/src/irregexp/NativeRegExpMacroAssembler.cpp
+++ b/js/src/irregexp/NativeRegExpMacroAssembler.cpp
@@ -128,16 +128,24 @@ NativeRegExpMacroAssembler::GenerateCode
 
     // Push non-volatile registers which might be modified by jitcode.
     size_t pushedNonVolatileRegisters = 0;
     for (GeneralRegisterForwardIterator iter(savedNonVolatileRegisters); iter.more(); ++iter) {
         masm.Push(*iter);
         pushedNonVolatileRegisters++;
     }
 
+#if defined(XP_IOS) && defined(JS_CODEGEN_ARM)
+    // The stack is 4-byte aligned on iOS, force 8-byte alignment.
+    masm.movePtr(StackPointer, temp0);
+    masm.andPtr(Imm32(~7), StackPointer);
+    masm.push(temp0);
+    masm.push(temp0);
+#endif
+
 #ifndef JS_CODEGEN_X86
     // The InputOutputData* is stored as an argument, save it on the stack
     // above the frame.
     masm.Push(IntArgReg0);
 #endif
 
     size_t frameSize = sizeof(FrameData) + num_registers_ * sizeof(void*);
     frameSize = JS_ROUNDUP(frameSize + masm.framePushed(), ABIStackAlignment) - masm.framePushed();
@@ -364,16 +372,21 @@ NativeRegExpMacroAssembler::GenerateCode
 
 #ifndef JS_CODEGEN_X86
     // Include the InputOutputData* when adjusting the stack size.
     masm.freeStack(frameSize + sizeof(void*));
 #else
     masm.freeStack(frameSize);
 #endif
 
+#if defined(XP_IOS) && defined(JS_CODEGEN_ARM)
+    masm.pop(temp0);
+    masm.movePtr(temp0, StackPointer);
+#endif
+
     // Restore non-volatile registers which were saved on entry.
     for (GeneralRegisterBackwardIterator iter(savedNonVolatileRegisters); iter.more(); ++iter)
         masm.Pop(*iter);
 
     masm.abiret();
 
     // Backtrack code (branch target for conditional backtracks).
     if (backtrack_label_.used()) {
--- a/js/src/jit/ExecutableAllocator.h
+++ b/js/src/jit/ExecutableAllocator.h
@@ -54,16 +54,20 @@ static void sync_instruction_memory(cadd
 extern  "C" void sync_instruction_memory(caddr_t v, u_int len);
 #endif
 #endif
 
 #if defined(JS_CODEGEN_MIPS32) && defined(__linux__) && !defined(JS_SIMULATOR_MIPS32)
 #include <sys/cachectl.h>
 #endif
 
+#if defined(JS_CODEGEN_ARM) && defined(XP_IOS)
+#include <libkern/OSCacheControl.h>
+#endif
+
 namespace JS {
     struct CodeSizes;
 } // namespace JS
 
 namespace js {
 namespace jit {
   enum CodeKind { ION_CODE = 0, BASELINE_CODE, REGEXP_CODE, OTHER_CODE };
 
@@ -404,16 +408,21 @@ class ExecutableAllocator
         _flush_cache(reinterpret_cast<char*>(code), size, BCACHE);
 #endif
     }
 #elif defined(JS_CODEGEN_ARM) && (defined(__FreeBSD__) || defined(__NetBSD__))
     static void cacheFlush(void* code, size_t size)
     {
         __clear_cache(code, reinterpret_cast<char*>(code) + size);
     }
+#elif defined(JS_CODEGEN_ARM) && defined(XP_IOS)
+    static void cacheFlush(void* code, size_t size)
+    {
+        sys_icache_invalidate(code, size);
+    }
 #elif defined(JS_CODEGEN_ARM) && (defined(__linux__) || defined(ANDROID)) && defined(__GNUC__)
     static void cacheFlush(void* code, size_t size)
     {
         asm volatile (
             "push    {r7}\n"
             "mov     r0, %0\n"
             "mov     r1, %1\n"
             "mov     r7, #0xf0000\n"
--- a/js/src/jit/arm/Architecture-arm.cpp
+++ b/js/src/jit/arm/Architecture-arm.cpp
@@ -1,17 +1,17 @@
 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
  * vim: set ts=8 sts=4 et sw=4 tw=99:
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "jit/arm/Architecture-arm.h"
 
-#ifndef JS_SIMULATOR_ARM
+#if !defined(JS_ARM_SIMULATOR) && !defined(__APPLE__)
 #include <elf.h>
 #endif
 
 #include <fcntl.h>
 #include <unistd.h>
 
 #include "jit/arm/Assembler-arm.h"
 #include "jit/RegisterSets.h"
@@ -240,16 +240,25 @@ InitARMFlags()
     flags |= HWCAP_VFP;
 #endif
 
 #if defined(__ARM_ARCH_7__) || defined (__ARM_ARCH_7A__)
     // Compiled to use ARMv7 instructions so assume the ARMv7 arch.
     flags |= HWCAP_ARMv7;
 #endif
 
+#if defined(__APPLE__)
+    #if defined(__ARM_NEON__)
+        flags |= HWCAP_NEON;
+    #endif
+    #if defined(__ARMVFPV3__)
+        flags |= HWCAP_VFPv3 | HWCAP_VFPD32
+    #endif
+#endif
+
 #endif // JS_SIMULATOR_ARM
 
     armHwCapFlags = CanonicalizeARMHwCapFlags(flags);
 
     JitSpew(JitSpew_Codegen, "ARM HWCAP: 0x%x\n", armHwCapFlags);
     return;
 }
 
--- a/js/src/jit/arm/Architecture-arm.h
+++ b/js/src/jit/arm/Architecture-arm.h
@@ -10,18 +10,19 @@
 #include "mozilla/MathAlgorithms.h"
 
 #include <limits.h>
 #include <stdint.h>
 
 #include "js/Utility.h"
 
 // GCC versions 4.6 and above define __ARM_PCS_VFP to denote a hard-float
-// ABI target.
-#if defined(__ARM_PCS_VFP)
+// ABI target. The iOS toolchain doesn't define anything specific here,
+// but iOS always supports VFP.
+#if defined(__ARM_PCS_VFP) || defined(XP_IOS)
 #define JS_CODEGEN_ARM_HARDFP
 #endif
 
 namespace js {
 namespace jit {
 
 // In bytes: slots needed for potential memory->memory move spills.
 //   +8 for cycles
@@ -104,25 +105,32 @@ class Registers
 
     static const SetType AllMask = (1 << Total) - 1;
     static const SetType ArgRegMask = (1 << r0) | (1 << r1) | (1 << r2) | (1 << r3);
 
     static const SetType VolatileMask =
         (1 << r0) |
         (1 << r1) |
         (1 << Registers::r2) |
-        (1 << Registers::r3);
+        (1 << Registers::r3)
+#if defined(XP_IOS)
+        // per https://developer.apple.com/library/ios/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARMv6FunctionCallingConventions.html#//apple_ref/doc/uid/TP40009021-SW4
+        | (1 << Registers::r9)
+#endif
+              ;
 
     static const SetType NonVolatileMask =
         (1 << Registers::r4) |
         (1 << Registers::r5) |
         (1 << Registers::r6) |
         (1 << Registers::r7) |
         (1 << Registers::r8) |
+#if !defined(XP_IOS)
         (1 << Registers::r9) |
+#endif
         (1 << Registers::r10) |
         (1 << Registers::r11) |
         (1 << Registers::r12) |
         (1 << Registers::r14);
 
     static const SetType WrapperMask =
         VolatileMask |         // = arguments
         (1 << Registers::r4) | // = outReg
new file mode 100644
--- /dev/null
+++ b/js/src/jit/arm/llvm-compiler-rt/arm/aeabi_idivmod.S
@@ -0,0 +1,27 @@
+//===-- aeabi_idivmod.S - EABI idivmod implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { int quot, int rem} __aeabi_idivmod(int numerator, int denominator) {
+//   int rem, quot;
+//   quot = __divmodsi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+        .syntax unified
+        .align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod)
+        push    { lr }
+        sub     sp, sp, #4
+        mov     r2, sp
+        bl      SYMBOL_NAME(__divmodsi4)
+        ldr     r1, [sp]
+        add     sp, sp, #4
+        pop     { pc }
new file mode 100644
--- /dev/null
+++ b/js/src/jit/arm/llvm-compiler-rt/arm/aeabi_uidivmod.S
@@ -0,0 +1,28 @@
+//===-- aeabi_uidivmod.S - EABI uidivmod implementation -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { unsigned quot, unsigned rem}
+//        __aeabi_uidivmod(unsigned numerator, unsigned denominator) {
+//   unsigned rem, quot;
+//   quot = __udivmodsi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+        .syntax unified
+        .align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+        push    { lr }
+        sub     sp, sp, #4
+        mov     r2, sp
+        bl      SYMBOL_NAME(__udivmodsi4)
+        ldr     r1, [sp]
+        add     sp, sp, #4
+        pop     { pc }
new file mode 100644
--- /dev/null
+++ b/js/src/jit/arm/llvm-compiler-rt/assembly.h
@@ -0,0 +1,70 @@
+/* ===-- assembly.h - compiler-rt assembler support macros -----------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file defines macros for use in compiler-rt assembler source.
+ * This file is not part of the interface of this library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#ifndef COMPILERRT_ASSEMBLY_H
+#define COMPILERRT_ASSEMBLY_H
+
+#if defined(__POWERPC__) || defined(__powerpc__) || defined(__ppc__)
+#define SEPARATOR @
+#else
+#define SEPARATOR ;
+#endif
+
+#if defined(__APPLE__)
+#define HIDDEN_DIRECTIVE .private_extern
+#define LOCAL_LABEL(name) L_##name
+#else
+#define HIDDEN_DIRECTIVE .hidden
+#define LOCAL_LABEL(name) .L_##name
+#endif
+
+#define GLUE2(a, b) a ## b
+#define GLUE(a, b) GLUE2(a, b)
+#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name)
+
+#ifdef VISIBILITY_HIDDEN
+#define DECLARE_SYMBOL_VISIBILITY(name)                    \
+  HIDDEN_DIRECTIVE SYMBOL_NAME(name) SEPARATOR
+#else
+#define DECLARE_SYMBOL_VISIBILITY(name)
+#endif
+
+#define DEFINE_COMPILERRT_FUNCTION(name)                   \
+  .globl SYMBOL_NAME(name) SEPARATOR                       \
+  DECLARE_SYMBOL_VISIBILITY(name)                          \
+  SYMBOL_NAME(name):
+
+#define DEFINE_COMPILERRT_PRIVATE_FUNCTION(name)           \
+  .globl SYMBOL_NAME(name) SEPARATOR                       \
+  HIDDEN_DIRECTIVE SYMBOL_NAME(name) SEPARATOR             \
+  SYMBOL_NAME(name):
+
+#define DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(name) \
+  .globl name SEPARATOR                                    \
+  HIDDEN_DIRECTIVE name SEPARATOR                          \
+  name:
+
+#define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target)     \
+  .globl SYMBOL_NAME(name) SEPARATOR                       \
+  .set SYMBOL_NAME(name), SYMBOL_NAME(target) SEPARATOR
+
+#if defined (__ARM_EABI__)
+# define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name)      \
+  DEFINE_COMPILERRT_FUNCTION_ALIAS(aeabi_name, name)
+#else
+# define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name)
+#endif
+
+#endif /* COMPILERRT_ASSEMBLY_H */
--- a/js/src/moz.build
+++ b/js/src/moz.build
@@ -433,16 +433,21 @@ elif CONFIG['JS_CODEGEN_ARM']:
         'jit/arm/MoveEmitter-arm.cpp',
         'jit/arm/SharedIC-arm.cpp',
         'jit/arm/Trampoline-arm.cpp',
     ]
     if CONFIG['JS_SIMULATOR_ARM']:
         UNIFIED_SOURCES += [
             'jit/arm/Simulator-arm.cpp'
         ]
+    elif CONFIG['OS_ARCH'] == 'Darwin':
+        SOURCES += [
+            'jit/arm/llvm-compiler-rt/arm/aeabi_idivmod.S',
+            'jit/arm/llvm-compiler-rt/arm/aeabi_uidivmod.S',
+        ]
 elif CONFIG['JS_CODEGEN_ARM64']:
     UNIFIED_SOURCES += [
         'jit/arm64/Architecture-arm64.cpp',
         'jit/arm64/Assembler-arm64.cpp',
         'jit/arm64/Bailouts-arm64.cpp',
         'jit/arm64/BaselineIC-arm64.cpp',
         'jit/arm64/CodeGenerator-arm64.cpp',
         'jit/arm64/Lowering-arm64.cpp',