Bug 986981 - OdinMonkey: Optimize addresses with constant offsets on x86 and x64 r=luke
authorDan Gohman <sunfish@mozilla.com>
Tue, 24 Feb 2015 07:50:14 -0800
changeset 230537 11a0fa1a0122a33151329be8353f57d19aa081ae
parent 230536 12ea42444af916a344a3975dfbd1a25e6418da79
child 230538 a8f5d952aff54a740ea82680b3ab816e0d23aea6
push id14067
push userkwierso@gmail.com
push dateTue, 24 Feb 2015 23:43:35 +0000
treeherderb2g-inbound@5f26f19972f5 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersluke
bugs986981
milestone39.0a1
Bug 986981 - OdinMonkey: Optimize addresses with constant offsets on x86 and x64 r=luke
js/src/asmjs/AsmJSModule.cpp
js/src/asmjs/AsmJSSignalHandlers.cpp
js/src/asmjs/AsmJSValidate.h
js/src/jit-test/tests/asm.js/testAddressErrors.js
js/src/jit-test/tests/asm.js/testZOOB.js
js/src/jit/CodeGenerator.cpp
js/src/jit/Disassembler.h
js/src/jit/EffectiveAddressAnalysis.cpp
js/src/jit/MIR.h
js/src/jit/MIRGenerator.h
js/src/jit/MIRGraph.cpp
js/src/jit/arm/Architecture-arm.h
js/src/jit/mips/Architecture-mips.h
js/src/jit/shared/Assembler-shared.h
js/src/jit/shared/Assembler-x86-shared.h
js/src/jit/shared/CodeGenerator-shared-inl.h
js/src/jit/shared/CodeGenerator-shared.h
js/src/jit/shared/CodeGenerator-x86-shared.cpp
js/src/jit/shared/CodeGenerator-x86-shared.h
js/src/jit/shared/Lowering-shared-inl.h
js/src/jit/shared/Lowering-shared.h
js/src/jit/x64/Architecture-x64.h
js/src/jit/x64/CodeGenerator-x64.cpp
js/src/jit/x64/Lowering-x64.cpp
js/src/jit/x86/Architecture-x86.h
js/src/jit/x86/Assembler-x86.h
js/src/jit/x86/CodeGenerator-x86.cpp
js/src/jit/x86/CodeGenerator-x86.h
js/src/jit/x86/Lowering-x86.cpp
--- a/js/src/asmjs/AsmJSModule.cpp
+++ b/js/src/asmjs/AsmJSModule.cpp
@@ -245,17 +245,17 @@ AsmJSModule::lookupCodeRange(void *pc) c
     return &codeRanges_[match];
 }
 
 struct HeapAccessOffset
 {
     const AsmJSHeapAccessVector &accesses;
     explicit HeapAccessOffset(const AsmJSHeapAccessVector &accesses) : accesses(accesses) {}
     uintptr_t operator[](size_t index) const {
-        return accesses[index].offset();
+        return accesses[index].insnOffset();
     }
 };
 
 const AsmJSHeapAccess *
 AsmJSModule::lookupHeapAccess(void *pc) const
 {
     MOZ_ASSERT(isFinished());
     MOZ_ASSERT(containsFunctionPC(pc));
@@ -323,17 +323,17 @@ AsmJSModule::finish(ExclusiveContext *cx
     // Call-site metadata used for stack unwinding.
     callSites_ = masm.extractCallSites();
 
 #if defined(JS_CODEGEN_ARM)
     // ARM requires the offsets to be updated.
     pod.functionBytes_ = masm.actualOffset(pod.functionBytes_);
     for (size_t i = 0; i < heapAccesses_.length(); i++) {
         AsmJSHeapAccess &a = heapAccesses_[i];
-        a.setOffset(masm.actualOffset(a.offset()));
+        a.setInsnOffset(masm.actualOffset(a.insnOffset()));
     }
     for (unsigned i = 0; i < numExportedFunctions(); i++) {
         if (!exportedFunction(i).isChangeHeap())
             exportedFunction(i).updateCodeOffset(masm);
     }
     for (unsigned i = 0; i < numExits(); i++)
         exit(i).updateOffsets(masm);
     for (size_t i = 0; i < callSites_.length(); i++) {
@@ -769,96 +769,97 @@ AsmJSModule::staticallyLink(ExclusiveCon
         exitDatum.exit = interpExitTrampoline(exits_[i]);
         exitDatum.fun = nullptr;
         exitDatum.baselineScript = nullptr;
     }
 
     MOZ_ASSERT(isStaticallyLinked());
 }
 
-#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
-static size_t
-ByteSizeOfHeapAccess(const jit::AsmJSHeapAccess access)
-{
-    Scalar::Type type = access.type();
-    if (Scalar::isSimdType(type))
-        return Scalar::scalarByteSize(type) * access.numSimdElems();
-    return TypedArrayElemSize(type);
-}
-#endif
 void
 AsmJSModule::initHeap(Handle<ArrayBufferObjectMaybeShared *> heap, JSContext *cx)
 {
     MOZ_ASSERT_IF(heap->is<ArrayBufferObject>(), heap->as<ArrayBufferObject>().isAsmJS());
     MOZ_ASSERT(IsValidAsmJSHeapLength(heap->byteLength()));
     MOZ_ASSERT(dynamicallyLinked_);
     MOZ_ASSERT(!maybeHeap_);
 
     maybeHeap_ = heap;
     heapDatum() = heap->dataPointer();
 
 #if defined(JS_CODEGEN_X86)
     uint8_t *heapOffset = heap->dataPointer();
+    uint32_t heapLength = heap->byteLength();
     for (unsigned i = 0; i < heapAccesses_.length(); i++) {
         const jit::AsmJSHeapAccess &access = heapAccesses_[i];
-        if (access.hasLengthCheck()) {
-            // An access is out-of-bounds iff
-            //      ptr + data-type-byte-size > heapLength
-            // i.e. ptr >= heapLength + 1 - data-type-byte-size
-            // (Note that we need >= as this is what codegen uses.)
-            size_t scalarByteSize = ByteSizeOfHeapAccess(access);
-            X86Encoding::SetPointer(access.patchLengthAt(code_),
-                                    (void*)(heap->byteLength() + 1 - scalarByteSize));
-        }
-        void *addr = access.patchOffsetAt(code_);
+        // An access is out-of-bounds iff
+        //      ptr + offset + data-type-byte-size > heapLength
+        // i.e. ptr > heapLength - data-type-byte-size - offset.
+        // data-type-byte-size and offset are already included in the addend
+        // so we just have to add the heap length here.
+        if (access.hasLengthCheck())
+            X86Encoding::AddInt32(access.patchLengthAt(code_), heapLength);
+        void *addr = access.patchHeapPtrImmAt(code_);
         uint32_t disp = reinterpret_cast<uint32_t>(X86Encoding::GetPointer(addr));
         MOZ_ASSERT(disp <= INT32_MAX);
         X86Encoding::SetPointer(addr, (void *)(heapOffset + disp));
     }
 #elif defined(JS_CODEGEN_X64)
     // Even with signal handling being used for most bounds checks, there may be
     // atomic operations that depend on explicit checks.
     //
     // If we have any explicit bounds checks, we need to patch the heap length
     // checks at the right places. All accesses that have been recorded are the
     // only ones that need bound checks (see also
     // CodeGeneratorX64::visitAsmJS{Load,Store,CompareExchange,AtomicBinop}Heap)
-    int32_t heapLength = int32_t(intptr_t(heap->byteLength()));
+    uint32_t heapLength = heap->byteLength();
     for (size_t i = 0; i < heapAccesses_.length(); i++) {
         const jit::AsmJSHeapAccess &access = heapAccesses_[i];
-        if (access.hasLengthCheck()) {
-            // See comment above for x86 codegen.
-            size_t scalarByteSize = ByteSizeOfHeapAccess(access);
-            X86Encoding::SetInt32(access.patchLengthAt(code_), heapLength + 1 - scalarByteSize);
-        }
+        // See comment above for x86 codegen.
+        if (access.hasLengthCheck())
+            X86Encoding::AddInt32(access.patchLengthAt(code_), heapLength);
     }
 #elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS)
     uint32_t heapLength = heap->byteLength();
     for (unsigned i = 0; i < heapAccesses_.length(); i++) {
         jit::Assembler::UpdateBoundsCheck(heapLength,
-                                          (jit::Instruction*)(heapAccesses_[i].offset() + code_));
+                                          (jit::Instruction*)(heapAccesses_[i].insnOffset() + code_));
     }
 #endif
 }
 
 void
 AsmJSModule::restoreHeapToInitialState(ArrayBufferObjectMaybeShared *maybePrevBuffer)
 {
 #if defined(JS_CODEGEN_X86)
     if (maybePrevBuffer) {
         // Subtract out the base-pointer added by AsmJSModule::initHeap.
         uint8_t *ptrBase = maybePrevBuffer->dataPointer();
+        uint32_t heapLength = maybePrevBuffer->byteLength();
         for (unsigned i = 0; i < heapAccesses_.length(); i++) {
             const jit::AsmJSHeapAccess &access = heapAccesses_[i];
-            void *addr = access.patchOffsetAt(code_);
+            // Subtract the heap length back out, leaving the raw displacement in place.
+            if (access.hasLengthCheck())
+                X86Encoding::AddInt32(access.patchLengthAt(code_), -heapLength);
+            void *addr = access.patchHeapPtrImmAt(code_);
             uint8_t *ptr = reinterpret_cast<uint8_t*>(X86Encoding::GetPointer(addr));
             MOZ_ASSERT(ptr >= ptrBase);
             X86Encoding::SetPointer(addr, (void *)(ptr - ptrBase));
         }
     }
+#elif defined(JS_CODEGEN_X64)
+    if (maybePrevBuffer) {
+        uint32_t heapLength = maybePrevBuffer->byteLength();
+        for (unsigned i = 0; i < heapAccesses_.length(); i++) {
+            const jit::AsmJSHeapAccess &access = heapAccesses_[i];
+            // See comment above for x86 codegen.
+            if (access.hasLengthCheck())
+                X86Encoding::AddInt32(access.patchLengthAt(code_), -heapLength);
+        }
+    }
 #endif
 
     maybeHeap_ = nullptr;
     heapDatum() = nullptr;
 }
 
 void
 AsmJSModule::restoreToInitialState(ArrayBufferObjectMaybeShared *maybePrevBuffer,
--- a/js/src/asmjs/AsmJSSignalHandlers.cpp
+++ b/js/src/asmjs/AsmJSSignalHandlers.cpp
@@ -17,16 +17,17 @@
  */
 
 #include "asmjs/AsmJSSignalHandlers.h"
 
 #include "mozilla/DebugOnly.h"
 #include "mozilla/PodOperations.h"
 
 #include "asmjs/AsmJSModule.h"
+#include "jit/Disassembler.h"
 #include "vm/Runtime.h"
 
 using namespace js;
 using namespace js::jit;
 
 using JS::GenericNaN;
 using mozilla::DebugOnly;
 using mozilla::PodArrayZero;
@@ -303,16 +304,38 @@ enum { REG_EIP = 14 };
 #  endif  // defined(__i386__)
 # endif  // !defined(__BIONIC_HAVE_UCONTEXT_T)
 #endif // defined(ANDROID)
 
 #if !defined(XP_WIN)
 # define CONTEXT ucontext_t
 #endif
 
+// Define a context type for use in the emulator code. This is usually just
+// the same as CONTEXT, but on Mac we use a different structure since we call
+// into the emulator code from a Mach exception handler rather than a
+// sigaction-style signal handler.
+#if defined(XP_MACOSX)
+# if defined(JS_CODEGEN_X64)
+struct macos_x64_context {
+    x86_thread_state64_t thread;
+    x86_float_state64_t float_;
+};
+#  define EMULATOR_CONTEXT macos_x64_context
+# else
+struct macos_x86_context {
+    x86_thread_state_t thread;
+    x86_float_state_t float_;
+};
+#  define EMULATOR_CONTEXT macos_x86_context
+# endif
+#else
+# define EMULATOR_CONTEXT CONTEXT
+#endif
+
 #if defined(JS_CPU_X64)
 # define PC_sig(p) RIP_sig(p)
 #elif defined(JS_CPU_X86)
 # define PC_sig(p) EIP_sig(p)
 #elif defined(JS_CPU_ARM)
 # define PC_sig(p) R15_sig(p)
 #elif defined(JS_CPU_MIPS)
 # define PC_sig(p) EPC_sig(p)
@@ -324,106 +347,378 @@ ContextToPC(CONTEXT *context)
 #ifdef JS_CODEGEN_NONE
     MOZ_CRASH();
 #else
      return reinterpret_cast<uint8_t**>(&PC_sig(context));
 #endif
 }
 
 #if defined(JS_CODEGEN_X64)
-template <class T>
-static void
-SetXMMRegToNaN(Scalar::Type viewType, T *xmm_reg)
+MOZ_COLD static void
+SetFPRegToNaN(size_t size, void *fp_reg)
+{
+    MOZ_RELEASE_ASSERT(size <= Simd128DataSize);
+    memset(fp_reg, 0, Simd128DataSize);
+    switch (size) {
+      case 4: *static_cast<float *>(fp_reg) = GenericNaN(); break;
+      case 8: *static_cast<double *>(fp_reg) = GenericNaN(); break;
+      default:
+        // All SIMD accesses throw on OOB.
+        MOZ_CRASH("unexpected size in SetFPRegToNaN");
+    }
+}
+
+MOZ_COLD static void
+SetGPRegToZero(void *gp_reg)
+{
+    memset(gp_reg, 0, sizeof(intptr_t));
+}
+
+MOZ_COLD static void
+SetFPRegToLoadedValue(const void *addr, size_t size, void *fp_reg)
+{
+    MOZ_RELEASE_ASSERT(size <= Simd128DataSize);
+    memset(fp_reg, 0, Simd128DataSize);
+    memcpy(fp_reg, addr, size);
+}
+
+MOZ_COLD static void
+SetGPRegToLoadedValue(const void *addr, size_t size, void *gp_reg)
 {
-    switch (viewType) {
-      case Scalar::Float32: {
-        JS_STATIC_ASSERT(sizeof(T) == 4 * sizeof(float));
-        float *floats = reinterpret_cast<float*>(xmm_reg);
-        floats[0] = GenericNaN();
-        floats[1] = 0;
-        floats[2] = 0;
-        floats[3] = 0;
-        break;
-      }
-      case Scalar::Float64: {
-        JS_STATIC_ASSERT(sizeof(T) == 2 * sizeof(double));
-        double *dbls = reinterpret_cast<double*>(xmm_reg);
-        dbls[0] = GenericNaN();
-        dbls[1] = 0;
-        break;
-      }
-      // Float32x4 and Int32x4 out of bounds are handled with the OutOfBounds stub.
-      case Scalar::Float32x4:
-      case Scalar::Int32x4:
-      case Scalar::Int8:
-      case Scalar::Uint8:
-      case Scalar::Int16:
-      case Scalar::Uint16:
-      case Scalar::Int32:
-      case Scalar::Uint32:
-      case Scalar::Uint8Clamped:
-      case Scalar::MaxTypedArrayViewType:
-        MOZ_CRASH("unexpected type in SetXMMRegToNaN");
-    }
+    MOZ_RELEASE_ASSERT(size <= sizeof(void *));
+    memset(gp_reg, 0, sizeof(void *));
+    memcpy(gp_reg, addr, size);
+}
+
+MOZ_COLD static void
+SetGPRegToLoadedValueSext32(const void *addr, size_t size, void *gp_reg)
+{
+    MOZ_RELEASE_ASSERT(size <= sizeof(int32_t));
+    int8_t msb = static_cast<const int8_t *>(addr)[size - 1];
+    memset(gp_reg, 0, sizeof(void *));
+    memset(gp_reg, msb >> 7, sizeof(int32_t));
+    memcpy(gp_reg, addr, size);
+}
+
+MOZ_COLD static void
+StoreValueFromFPReg(void *addr, size_t size, const void *fp_reg)
+{
+    MOZ_RELEASE_ASSERT(size <= Simd128DataSize);
+    memcpy(addr, fp_reg, size);
+}
+
+MOZ_COLD static void
+StoreValueFromGPReg(void *addr, size_t size, const void *gp_reg)
+{
+    MOZ_RELEASE_ASSERT(size <= sizeof(void *));
+    memcpy(addr, gp_reg, size);
+}
+
+MOZ_COLD static void
+StoreValueFromGPImm(void *addr, size_t size, int32_t imm)
+{
+    MOZ_RELEASE_ASSERT(size <= sizeof(imm));
+    memcpy(addr, &imm, size);
 }
 
 # if !defined(XP_MACOSX)
-static void
-SetRegisterToCoercedUndefined(CONTEXT *context, Scalar::Type viewType, AnyRegister reg)
+MOZ_COLD static void *
+AddressOfFPRegisterSlot(CONTEXT *context, FloatRegisters::Code code)
+{
+    switch (code) {
+      case X86Encoding::xmm0:  return &XMM_sig(context, 0);
+      case X86Encoding::xmm1:  return &XMM_sig(context, 1);
+      case X86Encoding::xmm2:  return &XMM_sig(context, 2);
+      case X86Encoding::xmm3:  return &XMM_sig(context, 3);
+      case X86Encoding::xmm4:  return &XMM_sig(context, 4);
+      case X86Encoding::xmm5:  return &XMM_sig(context, 5);
+      case X86Encoding::xmm6:  return &XMM_sig(context, 6);
+      case X86Encoding::xmm7:  return &XMM_sig(context, 7);
+      case X86Encoding::xmm8:  return &XMM_sig(context, 8);
+      case X86Encoding::xmm9:  return &XMM_sig(context, 9);
+      case X86Encoding::xmm10: return &XMM_sig(context, 10);
+      case X86Encoding::xmm11: return &XMM_sig(context, 11);
+      case X86Encoding::xmm12: return &XMM_sig(context, 12);
+      case X86Encoding::xmm13: return &XMM_sig(context, 13);
+      case X86Encoding::xmm14: return &XMM_sig(context, 14);
+      case X86Encoding::xmm15: return &XMM_sig(context, 15);
+      default: break;
+    }
+    MOZ_CRASH();
+}
+
+MOZ_COLD static void *
+AddressOfGPRegisterSlot(EMULATOR_CONTEXT *context, Registers::Code code)
 {
-    if (reg.isFloat()) {
-        switch (reg.fpu().code()) {
-          case X86Encoding::xmm0:  SetXMMRegToNaN(viewType, &XMM_sig(context, 0)); break;
-          case X86Encoding::xmm1:  SetXMMRegToNaN(viewType, &XMM_sig(context, 1)); break;
-          case X86Encoding::xmm2:  SetXMMRegToNaN(viewType, &XMM_sig(context, 2)); break;
-          case X86Encoding::xmm3:  SetXMMRegToNaN(viewType, &XMM_sig(context, 3)); break;
-          case X86Encoding::xmm4:  SetXMMRegToNaN(viewType, &XMM_sig(context, 4)); break;
-          case X86Encoding::xmm5:  SetXMMRegToNaN(viewType, &XMM_sig(context, 5)); break;
-          case X86Encoding::xmm6:  SetXMMRegToNaN(viewType, &XMM_sig(context, 6)); break;
-          case X86Encoding::xmm7:  SetXMMRegToNaN(viewType, &XMM_sig(context, 7)); break;
-          case X86Encoding::xmm8:  SetXMMRegToNaN(viewType, &XMM_sig(context, 8)); break;
-          case X86Encoding::xmm9:  SetXMMRegToNaN(viewType, &XMM_sig(context, 9)); break;
-          case X86Encoding::xmm10: SetXMMRegToNaN(viewType, &XMM_sig(context, 10)); break;
-          case X86Encoding::xmm11: SetXMMRegToNaN(viewType, &XMM_sig(context, 11)); break;
-          case X86Encoding::xmm12: SetXMMRegToNaN(viewType, &XMM_sig(context, 12)); break;
-          case X86Encoding::xmm13: SetXMMRegToNaN(viewType, &XMM_sig(context, 13)); break;
-          case X86Encoding::xmm14: SetXMMRegToNaN(viewType, &XMM_sig(context, 14)); break;
-          case X86Encoding::xmm15: SetXMMRegToNaN(viewType, &XMM_sig(context, 15)); break;
-          default: MOZ_CRASH();
-        }
-    } else {
-        switch (reg.gpr().code()) {
-          case X86Encoding::rax: RAX_sig(context) = 0; break;
-          case X86Encoding::rcx: RCX_sig(context) = 0; break;
-          case X86Encoding::rdx: RDX_sig(context) = 0; break;
-          case X86Encoding::rbx: RBX_sig(context) = 0; break;
-          case X86Encoding::rsp: RSP_sig(context) = 0; break;
-          case X86Encoding::rbp: RBP_sig(context) = 0; break;
-          case X86Encoding::rsi: RSI_sig(context) = 0; break;
-          case X86Encoding::rdi: RDI_sig(context) = 0; break;
-          case X86Encoding::r8:  R8_sig(context)  = 0; break;
-          case X86Encoding::r9:  R9_sig(context)  = 0; break;
-          case X86Encoding::r10: R10_sig(context) = 0; break;
-          case X86Encoding::r11: R11_sig(context) = 0; break;
-          case X86Encoding::r12: R12_sig(context) = 0; break;
-          case X86Encoding::r13: R13_sig(context) = 0; break;
-          case X86Encoding::r14: R14_sig(context) = 0; break;
-          case X86Encoding::r15: R15_sig(context) = 0; break;
-          default: MOZ_CRASH();
-        }
+    switch (code) {
+      case X86Encoding::rax: return &RAX_sig(context);
+      case X86Encoding::rcx: return &RCX_sig(context);
+      case X86Encoding::rdx: return &RDX_sig(context);
+      case X86Encoding::rbx: return &RBX_sig(context);
+      case X86Encoding::rsp: return &RSP_sig(context);
+      case X86Encoding::rbp: return &RBP_sig(context);
+      case X86Encoding::rsi: return &RSI_sig(context);
+      case X86Encoding::rdi: return &RDI_sig(context);
+      case X86Encoding::r8:  return &R8_sig(context);
+      case X86Encoding::r9:  return &R9_sig(context);
+      case X86Encoding::r10: return &R10_sig(context);
+      case X86Encoding::r11: return &R11_sig(context);
+      case X86Encoding::r12: return &R12_sig(context);
+      case X86Encoding::r13: return &R13_sig(context);
+      case X86Encoding::r14: return &R14_sig(context);
+      case X86Encoding::r15: return &R15_sig(context);
+      default: break;
     }
+    MOZ_CRASH();
+}
+# else
+MOZ_COLD static void *
+AddressOfFPRegisterSlot(EMULATOR_CONTEXT *context, FloatRegisters::Code code)
+{
+    switch (code) {
+      case X86Encoding::xmm0:  return &context->float_.__fpu_xmm0;
+      case X86Encoding::xmm1:  return &context->float_.__fpu_xmm1;
+      case X86Encoding::xmm2:  return &context->float_.__fpu_xmm2;
+      case X86Encoding::xmm3:  return &context->float_.__fpu_xmm3;
+      case X86Encoding::xmm4:  return &context->float_.__fpu_xmm4;
+      case X86Encoding::xmm5:  return &context->float_.__fpu_xmm5;
+      case X86Encoding::xmm6:  return &context->float_.__fpu_xmm6;
+      case X86Encoding::xmm7:  return &context->float_.__fpu_xmm7;
+      case X86Encoding::xmm8:  return &context->float_.__fpu_xmm8;
+      case X86Encoding::xmm9:  return &context->float_.__fpu_xmm9;
+      case X86Encoding::xmm10: return &context->float_.__fpu_xmm10;
+      case X86Encoding::xmm11: return &context->float_.__fpu_xmm11;
+      case X86Encoding::xmm12: return &context->float_.__fpu_xmm12;
+      case X86Encoding::xmm13: return &context->float_.__fpu_xmm13;
+      case X86Encoding::xmm14: return &context->float_.__fpu_xmm14;
+      case X86Encoding::xmm15: return &context->float_.__fpu_xmm15;
+      default: break;
+    }
+    MOZ_CRASH();
+}
+
+MOZ_COLD static void *
+AddressOfGPRegisterSlot(EMULATOR_CONTEXT *context, Registers::Code code)
+{
+    switch (code) {
+      case X86Encoding::rax: return &context->thread.__rax;
+      case X86Encoding::rcx: return &context->thread.__rcx;
+      case X86Encoding::rdx: return &context->thread.__rdx;
+      case X86Encoding::rbx: return &context->thread.__rbx;
+      case X86Encoding::rsp: return &context->thread.__rsp;
+      case X86Encoding::rbp: return &context->thread.__rbp;
+      case X86Encoding::rsi: return &context->thread.__rsi;
+      case X86Encoding::rdi: return &context->thread.__rdi;
+      case X86Encoding::r8:  return &context->thread.__r8;
+      case X86Encoding::r9:  return &context->thread.__r9;
+      case X86Encoding::r10: return &context->thread.__r10;
+      case X86Encoding::r11: return &context->thread.__r11;
+      case X86Encoding::r12: return &context->thread.__r12;
+      case X86Encoding::r13: return &context->thread.__r13;
+      case X86Encoding::r14: return &context->thread.__r14;
+      case X86Encoding::r15: return &context->thread.__r15;
+      default: break;
+    }
+    MOZ_CRASH();
 }
 # endif  // !XP_MACOSX
 
-static void
-RedirectToOutOfBoundsLabel(uint8_t **ppc, const AsmJSModule &module)
+MOZ_COLD static void
+SetRegisterToCoercedUndefined(EMULATOR_CONTEXT *context, size_t size,
+                              const Disassembler::OtherOperand &value)
+{
+    if (value.kind() == Disassembler::OtherOperand::FPR)
+        SetFPRegToNaN(size, AddressOfFPRegisterSlot(context, value.fpr()));
+    else
+        SetGPRegToZero(AddressOfGPRegisterSlot(context, value.gpr()));
+}
+
+MOZ_COLD static void
+SetRegisterToLoadedValue(EMULATOR_CONTEXT *context, const void *addr, size_t size,
+                         const Disassembler::OtherOperand &value)
+{
+    if (value.kind() == Disassembler::OtherOperand::FPR)
+        SetFPRegToLoadedValue(addr, size, AddressOfFPRegisterSlot(context, value.fpr()));
+    else
+        SetGPRegToLoadedValue(addr, size, AddressOfGPRegisterSlot(context, value.gpr()));
+}
+
+MOZ_COLD static void
+SetRegisterToLoadedValueSext32(EMULATOR_CONTEXT *context, const void *addr, size_t size,
+                               const Disassembler::OtherOperand &value)
+{
+    SetGPRegToLoadedValueSext32(addr, size, AddressOfGPRegisterSlot(context, value.gpr()));
+}
+
+MOZ_COLD static void
+StoreValueFromRegister(EMULATOR_CONTEXT *context, void *addr, size_t size,
+                       const Disassembler::OtherOperand &value)
+{
+    if (value.kind() == Disassembler::OtherOperand::FPR)
+        StoreValueFromFPReg(addr, size, AddressOfFPRegisterSlot(context, value.fpr()));
+    else if (value.kind() == Disassembler::OtherOperand::GPR)
+        StoreValueFromGPReg(addr, size, AddressOfGPRegisterSlot(context, value.gpr()));
+    else
+        StoreValueFromGPImm(addr, size, value.imm());
+}
+
+MOZ_COLD static uint8_t *
+ComputeAccessAddress(EMULATOR_CONTEXT *context, const Disassembler::ComplexAddress &address)
+{
+    MOZ_RELEASE_ASSERT(!address.isPCRelative(), "PC-relative addresses not supported yet");
+
+    uintptr_t result = address.disp();
+
+    if (address.base() != Registers::Invalid) {
+        uintptr_t base;
+        StoreValueFromGPReg(&base, sizeof(uintptr_t),
+                            AddressOfGPRegisterSlot(context, address.base()));
+        result += base;
+    }
+
+    if (address.index() != Registers::Invalid) {
+        uintptr_t index;
+        StoreValueFromGPReg(&index, sizeof(uintptr_t),
+                            AddressOfGPRegisterSlot(context, address.index()));
+        result += index * (1 << address.scale());
+    }
+
+    return reinterpret_cast<uint8_t *>(result);
+}
+
+MOZ_COLD static uint8_t *
+EmulateHeapAccess(EMULATOR_CONTEXT *context, uint8_t *pc, uint8_t *faultingAddress,
+                  const AsmJSHeapAccess *heapAccess, const AsmJSModule &module)
 {
-    MOZ_ASSERT(module.containsFunctionPC(*ppc));
-    *ppc = module.outOfBoundsExit();
+    MOZ_RELEASE_ASSERT(module.containsFunctionPC(pc));
+    MOZ_RELEASE_ASSERT(module.usesSignalHandlersForOOB());
+    MOZ_RELEASE_ASSERT(!heapAccess->hasLengthCheck());
+    MOZ_RELEASE_ASSERT(heapAccess->insnOffset() == (pc - module.codeBase()));
+
+    // Disassemble the instruction which caused the trap so that we can extract
+    // information about it and decide what to do.
+    Disassembler::HeapAccess access;
+    uint8_t *end = Disassembler::DisassembleHeapAccess(pc, &access);
+    const Disassembler::ComplexAddress &address = access.address();
+    MOZ_RELEASE_ASSERT(end > pc);
+    MOZ_RELEASE_ASSERT(module.containsFunctionPC(end));
+
+#if defined(JS_CODEGEN_X64)
+    // Check x64 asm.js heap access invariants.
+    MOZ_RELEASE_ASSERT(address.disp() >= 0);
+    MOZ_RELEASE_ASSERT(address.base() == HeapReg.code());
+    MOZ_RELEASE_ASSERT(address.index() != HeapReg.code());
+    MOZ_RELEASE_ASSERT(address.scale() == 0);
+    if (address.base() != Registers::Invalid) {
+        uintptr_t base;
+        StoreValueFromGPReg(&base, sizeof(uintptr_t),
+                            AddressOfGPRegisterSlot(context, address.base()));
+        MOZ_RELEASE_ASSERT(reinterpret_cast<uint8_t *>(base) == module.maybeHeap());
+    }
+    if (address.index() != Registers::Invalid) {
+        uintptr_t index;
+        StoreValueFromGPReg(&index, sizeof(uintptr_t),
+                            AddressOfGPRegisterSlot(context, address.index()));
+        MOZ_RELEASE_ASSERT(uint32_t(index) == index);
+    }
+#endif
+
+    // Determine the actual effective address of the faulting access. We can't
+    // rely on the faultingAddress given to us by the OS, because we need the
+    // address of the start of the access, and the OS may sometimes give us an
+    // address somewhere in the middle of the heap access.
+    uint8_t *accessAddress = ComputeAccessAddress(context, address);
+    MOZ_RELEASE_ASSERT(size_t(faultingAddress - accessAddress) < access.size(),
+                       "Given faulting address does not appear to be within computed "
+                       "faulting address range");
+    MOZ_RELEASE_ASSERT(accessAddress >= module.maybeHeap(),
+                       "Access begins outside the asm.js heap");
+    MOZ_RELEASE_ASSERT(accessAddress + access.size() <= module.maybeHeap() + AsmJSMappedSize,
+                       "Access extends beyond the asm.js heap guard region");
+    MOZ_RELEASE_ASSERT(accessAddress + access.size() > module.maybeHeap() + module.heapLength(),
+                       "Computed access address is not actually out of bounds");
+
+    // The basic sandbox model is that all heap accesses are a heap base
+    // register plus an index, and the index is always computed with 32-bit
+    // operations, so we know it can only be 4 GiB off of the heap base.
+    //
+    // However, we wish to support the optimization of folding immediates
+    // and scaled indices into addresses, and any address arithmetic we fold
+    // gets done at full pointer width, so it doesn't get properly wrapped.
+    // We support this by extending AsmJSMappedSize to the greatest size
+    // that could be reached by such an unwrapped address, and then when we
+    // arrive here in the signal handler for such an access, we compute the
+    // fully wrapped address, and perform the load or store on it.
+    //
+    // Taking a signal is really slow, but in theory programs really shouldn't
+    // be hitting this anyway.
+    intptr_t unwrappedOffset = accessAddress - module.maybeHeap();
+    uint32_t wrappedOffset = uint32_t(unwrappedOffset);
+    size_t size = access.size();
+    MOZ_RELEASE_ASSERT(wrappedOffset + size > wrappedOffset);
+    bool inBounds = wrappedOffset < module.heapLength() &&
+                    wrappedOffset + size < module.heapLength();
+
+    // If this is storing Z of an XYZ, check whether X is also in bounds, so
+    // that we don't store anything before throwing.
+    MOZ_RELEASE_ASSERT(unwrappedOffset > heapAccess->offsetWithinWholeSimdVector());
+    uint32_t wrappedBaseOffset = uint32_t(unwrappedOffset - heapAccess->offsetWithinWholeSimdVector());
+    if (wrappedBaseOffset >= module.heapLength())
+        inBounds = false;
+
+    if (inBounds) {
+        // We now know that this is an access that is actually in bounds when
+        // properly wrapped. Complete the load or store with the wrapped
+        // address.
+        uint8_t *wrappedAddress = module.maybeHeap() + wrappedOffset;
+        MOZ_RELEASE_ASSERT(wrappedAddress >= module.maybeHeap());
+        MOZ_RELEASE_ASSERT(wrappedAddress + size > wrappedAddress);
+        MOZ_RELEASE_ASSERT(wrappedAddress + size <= module.maybeHeap() + module.heapLength());
+        switch (access.kind()) {
+          case Disassembler::HeapAccess::Load:
+            SetRegisterToLoadedValue(context, wrappedAddress, size, access.otherOperand());
+            break;
+          case Disassembler::HeapAccess::LoadSext32:
+            SetRegisterToLoadedValueSext32(context, wrappedAddress, size, access.otherOperand());
+            break;
+          case Disassembler::HeapAccess::Store:
+            StoreValueFromRegister(context, wrappedAddress, size, access.otherOperand());
+            break;
+          case Disassembler::HeapAccess::Unknown:
+            MOZ_CRASH("Failed to disassemble instruction");
+        }
+    } else {
+        // We now know that this is an out-of-bounds access made by an asm.js
+        // load/store that we should handle.
+
+        if (heapAccess->throwOnOOB())
+            return module.outOfBoundsExit();
+
+        switch (access.kind()) {
+          case Disassembler::HeapAccess::Load:
+          case Disassembler::HeapAccess::LoadSext32:
+            // Assign the JS-defined result value to the destination register
+            // (ToInt32(undefined) or ToNumber(undefined), determined by the
+            // type of the destination register). Very conveniently, we can
+            // infer the type from the register class, since all SIMD accesses
+            // throw on out of bounds (see above), so the only types using FP
+            // registers are float32 and double.
+            SetRegisterToCoercedUndefined(context, access.size(), access.otherOperand());
+            break;
+          case Disassembler::HeapAccess::Store:
+            // Do nothing.
+            break;
+          case Disassembler::HeapAccess::Unknown:
+            MOZ_CRASH("Failed to disassemble instruction");
+        }
+    }
+
+    return end;
 }
+
 #endif // JS_CODEGEN_X64
 
 #if defined(XP_WIN)
 
 static bool
 HandleFault(PEXCEPTION_POINTERS exception)
 {
     EXCEPTION_RECORD *record = exception->ExceptionRecord;
@@ -448,17 +743,17 @@ HandleFault(PEXCEPTION_POINTERS exceptio
     if (!activation)
         return false;
 
 # if defined(JS_CODEGEN_X64)
     const AsmJSModule &module = activation->module();
 
     // These checks aren't necessary, but, since we can, check anyway to make
     // sure we aren't covering up a real bug.
-    void *faultingAddress = (void*)record->ExceptionInformation[1];
+    uint8_t *faultingAddress = reinterpret_cast<uint8_t *>(record->ExceptionInformation[1]);
     if (!module.maybeHeap() ||
         faultingAddress < module.maybeHeap() ||
         faultingAddress >= module.maybeHeap() + AsmJSMappedSize)
     {
         return false;
     }
 
     if (!module.containsFunctionPC(pc)) {
@@ -479,36 +774,17 @@ HandleFault(PEXCEPTION_POINTERS exceptio
         }
         return false;
     }
 
     const AsmJSHeapAccess *heapAccess = module.lookupHeapAccess(pc);
     if (!heapAccess)
         return false;
 
-    // We now know that this is an out-of-bounds access made by an asm.js
-    // load/store that we should handle.
-
-    // SIMD out-of-bounds loads and stores just need to throw.
-    if (Scalar::isSimdType(heapAccess->type())) {
-        RedirectToOutOfBoundsLabel(ppc, module);
-        return true;
-    }
-
-    // Also not necessary, but, since we can, do.
-    if (heapAccess->isLoad() != !record->ExceptionInformation[0])
-        return false;
-
-    // If this is a load, assign the JS-defined result value to the destination
-    // register (ToInt32(undefined) or ToNumber(undefined), determined by the
-    // type of the destination register) and set the PC to the next op. Upon
-    // return from the handler, execution will resume at this next PC.
-    if (heapAccess->isLoad())
-        SetRegisterToCoercedUndefined(context, heapAccess->type(), heapAccess->loadedReg());
-    *ppc += heapAccess->opLength();
+    *ppc = EmulateHeapAccess(context, pc, faultingAddress, heapAccess, module);
 
     return true;
 # else
     return false;
 # endif
 }
 
 static LONG WINAPI
@@ -520,92 +796,29 @@ AsmJSFaultHandler(LPEXCEPTION_POINTERS e
     // No need to worry about calling other handlers, the OS does this for us.
     return EXCEPTION_CONTINUE_SEARCH;
 }
 
 #elif defined(XP_MACOSX)
 # include <mach/exc.h>
 
 static uint8_t **
-ContextToPC(x86_thread_state_t &state)
+ContextToPC(EMULATOR_CONTEXT *context)
 {
 # if defined(JS_CPU_X64)
-    static_assert(sizeof(state.uts.ts64.__rip) == sizeof(void*),
+    static_assert(sizeof(context->thread.__rip) == sizeof(void*),
                   "stored IP should be compile-time pointer-sized");
-    return reinterpret_cast<uint8_t**>(&state.uts.ts64.__rip);
+    return reinterpret_cast<uint8_t**>(&context->thread.__rip);
 # else
-    static_assert(sizeof(state.uts.ts32.__eip) == sizeof(void*),
+    static_assert(sizeof(context->thread.uts.ts32.__eip) == sizeof(void*),
                   "stored IP should be compile-time pointer-sized");
-    return reinterpret_cast<uint8_t**>(&state.uts.ts32.__eip);
-# endif
+    return reinterpret_cast<uint8_t**>(&context->thread.uts.ts32.__eip);
+#endif
 }
 
-# if defined(JS_CODEGEN_X64)
-static bool
-SetRegisterToCoercedUndefined(mach_port_t rtThread, x86_thread_state64_t &state,
-                              const AsmJSHeapAccess &heapAccess)
-{
-    if (heapAccess.loadedReg().isFloat()) {
-        kern_return_t kret;
-
-        x86_float_state64_t fstate;
-        unsigned int count = x86_FLOAT_STATE64_COUNT;
-        kret = thread_get_state(rtThread, x86_FLOAT_STATE64, (thread_state_t) &fstate, &count);
-        if (kret != KERN_SUCCESS)
-            return false;
-
-        Scalar::Type viewType = heapAccess.type();
-        switch (heapAccess.loadedReg().fpu().code()) {
-          case X86Encoding::xmm0:  SetXMMRegToNaN(viewType, &fstate.__fpu_xmm0); break;
-          case X86Encoding::xmm1:  SetXMMRegToNaN(viewType, &fstate.__fpu_xmm1); break;
-          case X86Encoding::xmm2:  SetXMMRegToNaN(viewType, &fstate.__fpu_xmm2); break;
-          case X86Encoding::xmm3:  SetXMMRegToNaN(viewType, &fstate.__fpu_xmm3); break;
-          case X86Encoding::xmm4:  SetXMMRegToNaN(viewType, &fstate.__fpu_xmm4); break;
-          case X86Encoding::xmm5:  SetXMMRegToNaN(viewType, &fstate.__fpu_xmm5); break;
-          case X86Encoding::xmm6:  SetXMMRegToNaN(viewType, &fstate.__fpu_xmm6); break;
-          case X86Encoding::xmm7:  SetXMMRegToNaN(viewType, &fstate.__fpu_xmm7); break;
-          case X86Encoding::xmm8:  SetXMMRegToNaN(viewType, &fstate.__fpu_xmm8); break;
-          case X86Encoding::xmm9:  SetXMMRegToNaN(viewType, &fstate.__fpu_xmm9); break;
-          case X86Encoding::xmm10: SetXMMRegToNaN(viewType, &fstate.__fpu_xmm10); break;
-          case X86Encoding::xmm11: SetXMMRegToNaN(viewType, &fstate.__fpu_xmm11); break;
-          case X86Encoding::xmm12: SetXMMRegToNaN(viewType, &fstate.__fpu_xmm12); break;
-          case X86Encoding::xmm13: SetXMMRegToNaN(viewType, &fstate.__fpu_xmm13); break;
-          case X86Encoding::xmm14: SetXMMRegToNaN(viewType, &fstate.__fpu_xmm14); break;
-          case X86Encoding::xmm15: SetXMMRegToNaN(viewType, &fstate.__fpu_xmm15); break;
-          default: MOZ_CRASH();
-        }
-
-        kret = thread_set_state(rtThread, x86_FLOAT_STATE64, (thread_state_t)&fstate, x86_FLOAT_STATE64_COUNT);
-        if (kret != KERN_SUCCESS)
-            return false;
-    } else {
-        switch (heapAccess.loadedReg().gpr().code()) {
-          case X86Encoding::rax: state.__rax = 0; break;
-          case X86Encoding::rcx: state.__rcx = 0; break;
-          case X86Encoding::rdx: state.__rdx = 0; break;
-          case X86Encoding::rbx: state.__rbx = 0; break;
-          case X86Encoding::rsp: state.__rsp = 0; break;
-          case X86Encoding::rbp: state.__rbp = 0; break;
-          case X86Encoding::rsi: state.__rsi = 0; break;
-          case X86Encoding::rdi: state.__rdi = 0; break;
-          case X86Encoding::r8:  state.__r8  = 0; break;
-          case X86Encoding::r9:  state.__r9  = 0; break;
-          case X86Encoding::r10: state.__r10 = 0; break;
-          case X86Encoding::r11: state.__r11 = 0; break;
-          case X86Encoding::r12: state.__r12 = 0; break;
-          case X86Encoding::r13: state.__r13 = 0; break;
-          case X86Encoding::r14: state.__r14 = 0; break;
-          case X86Encoding::r15: state.__r15 = 0; break;
-          default: MOZ_CRASH();
-        }
-    }
-    return true;
-}
-# endif
-
 // This definition was generated by mig (the Mach Interface Generator) for the
 // routine 'exception_raise' (exc.defs).
 #pragma pack(4)
 typedef struct {
     mach_msg_header_t Head;
     /* start of the kernel processed data */
     mach_msg_body_t msgh_body;
     mach_msg_port_descriptor_t thread;
@@ -632,72 +845,74 @@ HandleMachException(JSRuntime *rt, const
     if (rt->handlingSignal)
         return false;
     AutoSetHandlingSignal handling(rt);
 
     // Get the port of the JSRuntime's thread from the message.
     mach_port_t rtThread = request.body.thread.name;
 
     // Read out the JSRuntime thread's register state.
-    x86_thread_state_t state;
-    unsigned int count = x86_THREAD_STATE_COUNT;
+    EMULATOR_CONTEXT context;
+# if defined(JS_CODEGEN_X64)
+    unsigned int thread_state_count = x86_THREAD_STATE64_COUNT;
+    unsigned int float_state_count = x86_FLOAT_STATE64_COUNT;
+    int thread_state = x86_THREAD_STATE64;
+    int float_state = x86_FLOAT_STATE64;
+# else
+    unsigned int thread_state_count = x86_THREAD_STATE_COUNT;
+    unsigned int float_state_count = x86_FLOAT_STATE_COUNT;
+    int thread_state = x86_THREAD_STATE;
+    int float_state = x86_FLOAT_STATE;
+# endif
     kern_return_t kret;
-    kret = thread_get_state(rtThread, x86_THREAD_STATE, (thread_state_t)&state, &count);
+    kret = thread_get_state(rtThread, thread_state,
+                            (thread_state_t)&context.thread, &thread_state_count);
+    if (kret != KERN_SUCCESS)
+        return false;
+    kret = thread_get_state(rtThread, float_state,
+                            (thread_state_t)&context.float_, &float_state_count);
     if (kret != KERN_SUCCESS)
         return false;
 
-    uint8_t **ppc = ContextToPC(state);
+    uint8_t **ppc = ContextToPC(&context);
     uint8_t *pc = *ppc;
 
     if (request.body.exception != EXC_BAD_ACCESS || request.body.codeCnt != 2)
         return false;
 
     AsmJSActivation *activation = rt->asmJSActivationStack();
     if (!activation)
         return false;
 
     const AsmJSModule &module = activation->module();
     if (!module.containsFunctionPC(pc))
         return false;
 
-# if defined(JS_CPU_X64)
+# if defined(JS_CODEGEN_X64)
     // These checks aren't necessary, but, since we can, check anyway to make
     // sure we aren't covering up a real bug.
-    void *faultingAddress = (void*)request.body.code[1];
+    uint8_t *faultingAddress = reinterpret_cast<uint8_t *>(request.body.code[1]);
     if (!module.maybeHeap() ||
         faultingAddress < module.maybeHeap() ||
         faultingAddress >= module.maybeHeap() + AsmJSMappedSize)
     {
         return false;
     }
 
     const AsmJSHeapAccess *heapAccess = module.lookupHeapAccess(pc);
     if (!heapAccess)
         return false;
 
-    // We now know that this is an out-of-bounds access made by an asm.js
-    // load/store that we should handle.
+    *ppc = EmulateHeapAccess(&context, pc, faultingAddress, heapAccess, module);
 
-    if (Scalar::isSimdType(heapAccess->type())) {
-        // SIMD out-of-bounds loads and stores just need to throw.
-        RedirectToOutOfBoundsLabel(ppc, module);
-    } else {
-        // If this is a load, assign the JS-defined result value to the destination
-        // register (ToInt32(undefined) or ToNumber(undefined), determined by the
-        // type of the destination register) and set the PC to the next op. Upon
-        // return from the handler, execution will resume at this next PC.
-        if (heapAccess->isLoad()) {
-            if (!SetRegisterToCoercedUndefined(rtThread, state.uts.ts64, *heapAccess))
-                return false;
-        }
-        *ppc += heapAccess->opLength();
-    }
-
-    // Update the thread state with the new pc.
-    kret = thread_set_state(rtThread, x86_THREAD_STATE, (thread_state_t)&state, x86_THREAD_STATE_COUNT);
+    // Update the thread state with the new pc and register values.
+    kret = thread_set_state(rtThread, float_state, (thread_state_t)&context.float_, float_state_count);
+    if (kret != KERN_SUCCESS)
+        return false;
+    kret = thread_set_state(rtThread, thread_state, (thread_state_t)&context.thread, thread_state_count);
     if (kret != KERN_SUCCESS)
         return false;
 
     return true;
 # else
     return false;
 # endif
 }
@@ -876,44 +1091,29 @@ HandleFault(int signum, siginfo_t *info,
 
     const AsmJSModule &module = activation->module();
     if (!module.containsFunctionPC(pc))
         return false;
 
 # if defined(JS_CODEGEN_X64)
     // These checks aren't necessary, but, since we can, check anyway to make
     // sure we aren't covering up a real bug.
-    void *faultingAddress = info->si_addr;
+    uint8_t *faultingAddress = static_cast<uint8_t *>(info->si_addr);
     if (!module.maybeHeap() ||
         faultingAddress < module.maybeHeap() ||
         faultingAddress >= module.maybeHeap() + AsmJSMappedSize)
     {
         return false;
     }
 
     const AsmJSHeapAccess *heapAccess = module.lookupHeapAccess(pc);
     if (!heapAccess)
         return false;
 
-    // We now know that this is an out-of-bounds access made by an asm.js
-    // load/store that we should handle.
-
-    // SIMD out-of-bounds loads and stores just need to throw.
-    if (Scalar::isSimdType(heapAccess->type())) {
-        RedirectToOutOfBoundsLabel(ppc, module);
-        return true;
-    }
-
-    // If this is a load, assign the JS-defined result value to the destination
-    // register (ToInt32(undefined) or ToNumber(undefined), determined by the
-    // type of the destination register) and set the PC to the next op. Upon
-    // return from the handler, execution will resume at this next PC.
-    if (heapAccess->isLoad())
-        SetRegisterToCoercedUndefined(context, heapAccess->type(), heapAccess->loadedReg());
-    *ppc += heapAccess->opLength();
+    *ppc = EmulateHeapAccess(context, pc, faultingAddress, heapAccess, module);
 
     return true;
 # else
     return false;
 # endif
 }
 
 static struct sigaction sPrevSEGVHandler;
--- a/js/src/asmjs/AsmJSValidate.h
+++ b/js/src/asmjs/AsmJSValidate.h
@@ -20,16 +20,17 @@
 #define jit_AsmJS_h
 
 #include "mozilla/MathAlgorithms.h"
 
 #include <stddef.h>
 
 #include "jsutil.h"
 
+#include "jit/Registers.h"
 #include "js/TypeDecls.h"
 #include "vm/NativeObject.h"
 
 namespace js {
 
 class ExclusiveContext;
 namespace frontend {
     template <typename ParseHandler> class Parser;
@@ -48,23 +49,35 @@ typedef frontend::ParseContext<frontend:
 // amount and the entire function should be reparsed from the beginning.
 extern bool
 ValidateAsmJS(ExclusiveContext *cx, AsmJSParser &parser, frontend::ParseNode *stmtList,
              bool *validated);
 
 // The assumed page size; dynamically checked in ValidateAsmJS.
 const size_t AsmJSPageSize = 4096;
 
+// Targets define AsmJSImmediateRange to be the size of an address immediate,
+// and AsmJSCheckedImmediateRange, to be the size of an address immediate that
+// can be supported by signal-handler OOB handling.
+static_assert(jit::AsmJSCheckedImmediateRange <= jit::AsmJSImmediateRange,
+              "AsmJSImmediateRange should be the size of an unconstrained "
+              "address immediate");
+
 #ifdef JS_CPU_X64
 // On x64, the internal ArrayBuffer data array is inflated to 4GiB (only the
 // byteLength portion of which is accessible) so that out-of-bounds accesses
 // (made using a uint32 index) are guaranteed to raise a SIGSEGV.
-// Unaligned accesses and mask optimizations might also try to access a few
-// bytes after this limit, so just inflate it by AsmJSPageSize.
-static const size_t AsmJSMappedSize = 4 * 1024ULL * 1024ULL * 1024ULL + AsmJSPageSize;
+// Then, an additional extent is added to permit folding of small immediate
+// values into addresses. And finally, unaligned accesses and mask optimizations
+// might also try to access a few bytes after this limit, so just inflate it by
+// AsmJSPageSize.
+static const size_t AsmJSMappedSize = 4 * 1024ULL * 1024ULL * 1024ULL +
+                                      jit::AsmJSCheckedImmediateRange +
+                                      AsmJSPageSize;
+
 #endif
 
 // From the asm.js spec Linking section:
 //  the heap object's byteLength must be either
 //    2^n for n in [12, 24)
 //  or
 //    2^24 * n for n >= 1.
 
copy from js/src/jit-test/tests/asm.js/testZOOB.js
copy to js/src/jit-test/tests/asm.js/testAddressErrors.js
--- a/js/src/jit-test/tests/asm.js/testZOOB.js
+++ b/js/src/jit-test/tests/asm.js/testAddressErrors.js
@@ -41,98 +41,8 @@ assertEq(asmLink(asmCompile('glob', 'imp
 assertEq(asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {arr[0] = 1; return arr[(0xffffffff+1)>>>0]|0 } return f'), this, null, buf)(), 1);
 
 // A non-intish shifted literal constant index should cause an error compiling.
 assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b); function f() {return arr[0x100000000>>0]|0 } return f');
 assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0x100000000>>2]|0 } return f');
 
 // Folded non-intish constant expressions should cause an error compiling.
 assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {return arr[0xffffffff+1]|0 } return f');
-
-var ab = new ArrayBuffer(BUF_MIN);
-var arr = new Int32Array(BUF_MIN);
-for (var i = 0; i < arr.length; i++)
-    arr[i] = i;
-
-function testInt(ctor, shift, scale, disp) {
-    var arr = new ctor(ab);
-    var f = asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.' + ctor.name + '(b); function f(i) {i=i|0; return arr[((i<<' + scale + ')+' + disp + ')>>' + shift + ']|0 } return f'), this, null, ab);
-    for (var i of [0,1,2,3,4,1023,1024,1025,BUF_MIN-2,BUF_MIN-1,BUF_MIN,BUF_MIN+1])
-        assertEq(f(i), arr[((i<<scale)+disp)>>shift]|0);
-
-    for (var i of [-Math.pow(2,28),Math.pow(2,28),-Math.pow(2,29),Math.pow(2,29),-Math.pow(2,30),Math.pow(2,30),-Math.pow(2,31),Math.pow(2,31),-Math.pow(2,32),Math.pow(2,32)]) {
-        for (var j of [-8,-4,-1,0,1,4,8])
-            assertEq(f(i+j), arr[(((i+j)<<scale)+disp)>>shift]|0);
-    }
-
-    var f = asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.' + ctor.name + '(b); function f(i,j) {i=i|0;j=j|0; arr[((i<<' + scale + ')+' + disp + ')>>' + shift + '] = j } return f'), this, null, ab);
-    for (var i of [0,1,2,3,4,1023,1024,1025,4095,4096,4097]) {
-        var index = ((i<<scale)+disp)>>shift;
-        var v = arr[index]|0;
-        arr[index] = 0;
-        f(i, v);
-        assertEq(arr[index]|0, v);
-    }
-
-    for (var i of [-Math.pow(2,31), Math.pow(2,31)-1, Math.pow(2,32)]) {
-        for (var j of [-8,-4,-1,0,1,4,8]) {
-            var index = (((i+j)<<scale)+disp)>>shift;
-            var v = arr[index]|0;
-            arr[index] = 0;
-            f(i+j, v);
-            assertEq(arr[index]|0, v);
-        }
-    }
-}
-
-function testFloat(ctor, shift, scale, disp, coercion) {
-    var arr = new ctor(ab);
-    var f = asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.' + ctor.name + '(b); var toF = glob.Math.fround; function f(i) {i=i|0; return ' + coercion + '(arr[((i<<' + scale + ')+' + disp + ')>>' + shift + ']) } return f'), this, null, ab);
-    for (var i of [0,1,2,3,4,1023,1024,1025,BUF_MIN-2,BUF_MIN-1,BUF_MIN,BUF_MIN+1])
-        assertEq(f(i), +arr[((i<<scale)+disp)>>shift]);
-
-    for (var i of [-Math.pow(2,31), Math.pow(2,31)-1, Math.pow(2,32)]) {
-        for (var j of [-8,-4,-1,0,1,4,8])
-            assertEq(f(i+j), +arr[(((i+j)<<scale)+disp)>>shift]);
-    }
-
-    var f = asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.' + ctor.name + '(b); var toF = glob.Math.fround; function f(i,j) {i=i|0;j=+j; arr[((i<<' + scale + ')+' + disp + ')>>' + shift + '] = j } return f'), this, null, ab);
-    for (var i of [0,1,2,3,4,1023,1024,1025,4095,4096,4097]) {
-        var index = ((i<<scale)+disp)>>shift;
-        var v = +arr[index];
-        arr[index] = 0;
-        f(i, v);
-        assertEq(+arr[index], v);
-    }
-
-    for (var i of [-Math.pow(2,31), Math.pow(2,31)-1, Math.pow(2,32)]) {
-        for (var j of [-8,-4,-1,0,1,4,8]) {
-            var index = (((i+j)<<scale)+disp)>>shift;
-            var v = +arr[index];
-            arr[index] = 0;
-            f(i+j, v);
-            assertEq(+arr[index], v);
-        }
-    }
-}
-
-function testFloat32(ctor, shift, scale, disp) {
-    testFloat(ctor, shift, scale, disp, "toF");
-}
-function testFloat64(ctor, shift, scale, disp) {
-    testFloat(ctor, shift, scale, disp, "+");
-}
-
-function test(tester, ctor, shift) {
-    for (scale of [0,1,2,3]) {
-        for (disp of [0,1,8,Math.pow(2,31)-1,Math.pow(2,31),Math.pow(2,32)-1])
-            tester(ctor, shift, scale, disp);
-    }
-}
-
-test(testInt, Int8Array, 0);
-test(testInt, Uint8Array, 0);
-test(testInt, Int16Array, 1);
-test(testInt, Uint16Array, 1);
-test(testInt, Int32Array, 2);
-test(testInt, Uint32Array, 2);
-test(testFloat32, Float32Array, 2);
-test(testFloat64, Float64Array, 3);
--- a/js/src/jit-test/tests/asm.js/testZOOB.js
+++ b/js/src/jit-test/tests/asm.js/testZOOB.js
@@ -1,138 +1,244 @@
 // |jit-test| test-also-noasmjs
 load(libdir + "asm.js");
+load(libdir + "asserts.js");
 
 setIonCheckGraphCoherency(false);
 setCachingEnabled(false);
 
-// constants
-var buf = new ArrayBuffer(BUF_MIN);
-
-// An unshifted literal constant byte index in the range 0 to 2^31-1 inclusive should give a link failure.
-assertAsmLinkFail(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {return arr[0x7fffffff]|0 } return f'), this, null, buf);
-assertAsmLinkFail(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0x1fffffff]|0 } return f'), this, null, buf);
-
-
-// An unshifted literal constant byte index outside the range 0 to 2^31-1 inclusive should cause an error compiling.
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0x20000000]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0x3fffffff]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0x40000000]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0x7fffffff]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0x80000000]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0x8fffffff]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0xffffffff]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0x100000000]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {return arr[0x80000000]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {return arr[0xffffffff]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {return arr[0x100000000]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int16Array(b); function f() {return arr[-1]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[-2]|0 } return f');
+var ab = new ArrayBuffer(BUF_MIN);
 
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[10-12]|0 } return f');
-
-// An intish shifted literal constant index should not fail to compile or link.
-assertEq(asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {return arr[0x3fffffff>>0]|0 } return f'), this, null, buf)(), 0);
-assertEq(asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0x3fffffff>>2]|0 } return f'), this, null, buf)(), 0);
-assertEq(asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {return arr[0xffffffff>>0]|0 } return f'), this, null, buf)(), 0);
-assertEq(asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0xffffffff>>2]|0 } return f'), this, null, buf)(), 0);
-assertEq(asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {return arr[-1>>0]|0 } return f'), this, null, buf)(), 0);
-assertEq(asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[-1>>2]|0 } return f'), this, null, buf)(), 0);
-// Unsigned (intish) folded constant index.
-assertEq(asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {return arr[0xffffffff>>>0]|0 } return f'), this, null, buf)(), 0);
-assertEq(asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {arr[0] = 1; return arr[(0xffffffff+1)>>>0]|0 } return f'), this, null, buf)(), 1);
-
-// A non-intish shifted literal constant index should cause an error compiling.
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b); function f() {return arr[0x100000000>>0]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0x100000000>>2]|0 } return f');
-
-// Folded non-intish constant expressions should cause an error compiling.
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {return arr[0xffffffff+1]|0 } return f');
-
-var ab = new ArrayBuffer(BUF_MIN);
-var arr = new Int32Array(BUF_MIN);
-for (var i = 0; i < arr.length; i++)
-    arr[i] = i;
+// Compute a set of interesting indices.
+indices = [0]
+for (var i of [4,1024,BUF_MIN,Math.pow(2,30),Math.pow(2,31),Math.pow(2,32),Math.pow(2,33)]) {
+    for (var j of [-2,-1,0,1,2]) {
+        for (var k of [1,-1])
+            indices.push((i+j)*k);
+    }
+}
 
 function testInt(ctor, shift, scale, disp) {
     var arr = new ctor(ab);
-    var f = asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.' + ctor.name + '(b); function f(i) {i=i|0; return arr[((i<<' + scale + ')+' + disp + ')>>' + shift + ']|0 } return f'), this, null, ab);
-    for (var i of [0,1,2,3,4,1023,1024,1025,BUF_MIN-2,BUF_MIN-1,BUF_MIN,BUF_MIN+1])
-        assertEq(f(i), arr[((i<<scale)+disp)>>shift]|0);
 
-    for (var i of [-Math.pow(2,28),Math.pow(2,28),-Math.pow(2,29),Math.pow(2,29),-Math.pow(2,30),Math.pow(2,30),-Math.pow(2,31),Math.pow(2,31),-Math.pow(2,32),Math.pow(2,32)]) {
-        for (var j of [-8,-4,-1,0,1,4,8])
-            assertEq(f(i+j), arr[(((i+j)<<scale)+disp)>>shift]|0);
-    }
+    var c = asmCompile('glob', 'imp', 'b',
+                       USE_ASM +
+                       'var arr=new glob.' + ctor.name + '(b); ' +
+                       'function load(i) {i=i|0; return arr[((i<<' + scale + ')+' + disp + ')>>' + shift + ']|0 } ' +
+                       'function store(i,j) {i=i|0;j=j|0; arr[((i<<' + scale + ')+' + disp + ')>>' + shift + '] = j } ' +
+                       'function storeZero(i) {i=i|0; arr[((i<<' + scale + ')+' + disp + ')>>' + shift + '] = 0 } ' +
+                       'function storeNegOne(i) {i=i|0; arr[((i<<' + scale + ')+' + disp + ')>>' + shift + '] = -1 } ' +
+                       'return { load: load, store: store, storeZero: storeZero, storeNegOne: storeNegOne }');
+    var f = asmLink(c, this, null, ab);
 
-    var f = asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.' + ctor.name + '(b); function f(i,j) {i=i|0;j=j|0; arr[((i<<' + scale + ')+' + disp + ')>>' + shift + '] = j } return f'), this, null, ab);
-    for (var i of [0,1,2,3,4,1023,1024,1025,4095,4096,4097]) {
+    var v = arr[0];
+    arr[0] = -1;
+    var negOne = arr[0]|0;
+    arr[0] = v;
+
+    for (var i of indices) {
         var index = ((i<<scale)+disp)>>shift;
-        var v = arr[index]|0;
-        arr[index] = 0;
-        f(i, v);
-        assertEq(arr[index]|0, v);
-    }
+        v = arr[index]|0;
+
+        // Loads
+        assertEq(f.load(i), v);
 
-    for (var i of [-Math.pow(2,31), Math.pow(2,31)-1, Math.pow(2,32)]) {
-        for (var j of [-8,-4,-1,0,1,4,8]) {
-            var index = (((i+j)<<scale)+disp)>>shift;
-            var v = arr[index]|0;
-            arr[index] = 0;
-            f(i+j, v);
-            assertEq(arr[index]|0, v);
-        }
+        // Stores of immediates
+        arr[index] = 1;
+        f.storeZero(i);
+        assertEq(arr[index]|0, 0);
+        f.storeNegOne(i);
+        assertEq(arr[index]|0, index>>>0 < arr.length ? negOne : 0);
+
+        // Stores
+        arr[index] = ~v;
+        f.store(i, v);
+        assertEq(arr[index]|0, v);
     }
 }
 
 function testFloat(ctor, shift, scale, disp, coercion) {
     var arr = new ctor(ab);
-    var f = asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.' + ctor.name + '(b); var toF = glob.Math.fround; function f(i) {i=i|0; return ' + coercion + '(arr[((i<<' + scale + ')+' + disp + ')>>' + shift + ']) } return f'), this, null, ab);
-    for (var i of [0,1,2,3,4,1023,1024,1025,BUF_MIN-2,BUF_MIN-1,BUF_MIN,BUF_MIN+1])
-        assertEq(f(i), +arr[((i<<scale)+disp)>>shift]);
 
-    for (var i of [-Math.pow(2,31), Math.pow(2,31)-1, Math.pow(2,32)]) {
-        for (var j of [-8,-4,-1,0,1,4,8])
-            assertEq(f(i+j), +arr[(((i+j)<<scale)+disp)>>shift]);
-    }
+    var c = asmCompile('glob', 'imp', 'b',
+                       USE_ASM +
+                       'var arr=new glob.' + ctor.name + '(b); ' +
+                       'var toF = glob.Math.fround; ' +
+                       'function load(i) {i=i|0; return ' + coercion + '(arr[((i<<' + scale + ')+' + disp + ')>>' + shift + ']) } ' +
+                       'function store(i,j) {i=i|0;j=+j; arr[((i<<' + scale + ')+' + disp + ')>>' + shift + '] = j } ' +
+                       'return { load: load, store: store }');
+    var f = asmLink(c, this, null, ab);
 
-    var f = asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.' + ctor.name + '(b); var toF = glob.Math.fround; function f(i,j) {i=i|0;j=+j; arr[((i<<' + scale + ')+' + disp + ')>>' + shift + '] = j } return f'), this, null, ab);
-    for (var i of [0,1,2,3,4,1023,1024,1025,4095,4096,4097]) {
+    for (var i of indices) {
         var index = ((i<<scale)+disp)>>shift;
         var v = +arr[index];
-        arr[index] = 0;
-        f(i, v);
-        assertEq(+arr[index], v);
-    }
+
+        // Loads
+        assertEq(f.load(i), v);
 
-    for (var i of [-Math.pow(2,31), Math.pow(2,31)-1, Math.pow(2,32)]) {
-        for (var j of [-8,-4,-1,0,1,4,8]) {
-            var index = (((i+j)<<scale)+disp)>>shift;
-            var v = +arr[index];
-            arr[index] = 0;
-            f(i+j, v);
-            assertEq(+arr[index], v);
-        }
+        // Stores
+        arr[index] = ~v;
+        f.store(i, v);
+        assertEq(+arr[index], v);
     }
 }
 
 function testFloat32(ctor, shift, scale, disp) {
     testFloat(ctor, shift, scale, disp, "toF");
 }
 function testFloat64(ctor, shift, scale, disp) {
     testFloat(ctor, shift, scale, disp, "+");
 }
 
+function assertEqX4(observed, expected) {
+    assertEq(observed.x, expected.x);
+    assertEq(observed.y, expected.y);
+    assertEq(observed.z, expected.z);
+    assertEq(observed.w, expected.w);
+}
+
+function testSimdX4(ctor, shift, scale, disp, simdName, simdCtor) {
+    var arr = new ctor(ab);
+
+    var c = asmCompile('glob', 'imp', 'b',
+                       USE_ASM +
+                       'var arr=new glob.' + ctor.name + '(b); ' +
+                       'var SIMD_' + simdName + ' = glob.SIMD.' + simdName + '; ' +
+                       'var SIMD_' + simdName + '_check = SIMD_' + simdName + '.check; ' +
+                       'var SIMD_' + simdName + '_load = SIMD_' + simdName + '.load; ' +
+                       'var SIMD_' + simdName + '_loadXYZ = SIMD_' + simdName + '.loadXYZ; ' +
+                       'var SIMD_' + simdName + '_loadXY = SIMD_' + simdName + '.loadXY; ' +
+                       'var SIMD_' + simdName + '_loadX = SIMD_' + simdName + '.loadX; ' +
+                       'var SIMD_' + simdName + '_store = SIMD_' + simdName + '.store; ' +
+                       'var SIMD_' + simdName + '_storeXYZ = SIMD_' + simdName + '.storeXYZ; ' +
+                       'var SIMD_' + simdName + '_storeXY = SIMD_' + simdName + '.storeXY; ' +
+                       'var SIMD_' + simdName + '_storeX = SIMD_' + simdName + '.storeX; ' +
+                       'function load(i) {i=i|0; return SIMD_' + simdName + '_check(SIMD_' + simdName + '_load(arr, ((i<<' + scale + ')+' + disp + ')>>' + shift + ')) } ' +
+                       'function loadXYZ(i) {i=i|0; return SIMD_' + simdName + '_check(SIMD_' + simdName + '_loadXYZ(arr, ((i<<' + scale + ')+' + disp + ')>>' + shift + ')) } ' +
+                       'function loadXY(i) {i=i|0; return SIMD_' + simdName + '_check(SIMD_' + simdName + '_loadXY(arr, ((i<<' + scale + ')+' + disp + ')>>' + shift + ')) } ' +
+                       'function loadX(i) {i=i|0; return SIMD_' + simdName + '_check(SIMD_' + simdName + '_loadX(arr, ((i<<' + scale + ')+' + disp + ')>>' + shift + ')) } ' +
+                       'function store(i,j) {i=i|0;j=SIMD_' + simdName + '_check(j); SIMD_' + simdName + '_store(arr, ((i<<' + scale + ')+' + disp + ')>>' + shift + ', j) } ' +
+                       'function storeXYZ(i,j) {i=i|0;j=SIMD_' + simdName + '_check(j); SIMD_' + simdName + '_storeXYZ(arr, ((i<<' + scale + ')+' + disp + ')>>' + shift + ', j) } ' +
+                       'function storeXY(i,j) {i=i|0;j=SIMD_' + simdName + '_check(j); SIMD_' + simdName + '_storeXY(arr, ((i<<' + scale + ')+' + disp + ')>>' + shift + ', j) } ' +
+                       'function storeX(i,j) {i=i|0;j=SIMD_' + simdName + '_check(j); SIMD_' + simdName + '_storeX(arr, ((i<<' + scale + ')+' + disp + ')>>' + shift + ', j) } ' +
+                       'return { load: load, loadXYZ: loadXYZ, loadXY: loadXY, loadX: loadX, store: store, storeXYZ: storeXYZ, storeXY : storeXY, storeX : storeX }');
+    var f = asmLink(c, this, null, ab);
+
+    for (var i of indices) {
+        var index = ((i<<scale)+disp)>>shift;
+
+        var v, vXYZ, vXY, vX;
+        var t = false, tXYZ = false, tXY = false, tX = false;
+        try { v = simdCtor.load(arr, index); }
+        catch (e) {
+            assertEq(e instanceof RangeError, true);
+            t = true;
+        }
+        try { vXYZ = simdCtor.loadXYZ(arr, index); }
+        catch (e) {
+            assertEq(e instanceof RangeError, true);
+            tXYZ = true;
+        }
+        try { vXY = simdCtor.loadXY(arr, index); }
+        catch (e) {
+            assertEq(e instanceof RangeError, true);
+            tXY = true;
+        }
+        try { vX = simdCtor.loadX(arr, index); }
+        catch (e) {
+            assertEq(e instanceof RangeError, true);
+            tX = true;
+        }
+
+        // Loads
+        var l, lXYZ, lXY, lX;
+        var r = false, rXYZ = false, rXY = false, rX = false;
+        try { l = f.load(i); }
+        catch (e) {
+            assertEq(e instanceof RangeError, true);
+            r = true;
+        }
+        try { lXYZ = f.loadXYZ(i); }
+        catch (e) {
+            assertEq(e instanceof RangeError, true);
+            rXYZ = true;
+        }
+        try { lXY = f.loadXY(i); }
+        catch (e) {
+            assertEq(e instanceof RangeError, true);
+            rXY = true;
+        }
+        try { lX = f.loadX(i); }
+        catch (e) {
+            assertEq(e instanceof RangeError, true);
+            rX = true;
+        }
+        assertEq(t, r);
+        assertEq(tXYZ, rXYZ);
+        assertEq(tXY, rXY);
+        assertEq(tX, rX);
+        if (!t) assertEqX4(v, l);
+        if (!tXYZ) assertEqX4(vXYZ, lXYZ);
+        if (!tXY) assertEqX4(vXY, lXY);
+        if (!tX) assertEqX4(vX, lX);
+
+        // Stores
+        if (!t) {
+            simdCtor.store(arr, index, simdCtor.not(v));
+            f.store(i, v);
+            assertEqX4(simdCtor.load(arr, index), v);
+        } else
+            assertThrowsInstanceOf(() => f.store(i, simdCtor()), RangeError);
+        if (!tXYZ) {
+            simdCtor.storeXYZ(arr, index, simdCtor.not(vXYZ));
+            f.storeXYZ(i, vXYZ);
+            assertEqX4(simdCtor.loadXYZ(arr, index), vXYZ);
+        } else
+            assertThrowsInstanceOf(() => f.storeXYZ(i, simdCtor()), RangeError);
+        if (!tXY) {
+            simdCtor.storeXY(arr, index, simdCtor.not(vXY));
+            f.storeXY(i, vXY);
+            assertEqX4(simdCtor.loadXY(arr, index), vXY);
+        } else
+            assertThrowsInstanceOf(() => f.storeXY(i, simdCtor()), RangeError);
+        if (!tX) {
+            simdCtor.storeX(arr, index, simdCtor.not(vX));
+            f.storeX(i, vX);
+            assertEqX4(simdCtor.loadX(arr, index), vX);
+        } else
+            assertThrowsInstanceOf(() => f.storeX(i, simdCtor()), RangeError);
+    }
+}
+
+function testFloat32x4(ctor, shift, scale, disp) {
+    testSimdX4(ctor, shift, scale, disp, 'float32x4', SIMD.float32x4);
+}
+function testInt32x4(ctor, shift, scale, disp) {
+    testSimdX4(ctor, shift, scale, disp, 'int32x4', SIMD.int32x4);
+}
+
 function test(tester, ctor, shift) {
+    var arr = new ctor(ab);
+    for (var i = 0; i < arr.length; i++)
+        arr[i] = Math.imul(i, Math.imul((i & 1), 2) - 1);
     for (scale of [0,1,2,3]) {
-        for (disp of [0,1,8,Math.pow(2,31)-1,Math.pow(2,31),Math.pow(2,32)-1])
+        for (disp of [0,1,2,8,Math.pow(2,31)-1,Math.pow(2,31),Math.pow(2,32)-1])
             tester(ctor, shift, scale, disp);
     }
+    for (var i = 0; i < arr.length; i++) {
+        var v = arr[i];
+        arr[i] = Math.imul(i, Math.imul((i & 1), 2) - 1);
+        assertEq(arr[i], v);
+    }
 }
 
 test(testInt, Int8Array, 0);
 test(testInt, Uint8Array, 0);
 test(testInt, Int16Array, 1);
 test(testInt, Uint16Array, 1);
 test(testInt, Int32Array, 2);
 test(testInt, Uint32Array, 2);
 test(testFloat32, Float32Array, 2);
 test(testFloat64, Float64Array, 3);
+if (typeof SIMD !== 'undefined' && isSimdAvailable()) {
+    test(testInt32x4, Uint8Array, 0);
+    test(testFloat32x4, Uint8Array, 0);
+}
--- a/js/src/jit/CodeGenerator.cpp
+++ b/js/src/jit/CodeGenerator.cpp
@@ -648,17 +648,17 @@ CodeGenerator::getJumpLabelForBranch(MBa
     if (!labelForBackedgeWithImplicitCheck(block))
         return block->lir()->label();
 
     // We need to use a patchable jump for this backedge, but want to treat
     // this as a normal label target to simplify codegen. Efficiency isn't so
     // important here as these tests are extremely unlikely to be used in loop
     // backedges, so emit inline code for the patchable jump. Heap allocating
     // the label allows it to be used by out of line blocks.
-    Label *res = GetJitContext()->temp->lifoAlloc()->new_<Label>();
+    Label *res = alloc().lifoAlloc()->new_<Label>();
     Label after;
     masm.jump(&after);
     masm.bind(res);
     jumpToBlock(block);
     masm.bind(&after);
     return res;
 }
 
--- a/js/src/jit/Disassembler.h
+++ b/js/src/jit/Disassembler.h
@@ -252,17 +252,16 @@ void DumpHeapAccess(const HeapAccess &ac
 inline void
 VerifyHeapAccess(uint8_t *begin, uint8_t *end, const HeapAccess &expected)
 {
     HeapAccess disassembled;
     uint8_t *e = DisassembleHeapAccess(begin, &disassembled);
     MOZ_ASSERT(e == end);
     MOZ_ASSERT(disassembled == expected);
 }
-
 #endif
 
 } // namespace Disassembler
 
 } // namespace jit
 } // namespace js
 
 #endif /* jit_Disassembler_h */
--- a/js/src/jit/EffectiveAddressAnalysis.cpp
+++ b/js/src/jit/EffectiveAddressAnalysis.cpp
@@ -84,16 +84,78 @@ AnalyzeLsh(TempAllocator &alloc, MLsh *l
         return;
     }
 
     MEffectiveAddress *eaddr = MEffectiveAddress::New(alloc, base, index, scale, displacement);
     last->replaceAllUsesWith(eaddr);
     last->block()->insertAfter(last, eaddr);
 }
 
+static bool
+IsAlignmentMask(uint32_t m)
+{
+    // Test whether m is just leading ones and trailing zeros.
+    return (-m & ~m) == 0;
+}
+
+template<typename MAsmJSHeapAccessType>
+static void
+AnalyzeAsmHeapAccess(MAsmJSHeapAccessType *ins, MIRGraph &graph)
+{
+    MDefinition *ptr = ins->ptr();
+
+    if (ptr->isConstantValue()) {
+        // Look for heap[i] where i is a constant offset, and fold the offset.
+        // By doing the folding now, we simplify the task of codegen; the offset
+        // is always the address mode immediate. This also allows it to avoid
+        // a situation where the sum of a constant pointer value and a non-zero
+        // offset doesn't actually fit into the address mode immediate.
+        int32_t imm = ptr->constantValue().toInt32();
+        if (imm != 0 && ins->tryAddDisplacement(imm)) {
+            MInstruction *zero = MConstant::New(graph.alloc(), Int32Value(0));
+            ins->block()->insertBefore(ins, zero);
+            ins->replacePtr(zero);
+        }
+    } else if (ptr->isAdd()) {
+        // Look for heap[a+i] where i is a constant offset, and fold the offset.
+        MDefinition *op0 = ptr->toAdd()->getOperand(0);
+        MDefinition *op1 = ptr->toAdd()->getOperand(1);
+        if (op0->isConstantValue())
+            mozilla::Swap(op0, op1);
+        if (op1->isConstantValue()) {
+            int32_t imm = op1->constantValue().toInt32();
+            if (ins->tryAddDisplacement(imm))
+                ins->replacePtr(op0);
+        }
+    } else if (ptr->isBitAnd() && ptr->hasOneUse()) {
+        // Transform heap[(a+i)&m] to heap[(a&m)+i] so that we can fold i into
+        // the access. Since we currently just mutate the BitAnd in place, this
+        // requires that we are its only user.
+        MDefinition *lhs = ptr->toBitAnd()->getOperand(0);
+        MDefinition *rhs = ptr->toBitAnd()->getOperand(1);
+        int lhsIndex = 0;
+        if (lhs->isConstantValue()) {
+            mozilla::Swap(lhs, rhs);
+            lhsIndex = 1;
+        }
+        if (lhs->isAdd() && rhs->isConstantValue()) {
+            MDefinition *op0 = lhs->toAdd()->getOperand(0);
+            MDefinition *op1 = lhs->toAdd()->getOperand(1);
+            if (op0->isConstantValue())
+                mozilla::Swap(op0, op1);
+            if (op1->isConstantValue()) {
+                uint32_t i = op1->constantValue().toInt32();
+                uint32_t m = rhs->constantValue().toInt32();
+                if (IsAlignmentMask(m) && ((i & m) == i) && ins->tryAddDisplacement(i))
+                    ptr->toBitAnd()->replaceOperand(lhsIndex, op0);
+            }
+        }
+    }
+}
+
 // This analysis converts patterns of the form:
 //   truncate(x + (y << {0,1,2,3}))
 //   truncate(x + (y << {0,1,2,3}) + imm32)
 // into a single lea instruction, and patterns of the form:
 //   asmload(x + imm32)
 //   asmload(x << {0,1,2,3})
 //   asmload((x << {0,1,2,3}) + imm32)
 //   asmload((x << {0,1,2,3}) & mask)            (where mask is redundant with shift)
@@ -103,14 +165,21 @@ AnalyzeLsh(TempAllocator &alloc, MLsh *l
 // Additionally, we should consider the general forms:
 //   truncate(x + y + imm32)
 //   truncate((y << {0,1,2,3}) + imm32)
 bool
 EffectiveAddressAnalysis::analyze()
 {
     for (ReversePostorderIterator block(graph_.rpoBegin()); block != graph_.rpoEnd(); block++) {
         for (MInstructionIterator i = block->begin(); i != block->end(); i++) {
+            // Note that we don't check for MAsmJSCompareExchangeHeap
+            // or MAsmJSAtomicBinopHeap, because the backend and the OOB
+            // mechanism don't support non-zero offsets for them yet.
             if (i->isLsh())
                 AnalyzeLsh(graph_.alloc(), i->toLsh());
+            else if (i->isAsmJSLoadHeap())
+                AnalyzeAsmHeapAccess(i->toAsmJSLoadHeap(), graph_);
+            else if (i->isAsmJSStoreHeap())
+                AnalyzeAsmHeapAccess(i->toAsmJSStoreHeap(), graph_);
         }
     }
     return true;
 }
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -12176,31 +12176,67 @@ class MAsmJSNeg
     INSTRUCTION_HEADER(AsmJSNeg)
     static MAsmJSNeg *NewAsmJS(TempAllocator &alloc, MDefinition *op, MIRType type) {
         return new(alloc) MAsmJSNeg(op, type);
     }
 };
 
 class MAsmJSHeapAccess
 {
-    Scalar::Type accessType_;
+    int32_t offset_;
+    Scalar::Type accessType_ : 8;
     bool needsBoundsCheck_;
     unsigned numSimdElems_;
 
   public:
     MAsmJSHeapAccess(Scalar::Type accessType, bool needsBoundsCheck, unsigned numSimdElems = 0)
-      : accessType_(accessType), needsBoundsCheck_(needsBoundsCheck), numSimdElems_(numSimdElems)
+      : offset_(0), accessType_(accessType),
+        needsBoundsCheck_(needsBoundsCheck), numSimdElems_(numSimdElems)
     {
         MOZ_ASSERT(numSimdElems <= ScalarTypeToLength(accessType));
     }
 
+    int32_t offset() const { return offset_; }
+    int32_t endOffset() const { return offset() + byteSize(); }
     Scalar::Type accessType() const { return accessType_; }
+    unsigned byteSize() const {
+        return Scalar::isSimdType(accessType())
+               ? Scalar::scalarByteSize(accessType()) * numSimdElems()
+               : TypedArrayElemSize(accessType());
+    }
     bool needsBoundsCheck() const { return needsBoundsCheck_; }
     void removeBoundsCheck() { needsBoundsCheck_ = false; }
     unsigned numSimdElems() const { MOZ_ASSERT(Scalar::isSimdType(accessType_)); return numSimdElems_; }
+
+    bool tryAddDisplacement(int32_t o) {
+        // Compute the new offset. Check for overflow and negative. In theory it
+        // ought to be possible to support negative offsets, but it'd require
+        // more elaborate bounds checking mechanisms than we currently have.
+        MOZ_ASSERT(offset_ >= 0);
+        int32_t newOffset = uint32_t(offset_) + o;
+        if (newOffset < 0)
+            return false;
+
+        // Compute the new offset to the end of the access. Check for overflow
+        // and negative here also.
+        int32_t newEnd = uint32_t(newOffset) + byteSize();
+        if (newEnd < 0)
+            return false;
+        MOZ_ASSERT(uint32_t(newEnd) >= uint32_t(newOffset));
+
+        // If we need bounds checking, keep it within the more restrictive
+        // AsmJSCheckedImmediateRange. Otherwise, just keep it within what
+        // the instruction set can support.
+        size_t range = needsBoundsCheck() ? AsmJSCheckedImmediateRange : AsmJSImmediateRange;
+        if (size_t(newEnd) > range)
+            return false;
+
+        offset_ = newOffset;
+        return true;
+    }
 };
 
 class MAsmJSLoadHeap
   : public MUnaryInstruction,
     public MAsmJSHeapAccess,
     public NoTypePolicy::Data
 {
     MemoryBarrierBits barrierBefore_;
@@ -12254,16 +12290,17 @@ class MAsmJSLoadHeap
                                MemoryBarrierBits barrierBefore = MembarNobits,
                                MemoryBarrierBits barrierAfter = MembarNobits)
     {
         return new(alloc) MAsmJSLoadHeap(accessType, ptr, needsBoundsCheck,
                                          numSimdElems, barrierBefore, barrierAfter);
     }
 
     MDefinition *ptr() const { return getOperand(0); }
+    void replacePtr(MDefinition *newPtr) { replaceOperand(0, newPtr); }
     MemoryBarrierBits barrierBefore() const { return barrierBefore_; }
     MemoryBarrierBits barrierAfter() const { return barrierAfter_; }
 
     bool congruentTo(const MDefinition *ins) const MOZ_OVERRIDE;
     AliasSet getAliasSet() const MOZ_OVERRIDE {
         return AliasSet::Load(AliasSet::AsmJSHeap);
     }
     bool mightAlias(const MDefinition *def) const MOZ_OVERRIDE;
@@ -12297,16 +12334,17 @@ class MAsmJSStoreHeap
                                 MemoryBarrierBits barrierBefore = MembarNobits,
                                 MemoryBarrierBits barrierAfter = MembarNobits)
     {
         return new(alloc) MAsmJSStoreHeap(accessType, ptr, v, needsBoundsCheck,
                                           numSimdElems, barrierBefore, barrierAfter);
     }
 
     MDefinition *ptr() const { return getOperand(0); }
+    void replacePtr(MDefinition *newPtr) { replaceOperand(0, newPtr); }
     MDefinition *value() const { return getOperand(1); }
     MemoryBarrierBits barrierBefore() const { return barrierBefore_; }
     MemoryBarrierBits barrierAfter() const { return barrierAfter_; }
 
     AliasSet getAliasSet() const MOZ_OVERRIDE {
         return AliasSet::Store(AliasSet::AsmJSHeap);
     }
 };
--- a/js/src/jit/MIRGenerator.h
+++ b/js/src/jit/MIRGenerator.h
@@ -34,17 +34,17 @@ class MStart;
 class OptimizationInfo;
 
 class MIRGenerator
 {
   public:
     MIRGenerator(CompileCompartment *compartment, const JitCompileOptions &options,
                  TempAllocator *alloc, MIRGraph *graph,
                  CompileInfo *info, const OptimizationInfo *optimizationInfo,
-                 Label *outOfBoundsLabel = nullptr, bool usesSignalHandlersForOOB = false);
+                 Label *outOfBoundsLabel = nullptr, bool usesSignalHandlersForAsmJSOOB = false);
 
     TempAllocator &alloc() {
         return *alloc_;
     }
     MIRGraph &graph() {
         return *graph_;
     }
     bool ensureBallast() {
@@ -196,17 +196,17 @@ class MIRGenerator
 
     // List of nursery objects used by this compilation. Can be traced by a
     // minor GC while compilation happens off-thread. This Vector should only
     // be accessed on the main thread (IonBuilder, nursery GC or
     // CodeGenerator::link).
     ObjectVector nurseryObjects_;
 
     Label *outOfBoundsLabel_;
-    bool usesSignalHandlersForOOB_;
+    bool usesSignalHandlersForAsmJSOOB_;
 
     void addAbortedNewScriptPropertiesGroup(ObjectGroup *type);
     void setForceAbort() {
         shouldForceAbort_ = true;
     }
     bool shouldForceAbort() {
         return shouldForceAbort_;
     }
@@ -222,20 +222,28 @@ class MIRGenerator
     const JitCompileOptions options;
 
     void traceNurseryObjects(JSTracer *trc);
 
     const ObjectVector &nurseryObjects() const {
         return nurseryObjects_;
     }
 
-    bool usesSignalHandlersForOOB() const {
-        return usesSignalHandlersForOOB_;
-    }
     Label *outOfBoundsLabel() const {
         return outOfBoundsLabel_;
     }
+    bool needsAsmJSBoundsCheckBranch(const MAsmJSHeapAccess *access) const {
+        // A heap access needs a bounds-check branch if we're not relying on signal
+        // handlers to catch errors, and if it's not proven to be within bounds.
+        // We use signal-handlers on x64, but on x86 there isn't enough address
+        // space for a guard region.
+#ifdef JS_CODEGEN_X64
+        if (usesSignalHandlersForAsmJSOOB_)
+            return false;
+#endif
+        return access->needsBoundsCheck();
+    }
 };
 
 } // namespace jit
 } // namespace js
 
 #endif /* jit_MIRGenerator_h */
--- a/js/src/jit/MIRGraph.cpp
+++ b/js/src/jit/MIRGraph.cpp
@@ -15,17 +15,17 @@
 
 using namespace js;
 using namespace js::jit;
 using mozilla::Swap;
 
 MIRGenerator::MIRGenerator(CompileCompartment *compartment, const JitCompileOptions &options,
                            TempAllocator *alloc, MIRGraph *graph, CompileInfo *info,
                            const OptimizationInfo *optimizationInfo,
-                           Label *outOfBoundsLabel, bool usesSignalHandlersForOOB)
+                           Label *outOfBoundsLabel, bool usesSignalHandlersForAsmJSOOB)
   : compartment(compartment),
     info_(info),
     optimizationInfo_(optimizationInfo),
     alloc_(alloc),
     graph_(graph),
     abortReason_(AbortReason_NoAbort),
     shouldForceAbort_(false),
     abortedNewScriptPropertiesGroups_(*alloc_),
@@ -37,17 +37,17 @@ MIRGenerator::MIRGenerator(CompileCompar
     usesSimd_(false),
     usesSimdCached_(false),
     minAsmJSHeapLength_(0),
     modifiesFrameArguments_(false),
     instrumentedProfiling_(false),
     instrumentedProfilingIsCached_(false),
     nurseryObjects_(*alloc),
     outOfBoundsLabel_(outOfBoundsLabel),
-    usesSignalHandlersForOOB_(usesSignalHandlersForOOB),
+    usesSignalHandlersForAsmJSOOB_(usesSignalHandlersForAsmJSOOB),
     options(options)
 { }
 
 bool
 MIRGenerator::usesSimd()
 {
     if (usesSimdCached_)
         return usesSimd_;
--- a/js/src/jit/arm/Architecture-arm.h
+++ b/js/src/jit/arm/Architecture-arm.h
@@ -578,12 +578,18 @@ static inline bool UseHardFpABI()
 #if defined(JS_CODEGEN_ARM_HARDFP)
     return true;
 #else
     return false;
 #endif
 }
 #endif
 
+// See the comments above AsmJSMappedSize in AsmJSValidate.h for more info.
+// TODO: Implement this for ARM. Note that it requires Codegen to respect the
+// offset field of AsmJSHeapAccess.
+static const size_t AsmJSCheckedImmediateRange = 0;
+static const size_t AsmJSImmediateRange = 0;
+
 } // namespace jit
 } // namespace js
 
 #endif /* jit_arm_Architecture_arm_h */
--- a/js/src/jit/mips/Architecture-mips.h
+++ b/js/src/jit/mips/Architecture-mips.h
@@ -496,12 +496,18 @@ hasUnaliasedDouble() {
 // On MIPS, fn-double aliases both fn-float32 and fn+1-float32, so if you need
 // to convert a float32 to a double as a temporary, you need a temporary
 // double register.
 inline bool
 hasMultiAlias() {
     return true;
 }
 
+// See the comments above AsmJSMappedSize in AsmJSValidate.h for more info.
+// TODO: Implement this for MIPS. Note that it requires Codegen to respect the
+// offset field of AsmJSHeapAccess.
+static const size_t AsmJSCheckedImmediateRange = 0;
+static const size_t AsmJSImmediateRange = 0;
+
 } // namespace jit
 } // namespace js
 
 #endif /* jit_mips_Architecture_mips_h */
--- a/js/src/jit/shared/Assembler-shared.h
+++ b/js/src/jit/shared/Assembler-shared.h
@@ -311,16 +311,19 @@ struct PatchedAbsoluteAddress
     void *addr;
 
     explicit PatchedAbsoluteAddress()
       : addr(nullptr)
     { }
     explicit PatchedAbsoluteAddress(const void *addr)
       : addr(const_cast<void*>(addr))
     { }
+    explicit PatchedAbsoluteAddress(uintptr_t addr)
+      : addr(reinterpret_cast<void*>(addr))
+    { }
 };
 
 // Specifies an address computed in the form of a register base and a constant,
 // 32-bit offset.
 struct Address
 {
     Register base;
     int32_t offset;
@@ -759,89 +762,87 @@ static const unsigned AsmJSNaN32GlobalDa
 
 // Summarizes a heap access made by asm.js code that needs to be patched later
 // and/or looked up by the asm.js signal handlers. Different architectures need
 // to know different things (x64: offset and length, ARM: where to patch in
 // heap length, x86: where to patch in heap length and base) hence the massive
 // #ifdefery.
 class AsmJSHeapAccess
 {
+#if defined(JS_CODEGEN_X64)
+  public:
+    enum WhatToDoOnOOB {
+        CarryOn, // loads return undefined, stores do nothing.
+        Throw    // throw a RangeError
+    };
+#endif
+
   private:
-    uint32_t offset_;
+    uint32_t insnOffset_;
+#if defined(JS_CODEGEN_X86)
+    uint8_t opLength_;  // the length of the load/store instruction
+#endif
+#if defined(JS_CODEGEN_X64)
+    uint8_t offsetWithinWholeSimdVector_; // if is this e.g. the Z of an XYZ
+    bool throwOnOOB_;   // should we throw on OOB?
+#endif
 #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
     uint8_t cmpDelta_;  // the number of bytes from the cmp to the load/store instruction
-    uint8_t opLength_;  // the length of the load/store instruction
-    uint8_t numSimdElems_; // the number of SIMD lanes to load/store at once
-    Scalar::Type type_;
-    AnyRegister::Code loadedReg_ : 8;
 #endif
 
     JS_STATIC_ASSERT(AnyRegister::Total < UINT8_MAX);
 
   public:
     AsmJSHeapAccess() {}
 #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
     static const uint32_t NoLengthCheck = UINT32_MAX;
+#endif
 
-    // If 'cmp' equals 'offset' or if it is not supplied then the
+#if defined(JS_CODEGEN_X86)
+    // If 'cmp' equals 'insnOffset' or if it is not supplied then the
     // cmpDelta_ is zero indicating that there is no length to patch.
-    AsmJSHeapAccess(uint32_t offset, uint32_t after, Scalar::Type type, AnyRegister loadedReg,
-                    uint32_t cmp = NoLengthCheck)
-      : offset_(offset),
-        cmpDelta_(cmp == NoLengthCheck ? 0 : offset - cmp),
-        opLength_(after - offset),
-        numSimdElems_(UINT8_MAX),
-        type_(type),
-        loadedReg_(loadedReg.code())
+    AsmJSHeapAccess(uint32_t insnOffset, uint32_t after, uint32_t cmp = NoLengthCheck)
+      : insnOffset_(insnOffset),
+        opLength_(after - insnOffset),
+        cmpDelta_(cmp == NoLengthCheck ? 0 : insnOffset - cmp)
+    {}
+#elif defined(JS_CODEGEN_X64)
+    // If 'cmp' equals 'insnOffset' or if it is not supplied then the
+    // cmpDelta_ is zero indicating that there is no length to patch.
+    AsmJSHeapAccess(uint32_t insnOffset, WhatToDoOnOOB oob,
+                    uint32_t cmp = NoLengthCheck,
+                    uint32_t offsetWithinWholeSimdVector = 0)
+      : insnOffset_(insnOffset),
+        offsetWithinWholeSimdVector_(offsetWithinWholeSimdVector),
+        throwOnOOB_(oob == Throw),
+        cmpDelta_(cmp == NoLengthCheck ? 0 : insnOffset - cmp)
     {
-        MOZ_ASSERT(!Scalar::isSimdType(type));
-    }
-    AsmJSHeapAccess(uint32_t offset, uint8_t after, Scalar::Type type, uint32_t cmp = NoLengthCheck)
-      : offset_(offset),
-        cmpDelta_(cmp == NoLengthCheck ? 0 : offset - cmp),
-        opLength_(after - offset),
-        numSimdElems_(UINT8_MAX),
-        type_(type),
-        loadedReg_(UINT8_MAX)
-    {
-        MOZ_ASSERT(!Scalar::isSimdType(type));
-    }
-    // SIMD loads / stores
-    AsmJSHeapAccess(uint32_t offset, uint32_t after, unsigned numSimdElems, Scalar::Type type,
-                    uint32_t cmp = NoLengthCheck)
-      : offset_(offset),
-        cmpDelta_(cmp == NoLengthCheck ? 0 : offset - cmp),
-        opLength_(after - offset),
-        numSimdElems_(numSimdElems),
-        type_(type),
-        loadedReg_(UINT8_MAX)
-    {
-        MOZ_ASSERT(Scalar::isSimdType(type));
+        MOZ_ASSERT(offsetWithinWholeSimdVector_ == offsetWithinWholeSimdVector);
     }
 #elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS)
-    explicit AsmJSHeapAccess(uint32_t offset)
-      : offset_(offset)
+    explicit AsmJSHeapAccess(uint32_t insnOffset)
+      : insnOffset_(insnOffset)
     {}
 #endif
 
-    uint32_t offset() const { return offset_; }
-    void setOffset(uint32_t offset) { offset_ = offset; }
+    uint32_t insnOffset() const { return insnOffset_; }
+    void setInsnOffset(uint32_t insnOffset) { insnOffset_ = insnOffset; }
 #if defined(JS_CODEGEN_X86)
-    void *patchOffsetAt(uint8_t *code) const { return code + (offset_ + opLength_); }
+    void *patchHeapPtrImmAt(uint8_t *code) const { return code + (insnOffset_ + opLength_); }
 #endif
 #if defined(JS_CODEGEN_X64)
-    unsigned opLength() const { MOZ_ASSERT(!Scalar::isSimdType(type_)); return opLength_; }
-    bool isLoad() const { MOZ_ASSERT(!Scalar::isSimdType(type_)); return loadedReg_ != UINT8_MAX; }
+    bool throwOnOOB() const { return throwOnOOB_; }
+    uint32_t offsetWithinWholeSimdVector() const { return offsetWithinWholeSimdVector_; }
 #endif
 #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
     bool hasLengthCheck() const { return cmpDelta_ > 0; }
-    void *patchLengthAt(uint8_t *code) const { return code + (offset_ - cmpDelta_); }
-    unsigned numSimdElems() const { MOZ_ASSERT(Scalar::isSimdType(type_)); return numSimdElems_; }
-    Scalar::Type type() const { return type_; }
-    AnyRegister loadedReg() const { return AnyRegister::FromCode(loadedReg_); }
+    void *patchLengthAt(uint8_t *code) const {
+        MOZ_ASSERT(hasLengthCheck());
+        return code + (insnOffset_ - cmpDelta_);
+    }
 #endif
 };
 
 typedef Vector<AsmJSHeapAccess, 0, SystemAllocPolicy> AsmJSHeapAccessVector;
 
 struct AsmJSGlobalAccess
 {
     CodeOffsetLabel patchAt;
--- a/js/src/jit/shared/Assembler-x86-shared.h
+++ b/js/src/jit/shared/Assembler-x86-shared.h
@@ -65,16 +65,20 @@ class Operand
       : kind_(MEM_REG_DISP),
         base_(reg.code()),
         disp_(disp)
     { }
     explicit Operand(AbsoluteAddress address)
       : kind_(MEM_ADDRESS32),
         disp_(X86Encoding::AddressImmediate(address.addr))
     { }
+    explicit Operand(PatchedAbsoluteAddress address)
+      : kind_(MEM_ADDRESS32),
+        disp_(X86Encoding::AddressImmediate(address.addr))
+    { }
 
     Address toAddress() const {
         MOZ_ASSERT(kind() == MEM_REG_DISP);
         return Address(Register::FromCode(base()), disp());
     }
 
     BaseIndex toBaseIndex() const {
         MOZ_ASSERT(kind() == MEM_SCALE);
--- a/js/src/jit/shared/CodeGenerator-shared-inl.h
+++ b/js/src/jit/shared/CodeGenerator-shared-inl.h
@@ -181,18 +181,18 @@ CodeGeneratorShared::restoreLiveVolatile
     MOZ_ASSERT(!ins->isCall());
     LSafepoint *safepoint = ins->safepoint();
     RegisterSet regs = RegisterSet::Intersect(safepoint->liveRegs(), RegisterSet::Volatile());
     masm.PopRegsInMask(regs);
 }
 
 void
 CodeGeneratorShared::verifyHeapAccessDisassembly(uint32_t begin, uint32_t end, bool isLoad,
-                                                 Scalar::Type type, const Operand &mem,
-                                                 LAllocation alloc)
+                                                 Scalar::Type type, unsigned numElems,
+                                                 const Operand &mem, LAllocation alloc)
 {
 #ifdef DEBUG
     using namespace Disassembler;
 
     OtherOperand op;
     Disassembler::HeapAccess::Kind kind = isLoad ? HeapAccess::Load : HeapAccess::Store;
     switch (type) {
       case Scalar::Int8:
@@ -224,18 +224,20 @@ CodeGeneratorShared::verifyHeapAccessDis
       case Scalar::Int32x4:
         op = OtherOperand(ToFloatRegister(alloc).code());
         break;
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("Unexpected array type");
     }
 
+    size_t size = Scalar::isSimdType(type)
+                  ? Scalar::scalarByteSize(type) * numElems
+                  : TypedArrayElemSize(type);
     masm.verifyHeapAccessDisassembly(begin, end,
-                                     HeapAccess(kind, TypedArrayElemSize(type),
-                                     ComplexAddress(mem), op));
+                                     HeapAccess(kind, size, ComplexAddress(mem), op));
 #endif
 }
 
 } // ion
 } // js
 
 #endif /* jit_shared_CodeGenerator_shared_inl_h */
--- a/js/src/jit/shared/CodeGenerator-shared.h
+++ b/js/src/jit/shared/CodeGenerator-shared.h
@@ -544,18 +544,18 @@ class CodeGeneratorShared : public LElem
     void emitTracelogIonStop() {
 #ifdef JS_TRACE_LOGGING
         emitTracelogStopEvent(TraceLogger_IonMonkey);
         emitTracelogScriptStop();
 #endif
     }
 
     inline void verifyHeapAccessDisassembly(uint32_t begin, uint32_t end, bool isLoad,
-                                            Scalar::Type type, const Operand &mem,
-                                            LAllocation alloc);
+                                            Scalar::Type type, unsigned numElems,
+                                            const Operand &mem, LAllocation alloc);
 };
 
 // An out-of-line path is generated at the end of the function.
 class OutOfLineCode : public TempObject
 {
     Label entry_;
     Label rejoin_;
     uint32_t framePushed_;
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -358,16 +358,92 @@ CodeGeneratorX86Shared::visitOutOfLineLo
       case Scalar::Uint8Clamped:
         Register destReg = ool->dest().gpr();
         masm.mov(ImmWord(0), destReg);
         break;
     }
     masm.jmp(ool->rejoin());
 }
 
+void
+CodeGeneratorX86Shared::visitOffsetBoundsCheck(OffsetBoundsCheck *oolCheck)
+{
+    // The access is heap[ptr + offset]. The inline code checks that
+    // ptr < heap.length - offset. We get here when that fails. We need to check
+    // for the case where ptr + offset >= 0, in which case the access is still
+    // in bounds.
+    MOZ_ASSERT(oolCheck->offset() != 0,
+               "An access without a constant offset doesn't need a separate OffsetBoundsCheck");
+    masm.cmp32(oolCheck->ptrReg(), Imm32(-uint32_t(oolCheck->offset())));
+    masm.j(Assembler::Below, oolCheck->outOfBounds());
+
+#ifdef JS_CODEGEN_X64
+    // In order to get the offset to wrap properly, we must sign-extend the
+    // pointer to 32-bits. We'll zero out the sign extension immediately
+    // after the access to restore asm.js invariants.
+    masm.movslq(oolCheck->ptrReg(), oolCheck->ptrReg());
+#endif
+
+    masm.jmp(oolCheck->rejoin());
+}
+
+uint32_t
+CodeGeneratorX86Shared::emitAsmJSBoundsCheckBranch(const MAsmJSHeapAccess *access,
+                                                   const MInstruction *mir,
+                                                   Register ptr, Label *fail)
+{
+    // Emit a bounds-checking branch for |access|.
+
+    MOZ_ASSERT(gen->needsAsmJSBoundsCheckBranch(access));
+
+    Label *pass = nullptr;
+
+    // If we have a non-zero offset, it's possible that |ptr| itself is out of
+    // bounds, while adding the offset computes an in-bounds address. To catch
+    // this case, we need a second branch, which we emit out of line since it's
+    // unlikely to be needed in normal programs.
+    if (access->offset() != 0) {
+        OffsetBoundsCheck *oolCheck = new(alloc()) OffsetBoundsCheck(fail, ptr, access->offset());
+        fail = oolCheck->entry();
+        pass = oolCheck->rejoin();
+        addOutOfLineCode(oolCheck, mir);
+    }
+
+    // The bounds check is a comparison with an immediate value. The asm.js
+    // module linking process will add the length of the heap to the immediate
+    // field, so -access->endOffset() will turn into
+    // (heapLength - access->endOffset()), allowing us to test whether the end
+    // of the access is beyond the end of the heap.
+    uint32_t maybeCmpOffset = masm.cmp32WithPatch(ptr, Imm32(-access->endOffset())).offset();
+    masm.j(Assembler::Above, fail);
+
+    if (pass)
+        masm.bind(pass);
+
+    return maybeCmpOffset;
+}
+
+void
+CodeGeneratorX86Shared::cleanupAfterAsmJSBoundsCheckBranch(const MAsmJSHeapAccess *access,
+                                                           Register ptr)
+{
+    // Clean up after performing a heap access checked by a branch.
+
+    MOZ_ASSERT(gen->needsAsmJSBoundsCheckBranch(access));
+
+#ifdef JS_CODEGEN_X64
+    // If the offset is 0, we don't use an OffsetBoundsCheck.
+    if (access->offset() != 0) {
+        // Zero out the high 32 bits, in case the OffsetBoundsCheck code had to
+        // sign-extend (movslq) the pointer value to get wraparound to work.
+        masm.movl(ptr, ptr);
+    }
+#endif
+}
+
 bool
 CodeGeneratorX86Shared::generateOutOfLineCode()
 {
     if (!CodeGeneratorShared::generateOutOfLineCode())
         return false;
 
     if (deoptLabel_.used()) {
         // All non-table-based bailouts will go here.
--- a/js/src/jit/shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.h
@@ -27,17 +27,16 @@ class CodeGeneratorX86Shared : public Co
     CodeGeneratorX86Shared *thisFromCtor() {
         return this;
     }
 
     template <typename T>
     void bailout(const T &t, LSnapshot *snapshot);
 
   protected:
-
     // Load a NaN or zero into a register for an out of bounds AsmJS or static
     // typed array load.
     class OutOfLineLoadTypedArrayOutOfBounds : public OutOfLineCodeBase<CodeGeneratorX86Shared>
     {
         AnyRegister dest_;
         Scalar::Type viewType_;
       public:
         OutOfLineLoadTypedArrayOutOfBounds(AnyRegister dest, Scalar::Type viewType)
@@ -46,16 +45,41 @@ class CodeGeneratorX86Shared : public Co
 
         AnyRegister dest() const { return dest_; }
         Scalar::Type viewType() const { return viewType_; }
         void accept(CodeGeneratorX86Shared *codegen) {
             codegen->visitOutOfLineLoadTypedArrayOutOfBounds(this);
         }
     };
 
+    // Additional bounds checking for heap accesses with constant offsets.
+    class OffsetBoundsCheck : public OutOfLineCodeBase<CodeGeneratorX86Shared>
+    {
+        Label *outOfBounds_;
+        Register ptrReg_;
+        int32_t offset_;
+      public:
+        OffsetBoundsCheck(Label *outOfBounds, Register ptrReg, int32_t offset)
+          : outOfBounds_(outOfBounds), ptrReg_(ptrReg), offset_(offset)
+        {}
+
+        Label *outOfBounds() const { return outOfBounds_; }
+        Register ptrReg() const { return ptrReg_; }
+        int32_t offset() const { return offset_; }
+        void accept(CodeGeneratorX86Shared *codegen) {
+            codegen->visitOffsetBoundsCheck(this);
+        }
+    };
+
+    // Functions for emitting bounds-checking code with branches.
+    MOZ_WARN_UNUSED_RESULT
+    uint32_t emitAsmJSBoundsCheckBranch(const MAsmJSHeapAccess *mir, const MInstruction *ins,
+                                        Register ptr, Label *fail);
+    void cleanupAfterAsmJSBoundsCheckBranch(const MAsmJSHeapAccess *mir, Register ptr);
+
     // Label for the common return path.
     NonAssertingLabel returnLabel_;
     NonAssertingLabel deoptLabel_;
 
     inline Operand ToOperand(const LAllocation &a) {
         if (a.isGeneralReg())
             return Operand(a.toGeneralReg()->reg());
         if (a.isFloatReg())
@@ -209,16 +233,17 @@ class CodeGeneratorX86Shared : public Co
     virtual void visitGuardObjectGroup(LGuardObjectGroup *guard);
     virtual void visitGuardClass(LGuardClass *guard);
     virtual void visitEffectiveAddress(LEffectiveAddress *ins);
     virtual void visitUDivOrMod(LUDivOrMod *ins);
     virtual void visitAsmJSPassStackArg(LAsmJSPassStackArg *ins);
     virtual void visitMemoryBarrier(LMemoryBarrier *ins);
 
     void visitOutOfLineLoadTypedArrayOutOfBounds(OutOfLineLoadTypedArrayOutOfBounds *ool);
+    void visitOffsetBoundsCheck(OffsetBoundsCheck *oolCheck);
 
     void visitNegI(LNegI *lir);
     void visitNegD(LNegD *lir);
     void visitNegF(LNegF *lir);
 
     // SIMD operators
     void visitSimdValueInt32x4(LSimdValueInt32x4 *lir);
     void visitSimdValueFloat32x4(LSimdValueFloat32x4 *lir);
--- a/js/src/jit/shared/Lowering-shared-inl.h
+++ b/js/src/jit/shared/Lowering-shared-inl.h
@@ -271,20 +271,20 @@ LAllocation
 LIRGeneratorShared::useRegisterOrConstantAtStart(MDefinition *mir)
 {
     if (mir->isConstant())
         return LAllocation(mir->toConstant()->vp());
     return useRegisterAtStart(mir);
 }
 
 LAllocation
-LIRGeneratorShared::useRegisterOrNonNegativeConstantAtStart(MDefinition *mir)
+LIRGeneratorShared::useRegisterOrZeroAtStart(MDefinition *mir)
 {
-    if (mir->isConstant() && mir->toConstant()->value().toInt32() >= 0)
-        return LAllocation(mir->toConstant()->vp());
+    if (mir->isConstant() && mir->toConstant()->value().isInt32(0))
+        return LAllocation();
     return useRegisterAtStart(mir);
 }
 
 LAllocation
 LIRGeneratorShared::useRegisterOrNonDoubleConstant(MDefinition *mir)
 {
     if (mir->isConstant() && mir->type() != MIRType_Double && mir->type() != MIRType_Float32)
         return LAllocation(mir->toConstant()->vp());
--- a/js/src/jit/shared/Lowering-shared.h
+++ b/js/src/jit/shared/Lowering-shared.h
@@ -103,17 +103,17 @@ class LIRGeneratorShared : public MDefin
     // "Storable" is architecture dependend, and will include registers and
     // constants on X86 and only registers on ARM.  This is a generic "things
     // we can expect to write into memory in 1 instruction".
     inline LAllocation useStorable(MDefinition *mir);
     inline LAllocation useStorableAtStart(MDefinition *mir);
     inline LAllocation useKeepaliveOrConstant(MDefinition *mir);
     inline LAllocation useRegisterOrConstant(MDefinition *mir);
     inline LAllocation useRegisterOrConstantAtStart(MDefinition *mir);
-    inline LAllocation useRegisterOrNonNegativeConstantAtStart(MDefinition *mir);
+    inline LAllocation useRegisterOrZeroAtStart(MDefinition *mir);
     inline LAllocation useRegisterOrNonDoubleConstant(MDefinition *mir);
 
     inline LUse useRegisterForTypedLoad(MDefinition *mir, MIRType type);
 
 #ifdef JS_NUNBOX32
     inline LUse useType(MDefinition *mir, LUse::Policy policy);
     inline LUse usePayload(MDefinition *mir, LUse::Policy policy);
     inline LUse usePayloadAtStart(MDefinition *mir, LUse::Policy policy);
--- a/js/src/jit/x64/Architecture-x64.h
+++ b/js/src/jit/x64/Architecture-x64.h
@@ -257,12 +257,17 @@ hasUnaliasedDouble()
 // On ARM, Dn aliases both S2n and S2n+1, so if you need to convert a float32
 // to a double as a temporary, you need a temporary double register.
 inline bool
 hasMultiAlias()
 {
     return false;
 }
 
+// Support some constant-offset addressing.
+// See the comments above AsmJSMappedSize in AsmJSValidate.h for more info.
+static const size_t AsmJSCheckedImmediateRange = 4096;
+static const size_t AsmJSImmediateRange = UINT32_C(0x80000000);
+
 } // namespace jit
 } // namespace js
 
 #endif /* jit_x64_Architecture_x64_h */
--- a/js/src/jit/x64/CodeGenerator-x64.cpp
+++ b/js/src/jit/x64/CodeGenerator-x64.cpp
@@ -293,124 +293,118 @@ CodeGeneratorX64::loadSimd(Scalar::Type 
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("should only handle SIMD types");
     }
 }
 
 void
 CodeGeneratorX64::emitSimdLoad(LAsmJSLoadHeap *ins)
 {
-    MAsmJSLoadHeap *mir = ins->mir();
+    const MAsmJSLoadHeap *mir = ins->mir();
     Scalar::Type type = mir->accessType();
-    const LAllocation *ptr = ins->ptr();
     FloatRegister out = ToFloatRegister(ins->output());
-    Operand srcAddr(HeapReg);
-
-    if (ptr->isConstant()) {
-        int32_t ptrImm = ptr->toConstant()->toInt32();
-        MOZ_ASSERT(ptrImm >= 0);
-        srcAddr = Operand(HeapReg, ptrImm);
-    } else {
-        srcAddr = Operand(HeapReg, ToRegister(ptr), TimesOne);
-    }
+    const LAllocation *ptr = ins->ptr();
+    Operand srcAddr = ptr->isBogus()
+                      ? Operand(HeapReg, mir->offset())
+                      : Operand(HeapReg, ToRegister(ptr), TimesOne, mir->offset());
 
     uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
-    if (mir->needsBoundsCheck() && !gen->usesSignalHandlersForOOB()) {
-        maybeCmpOffset = masm.cmp32WithPatch(ToRegister(ptr), Imm32(0)).offset();
-        masm.j(Assembler::AboveOrEqual, gen->outOfBoundsLabel()); // Throws RangeError
-    }
+    if (gen->needsAsmJSBoundsCheckBranch(mir))
+        maybeCmpOffset = emitAsmJSBoundsCheckBranch(mir, mir, ToRegister(ptr),
+                                                    gen->outOfBoundsLabel());
 
     unsigned numElems = mir->numSimdElems();
     if (numElems == 3) {
         MOZ_ASSERT(type == Scalar::Int32x4 || type == Scalar::Float32x4);
 
-        Operand shiftedOffset(HeapReg);
-        if (ptr->isConstant())
-            shiftedOffset = Operand(HeapReg, ptr->toConstant()->toInt32() + 2 * sizeof(float));
-        else
-            shiftedOffset = Operand(HeapReg, ToRegister(ptr), TimesOne, 2 * sizeof(float));
+        Operand srcAddrZ =
+            ptr->isBogus()
+            ? Operand(HeapReg, 2 * sizeof(float) + mir->offset())
+            : Operand(HeapReg, ToRegister(ptr), TimesOne, 2 * sizeof(float) + mir->offset());
 
         // Load XY
         uint32_t before = masm.size();
         loadSimd(type, 2, srcAddr, out);
         uint32_t after = masm.size();
-        // We're noting a load of 3 elements, so that the bounds check checks
-        // for 3 elements.
-        masm.append(AsmJSHeapAccess(before, after, 3, type, maybeCmpOffset));
+        verifyHeapAccessDisassembly(before, after, /*isLoad=*/true, type, 2, srcAddr,
+                                    *ins->output()->output());
+        masm.append(AsmJSHeapAccess(before, AsmJSHeapAccess::Throw, maybeCmpOffset));
 
         // Load Z (W is zeroed)
+        // This is still in bounds, as we've checked with a manual bounds check
+        // or we had enough space for sure when removing the bounds check.
         before = after;
-        loadSimd(type, 1, shiftedOffset, ScratchSimdReg);
+        loadSimd(type, 1, srcAddrZ, ScratchSimdReg);
         after = masm.size();
-        masm.append(AsmJSHeapAccess(before, after, 1, type));
+        verifyHeapAccessDisassembly(before, after, /*isLoad=*/true, type, 1, srcAddrZ, LFloatReg(ScratchSimdReg));
+        masm.append(AsmJSHeapAccess(before, AsmJSHeapAccess::Throw,
+                                    AsmJSHeapAccess::NoLengthCheck, 8));
 
         // Move ZW atop XY
         masm.vmovlhps(ScratchSimdReg, out, out);
-        return;
+    } else {
+        uint32_t before = masm.size();
+        loadSimd(type, numElems, srcAddr, out);
+        uint32_t after = masm.size();
+        verifyHeapAccessDisassembly(before, after, /*isLoad=*/true, type, numElems, srcAddr, *ins->output()->output());
+        masm.append(AsmJSHeapAccess(before, AsmJSHeapAccess::Throw, maybeCmpOffset));
     }
 
-    uint32_t before = masm.size();
-    loadSimd(type, numElems, srcAddr, out);
-    uint32_t after = masm.size();
-    masm.append(AsmJSHeapAccess(before, after, numElems, type, maybeCmpOffset));
+    if (maybeCmpOffset != AsmJSHeapAccess::NoLengthCheck)
+        cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
 }
 
 void
 CodeGeneratorX64::visitAsmJSLoadHeap(LAsmJSLoadHeap *ins)
 {
-    MAsmJSLoadHeap *mir = ins->mir();
-    Scalar::Type vt = mir->accessType();
-    const LAllocation *ptr = ins->ptr();
-    const LDefinition *out = ins->output();
-    Operand srcAddr(HeapReg);
+    const MAsmJSLoadHeap *mir = ins->mir();
+    Scalar::Type accessType = mir->accessType();
 
-    if (Scalar::isSimdType(vt))
+    if (Scalar::isSimdType(accessType))
         return emitSimdLoad(ins);
 
-    if (ptr->isConstant()) {
-        int32_t ptrImm = ptr->toConstant()->toInt32();
-        MOZ_ASSERT(ptrImm >= 0);
-        srcAddr = Operand(HeapReg, ptrImm);
-    } else {
-        srcAddr = Operand(HeapReg, ToRegister(ptr), TimesOne);
-    }
+    const LAllocation *ptr = ins->ptr();
+    const LDefinition *out = ins->output();
+    Operand srcAddr = ptr->isBogus()
+                      ? Operand(HeapReg, mir->offset())
+                      : Operand(HeapReg, ToRegister(ptr), TimesOne, mir->offset());
 
-    memoryBarrier(ins->mir()->barrierBefore());
+    memoryBarrier(mir->barrierBefore());
     OutOfLineLoadTypedArrayOutOfBounds *ool = nullptr;
     uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
-    if (mir->needsBoundsCheck() && !gen->usesSignalHandlersForOOB()) {
-        CodeOffsetLabel cmp = masm.cmp32WithPatch(ToRegister(ptr), Imm32(0));
-        ool = new(alloc()) OutOfLineLoadTypedArrayOutOfBounds(ToAnyRegister(out), vt);
-        addOutOfLineCode(ool, ins->mir());
-        masm.j(Assembler::AboveOrEqual, ool->entry());
-        maybeCmpOffset = cmp.offset();
+    if (gen->needsAsmJSBoundsCheckBranch(mir)) {
+        ool = new(alloc()) OutOfLineLoadTypedArrayOutOfBounds(ToAnyRegister(out), accessType);
+        addOutOfLineCode(ool, mir);
+        maybeCmpOffset = emitAsmJSBoundsCheckBranch(mir, mir, ToRegister(ptr), ool->entry());
     }
 
     uint32_t before = masm.size();
-    switch (vt) {
+    switch (accessType) {
       case Scalar::Int8:      masm.movsbl(srcAddr, ToRegister(out)); break;
       case Scalar::Uint8:     masm.movzbl(srcAddr, ToRegister(out)); break;
       case Scalar::Int16:     masm.movswl(srcAddr, ToRegister(out)); break;
       case Scalar::Uint16:    masm.movzwl(srcAddr, ToRegister(out)); break;
       case Scalar::Int32:
       case Scalar::Uint32:    masm.movl(srcAddr, ToRegister(out)); break;
       case Scalar::Float32:   masm.loadFloat32(srcAddr, ToFloatRegister(out)); break;
       case Scalar::Float64:   masm.loadDouble(srcAddr, ToFloatRegister(out)); break;
       case Scalar::Float32x4:
       case Scalar::Int32x4:   MOZ_CRASH("SIMD loads should be handled in emitSimdLoad");
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
           MOZ_CRASH("unexpected array type");
     }
     uint32_t after = masm.size();
-    verifyHeapAccessDisassembly(before, after, /*isLoad=*/true, vt, srcAddr, *out->output());
-    if (ool)
+    verifyHeapAccessDisassembly(before, after, /*isLoad=*/true, accessType, 0, srcAddr, *out->output());
+    if (ool) {
+        cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
         masm.bind(ool->rejoin());
-    memoryBarrier(ins->mir()->barrierAfter());
-    masm.append(AsmJSHeapAccess(before, after, vt, ToAnyRegister(out), maybeCmpOffset));
+    }
+    memoryBarrier(mir->barrierAfter());
+    masm.append(AsmJSHeapAccess(before, AsmJSHeapAccess::CarryOn, maybeCmpOffset));
 }
 
 void
 CodeGeneratorX64::storeSimd(Scalar::Type type, unsigned numElems, FloatRegister in,
                             const Operand &dstAddr)
 {
     switch (type) {
       case Scalar::Float32x4: {
@@ -447,222 +441,228 @@ CodeGeneratorX64::storeSimd(Scalar::Type
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("should only handle SIMD types");
     }
 }
 
 void
 CodeGeneratorX64::emitSimdStore(LAsmJSStoreHeap *ins)
 {
-    MAsmJSStoreHeap *mir = ins->mir();
+    const MAsmJSStoreHeap *mir = ins->mir();
     Scalar::Type type = mir->accessType();
-    const LAllocation *ptr = ins->ptr();
     FloatRegister in = ToFloatRegister(ins->value());
-    Operand dstAddr(HeapReg);
-
-    if (ptr->isConstant()) {
-        int32_t ptrImm = ptr->toConstant()->toInt32();
-        MOZ_ASSERT(ptrImm >= 0);
-        dstAddr = Operand(HeapReg, ptrImm);
-    } else {
-        dstAddr = Operand(HeapReg, ToRegister(ptr), TimesOne);
-    }
+    const LAllocation *ptr = ins->ptr();
+    Operand dstAddr = ptr->isBogus()
+                      ? Operand(HeapReg, mir->offset())
+                      : Operand(HeapReg, ToRegister(ptr), TimesOne, mir->offset());
 
     uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
-    if (mir->needsBoundsCheck() && !gen->usesSignalHandlersForOOB()) {
-        maybeCmpOffset = masm.cmp32WithPatch(ToRegister(ptr), Imm32(0)).offset();
-        masm.j(Assembler::AboveOrEqual, gen->outOfBoundsLabel()); // Throws RangeError
-    }
+    if (gen->needsAsmJSBoundsCheckBranch(mir))
+        maybeCmpOffset = emitAsmJSBoundsCheckBranch(mir, mir, ToRegister(ptr),
+                                                    gen->outOfBoundsLabel());
 
     unsigned numElems = mir->numSimdElems();
     if (numElems == 3) {
         MOZ_ASSERT(type == Scalar::Int32x4 || type == Scalar::Float32x4);
 
-        Operand shiftedOffset(HeapReg);
-        if (ptr->isConstant())
-            shiftedOffset = Operand(HeapReg, ptr->toConstant()->toInt32() + 2 * sizeof(float));
-        else
-            shiftedOffset = Operand(HeapReg, ToRegister(ptr), TimesOne, 2 * sizeof(float));
+        Operand dstAddrZ =
+            ptr->isBogus()
+            ? Operand(HeapReg, 2 * sizeof(float) + mir->offset())
+            : Operand(HeapReg, ToRegister(ptr), TimesOne, 2 * sizeof(float) + mir->offset());
 
-        // Store Z first: it would be observable to store XY first, in the
-        // case XY can be stored in bounds but Z can't (in this case, we'd throw
-        // without restoring the values previously stored before XY).
+        // It's possible that the Z could be out of bounds when the XY is in
+        // bounds. To avoid storing the XY before the exception is thrown, we
+        // store the Z first, and record its offset in the AsmJSHeapAccess so
+        // that the signal handler knows to check the bounds of the full
+        // access, rather than just the Z.
         masm.vmovhlps(in, ScratchSimdReg, ScratchSimdReg);
         uint32_t before = masm.size();
-        storeSimd(type, 1, ScratchSimdReg, shiftedOffset);
+        storeSimd(type, 1, ScratchSimdReg, dstAddrZ);
         uint32_t after = masm.size();
-        // We're noting a store of 3 elements, so that the bounds check checks
-        // for 3 elements.
-        masm.append(AsmJSHeapAccess(before, after, 3, type, maybeCmpOffset));
+        verifyHeapAccessDisassembly(before, after, /*isLoad=*/false, type, 1, dstAddrZ, LFloatReg(ScratchSimdReg));
+        masm.append(AsmJSHeapAccess(before, AsmJSHeapAccess::Throw, maybeCmpOffset, 8));
 
         // Store XY
         before = after;
         storeSimd(type, 2, in, dstAddr);
         after = masm.size();
-        masm.append(AsmJSHeapAccess(before, after, 2, type));
-        return;
+        verifyHeapAccessDisassembly(before, after, /*isLoad=*/false, type, 2, dstAddr, *ins->value());
+        masm.append(AsmJSHeapAccess(before, AsmJSHeapAccess::Throw));
+    } else {
+        uint32_t before = masm.size();
+        storeSimd(type, numElems, in, dstAddr);
+        uint32_t after = masm.size();
+        verifyHeapAccessDisassembly(before, after, /*isLoad=*/false, type, numElems, dstAddr, *ins->value());
+        masm.append(AsmJSHeapAccess(before, AsmJSHeapAccess::Throw, maybeCmpOffset));
     }
 
-    uint32_t before = masm.size();
-    storeSimd(type, numElems, in, dstAddr);
-    uint32_t after = masm.size();
-    masm.append(AsmJSHeapAccess(before, after, numElems, type, maybeCmpOffset));
+    if (maybeCmpOffset != AsmJSHeapAccess::NoLengthCheck)
+        cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
 }
 
 void
 CodeGeneratorX64::visitAsmJSStoreHeap(LAsmJSStoreHeap *ins)
 {
-    MAsmJSStoreHeap *mir = ins->mir();
-    Scalar::Type vt = mir->accessType();
-    const LAllocation *ptr = ins->ptr();
-    Operand dstAddr(HeapReg);
+    const MAsmJSStoreHeap *mir = ins->mir();
+    Scalar::Type accessType = mir->accessType();
 
-    if (Scalar::isSimdType(vt))
+    if (Scalar::isSimdType(accessType))
         return emitSimdStore(ins);
 
-    if (ptr->isConstant()) {
-        int32_t ptrImm = ptr->toConstant()->toInt32();
-        MOZ_ASSERT(ptrImm >= 0);
-        dstAddr = Operand(HeapReg, ptrImm);
-    } else {
-        dstAddr = Operand(HeapReg, ToRegister(ptr), TimesOne);
-    }
+    const LAllocation *value = ins->value();
+    const LAllocation *ptr = ins->ptr();
+    Operand dstAddr = ptr->isBogus()
+                      ? Operand(HeapReg, mir->offset())
+                      : Operand(HeapReg, ToRegister(ptr), TimesOne, mir->offset());
 
-    memoryBarrier(ins->mir()->barrierBefore());
-    Label rejoin;
+    memoryBarrier(mir->barrierBefore());
+    Label *rejoin = nullptr;
     uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
-    if (mir->needsBoundsCheck() && !gen->usesSignalHandlersForOOB()) {
-        CodeOffsetLabel cmp = masm.cmp32WithPatch(ToRegister(ptr), Imm32(0));
-        masm.j(Assembler::AboveOrEqual, &rejoin);
-        maybeCmpOffset = cmp.offset();
+    if (gen->needsAsmJSBoundsCheckBranch(mir)) {
+        rejoin = alloc().lifoAlloc()->new_<Label>();
+        maybeCmpOffset = emitAsmJSBoundsCheckBranch(mir, mir, ToRegister(ptr), rejoin);
     }
 
     uint32_t before = masm.size();
-    if (ins->value()->isConstant()) {
-        switch (vt) {
+    if (value->isConstant()) {
+        switch (accessType) {
           case Scalar::Int8:
-          case Scalar::Uint8:        masm.movb(Imm32(ToInt32(ins->value())), dstAddr); break;
+          case Scalar::Uint8:        masm.movb(Imm32(ToInt32(value)), dstAddr); break;
           case Scalar::Int16:
-          case Scalar::Uint16:       masm.movw(Imm32(ToInt32(ins->value())), dstAddr); break;
+          case Scalar::Uint16:       masm.movw(Imm32(ToInt32(value)), dstAddr); break;
           case Scalar::Int32:
-          case Scalar::Uint32:       masm.movl(Imm32(ToInt32(ins->value())), dstAddr); break;
+          case Scalar::Uint32:       masm.movl(Imm32(ToInt32(value)), dstAddr); break;
           case Scalar::Float32:
           case Scalar::Float64:
           case Scalar::Float32x4:
           case Scalar::Int32x4:
           case Scalar::Uint8Clamped:
           case Scalar::MaxTypedArrayViewType:
               MOZ_CRASH("unexpected array type");
         }
     } else {
-        switch (vt) {
+        switch (accessType) {
           case Scalar::Int8:
-          case Scalar::Uint8:        masm.movb(ToRegister(ins->value()), dstAddr); break;
+          case Scalar::Uint8:        masm.movb(ToRegister(value), dstAddr); break;
           case Scalar::Int16:
-          case Scalar::Uint16:       masm.movw(ToRegister(ins->value()), dstAddr); break;
+          case Scalar::Uint16:       masm.movw(ToRegister(value), dstAddr); break;
           case Scalar::Int32:
-          case Scalar::Uint32:       masm.movl(ToRegister(ins->value()), dstAddr); break;
-          case Scalar::Float32:      masm.storeFloat32(ToFloatRegister(ins->value()), dstAddr); break;
-          case Scalar::Float64:      masm.storeDouble(ToFloatRegister(ins->value()), dstAddr); break;
+          case Scalar::Uint32:       masm.movl(ToRegister(value), dstAddr); break;
+          case Scalar::Float32:      masm.storeFloat32(ToFloatRegister(value), dstAddr); break;
+          case Scalar::Float64:      masm.storeDouble(ToFloatRegister(value), dstAddr); break;
           case Scalar::Float32x4:
           case Scalar::Int32x4:      MOZ_CRASH("SIMD stores must be handled in emitSimdStore");
           case Scalar::Uint8Clamped:
           case Scalar::MaxTypedArrayViewType:
               MOZ_CRASH("unexpected array type");
         }
     }
     uint32_t after = masm.size();
-    verifyHeapAccessDisassembly(before, after, /*isLoad=*/false, vt, dstAddr, *ins->value());
-    if (rejoin.used())
-        masm.bind(&rejoin);
-    memoryBarrier(ins->mir()->barrierAfter());
-    masm.append(AsmJSHeapAccess(before, after, vt, maybeCmpOffset));
+    verifyHeapAccessDisassembly(before, after, /*isLoad=*/false, accessType, 0, dstAddr, *value);
+    if (rejoin) {
+        cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
+        masm.bind(rejoin);
+    }
+    memoryBarrier(mir->barrierAfter());
+    masm.append(AsmJSHeapAccess(before, AsmJSHeapAccess::CarryOn, maybeCmpOffset));
 }
 
 void
 CodeGeneratorX64::visitAsmJSCompareExchangeHeap(LAsmJSCompareExchangeHeap *ins)
 {
     MAsmJSCompareExchangeHeap *mir = ins->mir();
-    Scalar::Type vt = mir->accessType();
+    Scalar::Type accessType = mir->accessType();
     const LAllocation *ptr = ins->ptr();
 
     MOZ_ASSERT(ptr->isRegister());
-    BaseIndex srcAddr(HeapReg, ToRegister(ptr), TimesOne);
+    BaseIndex srcAddr(HeapReg, ToRegister(ptr), TimesOne, mir->offset());
 
     Register oldval = ToRegister(ins->oldValue());
     Register newval = ToRegister(ins->newValue());
 
+    // Note that we can't use
+    // needsAsmJSBoundsCheckBranch/emitAsmJSBoundsCheckBranch/cleanupAfterAsmJSBoundsCheckBranch
+    // since signal-handler bounds checking is not yet implemented for atomic accesses.
     Label rejoin;
     uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
     if (mir->needsBoundsCheck()) {
-        maybeCmpOffset = masm.cmp32WithPatch(ToRegister(ptr), Imm32(0)).offset();
+        maybeCmpOffset = masm.cmp32WithPatch(ToRegister(ptr), Imm32(-mir->endOffset())).offset();
         Label goahead;
-        masm.j(Assembler::Below, &goahead);
+        masm.j(Assembler::BelowOrEqual, &goahead);
         memoryBarrier(MembarFull);
         Register out = ToRegister(ins->output());
         masm.xorl(out, out);
         masm.jmp(&rejoin);
         masm.bind(&goahead);
     }
-    masm.compareExchangeToTypedIntArray(vt == Scalar::Uint32 ? Scalar::Int32 : vt,
+    masm.compareExchangeToTypedIntArray(accessType == Scalar::Uint32 ? Scalar::Int32 : accessType,
                                         srcAddr,
                                         oldval,
                                         newval,
                                         InvalidReg,
                                         ToAnyRegister(ins->output()));
     uint32_t after = masm.size();
     if (rejoin.used())
         masm.bind(&rejoin);
-    masm.append(AsmJSHeapAccess(after, after, mir->accessType(), maybeCmpOffset));
+    MOZ_ASSERT(mir->offset() == 0,
+               "The AsmJS signal handler doesn't yet support emulating "
+               "atomic accesses in the case of a fault from an unwrapped offset");
+    masm.append(AsmJSHeapAccess(after, AsmJSHeapAccess::Throw, maybeCmpOffset));
 }
 
 void
 CodeGeneratorX64::visitAsmJSAtomicBinopHeap(LAsmJSAtomicBinopHeap *ins)
 {
     MAsmJSAtomicBinopHeap *mir = ins->mir();
-    Scalar::Type vt = mir->accessType();
+    Scalar::Type accessType = mir->accessType();
     const LAllocation *ptr = ins->ptr();
     Register temp = ins->temp()->isBogusTemp() ? InvalidReg : ToRegister(ins->temp());
     const LAllocation* value = ins->value();
     AtomicOp op = mir->operation();
 
     MOZ_ASSERT(ptr->isRegister());
-    BaseIndex srcAddr(HeapReg, ToRegister(ptr), TimesOne);
+    BaseIndex srcAddr(HeapReg, ToRegister(ptr), TimesOne, mir->offset());
 
+    // Note that we can't use
+    // needsAsmJSBoundsCheckBranch/emitAsmJSBoundsCheckBranch/cleanupAfterAsmJSBoundsCheckBranch
+    // since signal-handler bounds checking is not yet implemented for atomic accesses.
     Label rejoin;
     uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
     if (mir->needsBoundsCheck()) {
-        maybeCmpOffset = masm.cmp32WithPatch(ToRegister(ptr), Imm32(0)).offset();
+        maybeCmpOffset = masm.cmp32WithPatch(ToRegister(ptr), Imm32(-mir->endOffset())).offset();
         Label goahead;
-        masm.j(Assembler::Below, &goahead);
+        masm.j(Assembler::BelowOrEqual, &goahead);
         memoryBarrier(MembarFull);
         Register out = ToRegister(ins->output());
         masm.xorl(out,out);
         masm.jmp(&rejoin);
         masm.bind(&goahead);
     }
     if (value->isConstant()) {
-        masm.atomicBinopToTypedIntArray(op, vt == Scalar::Uint32 ? Scalar::Int32 : vt,
+        masm.atomicBinopToTypedIntArray(op, accessType == Scalar::Uint32 ? Scalar::Int32 : accessType,
                                         Imm32(ToInt32(value)),
                                         srcAddr,
                                         temp,
                                         InvalidReg,
                                         ToAnyRegister(ins->output()));
     } else {
-        masm.atomicBinopToTypedIntArray(op, vt == Scalar::Uint32 ? Scalar::Int32 : vt,
+        masm.atomicBinopToTypedIntArray(op, accessType == Scalar::Uint32 ? Scalar::Int32 : accessType,
                                         ToRegister(value),
                                         srcAddr,
                                         temp,
                                         InvalidReg,
                                         ToAnyRegister(ins->output()));
     }
     uint32_t after = masm.size();
     if (rejoin.used())
         masm.bind(&rejoin);
-    masm.append(AsmJSHeapAccess(after, after, mir->accessType(), maybeCmpOffset));
+    MOZ_ASSERT(mir->offset() == 0,
+               "The AsmJS signal handler doesn't yet support emulating "
+               "atomic accesses in the case of a fault from an unwrapped offset");
+    masm.append(AsmJSHeapAccess(after, AsmJSHeapAccess::Throw, maybeCmpOffset));
 }
 
 void
 CodeGeneratorX64::visitAsmJSLoadGlobalVar(LAsmJSLoadGlobalVar *ins)
 {
     MAsmJSLoadGlobalVar *mir = ins->mir();
 
     MIRType type = mir->type();
--- a/js/src/jit/x64/Lowering-x64.cpp
+++ b/js/src/jit/x64/Lowering-x64.cpp
@@ -148,42 +148,36 @@ LIRGeneratorX64::visitAsmJSUnsignedToFlo
 }
 
 void
 LIRGeneratorX64::visitAsmJSLoadHeap(MAsmJSLoadHeap *ins)
 {
     MDefinition *ptr = ins->ptr();
     MOZ_ASSERT(ptr->type() == MIRType_Int32);
 
-    // Only a positive index is accepted because a negative offset encoded as an
-    // offset in the addressing mode would not wrap back into the protected area
-    // reserved for the heap. For simplicity (and since we don't care about
-    // getting maximum performance in these cases) only allow constant
-    // operands when skipping bounds checks.
-    LAllocation ptrAlloc = ins->needsBoundsCheck()
+    // For simplicity, require a register if we're going to emit a bounds-check
+    // branch, so that we don't have special cases for constants.
+    LAllocation ptrAlloc = gen->needsAsmJSBoundsCheckBranch(ins)
                            ? useRegisterAtStart(ptr)
-                           : useRegisterOrNonNegativeConstantAtStart(ptr);
+                           : useRegisterOrZeroAtStart(ptr);
 
     define(new(alloc()) LAsmJSLoadHeap(ptrAlloc), ins);
 }
 
 void
 LIRGeneratorX64::visitAsmJSStoreHeap(MAsmJSStoreHeap *ins)
 {
     MDefinition *ptr = ins->ptr();
     MOZ_ASSERT(ptr->type() == MIRType_Int32);
 
-    // Only a positive index is accepted because a negative offset encoded as an
-    // offset in the addressing mode would not wrap back into the protected area
-    // reserved for the heap. For simplicity (and since we don't care about
-    // getting maximum performance in these cases) only allow constant
-    // opererands when skipping bounds checks.
-    LAllocation ptrAlloc = ins->needsBoundsCheck()
+    // For simplicity, require a register if we're going to emit a bounds-check
+    // branch, so that we don't have special cases for constants.
+    LAllocation ptrAlloc = gen->needsAsmJSBoundsCheckBranch(ins)
                            ? useRegisterAtStart(ptr)
-                           : useRegisterOrNonNegativeConstantAtStart(ptr);
+                           : useRegisterOrZeroAtStart(ptr);
 
     LAsmJSStoreHeap *lir = nullptr;  // initialize to silence GCC warning
     switch (ins->accessType()) {
       case Scalar::Int8:
       case Scalar::Uint8:
       case Scalar::Int16:
       case Scalar::Uint16:
       case Scalar::Int32:
@@ -195,17 +189,16 @@ LIRGeneratorX64::visitAsmJSStoreHeap(MAs
       case Scalar::Float32x4:
       case Scalar::Int32x4:
         lir = new(alloc()) LAsmJSStoreHeap(ptrAlloc, useRegisterAtStart(ins->value()));
         break;
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("unexpected array type");
     }
-
     add(lir, ins);
 }
 
 void
 LIRGeneratorX64::visitAsmJSLoadFuncPtr(MAsmJSLoadFuncPtr *ins)
 {
     define(new(alloc()) LAsmJSLoadFuncPtr(useRegister(ins->index()), temp()), ins);
 }
--- a/js/src/jit/x86/Architecture-x86.h
+++ b/js/src/jit/x86/Architecture-x86.h
@@ -234,12 +234,17 @@ hasUnaliasedDouble()
 // On ARM, Dn aliases both S2n and S2n+1, so if you need to convert a float32
 // to a double as a temporary, you need a temporary double register.
 inline bool
 hasMultiAlias()
 {
     return false;
 }
 
+// Support some constant-offset addressing.
+// See the comments above AsmJSMappedSize in AsmJSValidate.h for more info.
+static const size_t AsmJSCheckedImmediateRange = 4096;
+static const size_t AsmJSImmediateRange = UINT32_C(0x80000000);
+
 } // namespace jit
 } // namespace js
 
 #endif /* jit_x86_Architecture_x86_h */
--- a/js/src/jit/x86/Assembler-x86.h
+++ b/js/src/jit/x86/Assembler-x86.h
@@ -437,108 +437,288 @@ class Assembler : public AssemblerX86Sha
     // Move a 32-bit immediate into a register where the immediate can be
     // patched.
     CodeOffsetLabel movlWithPatch(Imm32 imm, Register dest) {
         masm.movl_i32r(imm.value, dest.code());
         return CodeOffsetLabel(masm.currentOffset());
     }
 
     // Load from *(base + disp32) where disp32 can be patched.
-    CodeOffsetLabel movsblWithPatch(Address src, Register dest) {
-        masm.movsbl_mr_disp32(src.offset, src.base.code(), dest.code());
+    CodeOffsetLabel movsblWithPatch(const Operand &src, Register dest) {
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.movsbl_mr_disp32(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.movsbl_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel movzblWithPatch(Address src, Register dest) {
-        masm.movzbl_mr_disp32(src.offset, src.base.code(), dest.code());
+    CodeOffsetLabel movzblWithPatch(const Operand &src, Register dest) {
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.movzbl_mr_disp32(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.movzbl_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel movswlWithPatch(Address src, Register dest) {
-        masm.movswl_mr_disp32(src.offset, src.base.code(), dest.code());
+    CodeOffsetLabel movswlWithPatch(const Operand &src, Register dest) {
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.movswl_mr_disp32(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.movswl_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel movzwlWithPatch(Address src, Register dest) {
-        masm.movzwl_mr_disp32(src.offset, src.base.code(), dest.code());
+    CodeOffsetLabel movzwlWithPatch(const Operand &src, Register dest) {
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.movzwl_mr_disp32(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.movzwl_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel movlWithPatch(Address src, Register dest) {
-        masm.movl_mr_disp32(src.offset, src.base.code(), dest.code());
+    CodeOffsetLabel movlWithPatch(const Operand &src, Register dest) {
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.movl_mr_disp32(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.movl_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovssWithPatch(Address src, FloatRegister dest) {
+    CodeOffsetLabel vmovssWithPatch(const Operand &src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovss_mr_disp32(src.offset, src.base.code(), dest.code());
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovss_mr_disp32(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovss_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovdWithPatch(Address src, FloatRegister dest) {
+    CodeOffsetLabel vmovdWithPatch(const Operand &src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovd_mr_disp32(src.offset, src.base.code(), dest.code());
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovd_mr_disp32(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovd_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovqWithPatch(Address src, FloatRegister dest) {
+    CodeOffsetLabel vmovqWithPatch(const Operand &src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovq_mr_disp32(src.offset, src.base.code(), dest.code());
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovq_mr_disp32(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovq_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovsdWithPatch(Address src, FloatRegister dest) {
+    CodeOffsetLabel vmovsdWithPatch(const Operand &src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovsd_mr_disp32(src.offset, src.base.code(), dest.code());
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovsd_mr_disp32(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovsd_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovupsWithPatch(Address src, FloatRegister dest) {
+    CodeOffsetLabel vmovupsWithPatch(const Operand &src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovups_mr_disp32(src.offset, src.base.code(), dest.code());
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovups_mr_disp32(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovups_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovdquWithPatch(Address src, FloatRegister dest) {
+    CodeOffsetLabel vmovdquWithPatch(const Operand &src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovdqu_mr_disp32(src.offset, src.base.code(), dest.code());
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovdqu_mr_disp32(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovdqu_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
 
     // Store to *(base + disp32) where disp32 can be patched.
-    CodeOffsetLabel movbWithPatch(Register src, Address dest) {
-        masm.movb_rm_disp32(src.code(), dest.offset, dest.base.code());
+    CodeOffsetLabel movbWithPatch(Register src, const Operand &dest) {
+        switch (dest.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.movb_rm_disp32(src.code(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.movb_rm(src.code(), dest.address());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel movwWithPatch(Register src, Address dest) {
-        masm.movw_rm_disp32(src.code(), dest.offset, dest.base.code());
+    CodeOffsetLabel movwWithPatch(Register src, const Operand &dest) {
+        switch (dest.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.movw_rm_disp32(src.code(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.movw_rm(src.code(), dest.address());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel movlWithPatch(Register src, Address dest) {
-        masm.movl_rm_disp32(src.code(), dest.offset, dest.base.code());
+    CodeOffsetLabel movlWithPatch(Register src, const Operand &dest) {
+        switch (dest.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.movl_rm_disp32(src.code(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.movl_rm(src.code(), dest.address());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovdWithPatch(FloatRegister src, Address dest) {
+    CodeOffsetLabel vmovdWithPatch(FloatRegister src, const Operand &dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovd_rm_disp32(src.code(), dest.offset, dest.base.code());
+        switch (dest.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovd_rm_disp32(src.code(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovd_rm(src.code(), dest.address());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovqWithPatch(FloatRegister src, Address dest) {
+    CodeOffsetLabel vmovqWithPatch(FloatRegister src, const Operand &dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovq_rm_disp32(src.code(), dest.offset, dest.base.code());
+        switch (dest.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovq_rm_disp32(src.code(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovq_rm(src.code(), dest.address());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovssWithPatch(FloatRegister src, Address dest) {
+    CodeOffsetLabel vmovssWithPatch(FloatRegister src, const Operand &dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovss_rm_disp32(src.code(), dest.offset, dest.base.code());
+        switch (dest.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovss_rm_disp32(src.code(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovss_rm(src.code(), dest.address());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovsdWithPatch(FloatRegister src, Address dest) {
+    CodeOffsetLabel vmovsdWithPatch(FloatRegister src, const Operand &dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovsd_rm_disp32(src.code(), dest.offset, dest.base.code());
+        switch (dest.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovsd_rm_disp32(src.code(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovsd_rm(src.code(), dest.address());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovupsWithPatch(FloatRegister src, Address dest) {
+    CodeOffsetLabel vmovupsWithPatch(FloatRegister src, const Operand &dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovups_rm_disp32(src.code(), dest.offset, dest.base.code());
+        switch (dest.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovups_rm_disp32(src.code(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovups_rm(src.code(), dest.address());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovdquWithPatch(FloatRegister src, Address dest) {
+    CodeOffsetLabel vmovdquWithPatch(FloatRegister src, const Operand &dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovdqu_rm_disp32(src.code(), dest.offset, dest.base.code());
+        switch (dest.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovdqu_rm_disp32(src.code(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovdqu_rm(src.code(), dest.address());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
 
     // Load from *(addr + index*scale) where addr can be patched.
     CodeOffsetLabel movlWithPatch(PatchedAbsoluteAddress addr, Register index, Scale scale,
                                   Register dest)
     {
         masm.movl_mr(addr.addr, index.code(), scale, dest.code());
--- a/js/src/jit/x86/CodeGenerator-x86.cpp
+++ b/js/src/jit/x86/CodeGenerator-x86.cpp
@@ -250,78 +250,66 @@ CodeGeneratorX86::visitAsmJSUInt32ToFloa
 
     if (input != temp)
         masm.mov(input, temp);
 
     // Beware: convertUInt32ToFloat32 clobbers input.
     masm.convertUInt32ToFloat32(temp, output);
 }
 
-template<typename T>
 void
-CodeGeneratorX86::load(Scalar::Type vt, const T &srcAddr, const LDefinition *out)
+CodeGeneratorX86::load(Scalar::Type accessType, const Operand &srcAddr, const LDefinition *out)
 {
-    switch (vt) {
+    switch (accessType) {
       case Scalar::Int8:         masm.movsblWithPatch(srcAddr, ToRegister(out)); break;
       case Scalar::Uint8Clamped:
       case Scalar::Uint8:        masm.movzblWithPatch(srcAddr, ToRegister(out)); break;
       case Scalar::Int16:        masm.movswlWithPatch(srcAddr, ToRegister(out)); break;
       case Scalar::Uint16:       masm.movzwlWithPatch(srcAddr, ToRegister(out)); break;
       case Scalar::Int32:
       case Scalar::Uint32:       masm.movlWithPatch(srcAddr, ToRegister(out)); break;
       case Scalar::Float32:      masm.vmovssWithPatch(srcAddr, ToFloatRegister(out)); break;
       case Scalar::Float64:      masm.vmovsdWithPatch(srcAddr, ToFloatRegister(out)); break;
       case Scalar::Float32x4:
       case Scalar::Int32x4:      MOZ_CRASH("SIMD load should be handled in their own function");
       case Scalar::MaxTypedArrayViewType: MOZ_CRASH("unexpected type");
     }
 }
 
-template<typename T>
-void
-CodeGeneratorX86::loadAndNoteViewTypeElement(Scalar::Type vt, const T &srcAddr,
-                                             const LDefinition *out)
-{
-    uint32_t before = masm.size();
-    load(vt, srcAddr, out);
-    uint32_t after = masm.size();
-    masm.append(AsmJSHeapAccess(before, after, vt, ToAnyRegister(out)));
-}
-
 void
 CodeGeneratorX86::visitLoadTypedArrayElementStatic(LLoadTypedArrayElementStatic *ins)
 {
     const MLoadTypedArrayElementStatic *mir = ins->mir();
-    Scalar::Type vt = mir->accessType();
-    MOZ_ASSERT_IF(vt == Scalar::Float32, mir->type() == MIRType_Float32);
+    Scalar::Type accessType = mir->accessType();
+    MOZ_ASSERT_IF(accessType == Scalar::Float32, mir->type() == MIRType_Float32);
 
     Register ptr = ToRegister(ins->ptr());
     const LDefinition *out = ins->output();
     OutOfLineLoadTypedArrayOutOfBounds *ool = nullptr;
     uint32_t offset = mir->offset();
 
     if (mir->needsBoundsCheck()) {
         MOZ_ASSERT(offset == 0);
         if (!mir->fallible()) {
-            ool = new(alloc()) OutOfLineLoadTypedArrayOutOfBounds(ToAnyRegister(out), vt);
+            ool = new(alloc()) OutOfLineLoadTypedArrayOutOfBounds(ToAnyRegister(out), accessType);
             addOutOfLineCode(ool, ins->mir());
         }
 
         masm.cmpPtr(ptr, ImmWord(mir->length()));
         if (ool)
             masm.j(Assembler::AboveOrEqual, ool->entry());
         else
             bailoutIf(Assembler::AboveOrEqual, ins->snapshot());
     }
 
-    Address srcAddr(ptr, int32_t(mir->base()) + int32_t(offset));
-    load(vt, srcAddr, out);
-    if (vt == Scalar::Float64)
+    Operand srcAddr(ptr, int32_t(mir->base()) + int32_t(offset));
+    load(accessType, srcAddr, out);
+    if (accessType == Scalar::Float64)
         masm.canonicalizeDouble(ToFloatRegister(out));
-    if (vt == Scalar::Float32)
+    if (accessType == Scalar::Float32)
         masm.canonicalizeFloat(ToFloatRegister(out));
     if (ool)
         masm.bind(ool->rejoin());
 }
 
 void
 CodeGeneratorX86::visitAsmJSCall(LAsmJSCall *ins)
 {
@@ -349,19 +337,19 @@ CodeGeneratorX86::visitAsmJSCall(LAsmJSC
 
 void
 CodeGeneratorX86::memoryBarrier(MemoryBarrierBits barrier)
 {
     if (barrier & MembarStoreLoad)
         masm.storeLoadFence();
 }
 
-template<typename T>
 void
-CodeGeneratorX86::loadSimd(Scalar::Type type, unsigned numElems, T srcAddr, FloatRegister out)
+CodeGeneratorX86::loadSimd(Scalar::Type type, unsigned numElems, const Operand &srcAddr,
+                           FloatRegister out)
 {
     switch (type) {
       case Scalar::Float32x4: {
         switch (numElems) {
           // In memory-to-register mode, movss zeroes out the high lanes.
           case 1: masm.vmovssWithPatch(srcAddr, out); break;
           // See comment above, which also applies to movsd.
           case 2: masm.vmovsdWithPatch(srcAddr, out); break;
@@ -369,17 +357,17 @@ CodeGeneratorX86::loadSimd(Scalar::Type 
           default: MOZ_CRASH("unexpected size for partial load");
         }
         break;
       }
       case Scalar::Int32x4: {
         switch (numElems) {
           // In memory-to-register mode, movd zeroes out the high lanes.
           case 1: masm.vmovdWithPatch(srcAddr, out); break;
-          // See comment above, which also applies to movsd.
+          // See comment above, which also applies to movq.
           case 2: masm.vmovqWithPatch(srcAddr, out); break;
           case 4: masm.vmovdquWithPatch(srcAddr, out); break;
           default: MOZ_CRASH("unexpected size for partial load");
         }
         break;
       }
       case Scalar::Int8:
       case Scalar::Uint8:
@@ -391,218 +379,169 @@ CodeGeneratorX86::loadSimd(Scalar::Type 
       case Scalar::Float64:
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("should only handle SIMD types");
     }
 }
 
 void
-CodeGeneratorX86::emitSimdLoad(Scalar::Type type, unsigned numElems, const LAllocation *ptr,
-                               FloatRegister out, bool needsBoundsCheck /* = false */,
-                               Label *oobLabel /* = nullptr */)
+CodeGeneratorX86::emitSimdLoad(LAsmJSLoadHeap *ins)
 {
-    if (ptr->isConstant()) {
-        MOZ_ASSERT(!needsBoundsCheck);
-
-        if (numElems == 3) {
-            MOZ_ASSERT(type == Scalar::Int32x4 || type == Scalar::Float32x4);
-
-            // Load XY
-            emitSimdLoad(type, 2, ptr, out);
-
-            // Load Z (W is zeroed)
-            // This add won't overflow, as we've checked that we have at least
-            // room for loading 4 elements during asm.js validation.
-            PatchedAbsoluteAddress srcAddr((void *) (ptr->toConstant()->toInt32() + 2 * sizeof(float)));
-            uint32_t before = masm.size();
-            loadSimd(type, 1, srcAddr, ScratchSimdReg);
-            uint32_t after = masm.size();
-            masm.append(AsmJSHeapAccess(before, after, 1, type));
+    const MAsmJSLoadHeap *mir = ins->mir();
+    Scalar::Type type = mir->accessType();
+    FloatRegister out = ToFloatRegister(ins->output());
+    const LAllocation *ptr = ins->ptr();
+    Operand srcAddr = ptr->isBogus()
+                      ? Operand(PatchedAbsoluteAddress(mir->offset()))
+                      : Operand(ToRegister(ptr), mir->offset());
 
-            // Move ZW atop XY
-            masm.vmovlhps(ScratchSimdReg, out, out);
-            return;
-        }
+    uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
+    if (gen->needsAsmJSBoundsCheckBranch(mir))
+        maybeCmpOffset = emitAsmJSBoundsCheckBranch(mir, mir, ToRegister(ptr),
+                                                    gen->outOfBoundsLabel());
 
-        PatchedAbsoluteAddress srcAddr((void *) ptr->toConstant()->toInt32());
-        uint32_t before = masm.size();
-        loadSimd(type, numElems, srcAddr, out);
-        uint32_t after = masm.size();
-        masm.append(AsmJSHeapAccess(before, after, numElems, type));
-        return;
-    }
-
-    Register ptrReg = ToRegister(ptr);
-    uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
-    if (needsBoundsCheck) {
-        maybeCmpOffset = masm.cmp32WithPatch(ptrReg, Imm32(0)).offset();
-        masm.j(Assembler::AboveOrEqual, oobLabel); // Throws RangeError
-    }
-
-    uint32_t before = masm.size();
+    unsigned numElems = mir->numSimdElems();
     if (numElems == 3) {
         MOZ_ASSERT(type == Scalar::Int32x4 || type == Scalar::Float32x4);
 
+        Operand srcAddrZ =
+            ptr->isBogus()
+            ? Operand(PatchedAbsoluteAddress(2 * sizeof(float) + mir->offset()))
+            : Operand(ToRegister(ptr), 2 * sizeof(float) + mir->offset());
+
         // Load XY
-        Address addr(ptrReg, 0);
-        before = masm.size();
-        loadSimd(type, 2, addr, out);
+        uint32_t before = masm.size();
+        loadSimd(type, 2, srcAddr, out);
         uint32_t after = masm.size();
-        masm.append(AsmJSHeapAccess(before, after, 3, type, maybeCmpOffset));
+        masm.append(AsmJSHeapAccess(before, after, maybeCmpOffset));
 
         // Load Z (W is zeroed)
         // This is still in bounds, as we've checked with a manual bounds check
         // or we had enough space for sure when removing the bounds check.
-        Address shiftedAddr(ptrReg, 2 * sizeof(float));
         before = after;
-        loadSimd(type, 1, shiftedAddr, ScratchSimdReg);
+        loadSimd(type, 1, srcAddrZ, ScratchSimdReg);
         after = masm.size();
-        masm.append(AsmJSHeapAccess(before, after, 1, type));
+        masm.append(AsmJSHeapAccess(before, after));
 
         // Move ZW atop XY
         masm.vmovlhps(ScratchSimdReg, out, out);
-        return;
+    } else {
+        uint32_t before = masm.size();
+        loadSimd(type, numElems, srcAddr, out);
+        uint32_t after = masm.size();
+        masm.append(AsmJSHeapAccess(before, after, maybeCmpOffset));
     }
 
-    Address addr(ptrReg, 0);
-    loadSimd(type, numElems, addr, out);
-    uint32_t after = masm.size();
-    masm.append(AsmJSHeapAccess(before, after, numElems, type, maybeCmpOffset));
+    if (maybeCmpOffset != AsmJSHeapAccess::NoLengthCheck)
+        cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
 }
 
 void
 CodeGeneratorX86::visitAsmJSLoadHeap(LAsmJSLoadHeap *ins)
 {
     const MAsmJSLoadHeap *mir = ins->mir();
     Scalar::Type accessType = mir->accessType();
+
+    if (Scalar::isSimdType(accessType))
+        return emitSimdLoad(ins);
+
     const LAllocation *ptr = ins->ptr();
     const LDefinition *out = ins->output();
-
-    if (Scalar::isSimdType(accessType)) {
-        return emitSimdLoad(accessType, mir->numSimdElems(), ptr, ToFloatRegister(out),
-                            mir->needsBoundsCheck(), gen->outOfBoundsLabel());
-    }
-
-    memoryBarrier(ins->mir()->barrierBefore());
+    Operand srcAddr = ptr->isBogus()
+                      ? Operand(PatchedAbsoluteAddress(mir->offset()))
+                      : Operand(ToRegister(ptr), mir->offset());
 
-    if (ptr->isConstant()) {
-        // The constant displacement still needs to be added to the as-yet-unknown
-        // base address of the heap. For now, embed the displacement as an
-        // immediate in the instruction. This displacement will fixed up when the
-        // base address is known during dynamic linking (AsmJSModule::initHeap).
-        PatchedAbsoluteAddress srcAddr((void *) ptr->toConstant()->toInt32());
-        loadAndNoteViewTypeElement(accessType, srcAddr, out);
-        memoryBarrier(ins->mir()->barrierAfter());
-        return;
+    memoryBarrier(mir->barrierBefore());
+    OutOfLineLoadTypedArrayOutOfBounds *ool = nullptr;
+    uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
+    if (gen->needsAsmJSBoundsCheckBranch(mir)) {
+        ool = new(alloc()) OutOfLineLoadTypedArrayOutOfBounds(ToAnyRegister(out), accessType);
+        addOutOfLineCode(ool, mir);
+        maybeCmpOffset = emitAsmJSBoundsCheckBranch(mir, mir, ToRegister(ptr), ool->entry());
     }
 
-    Register ptrReg = ToRegister(ptr);
-    Address srcAddr(ptrReg, 0);
-
-    if (!mir->needsBoundsCheck()) {
-        loadAndNoteViewTypeElement(accessType, srcAddr, out);
-        memoryBarrier(ins->mir()->barrierAfter());
-        return;
-    }
-
-    OutOfLineLoadTypedArrayOutOfBounds *ool =
-        new(alloc()) OutOfLineLoadTypedArrayOutOfBounds(ToAnyRegister(out), accessType);
-    CodeOffsetLabel cmp = masm.cmp32WithPatch(ptrReg, Imm32(0));
-    addOutOfLineCode(ool, mir);
-    masm.j(Assembler::AboveOrEqual, ool->entry());
-
     uint32_t before = masm.size();
     load(accessType, srcAddr, out);
     uint32_t after = masm.size();
-    if (ool)
+    if (ool) {
+        cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
         masm.bind(ool->rejoin());
-    memoryBarrier(ins->mir()->barrierAfter());
-    masm.append(AsmJSHeapAccess(before, after, accessType, ToAnyRegister(out), cmp.offset()));
+    }
+    memoryBarrier(mir->barrierAfter());
+    masm.append(AsmJSHeapAccess(before, after, maybeCmpOffset));
 }
 
-template<typename T>
 void
-CodeGeneratorX86::store(Scalar::Type vt, const LAllocation *value, const T &dstAddr)
+CodeGeneratorX86::store(Scalar::Type accessType, const LAllocation *value, const Operand &dstAddr)
 {
-    switch (vt) {
+    switch (accessType) {
       case Scalar::Int8:
       case Scalar::Uint8Clamped:
       case Scalar::Uint8:        masm.movbWithPatch(ToRegister(value), dstAddr); break;
       case Scalar::Int16:
       case Scalar::Uint16:       masm.movwWithPatch(ToRegister(value), dstAddr); break;
       case Scalar::Int32:
       case Scalar::Uint32:       masm.movlWithPatch(ToRegister(value), dstAddr); break;
       case Scalar::Float32:      masm.vmovssWithPatch(ToFloatRegister(value), dstAddr); break;
       case Scalar::Float64:      masm.vmovsdWithPatch(ToFloatRegister(value), dstAddr); break;
       case Scalar::Float32x4:
       case Scalar::Int32x4:      MOZ_CRASH("SIMD stores should be handled in emitSimdStore");
       case Scalar::MaxTypedArrayViewType: MOZ_CRASH("unexpected type");
     }
 }
 
-template<typename T>
-void
-CodeGeneratorX86::storeAndNoteViewTypeElement(Scalar::Type vt, const LAllocation *value,
-                                              const T &dstAddr)
-{
-    uint32_t before = masm.size();
-    store(vt, value, dstAddr);
-    uint32_t after = masm.size();
-    masm.append(AsmJSHeapAccess(before, after, vt));
-}
-
 void
 CodeGeneratorX86::visitStoreTypedArrayElementStatic(LStoreTypedArrayElementStatic *ins)
 {
     MStoreTypedArrayElementStatic *mir = ins->mir();
-    Scalar::Type vt = mir->accessType();
+    Scalar::Type accessType = mir->accessType();
     Register ptr = ToRegister(ins->ptr());
     const LAllocation *value = ins->value();
     uint32_t offset = mir->offset();
 
     if (!mir->needsBoundsCheck()) {
-        Address dstAddr(ptr, int32_t(mir->base()) + int32_t(offset));
-        store(vt, value, dstAddr);
+        Operand dstAddr(ptr, int32_t(mir->base()) + int32_t(offset));
+        store(accessType, value, dstAddr);
         return;
     }
 
     MOZ_ASSERT(offset == 0);
     masm.cmpPtr(ptr, ImmWord(mir->length()));
     Label rejoin;
     masm.j(Assembler::AboveOrEqual, &rejoin);
 
-    Address dstAddr(ptr, (int32_t) mir->base());
-    store(vt, value, dstAddr);
+    Operand dstAddr(ptr, (int32_t) mir->base());
+    store(accessType, value, dstAddr);
     masm.bind(&rejoin);
 }
 
-template<typename T>
 void
-CodeGeneratorX86::storeSimd(Scalar::Type type, unsigned numElems, FloatRegister in, T destAddr)
+CodeGeneratorX86::storeSimd(Scalar::Type type, unsigned numElems, FloatRegister in,
+                            const Operand &dstAddr)
 {
     switch (type) {
       case Scalar::Float32x4: {
         switch (numElems) {
           // In memory-to-register mode, movss zeroes out the high lanes.
-          case 1: masm.vmovssWithPatch(in, destAddr); break;
+          case 1: masm.vmovssWithPatch(in, dstAddr); break;
           // See comment above, which also applies to movsd.
-          case 2: masm.vmovsdWithPatch(in, destAddr); break;
-          case 4: masm.vmovupsWithPatch(in, destAddr); break;
+          case 2: masm.vmovsdWithPatch(in, dstAddr); break;
+          case 4: masm.vmovupsWithPatch(in, dstAddr); break;
           default: MOZ_CRASH("unexpected size for partial load");
         }
         break;
       }
       case Scalar::Int32x4: {
         switch (numElems) {
-          // In memory-to-register mode, movd zeroes destAddr the high lanes.
-          case 1: masm.vmovdWithPatch(in, destAddr); break;
+          // In memory-to-register mode, movd zeroes out the high lanes.
+          case 1: masm.vmovdWithPatch(in, dstAddr); break;
           // See comment above, which also applies to movsd.
-          case 2: masm.vmovqWithPatch(in, destAddr); break;
-          case 4: masm.vmovdquWithPatch(in, destAddr); break;
+          case 2: masm.vmovqWithPatch(in, dstAddr); break;
+          case 4: masm.vmovdquWithPatch(in, dstAddr); break;
           default: MOZ_CRASH("unexpected size for partial load");
         }
         break;
       }
       case Scalar::Int8:
       case Scalar::Uint8:
       case Scalar::Int16:
       case Scalar::Uint16:
@@ -612,224 +551,190 @@ CodeGeneratorX86::storeSimd(Scalar::Type
       case Scalar::Float64:
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("should only handle SIMD types");
     }
 }
 
 void
-CodeGeneratorX86::emitSimdStore(Scalar::Type type, unsigned numElems, FloatRegister in,
-                                const LAllocation *ptr, bool needsBoundsCheck /* = false */,
-                                Label *oobLabel /* = nullptr */)
+CodeGeneratorX86::emitSimdStore(LAsmJSStoreHeap *ins)
 {
-    if (ptr->isConstant()) {
-        MOZ_ASSERT(!needsBoundsCheck);
-
-        if (numElems == 3) {
-            MOZ_ASSERT(type == Scalar::Int32x4 || type == Scalar::Float32x4);
-
-            // Store XY
-            emitSimdStore(type, 2, in, ptr);
-
-            masm.vmovhlps(in, ScratchSimdReg, ScratchSimdReg);
+    const MAsmJSStoreHeap *mir = ins->mir();
+    Scalar::Type type = mir->accessType();
+    FloatRegister in = ToFloatRegister(ins->value());
+    const LAllocation *ptr = ins->ptr();
+    Operand dstAddr = ptr->isBogus()
+                      ? Operand(PatchedAbsoluteAddress(mir->offset()))
+                      : Operand(ToRegister(ptr), mir->offset());
 
-            // Store Z
-            // This add won't overflow, as we've checked that we have at least
-            // room for loading 4 elements during asm.js validation.
-            PatchedAbsoluteAddress dstAddr((void *) (ptr->toConstant()->toInt32() + 2 * sizeof(float)));
-            uint32_t before = masm.size();
-            storeSimd(type, 1, ScratchSimdReg, dstAddr);
-            uint32_t after = masm.size();
-            masm.append(AsmJSHeapAccess(before, after, 1, type));
-            return;
-        }
+    uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
+    if (gen->needsAsmJSBoundsCheckBranch(mir))
+        maybeCmpOffset = emitAsmJSBoundsCheckBranch(mir, mir, ToRegister(ptr),
+                                                    gen->outOfBoundsLabel());
 
-        PatchedAbsoluteAddress dstAddr((void *) ptr->toConstant()->toInt32());
-        uint32_t before = masm.size();
-        storeSimd(type, numElems, in, dstAddr);
-        uint32_t after = masm.size();
-        masm.append(AsmJSHeapAccess(before, after, 3, type));
-        return;
-    }
-
-    Register ptrReg = ToRegister(ptr);
-    uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
-    if (needsBoundsCheck) {
-        maybeCmpOffset = masm.cmp32WithPatch(ptrReg, Imm32(0)).offset();
-        masm.j(Assembler::AboveOrEqual, oobLabel); // Throws RangeError
-    }
-
-    uint32_t before = masm.size();
+    unsigned numElems = mir->numSimdElems();
     if (numElems == 3) {
         MOZ_ASSERT(type == Scalar::Int32x4 || type == Scalar::Float32x4);
 
+        Operand dstAddrZ =
+            ptr->isBogus()
+            ? Operand(PatchedAbsoluteAddress(2 * sizeof(float) + mir->offset()))
+            : Operand(ToRegister(ptr), 2 * sizeof(float) + mir->offset());
+
         // Store XY
-        Address addr(ptrReg, 0);
-        before = masm.size();
-        storeSimd(type, 2, in, addr);
+        uint32_t before = masm.size();
+        storeSimd(type, 2, in, dstAddr);
         uint32_t after = masm.size();
-        masm.append(AsmJSHeapAccess(before, after, 3, type, maybeCmpOffset));
+        masm.append(AsmJSHeapAccess(before, after, maybeCmpOffset));
 
         masm.vmovhlps(in, ScratchSimdReg, ScratchSimdReg);
 
         // Store Z (W is zeroed)
         // This is still in bounds, as we've checked with a manual bounds check
         // or we had enough space for sure when removing the bounds check.
-        Address shiftedAddr(ptrReg, 2 * sizeof(float));
         before = masm.size();
-        storeSimd(type, 1, ScratchSimdReg, shiftedAddr);
+        storeSimd(type, 1, ScratchSimdReg, dstAddrZ);
         after = masm.size();
-        masm.append(AsmJSHeapAccess(before, after, 1, type));
-        return;
+        masm.append(AsmJSHeapAccess(before, after));
+    } else {
+        uint32_t before = masm.size();
+        storeSimd(type, numElems, in, dstAddr);
+        uint32_t after = masm.size();
+        masm.append(AsmJSHeapAccess(before, after, maybeCmpOffset));
     }
 
-    Address addr(ptrReg, 0);
-    storeSimd(type, numElems, in, addr);
-    uint32_t after = masm.size();
-    masm.append(AsmJSHeapAccess(before, after, numElems, type, maybeCmpOffset));
+    if (maybeCmpOffset != AsmJSHeapAccess::NoLengthCheck)
+        cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
 }
 
 void
 CodeGeneratorX86::visitAsmJSStoreHeap(LAsmJSStoreHeap *ins)
 {
-    MAsmJSStoreHeap *mir = ins->mir();
-    Scalar::Type vt = mir->accessType();
+    const MAsmJSStoreHeap *mir = ins->mir();
+    Scalar::Type accessType = mir->accessType();
+
+    if (Scalar::isSimdType(accessType))
+        return emitSimdStore(ins);
+
     const LAllocation *value = ins->value();
     const LAllocation *ptr = ins->ptr();
+    Operand dstAddr = ptr->isBogus()
+                      ? Operand(PatchedAbsoluteAddress(mir->offset()))
+                      : Operand(ToRegister(ptr), mir->offset());
 
-    if (Scalar::isSimdType(vt)) {
-        return emitSimdStore(vt, mir->numSimdElems(), ToFloatRegister(value), ptr,
-                             mir->needsBoundsCheck(), gen->outOfBoundsLabel());
+    memoryBarrier(mir->barrierBefore());
+    Label *rejoin = nullptr;
+    uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
+    if (gen->needsAsmJSBoundsCheckBranch(mir)) {
+        rejoin = alloc().lifoAlloc()->new_<Label>();
+        maybeCmpOffset = emitAsmJSBoundsCheckBranch(mir, mir, ToRegister(ptr), rejoin);
     }
 
-    memoryBarrier(ins->mir()->barrierBefore());
-
-    if (ptr->isConstant()) {
-        // The constant displacement still needs to be added to the as-yet-unknown
-        // base address of the heap. For now, embed the displacement as an
-        // immediate in the instruction. This displacement will fixed up when the
-        // base address is known during dynamic linking (AsmJSModule::initHeap).
-        PatchedAbsoluteAddress dstAddr((void *) ptr->toConstant()->toInt32());
-        storeAndNoteViewTypeElement(vt, value, dstAddr);
-        memoryBarrier(ins->mir()->barrierAfter());
-        return;
+    uint32_t before = masm.size();
+    store(accessType, value, dstAddr);
+    uint32_t after = masm.size();
+    if (rejoin) {
+        cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
+        masm.bind(rejoin);
     }
-
-    Register ptrReg = ToRegister(ptr);
-    Address dstAddr(ptrReg, 0);
-
-    if (!mir->needsBoundsCheck()) {
-        storeAndNoteViewTypeElement(vt, value, dstAddr);
-        memoryBarrier(ins->mir()->barrierAfter());
-        return;
-    }
-
-    CodeOffsetLabel cmp = masm.cmp32WithPatch(ptrReg, Imm32(0));
-    Label rejoin;
-    masm.j(Assembler::AboveOrEqual, &rejoin);
-
-    uint32_t before = masm.size();
-    store(vt, value, dstAddr);
-    uint32_t after = masm.size();
-    masm.bind(&rejoin);
-    memoryBarrier(ins->mir()->barrierAfter());
-    masm.append(AsmJSHeapAccess(before, after, vt, cmp.offset()));
+    memoryBarrier(mir->barrierAfter());
+    masm.append(AsmJSHeapAccess(before, after, maybeCmpOffset));
 }
 
 void
 CodeGeneratorX86::visitAsmJSCompareExchangeHeap(LAsmJSCompareExchangeHeap *ins)
 {
     MAsmJSCompareExchangeHeap *mir = ins->mir();
-    Scalar::Type vt = mir->accessType();
+    Scalar::Type accessType = mir->accessType();
     const LAllocation *ptr = ins->ptr();
     Register oldval = ToRegister(ins->oldValue());
     Register newval = ToRegister(ins->newValue());
 
     MOZ_ASSERT(ptr->isRegister());
     // Set up the offset within the heap in the pointer reg.
     Register ptrReg = ToRegister(ptr);
 
     Label rejoin;
     uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
 
     if (mir->needsBoundsCheck()) {
-        maybeCmpOffset = masm.cmp32WithPatch(ptrReg, Imm32(0)).offset();
+        maybeCmpOffset = masm.cmp32WithPatch(ptrReg, Imm32(-mir->endOffset())).offset();
         Label goahead;
-        masm.j(Assembler::Below, &goahead);
+        masm.j(Assembler::BelowOrEqual, &goahead);
         memoryBarrier(MembarFull);
         Register out = ToRegister(ins->output());
         masm.xorl(out,out);
         masm.jmp(&rejoin);
         masm.bind(&goahead);
     }
 
     // Add in the actual heap pointer explicitly, to avoid opening up
     // the abstraction that is compareExchangeToTypedIntArray at this time.
     uint32_t before = masm.size();
-    masm.addlWithPatch(Imm32(0), ptrReg);
+    masm.addlWithPatch(Imm32(mir->offset()), ptrReg);
     uint32_t after = masm.size();
-    masm.append(AsmJSHeapAccess(before, after, mir->accessType(), maybeCmpOffset));
+    masm.append(AsmJSHeapAccess(before, after, maybeCmpOffset));
 
-    Address memAddr(ToRegister(ptr), 0);
-    masm.compareExchangeToTypedIntArray(vt == Scalar::Uint32 ? Scalar::Int32 : vt,
+    Address memAddr(ToRegister(ptr), mir->offset());
+    masm.compareExchangeToTypedIntArray(accessType == Scalar::Uint32 ? Scalar::Int32 : accessType,
                                         memAddr,
                                         oldval,
                                         newval,
                                         InvalidReg,
                                         ToAnyRegister(ins->output()));
     if (rejoin.used())
         masm.bind(&rejoin);
 }
 
 void
 CodeGeneratorX86::visitAsmJSAtomicBinopHeap(LAsmJSAtomicBinopHeap *ins)
 {
     MAsmJSAtomicBinopHeap *mir = ins->mir();
-    Scalar::Type vt = mir->accessType();
+    Scalar::Type accessType = mir->accessType();
     const LAllocation *ptr = ins->ptr();
     Register temp = ins->temp()->isBogusTemp() ? InvalidReg : ToRegister(ins->temp());
     const LAllocation* value = ins->value();
     AtomicOp op = mir->operation();
 
     MOZ_ASSERT(ptr->isRegister());
     // Set up the offset within the heap in the pointer reg.
     Register ptrReg = ToRegister(ptr);
 
     Label rejoin;
     uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
 
     if (mir->needsBoundsCheck()) {
-        maybeCmpOffset = masm.cmp32WithPatch(ptrReg, Imm32(0)).offset();
+        maybeCmpOffset = masm.cmp32WithPatch(ptrReg, Imm32(-mir->endOffset())).offset();
         Label goahead;
-        masm.j(Assembler::Below, &goahead);
+        masm.j(Assembler::BelowOrEqual, &goahead);
         memoryBarrier(MembarFull);
         Register out = ToRegister(ins->output());
         masm.xorl(out,out);
         masm.jmp(&rejoin);
         masm.bind(&goahead);
     }
 
     // Add in the actual heap pointer explicitly, to avoid opening up
     // the abstraction that is atomicBinopToTypedIntArray at this time.
     uint32_t before = masm.size();
-    masm.addlWithPatch(Imm32(0), ptrReg);
+    masm.addlWithPatch(Imm32(mir->offset()), ptrReg);
     uint32_t after = masm.size();
-    masm.append(AsmJSHeapAccess(before, after, mir->accessType(), maybeCmpOffset));
+    masm.append(AsmJSHeapAccess(before, after, maybeCmpOffset));
 
-    Address memAddr(ptrReg, 0);
+    Address memAddr(ptrReg, mir->offset());
     if (value->isConstant()) {
-        masm.atomicBinopToTypedIntArray(op, vt == Scalar::Uint32 ? Scalar::Int32 : vt,
+        masm.atomicBinopToTypedIntArray(op, accessType == Scalar::Uint32 ? Scalar::Int32 : accessType,
                                         Imm32(ToInt32(value)),
                                         memAddr,
                                         temp,
                                         InvalidReg,
                                         ToAnyRegister(ins->output()));
     } else {
-        masm.atomicBinopToTypedIntArray(op, vt == Scalar::Uint32 ? Scalar::Int32 : vt,
+        masm.atomicBinopToTypedIntArray(op, accessType == Scalar::Uint32 ? Scalar::Int32 : accessType,
                                         ToRegister(value),
                                         memAddr,
                                         temp,
                                         InvalidReg,
                                         ToAnyRegister(ins->output()));
     }
     if (rejoin.used())
         masm.bind(&rejoin);
--- a/js/src/jit/x86/CodeGenerator-x86.h
+++ b/js/src/jit/x86/CodeGenerator-x86.h
@@ -23,35 +23,24 @@ class CodeGeneratorX86 : public CodeGene
         return this;
     }
 
   protected:
     ValueOperand ToValue(LInstruction *ins, size_t pos);
     ValueOperand ToOutValue(LInstruction *ins);
     ValueOperand ToTempValue(LInstruction *ins, size_t pos);
 
-    template<typename T>
-    void loadAndNoteViewTypeElement(Scalar::Type vt, const T &srcAddr, const LDefinition *out);
-    template<typename T>
-    void load(Scalar::Type vt, const T &srcAddr, const LDefinition *out);
-    template<typename T>
-    void storeAndNoteViewTypeElement(Scalar::Type vt, const LAllocation *value, const T &dstAddr);
-    template<typename T>
-    void store(Scalar::Type vt, const LAllocation *value, const T &dstAddr);
+    void load(Scalar::Type vt, const Operand &srcAddr, const LDefinition *out);
+    void store(Scalar::Type vt, const LAllocation *value, const Operand &dstAddr);
 
-    template<typename T>
-    void loadSimd(Scalar::Type type, unsigned numElems, T srcAddr, FloatRegister out);
-    void emitSimdLoad(Scalar::Type type, unsigned numElems, const LAllocation *ptr,
-                      FloatRegister out, bool needsBoundsCheck = false, Label *oobLabel = nullptr);
+    void loadSimd(Scalar::Type type, unsigned numElems, const Operand &srcAddr, FloatRegister out);
+    void emitSimdLoad(LAsmJSLoadHeap *ins);
 
-    template<typename T>
-    void storeSimd(Scalar::Type type, unsigned numElems, FloatRegister in, T destAddr);
-    void emitSimdStore(Scalar::Type type, unsigned numElems, FloatRegister in,
-                       const LAllocation *ptr, bool needsBoundsCheck = false,
-                       Label *oobLabel = nullptr);
+    void storeSimd(Scalar::Type type, unsigned numElems, FloatRegister in, const Operand &dstAddr);
+    void emitSimdStore(LAsmJSStoreHeap *ins);
 
     void memoryBarrier(MemoryBarrierBits barrier);
 
   public:
     CodeGeneratorX86(MIRGenerator *gen, LIRGraph *graph, MacroAssembler *masm);
 
   public:
     void visitBox(LBox *box);
--- a/js/src/jit/x86/Lowering-x86.cpp
+++ b/js/src/jit/x86/Lowering-x86.cpp
@@ -197,79 +197,57 @@ LIRGeneratorX86::visitAsmJSUnsignedToFlo
     LAsmJSUInt32ToFloat32 *lir = new(alloc()) LAsmJSUInt32ToFloat32(useRegisterAtStart(ins->input()), temp());
     define(lir, ins);
 }
 
 void
 LIRGeneratorX86::visitAsmJSLoadHeap(MAsmJSLoadHeap *ins)
 {
     MDefinition *ptr = ins->ptr();
-    LAllocation ptrAlloc;
     MOZ_ASSERT(ptr->type() == MIRType_Int32);
 
-    // For the x86 it is best to keep the 'ptr' in a register if a bounds check is needed.
-    if (ptr->isConstant() && !ins->needsBoundsCheck()) {
-        // A bounds check is only skipped for a positive index.
-        MOZ_ASSERT(ptr->toConstant()->value().toInt32() >= 0);
-        ptrAlloc = LAllocation(ptr->toConstant()->vp());
-    } else {
-        ptrAlloc = useRegisterAtStart(ptr);
-    }
-    LAsmJSLoadHeap *lir = new(alloc()) LAsmJSLoadHeap(ptrAlloc);
-    define(lir, ins);
+    // For simplicity, require a register if we're going to emit a bounds-check
+    // branch, so that we don't have special cases for constants.
+    LAllocation ptrAlloc = gen->needsAsmJSBoundsCheckBranch(ins)
+                           ? useRegisterAtStart(ptr)
+                           : useRegisterOrZeroAtStart(ptr);
+
+    define(new(alloc()) LAsmJSLoadHeap(ptrAlloc), ins);
 }
 
 void
 LIRGeneratorX86::visitAsmJSStoreHeap(MAsmJSStoreHeap *ins)
 {
     MDefinition *ptr = ins->ptr();
-    LAsmJSStoreHeap *lir;
     MOZ_ASSERT(ptr->type() == MIRType_Int32);
 
-    if (ptr->isConstant() && !ins->needsBoundsCheck()) {
-        MOZ_ASSERT(ptr->toConstant()->value().toInt32() >= 0);
-        LAllocation ptrAlloc = LAllocation(ptr->toConstant()->vp());
-        switch (ins->accessType()) {
-          case Scalar::Int8: case Scalar::Uint8:
-            // See comment below.
-            lir = new(alloc()) LAsmJSStoreHeap(ptrAlloc, useFixed(ins->value(), eax));
-            break;
-          case Scalar::Int16: case Scalar::Uint16:
-          case Scalar::Int32: case Scalar::Uint32:
-          case Scalar::Float32: case Scalar::Float64:
-          case Scalar::Float32x4: case Scalar::Int32x4:
-            // See comment below.
-            lir = new(alloc()) LAsmJSStoreHeap(ptrAlloc, useRegisterAtStart(ins->value()));
-            break;
-          case Scalar::Uint8Clamped:
-          case Scalar::MaxTypedArrayViewType:
-            MOZ_CRASH("unexpected array type");
-        }
-        add(lir, ins);
-        return;
-    }
+    // For simplicity, require a register if we're going to emit a bounds-check
+    // branch, so that we don't have special cases for constants.
+    LAllocation ptrAlloc = gen->needsAsmJSBoundsCheckBranch(ins)
+                           ? useRegisterAtStart(ptr)
+                           : useRegisterOrZeroAtStart(ptr);
 
+    LAsmJSStoreHeap *lir = nullptr;
     switch (ins->accessType()) {
       case Scalar::Int8: case Scalar::Uint8:
         // See comment for LIRGeneratorX86::useByteOpRegister.
-        lir = new(alloc()) LAsmJSStoreHeap(useRegister(ins->ptr()), useFixed(ins->value(), eax));
+        lir = new(alloc()) LAsmJSStoreHeap(ptrAlloc, useFixed(ins->value(), eax));
         break;
       case Scalar::Int16: case Scalar::Uint16:
       case Scalar::Int32: case Scalar::Uint32:
       case Scalar::Float32: case Scalar::Float64:
       case Scalar::Float32x4: case Scalar::Int32x4:
         // For now, don't allow constant values. The immediate operand
         // affects instruction layout which affects patching.
-        lir = new(alloc()) LAsmJSStoreHeap(useRegisterAtStart(ptr), useRegisterAtStart(ins->value()));
+        lir = new(alloc()) LAsmJSStoreHeap(ptrAlloc, useRegisterAtStart(ins->value()));
         break;
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("unexpected array type");
     }
-
     add(lir, ins);
 }
 
 void
 LIRGeneratorX86::visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic *ins)
 {
     // The code generated for StoreTypedArrayElementStatic is identical to that
     // for AsmJSStoreHeap, and the same concerns apply.