Bug 986981 - OdinMonkey: Optimize addresses with constant offsets on x86 and x64 r=luke
authorDan Gohman <sunfish@mozilla.com>
Tue, 24 Feb 2015 07:50:14 -0800
changeset 259361 11a0fa1a0122a33151329be8353f57d19aa081ae
parent 259360 12ea42444af916a344a3975dfbd1a25e6418da79
child 259362 a8f5d952aff54a740ea82680b3ab816e0d23aea6
push id4718
push userraliiev@mozilla.com
push dateMon, 11 May 2015 18:39:53 +0000
treeherdermozilla-beta@c20c4ef55f08 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersluke
bugs986981
milestone39.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 986981 - OdinMonkey: Optimize addresses with constant offsets on x86 and x64 r=luke
js/src/asmjs/AsmJSModule.cpp
js/src/asmjs/AsmJSSignalHandlers.cpp
js/src/asmjs/AsmJSValidate.h
js/src/jit-test/tests/asm.js/testAddressErrors.js
js/src/jit-test/tests/asm.js/testZOOB.js
js/src/jit/CodeGenerator.cpp
js/src/jit/Disassembler.h
js/src/jit/EffectiveAddressAnalysis.cpp
js/src/jit/MIR.h
js/src/jit/MIRGenerator.h
js/src/jit/MIRGraph.cpp
js/src/jit/arm/Architecture-arm.h
js/src/jit/mips/Architecture-mips.h
js/src/jit/shared/Assembler-shared.h
js/src/jit/shared/Assembler-x86-shared.h
js/src/jit/shared/CodeGenerator-shared-inl.h
js/src/jit/shared/CodeGenerator-shared.h
js/src/jit/shared/CodeGenerator-x86-shared.cpp
js/src/jit/shared/CodeGenerator-x86-shared.h
js/src/jit/shared/Lowering-shared-inl.h
js/src/jit/shared/Lowering-shared.h
js/src/jit/x64/Architecture-x64.h
js/src/jit/x64/CodeGenerator-x64.cpp
js/src/jit/x64/Lowering-x64.cpp
js/src/jit/x86/Architecture-x86.h
js/src/jit/x86/Assembler-x86.h
js/src/jit/x86/CodeGenerator-x86.cpp
js/src/jit/x86/CodeGenerator-x86.h
js/src/jit/x86/Lowering-x86.cpp
--- a/js/src/asmjs/AsmJSModule.cpp
+++ b/js/src/asmjs/AsmJSModule.cpp
@@ -245,17 +245,17 @@ AsmJSModule::lookupCodeRange(void *pc) c
     return &codeRanges_[match];
 }
 
 struct HeapAccessOffset
 {
     const AsmJSHeapAccessVector &accesses;
     explicit HeapAccessOffset(const AsmJSHeapAccessVector &accesses) : accesses(accesses) {}
     uintptr_t operator[](size_t index) const {
-        return accesses[index].offset();
+        return accesses[index].insnOffset();
     }
 };
 
 const AsmJSHeapAccess *
 AsmJSModule::lookupHeapAccess(void *pc) const
 {
     MOZ_ASSERT(isFinished());
     MOZ_ASSERT(containsFunctionPC(pc));
@@ -323,17 +323,17 @@ AsmJSModule::finish(ExclusiveContext *cx
     // Call-site metadata used for stack unwinding.
     callSites_ = masm.extractCallSites();
 
 #if defined(JS_CODEGEN_ARM)
     // ARM requires the offsets to be updated.
     pod.functionBytes_ = masm.actualOffset(pod.functionBytes_);
     for (size_t i = 0; i < heapAccesses_.length(); i++) {
         AsmJSHeapAccess &a = heapAccesses_[i];
-        a.setOffset(masm.actualOffset(a.offset()));
+        a.setInsnOffset(masm.actualOffset(a.insnOffset()));
     }
     for (unsigned i = 0; i < numExportedFunctions(); i++) {
         if (!exportedFunction(i).isChangeHeap())
             exportedFunction(i).updateCodeOffset(masm);
     }
     for (unsigned i = 0; i < numExits(); i++)
         exit(i).updateOffsets(masm);
     for (size_t i = 0; i < callSites_.length(); i++) {
@@ -769,96 +769,97 @@ AsmJSModule::staticallyLink(ExclusiveCon
         exitDatum.exit = interpExitTrampoline(exits_[i]);
         exitDatum.fun = nullptr;
         exitDatum.baselineScript = nullptr;
     }
 
     MOZ_ASSERT(isStaticallyLinked());
 }
 
-#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
-static size_t
-ByteSizeOfHeapAccess(const jit::AsmJSHeapAccess access)
-{
-    Scalar::Type type = access.type();
-    if (Scalar::isSimdType(type))
-        return Scalar::scalarByteSize(type) * access.numSimdElems();
-    return TypedArrayElemSize(type);
-}
-#endif
 void
 AsmJSModule::initHeap(Handle<ArrayBufferObjectMaybeShared *> heap, JSContext *cx)
 {
     MOZ_ASSERT_IF(heap->is<ArrayBufferObject>(), heap->as<ArrayBufferObject>().isAsmJS());
     MOZ_ASSERT(IsValidAsmJSHeapLength(heap->byteLength()));
     MOZ_ASSERT(dynamicallyLinked_);
     MOZ_ASSERT(!maybeHeap_);
 
     maybeHeap_ = heap;
     heapDatum() = heap->dataPointer();
 
 #if defined(JS_CODEGEN_X86)
     uint8_t *heapOffset = heap->dataPointer();
+    uint32_t heapLength = heap->byteLength();
     for (unsigned i = 0; i < heapAccesses_.length(); i++) {
         const jit::AsmJSHeapAccess &access = heapAccesses_[i];
-        if (access.hasLengthCheck()) {
-            // An access is out-of-bounds iff
-            //      ptr + data-type-byte-size > heapLength
-            // i.e. ptr >= heapLength + 1 - data-type-byte-size
-            // (Note that we need >= as this is what codegen uses.)
-            size_t scalarByteSize = ByteSizeOfHeapAccess(access);
-            X86Encoding::SetPointer(access.patchLengthAt(code_),
-                                    (void*)(heap->byteLength() + 1 - scalarByteSize));
-        }
-        void *addr = access.patchOffsetAt(code_);
+        // An access is out-of-bounds iff
+        //      ptr + offset + data-type-byte-size > heapLength
+        // i.e. ptr > heapLength - data-type-byte-size - offset.
+        // data-type-byte-size and offset are already included in the addend
+        // so we just have to add the heap length here.
+        if (access.hasLengthCheck())
+            X86Encoding::AddInt32(access.patchLengthAt(code_), heapLength);
+        void *addr = access.patchHeapPtrImmAt(code_);
         uint32_t disp = reinterpret_cast<uint32_t>(X86Encoding::GetPointer(addr));
         MOZ_ASSERT(disp <= INT32_MAX);
         X86Encoding::SetPointer(addr, (void *)(heapOffset + disp));
     }
 #elif defined(JS_CODEGEN_X64)
     // Even with signal handling being used for most bounds checks, there may be
     // atomic operations that depend on explicit checks.
     //
     // If we have any explicit bounds checks, we need to patch the heap length
     // checks at the right places. All accesses that have been recorded are the
     // only ones that need bound checks (see also
     // CodeGeneratorX64::visitAsmJS{Load,Store,CompareExchange,AtomicBinop}Heap)
-    int32_t heapLength = int32_t(intptr_t(heap->byteLength()));
+    uint32_t heapLength = heap->byteLength();
     for (size_t i = 0; i < heapAccesses_.length(); i++) {
         const jit::AsmJSHeapAccess &access = heapAccesses_[i];
-        if (access.hasLengthCheck()) {
-            // See comment above for x86 codegen.
-            size_t scalarByteSize = ByteSizeOfHeapAccess(access);
-            X86Encoding::SetInt32(access.patchLengthAt(code_), heapLength + 1 - scalarByteSize);
-        }
+        // See comment above for x86 codegen.
+        if (access.hasLengthCheck())
+            X86Encoding::AddInt32(access.patchLengthAt(code_), heapLength);
     }
 #elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS)
     uint32_t heapLength = heap->byteLength();
     for (unsigned i = 0; i < heapAccesses_.length(); i++) {
         jit::Assembler::UpdateBoundsCheck(heapLength,
-                                          (jit::Instruction*)(heapAccesses_[i].offset() + code_));
+                                          (jit::Instruction*)(heapAccesses_[i].insnOffset() + code_));
     }
 #endif
 }
 
 void
 AsmJSModule::restoreHeapToInitialState(ArrayBufferObjectMaybeShared *maybePrevBuffer)
 {
 #if defined(JS_CODEGEN_X86)
     if (maybePrevBuffer) {
         // Subtract out the base-pointer added by AsmJSModule::initHeap.
         uint8_t *ptrBase = maybePrevBuffer->dataPointer();
+        uint32_t heapLength = maybePrevBuffer->byteLength();
         for (unsigned i = 0; i < heapAccesses_.length(); i++) {
             const jit::AsmJSHeapAccess &access = heapAccesses_[i];
-            void *addr = access.patchOffsetAt(code_);
+            // Subtract the heap length back out, leaving the raw displacement in place.
+            if (access.hasLengthCheck())
+                X86Encoding::AddInt32(access.patchLengthAt(code_), -heapLength);
+            void *addr = access.patchHeapPtrImmAt(code_);
             uint8_t *ptr = reinterpret_cast<uint8_t*>(X86Encoding::GetPointer(addr));
             MOZ_ASSERT(ptr >= ptrBase);
             X86Encoding::SetPointer(addr, (void *)(ptr - ptrBase));
         }
     }
+#elif defined(JS_CODEGEN_X64)
+    if (maybePrevBuffer) {
+        uint32_t heapLength = maybePrevBuffer->byteLength();
+        for (unsigned i = 0; i < heapAccesses_.length(); i++) {
+            const jit::AsmJSHeapAccess &access = heapAccesses_[i];
+            // See comment above for x86 codegen.
+            if (access.hasLengthCheck())
+                X86Encoding::AddInt32(access.patchLengthAt(code_), -heapLength);
+        }
+    }
 #endif
 
     maybeHeap_ = nullptr;
     heapDatum() = nullptr;
 }
 
 void
 AsmJSModule::restoreToInitialState(ArrayBufferObjectMaybeShared *maybePrevBuffer,
--- a/js/src/asmjs/AsmJSSignalHandlers.cpp
+++ b/js/src/asmjs/AsmJSSignalHandlers.cpp
@@ -17,16 +17,17 @@
  */
 
 #include "asmjs/AsmJSSignalHandlers.h"
 
 #include "mozilla/DebugOnly.h"
 #include "mozilla/PodOperations.h"
 
 #include "asmjs/AsmJSModule.h"
+#include "jit/Disassembler.h"
 #include "vm/Runtime.h"
 
 using namespace js;
 using namespace js::jit;
 
 using JS::GenericNaN;
 using mozilla::DebugOnly;
 using mozilla::PodArrayZero;
@@ -303,16 +304,38 @@ enum { REG_EIP = 14 };
 #  endif  // defined(__i386__)
 # endif  // !defined(__BIONIC_HAVE_UCONTEXT_T)
 #endif // defined(ANDROID)
 
 #if !defined(XP_WIN)
 # define CONTEXT ucontext_t
 #endif
 
+// Define a context type for use in the emulator code. This is usually just
+// the same as CONTEXT, but on Mac we use a different structure since we call
+// into the emulator code from a Mach exception handler rather than a
+// sigaction-style signal handler.
+#if defined(XP_MACOSX)
+# if defined(JS_CODEGEN_X64)
+struct macos_x64_context {
+    x86_thread_state64_t thread;
+    x86_float_state64_t float_;
+};
+#  define EMULATOR_CONTEXT macos_x64_context
+# else
+struct macos_x86_context {
+    x86_thread_state_t thread;
+    x86_float_state_t float_;
+};
+#  define EMULATOR_CONTEXT macos_x86_context
+# endif
+#else
+# define EMULATOR_CONTEXT CONTEXT
+#endif
+
 #if defined(JS_CPU_X64)
 # define PC_sig(p) RIP_sig(p)
 #elif defined(JS_CPU_X86)
 # define PC_sig(p) EIP_sig(p)
 #elif defined(JS_CPU_ARM)
 # define PC_sig(p) R15_sig(p)
 #elif defined(JS_CPU_MIPS)
 # define PC_sig(p) EPC_sig(p)
@@ -324,106 +347,378 @@ ContextToPC(CONTEXT *context)
 #ifdef JS_CODEGEN_NONE
     MOZ_CRASH();
 #else
      return reinterpret_cast<uint8_t**>(&PC_sig(context));
 #endif
 }
 
 #if defined(JS_CODEGEN_X64)
-template <class T>
-static void
-SetXMMRegToNaN(Scalar::Type viewType, T *xmm_reg)
+MOZ_COLD static void
+SetFPRegToNaN(size_t size, void *fp_reg)
+{
+    MOZ_RELEASE_ASSERT(size <= Simd128DataSize);
+    memset(fp_reg, 0, Simd128DataSize);
+    switch (size) {
+      case 4: *static_cast<float *>(fp_reg) = GenericNaN(); break;
+      case 8: *static_cast<double *>(fp_reg) = GenericNaN(); break;
+      default:
+        // All SIMD accesses throw on OOB.
+        MOZ_CRASH("unexpected size in SetFPRegToNaN");
+    }
+}
+
+MOZ_COLD static void
+SetGPRegToZero(void *gp_reg)
+{
+    memset(gp_reg, 0, sizeof(intptr_t));
+}
+
+MOZ_COLD static void
+SetFPRegToLoadedValue(const void *addr, size_t size, void *fp_reg)
+{
+    MOZ_RELEASE_ASSERT(size <= Simd128DataSize);
+    memset(fp_reg, 0, Simd128DataSize);
+    memcpy(fp_reg, addr, size);
+}
+
+MOZ_COLD static void
+SetGPRegToLoadedValue(const void *addr, size_t size, void *gp_reg)
 {
-    switch (viewType) {
-      case Scalar::Float32: {
-        JS_STATIC_ASSERT(sizeof(T) == 4 * sizeof(float));
-        float *floats = reinterpret_cast<float*>(xmm_reg);
-        floats[0] = GenericNaN();
-        floats[1] = 0;
-        floats[2] = 0;
-        floats[3] = 0;
-        break;
-      }
-      case Scalar::Float64: {
-        JS_STATIC_ASSERT(sizeof(T) == 2 * sizeof(double));
-        double *dbls = reinterpret_cast<double*>(xmm_reg);
-        dbls[0] = GenericNaN();
-        dbls[1] = 0;
-        break;
-      }
-      // Float32x4 and Int32x4 out of bounds are handled with the OutOfBounds stub.
-      case Scalar::Float32x4:
-      case Scalar::Int32x4:
-      case Scalar::Int8:
-      case Scalar::Uint8:
-      case Scalar::Int16:
-      case Scalar::Uint16:
-      case Scalar::Int32:
-      case Scalar::Uint32:
-      case Scalar::Uint8Clamped:
-      case Scalar::MaxTypedArrayViewType:
-        MOZ_CRASH("unexpected type in SetXMMRegToNaN");
-    }
+    MOZ_RELEASE_ASSERT(size <= sizeof(void *));
+    memset(gp_reg, 0, sizeof(void *));
+    memcpy(gp_reg, addr, size);
+}
+
+MOZ_COLD static void
+SetGPRegToLoadedValueSext32(const void *addr, size_t size, void *gp_reg)
+{
+    MOZ_RELEASE_ASSERT(size <= sizeof(int32_t));
+    int8_t msb = static_cast<const int8_t *>(addr)[size - 1];
+    memset(gp_reg, 0, sizeof(void *));
+    memset(gp_reg, msb >> 7, sizeof(int32_t));
+    memcpy(gp_reg, addr, size);
+}
+
+MOZ_COLD static void
+StoreValueFromFPReg(void *addr, size_t size, const void *fp_reg)
+{
+    MOZ_RELEASE_ASSERT(size <= Simd128DataSize);
+    memcpy(addr, fp_reg, size);
+}
+
+MOZ_COLD static void
+StoreValueFromGPReg(void *addr, size_t size, const void *gp_reg)
+{
+    MOZ_RELEASE_ASSERT(size <= sizeof(void *));
+    memcpy(addr, gp_reg, size);
+}
+
+MOZ_COLD static void
+StoreValueFromGPImm(void *addr, size_t size, int32_t imm)
+{
+    MOZ_RELEASE_ASSERT(size <= sizeof(imm));
+    memcpy(addr, &imm, size);
 }
 
 # if !defined(XP_MACOSX)
-static void
-SetRegisterToCoercedUndefined(CONTEXT *context, Scalar::Type viewType, AnyRegister reg)
+MOZ_COLD static void *
+AddressOfFPRegisterSlot(CONTEXT *context, FloatRegisters::Code code)
+{
+    switch (code) {
+      case X86Encoding::xmm0:  return &XMM_sig(context, 0);
+      case X86Encoding::xmm1:  return &XMM_sig(context, 1);
+      case X86Encoding::xmm2:  return &XMM_sig(context, 2);
+      case X86Encoding::xmm3:  return &XMM_sig(context, 3);
+      case X86Encoding::xmm4:  return &XMM_sig(context, 4);
+      case X86Encoding::xmm5:  return &XMM_sig(context, 5);
+      case X86Encoding::xmm6:  return &XMM_sig(context, 6);
+      case X86Encoding::xmm7:  return &XMM_sig(context, 7);
+      case X86Encoding::xmm8:  return &XMM_sig(context, 8);
+      case X86Encoding::xmm9:  return &XMM_sig(context, 9);
+      case X86Encoding::xmm10: return &XMM_sig(context, 10);
+      case X86Encoding::xmm11: return &XMM_sig(context, 11);
+      case X86Encoding::xmm12: return &XMM_sig(context, 12);
+      case X86Encoding::xmm13: return &XMM_sig(context, 13);
+      case X86Encoding::xmm14: return &XMM_sig(context, 14);
+      case X86Encoding::xmm15: return &XMM_sig(context, 15);
+      default: break;
+    }
+    MOZ_CRASH();
+}
+
+MOZ_COLD static void *
+AddressOfGPRegisterSlot(EMULATOR_CONTEXT *context, Registers::Code code)
 {
-    if (reg.isFloat()) {
-        switch (reg.fpu().code()) {
-          case X86Encoding::xmm0:  SetXMMRegToNaN(viewType, &XMM_sig(context, 0)); break;
-          case X86Encoding::xmm1:  SetXMMRegToNaN(viewType, &XMM_sig(context, 1)); break;
-          case X86Encoding::xmm2:  SetXMMRegToNaN(viewType, &XMM_sig(context, 2)); break;
-          case X86Encoding::xmm3:  SetXMMRegToNaN(viewType, &XMM_sig(context, 3)); break;
-          case X86Encoding::xmm4:  SetXMMRegToNaN(viewType, &XMM_sig(context, 4)); break;
-          case X86Encoding::xmm5:  SetXMMRegToNaN(viewType, &XMM_sig(context, 5)); break;
-          case X86Encoding::xmm6:  SetXMMRegToNaN(viewType, &XMM_sig(context, 6)); break;
-          case X86Encoding::xmm7:  SetXMMRegToNaN(viewType, &XMM_sig(context, 7)); break;
-          case X86Encoding::xmm8:  SetXMMRegToNaN(viewType, &XMM_sig(context, 8)); break;
-          case X86Encoding::xmm9:  SetXMMRegToNaN(viewType, &XMM_sig(context, 9)); break;
-          case X86Encoding::xmm10: SetXMMRegToNaN(viewType, &XMM_sig(context, 10)); break;
-          case X86Encoding::xmm11: SetXMMRegToNaN(viewType, &XMM_sig(context, 11)); break;
-          case X86Encoding::xmm12: SetXMMRegToNaN(viewType, &XMM_sig(context, 12)); break;
-          case X86Encoding::xmm13: SetXMMRegToNaN(viewType, &XMM_sig(context, 13)); break;
-          case X86Encoding::xmm14: SetXMMRegToNaN(viewType, &XMM_sig(context, 14)); break;
-          case X86Encoding::xmm15: SetXMMRegToNaN(viewType, &XMM_sig(context, 15)); break;
-          default: MOZ_CRASH();
-        }
-    } else {
-        switch (reg.gpr().code()) {
-          case X86Encoding::rax: RAX_sig(context) = 0; break;
-          case X86Encoding::rcx: RCX_sig(context) = 0; break;
-          case X86Encoding::rdx: RDX_sig(context) = 0; break;
-          case X86Encoding::rbx: RBX_sig(context) = 0; break;
-          case X86Encoding::rsp: RSP_sig(context) = 0; break;
-          case X86Encoding::rbp: RBP_sig(context) = 0; break;
-          case X86Encoding::rsi: RSI_sig(context) = 0; break;
-          case X86Encoding::rdi: RDI_sig(context) = 0; break;
-          case X86Encoding::r8:  R8_sig(context)  = 0; break;
-          case X86Encoding::r9:  R9_sig(context)  = 0; break;
-          case X86Encoding::r10: R10_sig(context) = 0; break;
-          case X86Encoding::r11: R11_sig(context) = 0; break;
-          case X86Encoding::r12: R12_sig(context) = 0; break;
-          case X86Encoding::r13: R13_sig(context) = 0; break;
-          case X86Encoding::r14: R14_sig(context) = 0; break;
-          case X86Encoding::r15: R15_sig(context) = 0; break;
-          default: MOZ_CRASH();
-        }
+    switch (code) {
+      case X86Encoding::rax: return &RAX_sig(context);
+      case X86Encoding::rcx: return &RCX_sig(context);
+      case X86Encoding::rdx: return &RDX_sig(context);
+      case X86Encoding::rbx: return &RBX_sig(context);
+      case X86Encoding::rsp: return &RSP_sig(context);
+      case X86Encoding::rbp: return &RBP_sig(context);
+      case X86Encoding::rsi: return &RSI_sig(context);
+      case X86Encoding::rdi: return &RDI_sig(context);
+      case X86Encoding::r8:  return &R8_sig(context);
+      case X86Encoding::r9:  return &R9_sig(context);
+      case X86Encoding::r10: return &R10_sig(context);
+      case X86Encoding::r11: return &R11_sig(context);
+      case X86Encoding::r12: return &R12_sig(context);
+      case X86Encoding::r13: return &R13_sig(context);
+      case X86Encoding::r14: return &R14_sig(context);
+      case X86Encoding::r15: return &R15_sig(context);
+      default: break;
     }
+    MOZ_CRASH();
+}
+# else
+MOZ_COLD static void *
+AddressOfFPRegisterSlot(EMULATOR_CONTEXT *context, FloatRegisters::Code code)
+{
+    switch (code) {
+      case X86Encoding::xmm0:  return &context->float_.__fpu_xmm0;
+      case X86Encoding::xmm1:  return &context->float_.__fpu_xmm1;
+      case X86Encoding::xmm2:  return &context->float_.__fpu_xmm2;
+      case X86Encoding::xmm3:  return &context->float_.__fpu_xmm3;
+      case X86Encoding::xmm4:  return &context->float_.__fpu_xmm4;
+      case X86Encoding::xmm5:  return &context->float_.__fpu_xmm5;
+      case X86Encoding::xmm6:  return &context->float_.__fpu_xmm6;
+      case X86Encoding::xmm7:  return &context->float_.__fpu_xmm7;
+      case X86Encoding::xmm8:  return &context->float_.__fpu_xmm8;
+      case X86Encoding::xmm9:  return &context->float_.__fpu_xmm9;
+      case X86Encoding::xmm10: return &context->float_.__fpu_xmm10;
+      case X86Encoding::xmm11: return &context->float_.__fpu_xmm11;
+      case X86Encoding::xmm12: return &context->float_.__fpu_xmm12;
+      case X86Encoding::xmm13: return &context->float_.__fpu_xmm13;
+      case X86Encoding::xmm14: return &context->float_.__fpu_xmm14;
+      case X86Encoding::xmm15: return &context->float_.__fpu_xmm15;
+      default: break;
+    }
+    MOZ_CRASH();
+}
+
+MOZ_COLD static void *
+AddressOfGPRegisterSlot(EMULATOR_CONTEXT *context, Registers::Code code)
+{
+    switch (code) {
+      case X86Encoding::rax: return &context->thread.__rax;
+      case X86Encoding::rcx: return &context->thread.__rcx;
+      case X86Encoding::rdx: return &context->thread.__rdx;
+      case X86Encoding::rbx: return &context->thread.__rbx;
+      case X86Encoding::rsp: return &context->thread.__rsp;
+      case X86Encoding::rbp: return &context->thread.__rbp;
+      case X86Encoding::rsi: return &context->thread.__rsi;
+      case X86Encoding::rdi: return &context->thread.__rdi;
+      case X86Encoding::r8:  return &context->thread.__r8;
+      case X86Encoding::r9:  return &context->thread.__r9;
+      case X86Encoding::r10: return &context->thread.__r10;
+      case X86Encoding::r11: return &context->thread.__r11;
+      case X86Encoding::r12: return &context->thread.__r12;
+      case X86Encoding::r13: return &context->thread.__r13;
+      case X86Encoding::r14: return &context->thread.__r14;
+      case X86Encoding::r15: return &context->thread.__r15;
+      default: break;
+    }
+    MOZ_CRASH();
 }
 # endif  // !XP_MACOSX
 
-static void
-RedirectToOutOfBoundsLabel(uint8_t **ppc, const AsmJSModule &module)
+MOZ_COLD static void
+SetRegisterToCoercedUndefined(EMULATOR_CONTEXT *context, size_t size,
+                              const Disassembler::OtherOperand &value)
+{
+    if (value.kind() == Disassembler::OtherOperand::FPR)
+        SetFPRegToNaN(size, AddressOfFPRegisterSlot(context, value.fpr()));
+    else
+        SetGPRegToZero(AddressOfGPRegisterSlot(context, value.gpr()));
+}
+
+MOZ_COLD static void
+SetRegisterToLoadedValue(EMULATOR_CONTEXT *context, const void *addr, size_t size,
+                         const Disassembler::OtherOperand &value)
+{
+    if (value.kind() == Disassembler::OtherOperand::FPR)
+        SetFPRegToLoadedValue(addr, size, AddressOfFPRegisterSlot(context, value.fpr()));
+    else
+        SetGPRegToLoadedValue(addr, size, AddressOfGPRegisterSlot(context, value.gpr()));
+}
+
+MOZ_COLD static void
+SetRegisterToLoadedValueSext32(EMULATOR_CONTEXT *context, const void *addr, size_t size,
+                               const Disassembler::OtherOperand &value)
+{
+    SetGPRegToLoadedValueSext32(addr, size, AddressOfGPRegisterSlot(context, value.gpr()));
+}
+
+MOZ_COLD static void
+StoreValueFromRegister(EMULATOR_CONTEXT *context, void *addr, size_t size,
+                       const Disassembler::OtherOperand &value)
+{
+    if (value.kind() == Disassembler::OtherOperand::FPR)
+        StoreValueFromFPReg(addr, size, AddressOfFPRegisterSlot(context, value.fpr()));
+    else if (value.kind() == Disassembler::OtherOperand::GPR)
+        StoreValueFromGPReg(addr, size, AddressOfGPRegisterSlot(context, value.gpr()));
+    else
+        StoreValueFromGPImm(addr, size, value.imm());
+}
+
+MOZ_COLD static uint8_t *
+ComputeAccessAddress(EMULATOR_CONTEXT *context, const Disassembler::ComplexAddress &address)
+{
+    MOZ_RELEASE_ASSERT(!address.isPCRelative(), "PC-relative addresses not supported yet");
+
+    uintptr_t result = address.disp();
+
+    if (address.base() != Registers::Invalid) {
+        uintptr_t base;
+        StoreValueFromGPReg(&base, sizeof(uintptr_t),
+                            AddressOfGPRegisterSlot(context, address.base()));
+        result += base;
+    }
+
+    if (address.index() != Registers::Invalid) {
+        uintptr_t index;
+        StoreValueFromGPReg(&index, sizeof(uintptr_t),
+                            AddressOfGPRegisterSlot(context, address.index()));
+        result += index * (1 << address.scale());
+    }
+
+    return reinterpret_cast<uint8_t *>(result);
+}
+
+MOZ_COLD static uint8_t *
+EmulateHeapAccess(EMULATOR_CONTEXT *context, uint8_t *pc, uint8_t *faultingAddress,
+                  const AsmJSHeapAccess *heapAccess, const AsmJSModule &module)
 {
-    MOZ_ASSERT(module.containsFunctionPC(*ppc));
-    *ppc = module.outOfBoundsExit();
+    MOZ_RELEASE_ASSERT(module.containsFunctionPC(pc));
+    MOZ_RELEASE_ASSERT(module.usesSignalHandlersForOOB());
+    MOZ_RELEASE_ASSERT(!heapAccess->hasLengthCheck());
+    MOZ_RELEASE_ASSERT(heapAccess->insnOffset() == (pc - module.codeBase()));
+
+    // Disassemble the instruction which caused the trap so that we can extract
+    // information about it and decide what to do.
+    Disassembler::HeapAccess access;
+    uint8_t *end = Disassembler::DisassembleHeapAccess(pc, &access);
+    const Disassembler::ComplexAddress &address = access.address();
+    MOZ_RELEASE_ASSERT(end > pc);
+    MOZ_RELEASE_ASSERT(module.containsFunctionPC(end));
+
+#if defined(JS_CODEGEN_X64)
+    // Check x64 asm.js heap access invariants.
+    MOZ_RELEASE_ASSERT(address.disp() >= 0);
+    MOZ_RELEASE_ASSERT(address.base() == HeapReg.code());
+    MOZ_RELEASE_ASSERT(address.index() != HeapReg.code());
+    MOZ_RELEASE_ASSERT(address.scale() == 0);
+    if (address.base() != Registers::Invalid) {
+        uintptr_t base;
+        StoreValueFromGPReg(&base, sizeof(uintptr_t),
+                            AddressOfGPRegisterSlot(context, address.base()));
+        MOZ_RELEASE_ASSERT(reinterpret_cast<uint8_t *>(base) == module.maybeHeap());
+    }
+    if (address.index() != Registers::Invalid) {
+        uintptr_t index;
+        StoreValueFromGPReg(&index, sizeof(uintptr_t),
+                            AddressOfGPRegisterSlot(context, address.index()));
+        MOZ_RELEASE_ASSERT(uint32_t(index) == index);
+    }
+#endif
+
+    // Determine the actual effective address of the faulting access. We can't
+    // rely on the faultingAddress given to us by the OS, because we need the
+    // address of the start of the access, and the OS may sometimes give us an
+    // address somewhere in the middle of the heap access.
+    uint8_t *accessAddress = ComputeAccessAddress(context, address);
+    MOZ_RELEASE_ASSERT(size_t(faultingAddress - accessAddress) < access.size(),
+                       "Given faulting address does not appear to be within computed "
+                       "faulting address range");
+    MOZ_RELEASE_ASSERT(accessAddress >= module.maybeHeap(),
+                       "Access begins outside the asm.js heap");
+    MOZ_RELEASE_ASSERT(accessAddress + access.size() <= module.maybeHeap() + AsmJSMappedSize,
+                       "Access extends beyond the asm.js heap guard region");
+    MOZ_RELEASE_ASSERT(accessAddress + access.size() > module.maybeHeap() + module.heapLength(),
+                       "Computed access address is not actually out of bounds");
+
+    // The basic sandbox model is that all heap accesses are a heap base
+    // register plus an index, and the index is always computed with 32-bit
+    // operations, so we know it can only be 4 GiB off of the heap base.
+    //
+    // However, we wish to support the optimization of folding immediates
+    // and scaled indices into addresses, and any address arithmetic we fold
+    // gets done at full pointer width, so it doesn't get properly wrapped.
+    // We support this by extending AsmJSMappedSize to the greatest size
+    // that could be reached by such an unwrapped address, and then when we
+    // arrive here in the signal handler for such an access, we compute the
+    // fully wrapped address, and perform the load or store on it.
+    //
+    // Taking a signal is really slow, but in theory programs really shouldn't
+    // be hitting this anyway.
+    intptr_t unwrappedOffset = accessAddress - module.maybeHeap();
+    uint32_t wrappedOffset = uint32_t(unwrappedOffset);
+    size_t size = access.size();
+    MOZ_RELEASE_ASSERT(wrappedOffset + size > wrappedOffset);
+    bool inBounds = wrappedOffset < module.heapLength() &&
+                    wrappedOffset + size < module.heapLength();
+
+    // If this is storing Z of an XYZ, check whether X is also in bounds, so
+    // that we don't store anything before throwing.
+    MOZ_RELEASE_ASSERT(unwrappedOffset > heapAccess->offsetWithinWholeSimdVector());
+    uint32_t wrappedBaseOffset = uint32_t(unwrappedOffset - heapAccess->offsetWithinWholeSimdVector());
+    if (wrappedBaseOffset >= module.heapLength())
+        inBounds = false;
+
+    if (inBounds) {
+        // We now know that this is an access that is actually in bounds when
+        // properly wrapped. Complete the load or store with the wrapped
+        // address.
+        uint8_t *wrappedAddress = module.maybeHeap() + wrappedOffset;
+        MOZ_RELEASE_ASSERT(wrappedAddress >= module.maybeHeap());
+        MOZ_RELEASE_ASSERT(wrappedAddress + size > wrappedAddress);
+        MOZ_RELEASE_ASSERT(wrappedAddress + size <= module.maybeHeap() + module.heapLength());
+        switch (access.kind()) {
+          case Disassembler::HeapAccess::Load:
+            SetRegisterToLoadedValue(context, wrappedAddress, size, access.otherOperand());
+            break;
+          case Disassembler::HeapAccess::LoadSext32:
+            SetRegisterToLoadedValueSext32(context, wrappedAddress, size, access.otherOperand());
+            break;
+          case Disassembler::HeapAccess::Store:
+            StoreValueFromRegister(context, wrappedAddress, size, access.otherOperand());
+            break;
+          case Disassembler::HeapAccess::Unknown:
+            MOZ_CRASH("Failed to disassemble instruction");
+        }
+    } else {
+        // We now know that this is an out-of-bounds access made by an asm.js
+        // load/store that we should handle.
+
+        if (heapAccess->throwOnOOB())
+            return module.outOfBoundsExit();
+
+        switch (access.kind()) {
+          case Disassembler::HeapAccess::Load:
+          case Disassembler::HeapAccess::LoadSext32:
+            // Assign the JS-defined result value to the destination register
+            // (ToInt32(undefined) or ToNumber(undefined), determined by the
+            // type of the destination register). Very conveniently, we can
+            // infer the type from the register class, since all SIMD accesses
+            // throw on out of bounds (see above), so the only types using FP
+            // registers are float32 and double.
+            SetRegisterToCoercedUndefined(context, access.size(), access.otherOperand());
+            break;
+          case Disassembler::HeapAccess::Store:
+            // Do nothing.
+            break;
+          case Disassembler::HeapAccess::Unknown:
+            MOZ_CRASH("Failed to disassemble instruction");
+        }
+    }
+
+    return end;
 }
+
 #endif // JS_CODEGEN_X64
 
 #if defined(XP_WIN)
 
 static bool
 HandleFault(PEXCEPTION_POINTERS exception)
 {
     EXCEPTION_RECORD *record = exception->ExceptionRecord;
@@ -448,17 +743,17 @@ HandleFault(PEXCEPTION_POINTERS exceptio
     if (!activation)
         return false;
 
 # if defined(JS_CODEGEN_X64)
     const AsmJSModule &module = activation->module();
 
     // These checks aren't necessary, but, since we can, check anyway to make
     // sure we aren't covering up a real bug.
-    void *faultingAddress = (void*)record->ExceptionInformation[1];
+    uint8_t *faultingAddress = reinterpret_cast<uint8_t *>(record->ExceptionInformation[1]);
     if (!module.maybeHeap() ||
         faultingAddress < module.maybeHeap() ||
         faultingAddress >= module.maybeHeap() + AsmJSMappedSize)
     {
         return false;
     }
 
     if (!module.containsFunctionPC(pc)) {
@@ -479,36 +774,17 @@ HandleFault(PEXCEPTION_POINTERS exceptio
         }
         return false;
     }
 
     const AsmJSHeapAccess *heapAccess = module.lookupHeapAccess(pc);
     if (!heapAccess)
         return false;
 
-    // We now know that this is an out-of-bounds access made by an asm.js
-    // load/store that we should handle.
-
-    // SIMD out-of-bounds loads and stores just need to throw.
-    if (Scalar::isSimdType(heapAccess->type())) {
-        RedirectToOutOfBoundsLabel(ppc, module);
-        return true;
-    }
-
-    // Also not necessary, but, since we can, do.
-    if (heapAccess->isLoad() != !record->ExceptionInformation[0])
-        return false;
-
-    // If this is a load, assign the JS-defined result value to the destination
-    // register (ToInt32(undefined) or ToNumber(undefined), determined by the
-    // type of the destination register) and set the PC to the next op. Upon
-    // return from the handler, execution will resume at this next PC.
-    if (heapAccess->isLoad())
-        SetRegisterToCoercedUndefined(context, heapAccess->type(), heapAccess->loadedReg());
-    *ppc += heapAccess->opLength();
+    *ppc = EmulateHeapAccess(context, pc, faultingAddress, heapAccess, module);
 
     return true;
 # else
     return false;
 # endif
 }
 
 static LONG WINAPI
@@ -520,92 +796,29 @@ AsmJSFaultHandler(LPEXCEPTION_POINTERS e
     // No need to worry about calling other handlers, the OS does this for us.
     return EXCEPTION_CONTINUE_SEARCH;
 }
 
 #elif defined(XP_MACOSX)
 # include <mach/exc.h>
 
 static uint8_t **
-ContextToPC(x86_thread_state_t &state)
+ContextToPC(EMULATOR_CONTEXT *context)
 {
 # if defined(JS_CPU_X64)
-    static_assert(sizeof(state.uts.ts64.__rip) == sizeof(void*),
+    static_assert(sizeof(context->thread.__rip) == sizeof(void*),
                   "stored IP should be compile-time pointer-sized");
-    return reinterpret_cast<uint8_t**>(&state.uts.ts64.__rip);
+    return reinterpret_cast<uint8_t**>(&context->thread.__rip);
 # else
-    static_assert(sizeof(state.uts.ts32.__eip) == sizeof(void*),
+    static_assert(sizeof(context->thread.uts.ts32.__eip) == sizeof(void*),
                   "stored IP should be compile-time pointer-sized");
-    return reinterpret_cast<uint8_t**>(&state.uts.ts32.__eip);
-# endif
+    return reinterpret_cast<uint8_t**>(&context->thread.uts.ts32.__eip);
+#endif
 }
 
-# if defined(JS_CODEGEN_X64)
-static bool
-SetRegisterToCoercedUndefined(mach_port_t rtThread, x86_thread_state64_t &state,
-                              const AsmJSHeapAccess &heapAccess)
-{
-    if (heapAccess.loadedReg().isFloat()) {
-        kern_return_t kret;
-
-        x86_float_state64_t fstate;
-        unsigned int count = x86_FLOAT_STATE64_COUNT;
-        kret = thread_get_state(rtThread, x86_FLOAT_STATE64, (thread_state_t) &fstate, &count);
-        if (kret != KERN_SUCCESS)
-            return false;
-
-        Scalar::Type viewType = heapAccess.type();
-        switch (heapAccess.loadedReg().fpu().code()) {
-          case X86Encoding::xmm0:  SetXMMRegToNaN(viewType, &fstate.__fpu_xmm0); break;
-          case X86Encoding::xmm1:  SetXMMRegToNaN(viewType, &fstate.__fpu_xmm1); break;
-          case X86Encoding::xmm2:  SetXMMRegToNaN(viewType, &fstate.__fpu_xmm2); break;
-          case X86Encoding::xmm3:  SetXMMRegToNaN(viewType, &fstate.__fpu_xmm3); break;
-          case X86Encoding::xmm4:  SetXMMRegToNaN(viewType, &fstate.__fpu_xmm4); break;
-          case X86Encoding::xmm5:  SetXMMRegToNaN(viewType, &fstate.__fpu_xmm5); break;
-          case X86Encoding::xmm6:  SetXMMRegToNaN(viewType, &fstate.__fpu_xmm6); break;
-          case X86Encoding::xmm7:  SetXMMRegToNaN(viewType, &fstate.__fpu_xmm7); break;
-          case X86Encoding::xmm8:  SetXMMRegToNaN(viewType, &fstate.__fpu_xmm8); break;
-          case X86Encoding::xmm9:  SetXMMRegToNaN(viewType, &fstate.__fpu_xmm9); break;
-          case X86Encoding::xmm10: SetXMMRegToNaN(viewType, &fstate.__fpu_xmm10); break;
-          case X86Encoding::xmm11: SetXMMRegToNaN(viewType, &fstate.__fpu_xmm11); break;
-          case X86Encoding::xmm12: SetXMMRegToNaN(viewType, &fstate.__fpu_xmm12); break;
-          case X86Encoding::xmm13: SetXMMRegToNaN(viewType, &fstate.__fpu_xmm13); break;
-          case X86Encoding::xmm14: SetXMMRegToNaN(viewType, &fstate.__fpu_xmm14); break;
-          case X86Encoding::xmm15: SetXMMRegToNaN(viewType, &fstate.__fpu_xmm15); break;
-          default: MOZ_CRASH();
-        }
-
-        kret = thread_set_state(rtThread, x86_FLOAT_STATE64, (thread_state_t)&fstate, x86_FLOAT_STATE64_COUNT);
-        if (kret != KERN_SUCCESS)
-            return false;
-    } else {
-        switch (heapAccess.loadedReg().gpr().code()) {
-          case X86Encoding::rax: state.__rax = 0; break;
-          case X86Encoding::rcx: state.__rcx = 0; break;
-          case X86Encoding::rdx: state.__rdx = 0; break;
-          case X86Encoding::rbx: state.__rbx = 0; break;
-          case X86Encoding::rsp: state.__rsp = 0; break;
-          case X86Encoding::rbp: state.__rbp = 0; break;
-          case X86Encoding::rsi: state.__rsi = 0; break;
-          case X86Encoding::rdi: state.__rdi = 0; break;
-          case X86Encoding::r8:  state.__r8  = 0; break;
-          case X86Encoding::r9:  state.__r9  = 0; break;
-          case X86Encoding::r10: state.__r10 = 0; break;
-          case X86Encoding::r11: state.__r11 = 0; break;
-          case X86Encoding::r12: state.__r12 = 0; break;
-          case X86Encoding::r13: state.__r13 = 0; break;
-          case X86Encoding::r14: state.__r14 = 0; break;
-          case X86Encoding::r15: state.__r15 = 0; break;
-          default: MOZ_CRASH();
-        }
-    }
-    return true;
-}
-# endif
-
 // This definition was generated by mig (the Mach Interface Generator) for the
 // routine 'exception_raise' (exc.defs).
 #pragma pack(4)
 typedef struct {
     mach_msg_header_t Head;
     /* start of the kernel processed data */
     mach_msg_body_t msgh_body;
     mach_msg_port_descriptor_t thread;
@@ -632,72 +845,74 @@ HandleMachException(JSRuntime *rt, const
     if (rt->handlingSignal)
         return false;
     AutoSetHandlingSignal handling(rt);
 
     // Get the port of the JSRuntime's thread from the message.
     mach_port_t rtThread = request.body.thread.name;
 
     // Read out the JSRuntime thread's register state.
-    x86_thread_state_t state;
-    unsigned int count = x86_THREAD_STATE_COUNT;
+    EMULATOR_CONTEXT context;
+# if defined(JS_CODEGEN_X64)
+    unsigned int thread_state_count = x86_THREAD_STATE64_COUNT;
+    unsigned int float_state_count = x86_FLOAT_STATE64_COUNT;
+    int thread_state = x86_THREAD_STATE64;
+    int float_state = x86_FLOAT_STATE64;
+# else
+    unsigned int thread_state_count = x86_THREAD_STATE_COUNT;
+    unsigned int float_state_count = x86_FLOAT_STATE_COUNT;
+    int thread_state = x86_THREAD_STATE;
+    int float_state = x86_FLOAT_STATE;
+# endif
     kern_return_t kret;
-    kret = thread_get_state(rtThread, x86_THREAD_STATE, (thread_state_t)&state, &count);
+    kret = thread_get_state(rtThread, thread_state,
+                            (thread_state_t)&context.thread, &thread_state_count);
+    if (kret != KERN_SUCCESS)
+        return false;
+    kret = thread_get_state(rtThread, float_state,
+                            (thread_state_t)&context.float_, &float_state_count);
     if (kret != KERN_SUCCESS)
         return false;
 
-    uint8_t **ppc = ContextToPC(state);
+    uint8_t **ppc = ContextToPC(&context);
     uint8_t *pc = *ppc;
 
     if (request.body.exception != EXC_BAD_ACCESS || request.body.codeCnt != 2)
         return false;
 
     AsmJSActivation *activation = rt->asmJSActivationStack();
     if (!activation)
         return false;
 
     const AsmJSModule &module = activation->module();
     if (!module.containsFunctionPC(pc))
         return false;
 
-# if defined(JS_CPU_X64)
+# if defined(JS_CODEGEN_X64)
     // These checks aren't necessary, but, since we can, check anyway to make
     // sure we aren't covering up a real bug.
-    void *faultingAddress = (void*)request.body.code[1];
+    uint8_t *faultingAddress = reinterpret_cast<uint8_t *>(request.body.code[1]);
     if (!module.maybeHeap() ||
         faultingAddress < module.maybeHeap() ||
         faultingAddress >= module.maybeHeap() + AsmJSMappedSize)
     {
         return false;
     }
 
     const AsmJSHeapAccess *heapAccess = module.lookupHeapAccess(pc);
     if (!heapAccess)
         return false;
 
-    // We now know that this is an out-of-bounds access made by an asm.js
-    // load/store that we should handle.
+    *ppc = EmulateHeapAccess(&context, pc, faultingAddress, heapAccess, module);
 
-    if (Scalar::isSimdType(heapAccess->type())) {
-        // SIMD out-of-bounds loads and stores just need to throw.
-        RedirectToOutOfBoundsLabel(ppc, module);
-    } else {
-        // If this is a load, assign the JS-defined result value to the destination
-        // register (ToInt32(undefined) or ToNumber(undefined), determined by the
-        // type of the destination register) and set the PC to the next op. Upon
-        // return from the handler, execution will resume at this next PC.
-        if (heapAccess->isLoad()) {
-            if (!SetRegisterToCoercedUndefined(rtThread, state.uts.ts64, *heapAccess))
-                return false;
-        }
-        *ppc += heapAccess->opLength();
-    }
-
-    // Update the thread state with the new pc.
-    kret = thread_set_state(rtThread, x86_THREAD_STATE, (thread_state_t)&state, x86_THREAD_STATE_COUNT);
+    // Update the thread state with the new pc and register values.
+    kret = thread_set_state(rtThread, float_state, (thread_state_t)&context.float_, float_state_count);
+    if (kret != KERN_SUCCESS)
+        return false;
+    kret = thread_set_state(rtThread, thread_state, (thread_state_t)&context.thread, thread_state_count);
     if (kret != KERN_SUCCESS)
         return false;
 
     return true;
 # else
     return false;
 # endif
 }
@@ -876,44 +1091,29 @@ HandleFault(int signum, siginfo_t *info,
 
     const AsmJSModule &module = activation->module();
     if (!module.containsFunctionPC(pc))
         return false;
 
 # if defined(JS_CODEGEN_X64)
     // These checks aren't necessary, but, since we can, check anyway to make
     // sure we aren't covering up a real bug.
-    void *faultingAddress = info->si_addr;
+    uint8_t *faultingAddress = static_cast<uint8_t *>(info->si_addr);
     if (!module.maybeHeap() ||
         faultingAddress < module.maybeHeap() ||
         faultingAddress >= module.maybeHeap() + AsmJSMappedSize)
     {
         return false;
     }
 
     const AsmJSHeapAccess *heapAccess = module.lookupHeapAccess(pc);
     if (!heapAccess)
         return false;
 
-    // We now know that this is an out-of-bounds access made by an asm.js
-    // load/store that we should handle.
-
-    // SIMD out-of-bounds loads and stores just need to throw.
-    if (Scalar::isSimdType(heapAccess->type())) {
-        RedirectToOutOfBoundsLabel(ppc, module);
-        return true;
-    }
-
-    // If this is a load, assign the JS-defined result value to the destination
-    // register (ToInt32(undefined) or ToNumber(undefined), determined by the
-    // type of the destination register) and set the PC to the next op. Upon
-    // return from the handler, execution will resume at this next PC.
-    if (heapAccess->isLoad())
-        SetRegisterToCoercedUndefined(context, heapAccess->type(), heapAccess->loadedReg());
-    *ppc += heapAccess->opLength();
+    *ppc = EmulateHeapAccess(context, pc, faultingAddress, heapAccess, module);
 
     return true;
 # else
     return false;
 # endif
 }
 
 static struct sigaction sPrevSEGVHandler;
--- a/js/src/asmjs/AsmJSValidate.h
+++ b/js/src/asmjs/AsmJSValidate.h
@@ -20,16 +20,17 @@
 #define jit_AsmJS_h
 
 #include "mozilla/MathAlgorithms.h"
 
 #include <stddef.h>
 
 #include "jsutil.h"
 
+#include "jit/Registers.h"
 #include "js/TypeDecls.h"
 #include "vm/NativeObject.h"
 
 namespace js {
 
 class ExclusiveContext;
 namespace frontend {
     template <typename ParseHandler> class Parser;
@@ -48,23 +49,35 @@ typedef frontend::ParseContext<frontend:
 // amount and the entire function should be reparsed from the beginning.
 extern bool
 ValidateAsmJS(ExclusiveContext *cx, AsmJSParser &parser, frontend::ParseNode *stmtList,
              bool *validated);
 
 // The assumed page size; dynamically checked in ValidateAsmJS.
 const size_t AsmJSPageSize = 4096;
 
+// Targets define AsmJSImmediateRange to be the size of an address immediate,
+// and AsmJSCheckedImmediateRange, to be the size of an address immediate that
+// can be supported by signal-handler OOB handling.
+static_assert(jit::AsmJSCheckedImmediateRange <= jit::AsmJSImmediateRange,
+              "AsmJSImmediateRange should be the size of an unconstrained "
+              "address immediate");
+
 #ifdef JS_CPU_X64
 // On x64, the internal ArrayBuffer data array is inflated to 4GiB (only the
 // byteLength portion of which is accessible) so that out-of-bounds accesses
 // (made using a uint32 index) are guaranteed to raise a SIGSEGV.
-// Unaligned accesses and mask optimizations might also try to access a few
-// bytes after this limit, so just inflate it by AsmJSPageSize.
-static const size_t AsmJSMappedSize = 4 * 1024ULL * 1024ULL * 1024ULL + AsmJSPageSize;
+// Then, an additional extent is added to permit folding of small immediate
+// values into addresses. And finally, unaligned accesses and mask optimizations
+// might also try to access a few bytes after this limit, so just inflate it by
+// AsmJSPageSize.
+static const size_t AsmJSMappedSize = 4 * 1024ULL * 1024ULL * 1024ULL +
+                                      jit::AsmJSCheckedImmediateRange +
+                                      AsmJSPageSize;
+
 #endif
 
 // From the asm.js spec Linking section:
 //  the heap object's byteLength must be either
 //    2^n for n in [12, 24)
 //  or
 //    2^24 * n for n >= 1.
 
copy from js/src/jit-test/tests/asm.js/testZOOB.js
copy to js/src/jit-test/tests/asm.js/testAddressErrors.js
--- a/js/src/jit-test/tests/asm.js/testZOOB.js
+++ b/js/src/jit-test/tests/asm.js/testAddressErrors.js
@@ -41,98 +41,8 @@ assertEq(asmLink(asmCompile('glob', 'imp
 assertEq(asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {arr[0] = 1; return arr[(0xffffffff+1)>>>0]|0 } return f'), this, null, buf)(), 1);
 
 // A non-intish shifted literal constant index should cause an error compiling.
 assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b); function f() {return arr[0x100000000>>0]|0 } return f');
 assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0x100000000>>2]|0 } return f');
 
 // Folded non-intish constant expressions should cause an error compiling.
 assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {return arr[0xffffffff+1]|0 } return f');
-
-var ab = new ArrayBuffer(BUF_MIN);
-var arr = new Int32Array(BUF_MIN);
-for (var i = 0; i < arr.length; i++)
-    arr[i] = i;
-
-function testInt(ctor, shift, scale, disp) {
-    var arr = new ctor(ab);
-    var f = asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.' + ctor.name + '(b); function f(i) {i=i|0; return arr[((i<<' + scale + ')+' + disp + ')>>' + shift + ']|0 } return f'), this, null, ab);
-    for (var i of [0,1,2,3,4,1023,1024,1025,BUF_MIN-2,BUF_MIN-1,BUF_MIN,BUF_MIN+1])
-        assertEq(f(i), arr[((i<<scale)+disp)>>shift]|0);
-
-    for (var i of [-Math.pow(2,28),Math.pow(2,28),-Math.pow(2,29),Math.pow(2,29),-Math.pow(2,30),Math.pow(2,30),-Math.pow(2,31),Math.pow(2,31),-Math.pow(2,32),Math.pow(2,32)]) {
-        for (var j of [-8,-4,-1,0,1,4,8])
-            assertEq(f(i+j), arr[(((i+j)<<scale)+disp)>>shift]|0);
-    }
-
-    var f = asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.' + ctor.name + '(b); function f(i,j) {i=i|0;j=j|0; arr[((i<<' + scale + ')+' + disp + ')>>' + shift + '] = j } return f'), this, null, ab);
-    for (var i of [0,1,2,3,4,1023,1024,1025,4095,4096,4097]) {
-        var index = ((i<<scale)+disp)>>shift;
-        var v = arr[index]|0;
-        arr[index] = 0;
-        f(i, v);
-        assertEq(arr[index]|0, v);
-    }
-
-    for (var i of [-Math.pow(2,31), Math.pow(2,31)-1, Math.pow(2,32)]) {
-        for (var j of [-8,-4,-1,0,1,4,8]) {
-            var index = (((i+j)<<scale)+disp)>>shift;
-            var v = arr[index]|0;
-            arr[index] = 0;
-            f(i+j, v);
-            assertEq(arr[index]|0, v);
-        }
-    }
-}
-
-function testFloat(ctor, shift, scale, disp, coercion) {
-    var arr = new ctor(ab);
-    var f = asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.' + ctor.name + '(b); var toF = glob.Math.fround; function f(i) {i=i|0; return ' + coercion + '(arr[((i<<' + scale + ')+' + disp + ')>>' + shift + ']) } return f'), this, null, ab);
-    for (var i of [0,1,2,3,4,1023,1024,1025,BUF_MIN-2,BUF_MIN-1,BUF_MIN,BUF_MIN+1])
-        assertEq(f(i), +arr[((i<<scale)+disp)>>shift]);
-
-    for (var i of [-Math.pow(2,31), Math.pow(2,31)-1, Math.pow(2,32)]) {
-        for (var j of [-8,-4,-1,0,1,4,8])
-            assertEq(f(i+j), +arr[(((i+j)<<scale)+disp)>>shift]);
-    }
-
-    var f = asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.' + ctor.name + '(b); var toF = glob.Math.fround; function f(i,j) {i=i|0;j=+j; arr[((i<<' + scale + ')+' + disp + ')>>' + shift + '] = j } return f'), this, null, ab);
-    for (var i of [0,1,2,3,4,1023,1024,1025,4095,4096,4097]) {
-        var index = ((i<<scale)+disp)>>shift;
-        var v = +arr[index];
-        arr[index] = 0;
-        f(i, v);
-        assertEq(+arr[index], v);
-    }
-
-    for (var i of [-Math.pow(2,31), Math.pow(2,31)-1, Math.pow(2,32)]) {
-        for (var j of [-8,-4,-1,0,1,4,8]) {
-            var index = (((i+j)<<scale)+disp)>>shift;
-            var v = +arr[index];
-            arr[index] = 0;
-            f(i+j, v);
-            assertEq(+arr[index], v);
-        }
-    }
-}
-
-function testFloat32(ctor, shift, scale, disp) {
-    testFloat(ctor, shift, scale, disp, "toF");
-}
-function testFloat64(ctor, shift, scale, disp) {
-    testFloat(ctor, shift, scale, disp, "+");
-}
-
-function test(tester, ctor, shift) {
-    for (scale of [0,1,2,3]) {
-        for (disp of [0,1,8,Math.pow(2,31)-1,Math.pow(2,31),Math.pow(2,32)-1])
-            tester(ctor, shift, scale, disp);
-    }
-}
-
-test(testInt, Int8Array, 0);
-test(testInt, Uint8Array, 0);
-test(testInt, Int16Array, 1);
-test(testInt, Uint16Array, 1);
-test(testInt, Int32Array, 2);
-test(testInt, Uint32Array, 2);
-test(testFloat32, Float32Array, 2);
-test(testFloat64, Float64Array, 3);
--- a/js/src/jit-test/tests/asm.js/testZOOB.js
+++ b/js/src/jit-test/tests/asm.js/testZOOB.js
@@ -1,138 +1,244 @@
 // |jit-test| test-also-noasmjs
 load(libdir + "asm.js");
+load(libdir + "asserts.js");
 
 setIonCheckGraphCoherency(false);
 setCachingEnabled(false);
 
-// constants
-var buf = new ArrayBuffer(BUF_MIN);
-
-// An unshifted literal constant byte index in the range 0 to 2^31-1 inclusive should give a link failure.
-assertAsmLinkFail(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {return arr[0x7fffffff]|0 } return f'), this, null, buf);
-assertAsmLinkFail(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0x1fffffff]|0 } return f'), this, null, buf);
-
-
-// An unshifted literal constant byte index outside the range 0 to 2^31-1 inclusive should cause an error compiling.
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0x20000000]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0x3fffffff]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0x40000000]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0x7fffffff]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0x80000000]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0x8fffffff]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0xffffffff]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0x100000000]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {return arr[0x80000000]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {return arr[0xffffffff]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {return arr[0x100000000]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int16Array(b); function f() {return arr[-1]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[-2]|0 } return f');
+var ab = new ArrayBuffer(BUF_MIN);
 
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[10-12]|0 } return f');
-
-// An intish shifted literal constant index should not fail to compile or link.
-assertEq(asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {return arr[0x3fffffff>>0]|0 } return f'), this, null, buf)(), 0);
-assertEq(asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0x3fffffff>>2]|0 } return f'), this, null, buf)(), 0);
-assertEq(asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {return arr[0xffffffff>>0]|0 } return f'), this, null, buf)(), 0);
-assertEq(asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0xffffffff>>2]|0 } return f'), this, null, buf)(), 0);
-assertEq(asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {return arr[-1>>0]|0 } return f'), this, null, buf)(), 0);
-assertEq(asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[-1>>2]|0 } return f'), this, null, buf)(), 0);
-// Unsigned (intish) folded constant index.
-assertEq(asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {return arr[0xffffffff>>>0]|0 } return f'), this, null, buf)(), 0);
-assertEq(asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {arr[0] = 1; return arr[(0xffffffff+1)>>>0]|0 } return f'), this, null, buf)(), 1);
-
-// A non-intish shifted literal constant index should cause an error compiling.
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b); function f() {return arr[0x100000000>>0]|0 } return f');
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int32Array(b); function f() {return arr[0x100000000>>2]|0 } return f');
-
-// Folded non-intish constant expressions should cause an error compiling.
-assertAsmTypeFail('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.Int8Array(b);  function f() {return arr[0xffffffff+1]|0 } return f');
-
-var ab = new ArrayBuffer(BUF_MIN);
-var arr = new Int32Array(BUF_MIN);
-for (var i = 0; i < arr.length; i++)
-    arr[i] = i;
+// Compute a set of interesting indices.
+indices = [0]
+for (var i of [4,1024,BUF_MIN,Math.pow(2,30),Math.pow(2,31),Math.pow(2,32),Math.pow(2,33)]) {
+    for (var j of [-2,-1,0,1,2]) {
+        for (var k of [1,-1])
+            indices.push((i+j)*k);
+    }
+}
 
 function testInt(ctor, shift, scale, disp) {
     var arr = new ctor(ab);
-    var f = asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.' + ctor.name + '(b); function f(i) {i=i|0; return arr[((i<<' + scale + ')+' + disp + ')>>' + shift + ']|0 } return f'), this, null, ab);
-    for (var i of [0,1,2,3,4,1023,1024,1025,BUF_MIN-2,BUF_MIN-1,BUF_MIN,BUF_MIN+1])
-        assertEq(f(i), arr[((i<<scale)+disp)>>shift]|0);
 
-    for (var i of [-Math.pow(2,28),Math.pow(2,28),-Math.pow(2,29),Math.pow(2,29),-Math.pow(2,30),Math.pow(2,30),-Math.pow(2,31),Math.pow(2,31),-Math.pow(2,32),Math.pow(2,32)]) {
-        for (var j of [-8,-4,-1,0,1,4,8])
-            assertEq(f(i+j), arr[(((i+j)<<scale)+disp)>>shift]|0);
-    }
+    var c = asmCompile('glob', 'imp', 'b',
+                       USE_ASM +
+                       'var arr=new glob.' + ctor.name + '(b); ' +
+                       'function load(i) {i=i|0; return arr[((i<<' + scale + ')+' + disp + ')>>' + shift + ']|0 } ' +
+                       'function store(i,j) {i=i|0;j=j|0; arr[((i<<' + scale + ')+' + disp + ')>>' + shift + '] = j } ' +
+                       'function storeZero(i) {i=i|0; arr[((i<<' + scale + ')+' + disp + ')>>' + shift + '] = 0 } ' +
+                       'function storeNegOne(i) {i=i|0; arr[((i<<' + scale + ')+' + disp + ')>>' + shift + '] = -1 } ' +
+                       'return { load: load, store: store, storeZero: storeZero, storeNegOne: storeNegOne }');
+    var f = asmLink(c, this, null, ab);
 
-    var f = asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.' + ctor.name + '(b); function f(i,j) {i=i|0;j=j|0; arr[((i<<' + scale + ')+' + disp + ')>>' + shift + '] = j } return f'), this, null, ab);
-    for (var i of [0,1,2,3,4,1023,1024,1025,4095,4096,4097]) {
+    var v = arr[0];
+    arr[0] = -1;
+    var negOne = arr[0]|0;
+    arr[0] = v;
+
+    for (var i of indices) {
         var index = ((i<<scale)+disp)>>shift;
-        var v = arr[index]|0;
-        arr[index] = 0;
-        f(i, v);
-        assertEq(arr[index]|0, v);
-    }
+        v = arr[index]|0;
+
+        // Loads
+        assertEq(f.load(i), v);
 
-    for (var i of [-Math.pow(2,31), Math.pow(2,31)-1, Math.pow(2,32)]) {
-        for (var j of [-8,-4,-1,0,1,4,8]) {
-            var index = (((i+j)<<scale)+disp)>>shift;
-            var v = arr[index]|0;
-            arr[index] = 0;
-            f(i+j, v);
-            assertEq(arr[index]|0, v);
-        }
+        // Stores of immediates
+        arr[index] = 1;
+        f.storeZero(i);
+        assertEq(arr[index]|0, 0);
+        f.storeNegOne(i);
+        assertEq(arr[index]|0, index>>>0 < arr.length ? negOne : 0);
+
+        // Stores
+        arr[index] = ~v;
+        f.store(i, v);
+        assertEq(arr[index]|0, v);
     }
 }
 
 function testFloat(ctor, shift, scale, disp, coercion) {
     var arr = new ctor(ab);
-    var f = asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.' + ctor.name + '(b); var toF = glob.Math.fround; function f(i) {i=i|0; return ' + coercion + '(arr[((i<<' + scale + ')+' + disp + ')>>' + shift + ']) } return f'), this, null, ab);
-    for (var i of [0,1,2,3,4,1023,1024,1025,BUF_MIN-2,BUF_MIN-1,BUF_MIN,BUF_MIN+1])
-        assertEq(f(i), +arr[((i<<scale)+disp)>>shift]);
 
-    for (var i of [-Math.pow(2,31), Math.pow(2,31)-1, Math.pow(2,32)]) {
-        for (var j of [-8,-4,-1,0,1,4,8])
-            assertEq(f(i+j), +arr[(((i+j)<<scale)+disp)>>shift]);
-    }
+    var c = asmCompile('glob', 'imp', 'b',
+                       USE_ASM +
+                       'var arr=new glob.' + ctor.name + '(b); ' +
+                       'var toF = glob.Math.fround; ' +
+                       'function load(i) {i=i|0; return ' + coercion + '(arr[((i<<' + scale + ')+' + disp + ')>>' + shift + ']) } ' +
+                       'function store(i,j) {i=i|0;j=+j; arr[((i<<' + scale + ')+' + disp + ')>>' + shift + '] = j } ' +
+                       'return { load: load, store: store }');
+    var f = asmLink(c, this, null, ab);
 
-    var f = asmLink(asmCompile('glob', 'imp', 'b', USE_ASM + 'var arr=new glob.' + ctor.name + '(b); var toF = glob.Math.fround; function f(i,j) {i=i|0;j=+j; arr[((i<<' + scale + ')+' + disp + ')>>' + shift + '] = j } return f'), this, null, ab);
-    for (var i of [0,1,2,3,4,1023,1024,1025,4095,4096,4097]) {
+    for (var i of indices) {
         var index = ((i<<scale)+disp)>>shift;
         var v = +arr[index];
-        arr[index] = 0;
-        f(i, v);
-        assertEq(+arr[index], v);
-    }
+
+        // Loads
+        assertEq(f.load(i), v);
 
-    for (var i of [-Math.pow(2,31), Math.pow(2,31)-1, Math.pow(2,32)]) {
-        for (var j of [-8,-4,-1,0,1,4,8]) {
-            var index = (((i+j)<<scale)+disp)>>shift;
-            var v = +arr[index];
-            arr[index] = 0;
-            f(i+j, v);
-            assertEq(+arr[index], v);
-        }
+        // Stores
+        arr[index] = ~v;
+        f.store(i, v);
+        assertEq(+arr[index], v);
     }
 }
 
 function testFloat32(ctor, shift, scale, disp) {
     testFloat(ctor, shift, scale, disp, "toF");
 }
 function testFloat64(ctor, shift, scale, disp) {
     testFloat(ctor, shift, scale, disp, "+");
 }
 
+function assertEqX4(observed, expected) {
+    assertEq(observed.x, expected.x);
+    assertEq(observed.y, expected.y);
+    assertEq(observed.z, expected.z);
+    assertEq(observed.w, expected.w);
+}
+
+function testSimdX4(ctor, shift, scale, disp, simdName, simdCtor) {
+    var arr = new ctor(ab);
+
+    var c = asmCompile('glob', 'imp', 'b',
+                       USE_ASM +
+                       'var arr=new glob.' + ctor.name + '(b); ' +
+                       'var SIMD_' + simdName + ' = glob.SIMD.' + simdName + '; ' +
+                       'var SIMD_' + simdName + '_check = SIMD_' + simdName + '.check; ' +
+                       'var SIMD_' + simdName + '_load = SIMD_' + simdName + '.load; ' +
+                       'var SIMD_' + simdName + '_loadXYZ = SIMD_' + simdName + '.loadXYZ; ' +
+                       'var SIMD_' + simdName + '_loadXY = SIMD_' + simdName + '.loadXY; ' +
+                       'var SIMD_' + simdName + '_loadX = SIMD_' + simdName + '.loadX; ' +
+                       'var SIMD_' + simdName + '_store = SIMD_' + simdName + '.store; ' +
+                       'var SIMD_' + simdName + '_storeXYZ = SIMD_' + simdName + '.storeXYZ; ' +
+                       'var SIMD_' + simdName + '_storeXY = SIMD_' + simdName + '.storeXY; ' +
+                       'var SIMD_' + simdName + '_storeX = SIMD_' + simdName + '.storeX; ' +
+                       'function load(i) {i=i|0; return SIMD_' + simdName + '_check(SIMD_' + simdName + '_load(arr, ((i<<' + scale + ')+' + disp + ')>>' + shift + ')) } ' +
+                       'function loadXYZ(i) {i=i|0; return SIMD_' + simdName + '_check(SIMD_' + simdName + '_loadXYZ(arr, ((i<<' + scale + ')+' + disp + ')>>' + shift + ')) } ' +
+                       'function loadXY(i) {i=i|0; return SIMD_' + simdName + '_check(SIMD_' + simdName + '_loadXY(arr, ((i<<' + scale + ')+' + disp + ')>>' + shift + ')) } ' +
+                       'function loadX(i) {i=i|0; return SIMD_' + simdName + '_check(SIMD_' + simdName + '_loadX(arr, ((i<<' + scale + ')+' + disp + ')>>' + shift + ')) } ' +
+                       'function store(i,j) {i=i|0;j=SIMD_' + simdName + '_check(j); SIMD_' + simdName + '_store(arr, ((i<<' + scale + ')+' + disp + ')>>' + shift + ', j) } ' +
+                       'function storeXYZ(i,j) {i=i|0;j=SIMD_' + simdName + '_check(j); SIMD_' + simdName + '_storeXYZ(arr, ((i<<' + scale + ')+' + disp + ')>>' + shift + ', j) } ' +
+                       'function storeXY(i,j) {i=i|0;j=SIMD_' + simdName + '_check(j); SIMD_' + simdName + '_storeXY(arr, ((i<<' + scale + ')+' + disp + ')>>' + shift + ', j) } ' +
+                       'function storeX(i,j) {i=i|0;j=SIMD_' + simdName + '_check(j); SIMD_' + simdName + '_storeX(arr, ((i<<' + scale + ')+' + disp + ')>>' + shift + ', j) } ' +
+                       'return { load: load, loadXYZ: loadXYZ, loadXY: loadXY, loadX: loadX, store: store, storeXYZ: storeXYZ, storeXY : storeXY, storeX : storeX }');
+    var f = asmLink(c, this, null, ab);
+
+    for (var i of indices) {
+        var index = ((i<<scale)+disp)>>shift;
+
+        var v, vXYZ, vXY, vX;
+        var t = false, tXYZ = false, tXY = false, tX = false;
+        try { v = simdCtor.load(arr, index); }
+        catch (e) {
+            assertEq(e instanceof RangeError, true);
+            t = true;
+        }
+        try { vXYZ = simdCtor.loadXYZ(arr, index); }
+        catch (e) {
+            assertEq(e instanceof RangeError, true);
+            tXYZ = true;
+        }
+        try { vXY = simdCtor.loadXY(arr, index); }
+        catch (e) {
+            assertEq(e instanceof RangeError, true);
+            tXY = true;
+        }
+        try { vX = simdCtor.loadX(arr, index); }
+        catch (e) {
+            assertEq(e instanceof RangeError, true);
+            tX = true;
+        }
+
+        // Loads
+        var l, lXYZ, lXY, lX;
+        var r = false, rXYZ = false, rXY = false, rX = false;
+        try { l = f.load(i); }
+        catch (e) {
+            assertEq(e instanceof RangeError, true);
+            r = true;
+        }
+        try { lXYZ = f.loadXYZ(i); }
+        catch (e) {
+            assertEq(e instanceof RangeError, true);
+            rXYZ = true;
+        }
+        try { lXY = f.loadXY(i); }
+        catch (e) {
+            assertEq(e instanceof RangeError, true);
+            rXY = true;
+        }
+        try { lX = f.loadX(i); }
+        catch (e) {
+            assertEq(e instanceof RangeError, true);
+            rX = true;
+        }
+        assertEq(t, r);
+        assertEq(tXYZ, rXYZ);
+        assertEq(tXY, rXY);
+        assertEq(tX, rX);
+        if (!t) assertEqX4(v, l);
+        if (!tXYZ) assertEqX4(vXYZ, lXYZ);
+        if (!tXY) assertEqX4(vXY, lXY);
+        if (!tX) assertEqX4(vX, lX);
+
+        // Stores
+        if (!t) {
+            simdCtor.store(arr, index, simdCtor.not(v));
+            f.store(i, v);
+            assertEqX4(simdCtor.load(arr, index), v);
+        } else
+            assertThrowsInstanceOf(() => f.store(i, simdCtor()), RangeError);
+        if (!tXYZ) {
+            simdCtor.storeXYZ(arr, index, simdCtor.not(vXYZ));
+            f.storeXYZ(i, vXYZ);
+            assertEqX4(simdCtor.loadXYZ(arr, index), vXYZ);
+        } else
+            assertThrowsInstanceOf(() => f.storeXYZ(i, simdCtor()), RangeError);
+        if (!tXY) {
+            simdCtor.storeXY(arr, index, simdCtor.not(vXY));
+            f.storeXY(i, vXY);
+            assertEqX4(simdCtor.loadXY(arr, index), vXY);
+        } else
+            assertThrowsInstanceOf(() => f.storeXY(i, simdCtor()), RangeError);
+        if (!tX) {
+            simdCtor.storeX(arr, index, simdCtor.not(vX));
+            f.storeX(i, vX);
+            assertEqX4(simdCtor.loadX(arr, index), vX);
+        } else
+            assertThrowsInstanceOf(() => f.storeX(i, simdCtor()), RangeError);
+    }
+}
+
+function testFloat32x4(ctor, shift, scale, disp) {
+    testSimdX4(ctor, shift, scale, disp, 'float32x4', SIMD.float32x4);
+}
+function testInt32x4(ctor, shift, scale, disp) {
+    testSimdX4(ctor, shift, scale, disp, 'int32x4', SIMD.int32x4);
+}
+
 function test(tester, ctor, shift) {
+    var arr = new ctor(ab);
+    for (var i = 0; i < arr.length; i++)
+        arr[i] = Math.imul(i, Math.imul((i & 1), 2) - 1);
     for (scale of [0,1,2,3]) {
-        for (disp of [0,1,8,Math.pow(2,31)-1,Math.pow(2,31),Math.pow(2,32)-1])
+        for (disp of [0,1,2,8,Math.pow(2,31)-1,Math.pow(2,31),Math.pow(2,32)-1])
             tester(ctor, shift, scale, disp);
     }
+    for (var i = 0; i < arr.length; i++) {
+        var v = arr[i];
+        arr[i] = Math.imul(i, Math.imul((i & 1), 2) - 1);
+        assertEq(arr[i], v);
+    }
 }
 
 test(testInt, Int8Array, 0);
 test(testInt, Uint8Array, 0);
 test(testInt, Int16Array, 1);
 test(testInt, Uint16Array, 1);
 test(testInt, Int32Array, 2);
 test(testInt, Uint32Array, 2);
 test(testFloat32, Float32Array, 2);
 test(testFloat64, Float64Array, 3);
+if (typeof SIMD !== 'undefined' && isSimdAvailable()) {
+    test(testInt32x4, Uint8Array, 0);
+    test(testFloat32x4, Uint8Array, 0);
+}
--- a/js/src/jit/CodeGenerator.cpp
+++ b/js/src/jit/CodeGenerator.cpp
@@ -648,17 +648,17 @@ CodeGenerator::getJumpLabelForBranch(MBa
     if (!labelForBackedgeWithImplicitCheck(block))
         return block->lir()->label();
 
     // We need to use a patchable jump for this backedge, but want to treat
     // this as a normal label target to simplify codegen. Efficiency isn't so
     // important here as these tests are extremely unlikely to be used in loop
     // backedges, so emit inline code for the patchable jump. Heap allocating
     // the label allows it to be used by out of line blocks.
-    Label *res = GetJitContext()->temp->lifoAlloc()->new_<Label>();
+    Label *res = alloc().lifoAlloc()->new_<Label>();
     Label after;
     masm.jump(&after);
     masm.bind(res);
     jumpToBlock(block);
     masm.bind(&after);
     return res;
 }
 
--- a/js/src/jit/Disassembler.h
+++ b/js/src/jit/Disassembler.h
@@ -252,17 +252,16 @@ void DumpHeapAccess(const HeapAccess &ac
 inline void
 VerifyHeapAccess(uint8_t *begin, uint8_t *end, const HeapAccess &expected)
 {
     HeapAccess disassembled;
     uint8_t *e = DisassembleHeapAccess(begin, &disassembled);
     MOZ_ASSERT(e == end);
     MOZ_ASSERT(disassembled == expected);
 }
-
 #endif
 
 } // namespace Disassembler
 
 } // namespace jit
 } // namespace js
 
 #endif /* jit_Disassembler_h */
--- a/js/src/jit/EffectiveAddressAnalysis.cpp
+++ b/js/src/jit/EffectiveAddressAnalysis.cpp
@@ -84,16 +84,78 @@ AnalyzeLsh(TempAllocator &alloc, MLsh *l
         return;
     }
 
     MEffectiveAddress *eaddr = MEffectiveAddress::New(alloc, base, index, scale, displacement);
     last->replaceAllUsesWith(eaddr);
     last->block()->insertAfter(last, eaddr);
 }
 
+static bool
+IsAlignmentMask(uint32_t m)
+{
+    // Test whether m is just leading ones and trailing zeros.
+    return (-m & ~m) == 0;
+}
+
+template<typename MAsmJSHeapAccessType>
+static void
+AnalyzeAsmHeapAccess(MAsmJSHeapAccessType *ins, MIRGraph &graph)
+{
+    MDefinition *ptr = ins->ptr();
+
+    if (ptr->isConstantValue()) {
+        // Look for heap[i] where i is a constant offset, and fold the offset.
+        // By doing the folding now, we simplify the task of codegen; the offset
+        // is always the address mode immediate. This also allows it to avoid
+        // a situation where the sum of a constant pointer value and a non-zero
+        // offset doesn't actually fit into the address mode immediate.
+        int32_t imm = ptr->constantValue().toInt32();
+        if (imm != 0 && ins->tryAddDisplacement(imm)) {
+            MInstruction *zero = MConstant::New(graph.alloc(), Int32Value(0));
+            ins->block()->insertBefore(ins, zero);
+            ins->replacePtr(zero);
+        }
+    } else if (ptr->isAdd()) {
+        // Look for heap[a+i] where i is a constant offset, and fold the offset.
+        MDefinition *op0 = ptr->toAdd()->getOperand(0);
+        MDefinition *op1 = ptr->toAdd()->getOperand(1);
+        if (op0->isConstantValue())
+            mozilla::Swap(op0, op1);
+        if (op1->isConstantValue()) {
+            int32_t imm = op1->constantValue().toInt32();
+            if (ins->tryAddDisplacement(imm))
+                ins->replacePtr(op0);
+        }
+    } else if (ptr->isBitAnd() && ptr->hasOneUse()) {
+        // Transform heap[(a+i)&m] to heap[(a&m)+i] so that we can fold i into
+        // the access. Since we currently just mutate the BitAnd in place, this
+        // requires that we are its only user.
+        MDefinition *lhs = ptr->toBitAnd()->getOperand(0);
+        MDefinition *rhs = ptr->toBitAnd()->getOperand(1);
+        int lhsIndex = 0;
+        if (lhs->isConstantValue()) {
+            mozilla::Swap(lhs, rhs);
+            lhsIndex = 1;
+        }
+        if (lhs->isAdd() && rhs->isConstantValue()) {
+            MDefinition *op0 = lhs->toAdd()->getOperand(0);
+            MDefinition *op1 = lhs->toAdd()->getOperand(1);
+            if (op0->isConstantValue())
+                mozilla::Swap(op0, op1);
+            if (op1->isConstantValue()) {
+                uint32_t i = op1->constantValue().toInt32();
+                uint32_t m = rhs->constantValue().toInt32();
+                if (IsAlignmentMask(m) && ((i & m) == i) && ins->tryAddDisplacement(i))
+                    ptr->toBitAnd()->replaceOperand(lhsIndex, op0);
+            }
+        }
+    }
+}
+
 // This analysis converts patterns of the form:
 //   truncate(x + (y << {0,1,2,3}))
 //   truncate(x + (y << {0,1,2,3}) + imm32)
 // into a single lea instruction, and patterns of the form:
 //   asmload(x + imm32)
 //   asmload(x << {0,1,2,3})
 //   asmload((x << {0,1,2,3}) + imm32)
 //   asmload((x << {0,1,2,3}) & mask)            (where mask is redundant with shift)
@@ -103,14 +165,21 @@ AnalyzeLsh(TempAllocator &alloc, MLsh *l
 // Additionally, we should consider the general forms:
 //   truncate(x + y + imm32)
 //   truncate((y << {0,1,2,3}) + imm32)
 bool
 EffectiveAddressAnalysis::analyze()
 {
     for (ReversePostorderIterator block(graph_.rpoBegin()); block != graph_.rpoEnd(); block++) {
         for (MInstructionIterator i = block->begin(); i != block->end(); i++) {
+            // Note that we don't check for MAsmJSCompareExchangeHeap
+            // or MAsmJSAtomicBinopHeap, because the backend and the OOB
+            // mechanism don't support non-zero offsets for them yet.
             if (i->isLsh())
                 AnalyzeLsh(graph_.alloc(), i->toLsh());
+            else if (i->isAsmJSLoadHeap())
+                AnalyzeAsmHeapAccess(i->toAsmJSLoadHeap(), graph_);
+            else if (i->isAsmJSStoreHeap())
+                AnalyzeAsmHeapAccess(i->toAsmJSStoreHeap(), graph_);
         }
     }
     return true;
 }
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -12176,31 +12176,67 @@ class MAsmJSNeg
     INSTRUCTION_HEADER(AsmJSNeg)
     static MAsmJSNeg *NewAsmJS(TempAllocator &alloc, MDefinition *op, MIRType type) {
         return new(alloc) MAsmJSNeg(op, type);
     }
 };
 
 class MAsmJSHeapAccess
 {
-    Scalar::Type accessType_;
+    int32_t offset_;
+    Scalar::Type accessType_ : 8;
     bool needsBoundsCheck_;
     unsigned numSimdElems_;
 
   public:
     MAsmJSHeapAccess(Scalar::Type accessType, bool needsBoundsCheck, unsigned numSimdElems = 0)
-      : accessType_(accessType), needsBoundsCheck_(needsBoundsCheck), numSimdElems_(numSimdElems)
+      : offset_(0), accessType_(accessType),
+        needsBoundsCheck_(needsBoundsCheck), numSimdElems_(numSimdElems)
     {
         MOZ_ASSERT(numSimdElems <= ScalarTypeToLength(accessType));
     }
 
+    int32_t offset() const { return offset_; }
+    int32_t endOffset() const { return offset() + byteSize(); }
     Scalar::Type accessType() const { return accessType_; }
+    unsigned byteSize() const {
+        return Scalar::isSimdType(accessType())
+               ? Scalar::scalarByteSize(accessType()) * numSimdElems()
+               : TypedArrayElemSize(accessType());
+    }
     bool needsBoundsCheck() const { return needsBoundsCheck_; }
     void removeBoundsCheck() { needsBoundsCheck_ = false; }
     unsigned numSimdElems() const { MOZ_ASSERT(Scalar::isSimdType(accessType_)); return numSimdElems_; }
+
+    bool tryAddDisplacement(int32_t o) {
+        // Compute the new offset. Check for overflow and negative. In theory it
+        // ought to be possible to support negative offsets, but it'd require
+        // more elaborate bounds checking mechanisms than we currently have.
+        MOZ_ASSERT(offset_ >= 0);
+        int32_t newOffset = uint32_t(offset_) + o;
+        if (newOffset < 0)
+            return false;
+
+        // Compute the new offset to the end of the access. Check for overflow
+        // and negative here also.
+        int32_t newEnd = uint32_t(newOffset) + byteSize();
+        if (newEnd < 0)
+            return false;
+        MOZ_ASSERT(uint32_t(newEnd) >= uint32_t(newOffset));
+
+        // If we need bounds checking, keep it within the more restrictive
+        // AsmJSCheckedImmediateRange. Otherwise, just keep it within what
+        // the instruction set can support.
+        size_t range = needsBoundsCheck() ? AsmJSCheckedImmediateRange : AsmJSImmediateRange;
+        if (size_t(newEnd) > range)
+            return false;
+
+        offset_ = newOffset;
+        return true;
+    }
 };
 
 class MAsmJSLoadHeap
   : public MUnaryInstruction,
     public MAsmJSHeapAccess,
     public NoTypePolicy::Data
 {
     MemoryBarrierBits barrierBefore_;
@@ -12254,16 +12290,17 @@ class MAsmJSLoadHeap
                                MemoryBarrierBits barrierBefore = MembarNobits,
                                MemoryBarrierBits barrierAfter = MembarNobits)
     {
         return new(alloc) MAsmJSLoadHeap(accessType, ptr, needsBoundsCheck,
                                          numSimdElems, barrierBefore, barrierAfter);
     }
 
     MDefinition *ptr() const { return getOperand(0); }
+    void replacePtr(MDefinition *newPtr) { replaceOperand(0, newPtr); }
     MemoryBarrierBits barrierBefore() const { return barrierBefore_; }
     MemoryBarrierBits barrierAfter() const { return barrierAfter_; }
 
     bool congruentTo(const MDefinition *ins) const MOZ_OVERRIDE;
     AliasSet getAliasSet() const MOZ_OVERRIDE {
         return AliasSet::Load(AliasSet::AsmJSHeap);
     }
     bool mightAlias(const MDefinition *def) const MOZ_OVERRIDE;
@@ -12297,16 +12334,17 @@ class MAsmJSStoreHeap
                                 MemoryBarrierBits barrierBefore = MembarNobits,
                                 MemoryBarrierBits barrierAfter = MembarNobits)
     {
         return new(alloc) MAsmJSStoreHeap(accessType, ptr, v, needsBoundsCheck,
                                           numSimdElems, barrierBefore, barrierAfter);
     }
 
     MDefinition *ptr() const { return getOperand(0); }
+    void replacePtr(MDefinition *newPtr) { replaceOperand(0, newPtr); }
     MDefinition *value() const { return getOperand(1); }
     MemoryBarrierBits barrierBefore() const { return barrierBefore_; }
     MemoryBarrierBits barrierAfter() const { return barrierAfter_; }
 
     AliasSet getAliasSet() const MOZ_OVERRIDE {
         return AliasSet::Store(AliasSet::AsmJSHeap);
     }
 };
--- a/js/src/jit/MIRGenerator.h
+++ b/js/src/jit/MIRGenerator.h
@@ -34,17 +34,17 @@ class MStart;
 class OptimizationInfo;
 
 class MIRGenerator
 {
   public:
     MIRGenerator(CompileCompartment *compartment, const JitCompileOptions &options,
                  TempAllocator *alloc, MIRGraph *graph,
                  CompileInfo *info, const OptimizationInfo *optimizationInfo,
-                 Label *outOfBoundsLabel = nullptr, bool usesSignalHandlersForOOB = false);
+                 Label *outOfBoundsLabel = nullptr, bool usesSignalHandlersForAsmJSOOB = false);
 
     TempAllocator &alloc() {
         return *alloc_;
     }
     MIRGraph &graph() {
         return *graph_;
     }
     bool ensureBallast() {
@@ -196,17 +196,17 @@ class MIRGenerator
 
     // List of nursery objects used by this compilation. Can be traced by a
     // minor GC while compilation happens off-thread. This Vector should only
     // be accessed on the main thread (IonBuilder, nursery GC or
     // CodeGenerator::link).
     ObjectVector nurseryObjects_;
 
     Label *outOfBoundsLabel_;
-    bool usesSignalHandlersForOOB_;
+    bool usesSignalHandlersForAsmJSOOB_;
 
     void addAbortedNewScriptPropertiesGroup(ObjectGroup *type);
     void setForceAbort() {
         shouldForceAbort_ = true;
     }
     bool shouldForceAbort() {
         return shouldForceAbort_;
     }
@@ -222,20 +222,28 @@ class MIRGenerator
     const JitCompileOptions options;
 
     void traceNurseryObjects(JSTracer *trc);
 
     const ObjectVector &nurseryObjects() const {
         return nurseryObjects_;
     }
 
-    bool usesSignalHandlersForOOB() const {
-        return usesSignalHandlersForOOB_;
-    }
     Label *outOfBoundsLabel() const {
         return outOfBoundsLabel_;
     }
+    bool needsAsmJSBoundsCheckBranch(const MAsmJSHeapAccess *access) const {
+        // A heap access needs a bounds-check branch if we're not relying on signal
+        // handlers to catch errors, and if it's not proven to be within bounds.
+        // We use signal-handlers on x64, but on x86 there isn't enough address
+        // space for a guard region.
+#ifdef JS_CODEGEN_X64
+        if (usesSignalHandlersForAsmJSOOB_)
+            return false;
+#endif
+        return access->needsBoundsCheck();
+    }
 };
 
 } // namespace jit
 } // namespace js
 
 #endif /* jit_MIRGenerator_h */
--- a/js/src/jit/MIRGraph.cpp
+++ b/js/src/jit/MIRGraph.cpp
@@ -15,17 +15,17 @@
 
 using namespace js;
 using namespace js::jit;
 using mozilla::Swap;
 
 MIRGenerator::MIRGenerator(CompileCompartment *compartment, const JitCompileOptions &options,
                            TempAllocator *alloc, MIRGraph *graph, CompileInfo *info,
                            const OptimizationInfo *optimizationInfo,
-                           Label *outOfBoundsLabel, bool usesSignalHandlersForOOB)
+                           Label *outOfBoundsLabel, bool usesSignalHandlersForAsmJSOOB)
   : compartment(compartment),
     info_(info),
     optimizationInfo_(optimizationInfo),
     alloc_(alloc),
     graph_(graph),
     abortReason_(AbortReason_NoAbort),
     shouldForceAbort_(false),
     abortedNewScriptPropertiesGroups_(*alloc_),
@@ -37,17 +37,17 @@ MIRGenerator::MIRGenerator(CompileCompar
     usesSimd_(false),
     usesSimdCached_(false),
     minAsmJSHeapLength_(0),
     modifiesFrameArguments_(false),
     instrumentedProfiling_(false),
     instrumentedProfilingIsCached_(false),
     nurseryObjects_(*alloc),
     outOfBoundsLabel_(outOfBoundsLabel),
-    usesSignalHandlersForOOB_(usesSignalHandlersForOOB),
+    usesSignalHandlersForAsmJSOOB_(usesSignalHandlersForAsmJSOOB),
     options(options)
 { }
 
 bool
 MIRGenerator::usesSimd()
 {
     if (usesSimdCached_)
         return usesSimd_;
--- a/js/src/jit/arm/Architecture-arm.h
+++ b/js/src/jit/arm/Architecture-arm.h
@@ -578,12 +578,18 @@ static inline bool UseHardFpABI()
 #if defined(JS_CODEGEN_ARM_HARDFP)
     return true;
 #else
     return false;
 #endif
 }
 #endif
 
+// See the comments above AsmJSMappedSize in AsmJSValidate.h for more info.
+// TODO: Implement this for ARM. Note that it requires Codegen to respect the
+// offset field of AsmJSHeapAccess.
+static const size_t AsmJSCheckedImmediateRange = 0;
+static const size_t AsmJSImmediateRange = 0;
+
 } // namespace jit
 } // namespace js
 
 #endif /* jit_arm_Architecture_arm_h */
--- a/js/src/jit/mips/Architecture-mips.h
+++ b/js/src/jit/mips/Architecture-mips.h
@@ -496,12 +496,18 @@ hasUnaliasedDouble() {
 // On MIPS, fn-double aliases both fn-float32 and fn+1-float32, so if you need
 // to convert a float32 to a double as a temporary, you need a temporary
 // double register.
 inline bool
 hasMultiAlias() {
     return true;
 }
 
+// See the comments above AsmJSMappedSize in AsmJSValidate.h for more info.
+// TODO: Implement this for MIPS. Note that it requires Codegen to respect the
+// offset field of AsmJSHeapAccess.
+static const size_t AsmJSCheckedImmediateRange = 0;
+static const size_t AsmJSImmediateRange = 0;
+
 } // namespace jit
 } // namespace js
 
 #endif /* jit_mips_Architecture_mips_h */
--- a/js/src/jit/shared/Assembler-shared.h
+++ b/js/src/jit/shared/Assembler-shared.h
@@ -311,16 +311,19 @@ struct PatchedAbsoluteAddress
     void *addr;
 
     explicit PatchedAbsoluteAddress()
       : addr(nullptr)
     { }
     explicit PatchedAbsoluteAddress(const void *addr)
       : addr(const_cast<void*>(addr))
     { }
+    explicit PatchedAbsoluteAddress(uintptr_t addr)
+      : addr(reinterpret_cast<void*>(addr))
+    { }
 };
 
 // Specifies an address computed in the form of a register base and a constant,
 // 32-bit offset.
 struct Address
 {
     Register base;
     int32_t offset;
@@ -759,89 +762,87 @@ static const unsigned AsmJSNaN32GlobalDa
 
 // Summarizes a heap access made by asm.js code that needs to be patched later
 // and/or looked up by the asm.js signal handlers. Different architectures need
 // to know different things (x64: offset and length, ARM: where to patch in
 // heap length, x86: where to patch in heap length and base) hence the massive
 // #ifdefery.
 class AsmJSHeapAccess
 {
+#if defined(JS_CODEGEN_X64)
+  public:
+    enum WhatToDoOnOOB {
+        CarryOn, // loads return undefined, stores do nothing.
+        Throw    // throw a RangeError
+    };
+#endif
+
   private:
-    uint32_t offset_;
+    uint32_t insnOffset_;
+#if defined(JS_CODEGEN_X86)
+    uint8_t opLength_;  // the length of the load/store instruction
+#endif
+#if defined(JS_CODEGEN_X64)
+    uint8_t offsetWithinWholeSimdVector_; // if is this e.g. the Z of an XYZ
+    bool throwOnOOB_;   // should we throw on OOB?
+#endif
 #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
     uint8_t cmpDelta_;  // the number of bytes from the cmp to the load/store instruction
-    uint8_t opLength_;  // the length of the load/store instruction
-    uint8_t numSimdElems_; // the number of SIMD lanes to load/store at once
-    Scalar::Type type_;
-    AnyRegister::Code loadedReg_ : 8;
 #endif
 
     JS_STATIC_ASSERT(AnyRegister::Total < UINT8_MAX);
 
   public:
     AsmJSHeapAccess() {}
 #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
     static const uint32_t NoLengthCheck = UINT32_MAX;
+#endif
 
-    // If 'cmp' equals 'offset' or if it is not supplied then the
+#if defined(JS_CODEGEN_X86)
+    // If 'cmp' equals 'insnOffset' or if it is not supplied then the
     // cmpDelta_ is zero indicating that there is no length to patch.
-    AsmJSHeapAccess(uint32_t offset, uint32_t after, Scalar::Type type, AnyRegister loadedReg,
-                    uint32_t cmp = NoLengthCheck)
-      : offset_(offset),
-        cmpDelta_(cmp == NoLengthCheck ? 0 : offset - cmp),
-        opLength_(after - offset),
-        numSimdElems_(UINT8_MAX),
-        type_(type),
-        loadedReg_(loadedReg.code())
+    AsmJSHeapAccess(uint32_t insnOffset, uint32_t after, uint32_t cmp = NoLengthCheck)
+      : insnOffset_(insnOffset),
+        opLength_(after - insnOffset),
+        cmpDelta_(cmp == NoLengthCheck ? 0 : insnOffset - cmp)
+    {}
+#elif defined(JS_CODEGEN_X64)
+    // If 'cmp' equals 'insnOffset' or if it is not supplied then the
+    // cmpDelta_ is zero indicating that there is no length to patch.
+    AsmJSHeapAccess(uint32_t insnOffset, WhatToDoOnOOB oob,
+                    uint32_t cmp = NoLengthCheck,
+                    uint32_t offsetWithinWholeSimdVector = 0)
+      : insnOffset_(insnOffset),
+        offsetWithinWholeSimdVector_(offsetWithinWholeSimdVector),
+        throwOnOOB_(oob == Throw),
+        cmpDelta_(cmp == NoLengthCheck ? 0 : insnOffset - cmp)
     {
-        MOZ_ASSERT(!Scalar::isSimdType(type));
-    }
-    AsmJSHeapAccess(uint32_t offset, uint8_t after, Scalar::Type type, uint32_t cmp = NoLengthCheck)
-      : offset_(offset),
-        cmpDelta_(cmp == NoLengthCheck ? 0 : offset - cmp),
-        opLength_(after - offset),
-        numSimdElems_(UINT8_MAX),
-        type_(type),
-        loadedReg_(UINT8_MAX)
-    {
-        MOZ_ASSERT(!Scalar::isSimdType(type));
-    }
-    // SIMD loads / stores
-    AsmJSHeapAccess(uint32_t offset, uint32_t after, unsigned numSimdElems, Scalar::Type type,
-                    uint32_t cmp = NoLengthCheck)
-      : offset_(offset),
-        cmpDelta_(cmp == NoLengthCheck ? 0 : offset - cmp),
-        opLength_(after - offset),
-        numSimdElems_(numSimdElems),
-        type_(type),
-        loadedReg_(UINT8_MAX)
-    {
-        MOZ_ASSERT(Scalar::isSimdType(type));
+        MOZ_ASSERT(offsetWithinWholeSimdVector_ == offsetWithinWholeSimdVector);
     }
 #elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS)
-    explicit AsmJSHeapAccess(uint32_t offset)
-      : offset_(offset)
+    explicit AsmJSHeapAccess(uint32_t insnOffset)
+      : insnOffset_(insnOffset)
     {}
 #endif
 
-    uint32_t offset() const { return offset_; }
-    void setOffset(uint32_t offset) { offset_ = offset; }
+    uint32_t insnOffset() const { return insnOffset_; }
+    void setInsnOffset(uint32_t insnOffset) { insnOffset_ = insnOffset; }
 #if defined(JS_CODEGEN_X86)
-    void *patchOffsetAt(uint8_t *code) const { return code + (offset_ + opLength_); }
+    void *patchHeapPtrImmAt(uint8_t *code) const { return code + (insnOffset_ + opLength_); }
 #endif
 #if defined(JS_CODEGEN_X64)
-    unsigned opLength() const { MOZ_ASSERT(!Scalar::isSimdType(type_)); return opLength_; }
-    bool isLoad() const { MOZ_ASSERT(!Scalar::isSimdType(type_)); return loadedReg_ != UINT8_MAX; }
+    bool throwOnOOB() const { return throwOnOOB_; }
+    uint32_t offsetWithinWholeSimdVector() const { return offsetWithinWholeSimdVector_; }
 #endif
 #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
     bool hasLengthCheck() const { return cmpDelta_ > 0; }
-    void *patchLengthAt(uint8_t *code) const { return code + (offset_ - cmpDelta_); }
-    unsigned numSimdElems() const { MOZ_ASSERT(Scalar::isSimdType(type_)); return numSimdElems_; }
-    Scalar::Type type() const { return type_; }
-    AnyRegister loadedReg() const { return AnyRegister::FromCode(loadedReg_); }
+    void *patchLengthAt(uint8_t *code) const {
+        MOZ_ASSERT(hasLengthCheck());
+        return code + (insnOffset_ - cmpDelta_);
+    }
 #endif
 };
 
 typedef Vector<AsmJSHeapAccess, 0, SystemAllocPolicy> AsmJSHeapAccessVector;
 
 struct AsmJSGlobalAccess
 {
     CodeOffsetLabel patchAt;
--- a/js/src/jit/shared/Assembler-x86-shared.h
+++ b/js/src/jit/shared/Assembler-x86-shared.h
@@ -65,16 +65,20 @@ class Operand
       : kind_(MEM_REG_DISP),
         base_(reg.code()),
         disp_(disp)
     { }
     explicit Operand(AbsoluteAddress address)
       : kind_(MEM_ADDRESS32),
         disp_(X86Encoding::AddressImmediate(address.addr))
     { }
+    explicit Operand(PatchedAbsoluteAddress address)
+      : kind_(MEM_ADDRESS32),
+        disp_(X86Encoding::AddressImmediate(address.addr))
+    { }
 
     Address toAddress() const {
         MOZ_ASSERT(kind() == MEM_REG_DISP);
         return Address(Register::FromCode(base()), disp());
     }
 
     BaseIndex toBaseIndex() const {
         MOZ_ASSERT(kind() == MEM_SCALE);
--- a/js/src/jit/shared/CodeGenerator-shared-inl.h
+++ b/js/src/jit/shared/CodeGenerator-shared-inl.h
@@ -181,18 +181,18 @@ CodeGeneratorShared::restoreLiveVolatile
     MOZ_ASSERT(!ins->isCall());
     LSafepoint *safepoint = ins->safepoint();
     RegisterSet regs = RegisterSet::Intersect(safepoint->liveRegs(), RegisterSet::Volatile());
     masm.PopRegsInMask(regs);
 }
 
 void
 CodeGeneratorShared::verifyHeapAccessDisassembly(uint32_t begin, uint32_t end, bool isLoad,
-                                                 Scalar::Type type, const Operand &mem,
-                                                 LAllocation alloc)
+                                                 Scalar::Type type, unsigned numElems,
+                                                 const Operand &mem, LAllocation alloc)
 {
 #ifdef DEBUG
     using namespace Disassembler;
 
     OtherOperand op;
     Disassembler::HeapAccess::Kind kind = isLoad ? HeapAccess::Load : HeapAccess::Store;
     switch (type) {
       case Scalar::Int8:
@@ -224,18 +224,20 @@ CodeGeneratorShared::verifyHeapAccessDis
       case Scalar::Int32x4:
         op = OtherOperand(ToFloatRegister(alloc).code());
         break;
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("Unexpected array type");
     }
 
+    size_t size = Scalar::isSimdType(type)
+                  ? Scalar::scalarByteSize(type) * numElems
+                  : TypedArrayElemSize(type);
     masm.verifyHeapAccessDisassembly(begin, end,
-                                     HeapAccess(kind, TypedArrayElemSize(type),
-                                     ComplexAddress(mem), op));
+                                     HeapAccess(kind, size, ComplexAddress(mem), op));
 #endif
 }
 
 } // ion
 } // js
 
 #endif /* jit_shared_CodeGenerator_shared_inl_h */
--- a/js/src/jit/shared/CodeGenerator-shared.h
+++ b/js/src/jit/shared/CodeGenerator-shared.h
@@ -544,18 +544,18 @@ class CodeGeneratorShared : public LElem
     void emitTracelogIonStop() {
 #ifdef JS_TRACE_LOGGING
         emitTracelogStopEvent(TraceLogger_IonMonkey);
         emitTracelogScriptStop();
 #endif
     }
 
     inline void verifyHeapAccessDisassembly(uint32_t begin, uint32_t end, bool isLoad,
-                                            Scalar::Type type, const Operand &mem,
-                                            LAllocation alloc);
+                                            Scalar::Type type, unsigned numElems,
+                                            const Operand &mem, LAllocation alloc);
 };
 
 // An out-of-line path is generated at the end of the function.
 class OutOfLineCode : public TempObject
 {
     Label entry_;
     Label rejoin_;
     uint32_t framePushed_;
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -358,16 +358,92 @@ CodeGeneratorX86Shared::visitOutOfLineLo
       case Scalar::Uint8Clamped:
         Register destReg = ool->dest().gpr();
         masm.mov(ImmWord(0), destReg);
         break;
     }
     masm.jmp(ool->rejoin());
 }
 
+void
+CodeGeneratorX86Shared::visitOffsetBoundsCheck(OffsetBoundsCheck *oolCheck)
+{
+    // The access is heap[ptr + offset]. The inline code checks that
+    // ptr < heap.length - offset. We get here when that fails. We need to check
+    // for the case where ptr + offset >= 0, in which case the access is still
+    // in bounds.
+    MOZ_ASSERT(oolCheck->offset() != 0,
+               "An access without a constant offset doesn't need a separate OffsetBoundsCheck");
+    masm.cmp32(oolCheck->ptrReg(), Imm32(-uint32_t(oolCheck->offset())));
+    masm.j(Assembler::Below, oolCheck->outOfBounds());
+
+#ifdef JS_CODEGEN_X64
+    // In order to get the offset to wrap properly, we must sign-extend the
+    // pointer to 32-bits. We'll zero out the sign extension immediately
+    // after the access to restore asm.js invariants.
+    masm.movslq(oolCheck->ptrReg(), oolCheck->ptrReg());
+#endif
+
+    masm.jmp(oolCheck->rejoin());
+}
+
+uint32_t
+CodeGeneratorX86Shared::emitAsmJSBoundsCheckBranch(const MAsmJSHeapAccess *access,
+                                                   const MInstruction *mir,
+                                                   Register ptr, Label *fail)
+{
+    // Emit a bounds-checking branch for |access|.
+
+    MOZ_ASSERT(gen->needsAsmJSBoundsCheckBranch(access));
+
+    Label *pass = nullptr;
+
+    // If we have a non-zero offset, it's possible that |ptr| itself is out of
+    // bounds, while adding the offset computes an in-bounds address. To catch
+    // this case, we need a second branch, which we emit out of line since it's
+    // unlikely to be needed in normal programs.
+    if (access->offset() != 0) {
+        OffsetBoundsCheck *oolCheck = new(alloc()) OffsetBoundsCheck(fail, ptr, access->offset());
+        fail = oolCheck->entry();
+        pass = oolCheck->rejoin();
+        addOutOfLineCode(oolCheck, mir);
+    }
+
+    // The bounds check is a comparison with an immediate value. The asm.js
+    // module linking process will add the length of the heap to the immediate
+    // field, so -access->endOffset() will turn into
+    // (heapLength - access->endOffset()), allowing us to test whether the end
+    // of the access is beyond the end of the heap.
+    uint32_t maybeCmpOffset = masm.cmp32WithPatch(ptr, Imm32(-access->endOffset())).offset();
+    masm.j(Assembler::Above, fail);
+
+    if (pass)
+        masm.bind(pass);
+
+    return maybeCmpOffset;
+}
+
+void
+CodeGeneratorX86Shared::cleanupAfterAsmJSBoundsCheckBranch(const MAsmJSHeapAccess *access,
+                                                           Register ptr)
+{
+    // Clean up after performing a heap access checked by a branch.
+
+    MOZ_ASSERT(gen->needsAsmJSBoundsCheckBranch(access));
+
+#ifdef JS_CODEGEN_X64
+    // If the offset is 0, we don't use an OffsetBoundsCheck.
+    if (access->offset() != 0) {
+        // Zero out the high 32 bits, in case the OffsetBoundsCheck code had to
+        // sign-extend (movslq) the pointer value to get wraparound to work.
+        masm.movl(ptr, ptr);
+    }
+#endif
+}
+
 bool
 CodeGeneratorX86Shared::generateOutOfLineCode()
 {
     if (!CodeGeneratorShared::generateOutOfLineCode())
         return false;
 
     if (deoptLabel_.used()) {
         // All non-table-based bailouts will go here.
--- a/js/src/jit/shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.h
@@ -27,17 +27,16 @@ class CodeGeneratorX86Shared : public Co
     CodeGeneratorX86Shared *thisFromCtor() {
         return this;
     }
 
     template <typename T>
     void bailout(const T &t, LSnapshot *snapshot);
 
   protected:
-
     // Load a NaN or zero into a register for an out of bounds AsmJS or static
     // typed array load.
     class OutOfLineLoadTypedArrayOutOfBounds : public OutOfLineCodeBase<CodeGeneratorX86Shared>
     {
         AnyRegister dest_;
         Scalar::Type viewType_;
       public:
         OutOfLineLoadTypedArrayOutOfBounds(AnyRegister dest, Scalar::Type viewType)
@@ -46,16 +45,41 @@ class CodeGeneratorX86Shared : public Co
 
         AnyRegister dest() const { return dest_; }
         Scalar::Type viewType() const { return viewType_; }
         void accept(CodeGeneratorX86Shared *codegen) {
             codegen->visitOutOfLineLoadTypedArrayOutOfBounds(this);
         }
     };
 
+    // Additional bounds checking for heap accesses with constant offsets.
+    class OffsetBoundsCheck : public OutOfLineCodeBase<CodeGeneratorX86Shared>
+    {
+        Label *outOfBounds_;
+        Register ptrReg_;
+        int32_t offset_;
+      public:
+        OffsetBoundsCheck(Label *outOfBounds, Register ptrReg, int32_t offset)
+          : outOfBounds_(outOfBounds), ptrReg_(ptrReg), offset_(offset)
+        {}
+
+        Label *outOfBounds() const { return outOfBounds_; }
+        Register ptrReg() const { return ptrReg_; }
+        int32_t offset() const { return offset_; }
+        void accept(CodeGeneratorX86Shared *codegen) {
+            codegen->visitOffsetBoundsCheck(this);
+        }
+    };
+
+    // Functions for emitting bounds-checking code with branches.
+    MOZ_WARN_UNUSED_RESULT
+    uint32_t emitAsmJSBoundsCheckBranch(const MAsmJSHeapAccess *mir, const MInstruction *ins,
+                                        Register ptr, Label *fail);
+    void cleanupAfterAsmJSBoundsCheckBranch(const MAsmJSHeapAccess *mir, Register ptr);
+
     // Label for the common return path.
     NonAssertingLabel returnLabel_;
     NonAssertingLabel deoptLabel_;
 
     inline Operand ToOperand(const LAllocation &a) {
         if (a.isGeneralReg())
             return Operand(a.toGeneralReg()->reg());
         if (a.isFloatReg())
@@ -209,16 +233,17 @@ class CodeGeneratorX86Shared : public Co
     virtual void visitGuardObjectGroup(LGuardObjectGroup *guard);
     virtual void visitGuardClass(LGuardClass *guard);
     virtual void visitEffectiveAddress(LEffectiveAddress *ins);
     virtual void visitUDivOrMod(LUDivOrMod *ins);
     virtual void visitAsmJSPassStackArg(LAsmJSPassStackArg *ins);
     virtual void visitMemoryBarrier(LMemoryBarrier *ins);
 
     void visitOutOfLineLoadTypedArrayOutOfBounds(OutOfLineLoadTypedArrayOutOfBounds *ool);
+    void visitOffsetBoundsCheck(OffsetBoundsCheck *oolCheck);
 
     void visitNegI(LNegI *lir);
     void visitNegD(LNegD *lir);
     void visitNegF(LNegF *lir);
 
     // SIMD operators
     void visitSimdValueInt32x4(LSimdValueInt32x4 *lir);
     void visitSimdValueFloat32x4(LSimdValueFloat32x4 *lir);
--- a/js/src/jit/shared/Lowering-shared-inl.h
+++ b/js/src/jit/shared/Lowering-shared-inl.h
@@ -271,20 +271,20 @@ LAllocation
 LIRGeneratorShared::useRegisterOrConstantAtStart(MDefinition *mir)
 {
     if (mir->isConstant())
         return LAllocation(mir->toConstant()->vp());
     return useRegisterAtStart(mir);
 }
 
 LAllocation
-LIRGeneratorShared::useRegisterOrNonNegativeConstantAtStart(MDefinition *mir)
+LIRGeneratorShared::useRegisterOrZeroAtStart(MDefinition *mir)
 {
-    if (mir->isConstant() && mir->toConstant()->value().toInt32() >= 0)
-        return LAllocation(mir->toConstant()->vp());
+    if (mir->isConstant() && mir->toConstant()->value().isInt32(0))
+        return LAllocation();
     return useRegisterAtStart(mir);
 }
 
 LAllocation
 LIRGeneratorShared::useRegisterOrNonDoubleConstant(MDefinition *mir)
 {
     if (mir->isConstant() && mir->type() != MIRType_Double && mir->type() != MIRType_Float32)
         return LAllocation(mir->toConstant()->vp());
--- a/js/src/jit/shared/Lowering-shared.h
+++ b/js/src/jit/shared/Lowering-shared.h
@@ -103,17 +103,17 @@ class LIRGeneratorShared : public MDefin
     // "Storable" is architecture dependend, and will include registers and
     // constants on X86 and only registers on ARM.  This is a generic "things
     // we can expect to write into memory in 1 instruction".
     inline LAllocation useStorable(MDefinition *mir);
     inline LAllocation useStorableAtStart(MDefinition *mir);
     inline LAllocation useKeepaliveOrConstant(MDefinition *mir);
     inline LAllocation useRegisterOrConstant(MDefinition *mir);
     inline LAllocation useRegisterOrConstantAtStart(MDefinition *mir);
-    inline LAllocation useRegisterOrNonNegativeConstantAtStart(MDefinition *mir);
+    inline LAllocation useRegisterOrZeroAtStart(MDefinition *mir);
     inline LAllocation useRegisterOrNonDoubleConstant(MDefinition *mir);
 
     inline LUse useRegisterForTypedLoad(MDefinition *mir, MIRType type);
 
 #ifdef JS_NUNBOX32
     inline LUse useType(MDefinition *mir, LUse::Policy policy);
     inline LUse usePayload(MDefinition *mir, LUse::Policy policy);
     inline LUse usePayloadAtStart(MDefinition *mir, LUse::Policy policy);
--- a/js/src/jit/x64/Architecture-x64.h
+++ b/js/src/jit/x64/Architecture-x64.h
@@ -257,12 +257,17 @@ hasUnaliasedDouble()
 // On ARM, Dn aliases both S2n and S2n+1, so if you need to convert a float32
 // to a double as a temporary, you need a temporary double register.
 inline bool
 hasMultiAlias()
 {
     return false;
 }
 
+// Support some constant-offset addressing.
+// See the comments above AsmJSMappedSize in AsmJSValidate.h for more info.
+static const size_t AsmJSCheckedImmediateRange = 4096;
+static const size_t AsmJSImmediateRange = UINT32_C(0x80000000);
+
 } // namespace jit
 } // namespace js
 
 #endif /* jit_x64_Architecture_x64_h */
--- a/js/src/jit/x64/CodeGenerator-x64.cpp
+++ b/js/src/jit/x64/CodeGenerator-x64.cpp
@@ -293,124 +293,118 @@ CodeGeneratorX64::loadSimd(Scalar::Type 
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("should only handle SIMD types");
     }
 }
 
 void
 CodeGeneratorX64::emitSimdLoad(LAsmJSLoadHeap *ins)
 {
-    MAsmJSLoadHeap *mir = ins->mir();
+    const MAsmJSLoadHeap *mir = ins->mir();
     Scalar::Type type = mir->accessType();
-    const LAllocation *ptr = ins->ptr();
     FloatRegister out = ToFloatRegister(ins->output());
-    Operand srcAddr(HeapReg);
-
-    if (ptr->isConstant()) {
-        int32_t ptrImm = ptr->toConstant()->toInt32();
-        MOZ_ASSERT(ptrImm >= 0);
-        srcAddr = Operand(HeapReg, ptrImm);
-    } else {
-        srcAddr = Operand(HeapReg, ToRegister(ptr), TimesOne);
-    }
+    const LAllocation *ptr = ins->ptr();
+    Operand srcAddr = ptr->isBogus()
+                      ? Operand(HeapReg, mir->offset())
+                      : Operand(HeapReg, ToRegister(ptr), TimesOne, mir->offset());
 
     uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
-    if (mir->needsBoundsCheck() && !gen->usesSignalHandlersForOOB()) {
-        maybeCmpOffset = masm.cmp32WithPatch(ToRegister(ptr), Imm32(0)).offset();
-        masm.j(Assembler::AboveOrEqual, gen->outOfBoundsLabel()); // Throws RangeError
-    }
+    if (gen->needsAsmJSBoundsCheckBranch(mir))
+        maybeCmpOffset = emitAsmJSBoundsCheckBranch(mir, mir, ToRegister(ptr),
+                                                    gen->outOfBoundsLabel());
 
     unsigned numElems = mir->numSimdElems();
     if (numElems == 3) {
         MOZ_ASSERT(type == Scalar::Int32x4 || type == Scalar::Float32x4);
 
-        Operand shiftedOffset(HeapReg);
-        if (ptr->isConstant())
-            shiftedOffset = Operand(HeapReg, ptr->toConstant()->toInt32() + 2 * sizeof(float));
-        else
-            shiftedOffset = Operand(HeapReg, ToRegister(ptr), TimesOne, 2 * sizeof(float));
+        Operand srcAddrZ =
+            ptr->isBogus()
+            ? Operand(HeapReg, 2 * sizeof(float) + mir->offset())
+            : Operand(HeapReg, ToRegister(ptr), TimesOne, 2 * sizeof(float) + mir->offset());
 
         // Load XY
         uint32_t before = masm.size();
         loadSimd(type, 2, srcAddr, out);
         uint32_t after = masm.size();
-        // We're noting a load of 3 elements, so that the bounds check checks
-        // for 3 elements.
-        masm.append(AsmJSHeapAccess(before, after, 3, type, maybeCmpOffset));
+        verifyHeapAccessDisassembly(before, after, /*isLoad=*/true, type, 2, srcAddr,
+                                    *ins->output()->output());
+        masm.append(AsmJSHeapAccess(before, AsmJSHeapAccess::Throw, maybeCmpOffset));
 
         // Load Z (W is zeroed)
+        // This is still in bounds, as we've checked with a manual bounds check
+        // or we had enough space for sure when removing the bounds check.
         before = after;
-        loadSimd(type, 1, shiftedOffset, ScratchSimdReg);
+        loadSimd(type, 1, srcAddrZ, ScratchSimdReg);
         after = masm.size();
-        masm.append(AsmJSHeapAccess(before, after, 1, type));
+        verifyHeapAccessDisassembly(before, after, /*isLoad=*/true, type, 1, srcAddrZ, LFloatReg(ScratchSimdReg));
+        masm.append(AsmJSHeapAccess(before, AsmJSHeapAccess::Throw,
+                                    AsmJSHeapAccess::NoLengthCheck, 8));
 
         // Move ZW atop XY
         masm.vmovlhps(ScratchSimdReg, out, out);
-        return;
+    } else {
+        uint32_t before = masm.size();
+        loadSimd(type, numElems, srcAddr, out);
+        uint32_t after = masm.size();
+        verifyHeapAccessDisassembly(before, after, /*isLoad=*/true, type, numElems, srcAddr, *ins->output()->output());
+        masm.append(AsmJSHeapAccess(before, AsmJSHeapAccess::Throw, maybeCmpOffset));
     }
 
-    uint32_t before = masm.size();
-    loadSimd(type, numElems, srcAddr, out);
-    uint32_t after = masm.size();
-    masm.append(AsmJSHeapAccess(before, after, numElems, type, maybeCmpOffset));
+    if (maybeCmpOffset != AsmJSHeapAccess::NoLengthCheck)
+        cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
 }
 
 void
 CodeGeneratorX64::visitAsmJSLoadHeap(LAsmJSLoadHeap *ins)
 {
-    MAsmJSLoadHeap *mir = ins->mir();
-    Scalar::Type vt = mir->accessType();
-    const LAllocation *ptr = ins->ptr();
-    const LDefinition *out = ins->output();
-    Operand srcAddr(HeapReg);
+    const MAsmJSLoadHeap *mir = ins->mir();
+    Scalar::Type accessType = mir->accessType();
 
-    if (Scalar::isSimdType(vt))
+    if (Scalar::isSimdType(accessType))
         return emitSimdLoad(ins);
 
-    if (ptr->isConstant()) {
-        int32_t ptrImm = ptr->toConstant()->toInt32();
-        MOZ_ASSERT(ptrImm >= 0);
-        srcAddr = Operand(HeapReg, ptrImm);
-    } else {
-        srcAddr = Operand(HeapReg, ToRegister(ptr), TimesOne);
-    }
+    const LAllocation *ptr = ins->ptr();
+    const LDefinition *out = ins->output();
+    Operand srcAddr = ptr->isBogus()
+                      ? Operand(HeapReg, mir->offset())
+                      : Operand(HeapReg, ToRegister(ptr), TimesOne, mir->offset());
 
-    memoryBarrier(ins->mir()->barrierBefore());
+    memoryBarrier(mir->barrierBefore());
     OutOfLineLoadTypedArrayOutOfBounds *ool = nullptr;
     uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
-    if (mir->needsBoundsCheck() && !gen->usesSignalHandlersForOOB()) {
-        CodeOffsetLabel cmp = masm.cmp32WithPatch(ToRegister(ptr), Imm32(0));
-        ool = new(alloc()) OutOfLineLoadTypedArrayOutOfBounds(ToAnyRegister(out), vt);
-        addOutOfLineCode(ool, ins->mir());
-        masm.j(Assembler::AboveOrEqual, ool->entry());
-        maybeCmpOffset = cmp.offset();
+    if (gen->needsAsmJSBoundsCheckBranch(mir)) {
+        ool = new(alloc()) OutOfLineLoadTypedArrayOutOfBounds(ToAnyRegister(out), accessType);
+        addOutOfLineCode(ool, mir);
+        maybeCmpOffset = emitAsmJSBoundsCheckBranch(mir, mir, ToRegister(ptr), ool->entry());
     }
 
     uint32_t before = masm.size();
-    switch (vt) {
+    switch (accessType) {
       case Scalar::Int8:      masm.movsbl(srcAddr, ToRegister(out)); break;
       case Scalar::Uint8:     masm.movzbl(srcAddr, ToRegister(out)); break;
       case Scalar::Int16:     masm.movswl(srcAddr, ToRegister(out)); break;
       case Scalar::Uint16:    masm.movzwl(srcAddr, ToRegister(out)); break;
       case Scalar::Int32:
       case Scalar::Uint32:    masm.movl(srcAddr, ToRegister(out)); break;
       case Scalar::Float32:   masm.loadFloat32(srcAddr, ToFloatRegister(out)); break;
       case Scalar::Float64:   masm.loadDouble(srcAddr, ToFloatRegister(out)); break;
       case Scalar::Float32x4:
       case Scalar::Int32x4:   MOZ_CRASH("SIMD loads should be handled in emitSimdLoad");
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
           MOZ_CRASH("unexpected array type");
     }
     uint32_t after = masm.size();
-    verifyHeapAccessDisassembly(before, after, /*isLoad=*/true, vt, srcAddr, *out->output());
-    if (ool)
+    verifyHeapAccessDisassembly(before, after, /*isLoad=*/true, accessType, 0, srcAddr, *out->output());
+    if (ool) {
+        cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
         masm.bind(ool->rejoin());
-    memoryBarrier(ins->mir()->barrierAfter());
-    masm.append(AsmJSHeapAccess(before, after, vt, ToAnyRegister(out), maybeCmpOffset));
+    }
+    memoryBarrier(mir->barrierAfter());
+    masm.append(AsmJSHeapAccess(before, AsmJSHeapAccess::CarryOn, maybeCmpOffset));
 }
 
 void
 CodeGeneratorX64::storeSimd(Scalar::Type type, unsigned numElems, FloatRegister in,
                             const Operand &dstAddr)
 {
     switch (type) {
       case Scalar::Float32x4: {
@@ -447,222 +441,228 @@ CodeGeneratorX64::storeSimd(Scalar::Type
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("should only handle SIMD types");
     }
 }
 
 void
 CodeGeneratorX64::emitSimdStore(LAsmJSStoreHeap *ins)
 {
-    MAsmJSStoreHeap *mir = ins->mir();
+    const MAsmJSStoreHeap *mir = ins->mir();
     Scalar::Type type = mir->accessType();
-    const LAllocation *ptr = ins->ptr();
     FloatRegister in = ToFloatRegister(ins->value());
-    Operand dstAddr(HeapReg);
-
-    if (ptr->isConstant()) {
-        int32_t ptrImm = ptr->toConstant()->toInt32();
-        MOZ_ASSERT(ptrImm >= 0);
-        dstAddr = Operand(HeapReg, ptrImm);
-    } else {
-        dstAddr = Operand(HeapReg, ToRegister(ptr), TimesOne);
-    }
+    const LAllocation *ptr = ins->ptr();
+    Operand dstAddr = ptr->isBogus()
+                      ? Operand(HeapReg, mir->offset())
+                      : Operand(HeapReg, ToRegister(ptr), TimesOne, mir->offset());
 
     uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
-    if (mir->needsBoundsCheck() && !gen->usesSignalHandlersForOOB()) {
-        maybeCmpOffset = masm.cmp32WithPatch(ToRegister(ptr), Imm32(0)).offset();
-        masm.j(Assembler::AboveOrEqual, gen->outOfBoundsLabel()); // Throws RangeError
-    }
+    if (gen->needsAsmJSBoundsCheckBranch(mir))
+        maybeCmpOffset = emitAsmJSBoundsCheckBranch(mir, mir, ToRegister(ptr),
+                                                    gen->outOfBoundsLabel());
 
     unsigned numElems = mir->numSimdElems();
     if (numElems == 3) {
         MOZ_ASSERT(type == Scalar::Int32x4 || type == Scalar::Float32x4);
 
-        Operand shiftedOffset(HeapReg);
-        if (ptr->isConstant())
-            shiftedOffset = Operand(HeapReg, ptr->toConstant()->toInt32() + 2 * sizeof(float));
-        else
-            shiftedOffset = Operand(HeapReg, ToRegister(ptr), TimesOne, 2 * sizeof(float));
+        Operand dstAddrZ =
+            ptr->isBogus()
+            ? Operand(HeapReg, 2 * sizeof(float) + mir->offset())
+            : Operand(HeapReg, ToRegister(ptr), TimesOne, 2 * sizeof(float) + mir->offset());
 
-        // Store Z first: it would be observable to store XY first, in the
-        // case XY can be stored in bounds but Z can't (in this case, we'd throw
-        // without restoring the values previously stored before XY).
+        // It's possible that the Z could be out of bounds when the XY is in
+        // bounds. To avoid storing the XY before the exception is thrown, we
+        // store the Z first, and record its offset in the AsmJSHeapAccess so
+        // that the signal handler knows to check the bounds of the full
+        // access, rather than just the Z.
         masm.vmovhlps(in, ScratchSimdReg, ScratchSimdReg);
         uint32_t before = masm.size();
-        storeSimd(type, 1, ScratchSimdReg, shiftedOffset);
+        storeSimd(type, 1, ScratchSimdReg, dstAddrZ);
         uint32_t after = masm.size();
-        // We're noting a store of 3 elements, so that the bounds check checks
-        // for 3 elements.
-        masm.append(AsmJSHeapAccess(before, after, 3, type, maybeCmpOffset));
+        verifyHeapAccessDisassembly(before, after, /*isLoad=*/false, type, 1, dstAddrZ, LFloatReg(ScratchSimdReg));
+        masm.append(AsmJSHeapAccess(before, AsmJSHeapAccess::Throw, maybeCmpOffset, 8));
 
         // Store XY
         before = after;
         storeSimd(type, 2, in, dstAddr);
         after = masm.size();
-        masm.append(AsmJSHeapAccess(before, after, 2, type));
-        return;
+        verifyHeapAccessDisassembly(before, after, /*isLoad=*/false, type, 2, dstAddr, *ins->value());
+        masm.append(AsmJSHeapAccess(before, AsmJSHeapAccess::Throw));
+    } else {
+        uint32_t before = masm.size();
+        storeSimd(type, numElems, in, dstAddr);
+        uint32_t after = masm.size();
+        verifyHeapAccessDisassembly(before, after, /*isLoad=*/false, type, numElems, dstAddr, *ins->value());
+        masm.append(AsmJSHeapAccess(before, AsmJSHeapAccess::Throw, maybeCmpOffset));
     }
 
-    uint32_t before = masm.size();
-    storeSimd(type, numElems, in, dstAddr);
-    uint32_t after = masm.size();
-    masm.append(AsmJSHeapAccess(before, after, numElems, type, maybeCmpOffset));
+    if (maybeCmpOffset != AsmJSHeapAccess::NoLengthCheck)
+        cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
 }
 
 void
 CodeGeneratorX64::visitAsmJSStoreHeap(LAsmJSStoreHeap *ins)
 {
-    MAsmJSStoreHeap *mir = ins->mir();
-    Scalar::Type vt = mir->accessType();
-    const LAllocation *ptr = ins->ptr();
-    Operand dstAddr(HeapReg);
+    const MAsmJSStoreHeap *mir = ins->mir();
+    Scalar::Type accessType = mir->accessType();
 
-    if (Scalar::isSimdType(vt))
+    if (Scalar::isSimdType(accessType))
         return emitSimdStore(ins);
 
-    if (ptr->isConstant()) {
-        int32_t ptrImm = ptr->toConstant()->toInt32();
-        MOZ_ASSERT(ptrImm >= 0);
-        dstAddr = Operand(HeapReg, ptrImm);
-    } else {
-        dstAddr = Operand(HeapReg, ToRegister(ptr), TimesOne);
-    }
+    const LAllocation *value = ins->value();
+    const LAllocation *ptr = ins->ptr();
+    Operand dstAddr = ptr->isBogus()
+                      ? Operand(HeapReg, mir->offset())
+                      : Operand(HeapReg, ToRegister(ptr), TimesOne, mir->offset());
 
-    memoryBarrier(ins->mir()->barrierBefore());
-    Label rejoin;
+    memoryBarrier(mir->barrierBefore());
+    Label *rejoin = nullptr;
     uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
-    if (mir->needsBoundsCheck() && !gen->usesSignalHandlersForOOB()) {
-        CodeOffsetLabel cmp = masm.cmp32WithPatch(ToRegister(ptr), Imm32(0));
-        masm.j(Assembler::AboveOrEqual, &rejoin);
-        maybeCmpOffset = cmp.offset();
+    if (gen->needsAsmJSBoundsCheckBranch(mir)) {
+        rejoin = alloc().lifoAlloc()->new_<Label>();
+        maybeCmpOffset = emitAsmJSBoundsCheckBranch(mir, mir, ToRegister(ptr), rejoin);
     }
 
     uint32_t before = masm.size();
-    if (ins->value()->isConstant()) {
-        switch (vt) {
+    if (value->isConstant()) {
+        switch (accessType) {
           case Scalar::Int8:
-          case Scalar::Uint8:        masm.movb(Imm32(ToInt32(ins->value())), dstAddr); break;
+          case Scalar::Uint8:        masm.movb(Imm32(ToInt32(value)), dstAddr); break;
           case Scalar::Int16:
-          case Scalar::Uint16:       masm.movw(Imm32(ToInt32(ins->value())), dstAddr); break;
+          case Scalar::Uint16:       masm.movw(Imm32(ToInt32(value)), dstAddr); break;
           case Scalar::Int32:
-          case Scalar::Uint32:       masm.movl(Imm32(ToInt32(ins->value())), dstAddr); break;
+          case Scalar::Uint32:       masm.movl(Imm32(ToInt32(value)), dstAddr); break;
           case Scalar::Float32:
           case Scalar::Float64:
           case Scalar::Float32x4:
           case Scalar::Int32x4:
           case Scalar::Uint8Clamped:
           case Scalar::MaxTypedArrayViewType:
               MOZ_CRASH("unexpected array type");
         }
     } else {
-        switch (vt) {
+        switch (accessType) {
           case Scalar::Int8:
-          case Scalar::Uint8:        masm.movb(ToRegister(ins->value()), dstAddr); break;
+          case Scalar::Uint8:        masm.movb(ToRegister(value), dstAddr); break;
           case Scalar::Int16:
-          case Scalar::Uint16:       masm.movw(ToRegister(ins->value()), dstAddr); break;
+          case Scalar::Uint16:       masm.movw(ToRegister(value), dstAddr); break;
           case Scalar::Int32:
-          case Scalar::Uint32:       masm.movl(ToRegister(ins->value()), dstAddr); break;
-          case Scalar::Float32:      masm.storeFloat32(ToFloatRegister(ins->value()), dstAddr); break;
-          case Scalar::Float64:      masm.storeDouble(ToFloatRegister(ins->value()), dstAddr); break;
+          case Scalar::Uint32:       masm.movl(ToRegister(value), dstAddr); break;
+          case Scalar::Float32:      masm.storeFloat32(ToFloatRegister(value), dstAddr); break;
+          case Scalar::Float64:      masm.storeDouble(ToFloatRegister(value), dstAddr); break;
           case Scalar::Float32x4:
           case Scalar::Int32x4:      MOZ_CRASH("SIMD stores must be handled in emitSimdStore");
           case Scalar::Uint8Clamped:
           case Scalar::MaxTypedArrayViewType:
               MOZ_CRASH("unexpected array type");
         }
     }
     uint32_t after = masm.size();
-    verifyHeapAccessDisassembly(before, after, /*isLoad=*/false, vt, dstAddr, *ins->value());
-    if (rejoin.used())
-        masm.bind(&rejoin);
-    memoryBarrier(ins->mir()->barrierAfter());
-    masm.append(AsmJSHeapAccess(before, after, vt, maybeCmpOffset));
+    verifyHeapAccessDisassembly(before, after, /*isLoad=*/false, accessType, 0, dstAddr, *value);
+    if (rejoin) {
+        cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
+        masm.bind(rejoin);
+    }
+    memoryBarrier(mir->barrierAfter());
+    masm.append(AsmJSHeapAccess(before, AsmJSHeapAccess::CarryOn, maybeCmpOffset));
 }
 
 void
 CodeGeneratorX64::visitAsmJSCompareExchangeHeap(LAsmJSCompareExchangeHeap *ins)
 {
     MAsmJSCompareExchangeHeap *mir = ins->mir();
-    Scalar::Type vt = mir->accessType();
+    Scalar::Type accessType = mir->accessType();
     const LAllocation *ptr = ins->ptr();
 
     MOZ_ASSERT(ptr->isRegister());
-    BaseIndex srcAddr(HeapReg, ToRegister(ptr), TimesOne);
+    BaseIndex srcAddr(HeapReg, ToRegister(ptr), TimesOne, mir->offset());
 
     Register oldval = ToRegister(ins->oldValue());
     Register newval = ToRegister(ins->newValue());
 
+    // Note that we can't use
+    // needsAsmJSBoundsCheckBranch/emitAsmJSBoundsCheckBranch/cleanupAfterAsmJSBoundsCheckBranch
+    // since signal-handler bounds checking is not yet implemented for atomic accesses.
     Label rejoin;
     uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
     if (mir->needsBoundsCheck()) {
-        maybeCmpOffset = masm.cmp32WithPatch(ToRegister(ptr), Imm32(0)).offset();
+        maybeCmpOffset = masm.cmp32WithPatch(ToRegister(ptr), Imm32(-mir->endOffset())).offset();
         Label goahead;
-        masm.j(Assembler::Below, &goahead);
+        masm.j(Assembler::BelowOrEqual, &goahead);
         memoryBarrier(MembarFull);
         Register out = ToRegister(ins->output());
         masm.xorl(out, out);
         masm.jmp(&rejoin);
         masm.bind(&goahead);
     }
-    masm.compareExchangeToTypedIntArray(vt == Scalar::Uint32 ? Scalar::Int32 : vt,
+    masm.compareExchangeToTypedIntArray(accessType == Scalar::Uint32 ? Scalar::Int32 : accessType,
                                         srcAddr,
                                         oldval,
                                         newval,
                                         InvalidReg,
                                         ToAnyRegister(ins->output()));
     uint32_t after = masm.size();
     if (rejoin.used())
         masm.bind(&rejoin);
-    masm.append(AsmJSHeapAccess(after, after, mir->accessType(), maybeCmpOffset));
+    MOZ_ASSERT(mir->offset() == 0,
+               "The AsmJS signal handler doesn't yet support emulating "
+               "atomic accesses in the case of a fault from an unwrapped offset");
+    masm.append(AsmJSHeapAccess(after, AsmJSHeapAccess::Throw, maybeCmpOffset));
 }
 
 void
 CodeGeneratorX64::visitAsmJSAtomicBinopHeap(LAsmJSAtomicBinopHeap *ins)
 {
     MAsmJSAtomicBinopHeap *mir = ins->mir();
-    Scalar::Type vt = mir->accessType();
+    Scalar::Type accessType = mir->accessType();
     const LAllocation *ptr = ins->ptr();
     Register temp = ins->temp()->isBogusTemp() ? InvalidReg : ToRegister(ins->temp());
     const LAllocation* value = ins->value();
     AtomicOp op = mir->operation();
 
     MOZ_ASSERT(ptr->isRegister());
-    BaseIndex srcAddr(HeapReg, ToRegister(ptr), TimesOne);
+    BaseIndex srcAddr(HeapReg, ToRegister(ptr), TimesOne, mir->offset());
 
+    // Note that we can't use
+    // needsAsmJSBoundsCheckBranch/emitAsmJSBoundsCheckBranch/cleanupAfterAsmJSBoundsCheckBranch
+    // since signal-handler bounds checking is not yet implemented for atomic accesses.
     Label rejoin;
     uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
     if (mir->needsBoundsCheck()) {
-        maybeCmpOffset = masm.cmp32WithPatch(ToRegister(ptr), Imm32(0)).offset();
+        maybeCmpOffset = masm.cmp32WithPatch(ToRegister(ptr), Imm32(-mir->endOffset())).offset();
         Label goahead;
-        masm.j(Assembler::Below, &goahead);
+        masm.j(Assembler::BelowOrEqual, &goahead);
         memoryBarrier(MembarFull);
         Register out = ToRegister(ins->output());
         masm.xorl(out,out);
         masm.jmp(&rejoin);
         masm.bind(&goahead);
     }
     if (value->isConstant()) {
-        masm.atomicBinopToTypedIntArray(op, vt == Scalar::Uint32 ? Scalar::Int32 : vt,
+        masm.atomicBinopToTypedIntArray(op, accessType == Scalar::Uint32 ? Scalar::Int32 : accessType,
                                         Imm32(ToInt32(value)),
                                         srcAddr,
                                         temp,
                                         InvalidReg,
                                         ToAnyRegister(ins->output()));
     } else {
-        masm.atomicBinopToTypedIntArray(op, vt == Scalar::Uint32 ? Scalar::Int32 : vt,
+        masm.atomicBinopToTypedIntArray(op, accessType == Scalar::Uint32 ? Scalar::Int32 : accessType,
                                         ToRegister(value),
                                         srcAddr,
                                         temp,
                                         InvalidReg,
                                         ToAnyRegister(ins->output()));
     }
     uint32_t after = masm.size();
     if (rejoin.used())
         masm.bind(&rejoin);
-    masm.append(AsmJSHeapAccess(after, after, mir->accessType(), maybeCmpOffset));
+    MOZ_ASSERT(mir->offset() == 0,
+               "The AsmJS signal handler doesn't yet support emulating "
+               "atomic accesses in the case of a fault from an unwrapped offset");
+    masm.append(AsmJSHeapAccess(after, AsmJSHeapAccess::Throw, maybeCmpOffset));
 }
 
 void
 CodeGeneratorX64::visitAsmJSLoadGlobalVar(LAsmJSLoadGlobalVar *ins)
 {
     MAsmJSLoadGlobalVar *mir = ins->mir();
 
     MIRType type = mir->type();
--- a/js/src/jit/x64/Lowering-x64.cpp
+++ b/js/src/jit/x64/Lowering-x64.cpp
@@ -148,42 +148,36 @@ LIRGeneratorX64::visitAsmJSUnsignedToFlo
 }
 
 void
 LIRGeneratorX64::visitAsmJSLoadHeap(MAsmJSLoadHeap *ins)
 {
     MDefinition *ptr = ins->ptr();
     MOZ_ASSERT(ptr->type() == MIRType_Int32);
 
-    // Only a positive index is accepted because a negative offset encoded as an
-    // offset in the addressing mode would not wrap back into the protected area
-    // reserved for the heap. For simplicity (and since we don't care about
-    // getting maximum performance in these cases) only allow constant
-    // operands when skipping bounds checks.
-    LAllocation ptrAlloc = ins->needsBoundsCheck()
+    // For simplicity, require a register if we're going to emit a bounds-check
+    // branch, so that we don't have special cases for constants.
+    LAllocation ptrAlloc = gen->needsAsmJSBoundsCheckBranch(ins)
                            ? useRegisterAtStart(ptr)
-                           : useRegisterOrNonNegativeConstantAtStart(ptr);
+                           : useRegisterOrZeroAtStart(ptr);
 
     define(new(alloc()) LAsmJSLoadHeap(ptrAlloc), ins);
 }
 
 void
 LIRGeneratorX64::visitAsmJSStoreHeap(MAsmJSStoreHeap *ins)
 {
     MDefinition *ptr = ins->ptr();
     MOZ_ASSERT(ptr->type() == MIRType_Int32);
 
-    // Only a positive index is accepted because a negative offset encoded as an
-    // offset in the addressing mode would not wrap back into the protected area
-    // reserved for the heap. For simplicity (and since we don't care about
-    // getting maximum performance in these cases) only allow constant
-    // opererands when skipping bounds checks.
-    LAllocation ptrAlloc = ins->needsBoundsCheck()
+    // For simplicity, require a register if we're going to emit a bounds-check
+    // branch, so that we don't have special cases for constants.
+    LAllocation ptrAlloc = gen->needsAsmJSBoundsCheckBranch(ins)
                            ? useRegisterAtStart(ptr)
-                           : useRegisterOrNonNegativeConstantAtStart(ptr);
+                           : useRegisterOrZeroAtStart(ptr);
 
     LAsmJSStoreHeap *lir = nullptr;  // initialize to silence GCC warning
     switch (ins->accessType()) {
       case Scalar::Int8:
       case Scalar::Uint8:
       case Scalar::Int16:
       case Scalar::Uint16:
       case Scalar::Int32:
@@ -195,17 +189,16 @@ LIRGeneratorX64::visitAsmJSStoreHeap(MAs
       case Scalar::Float32x4:
       case Scalar::Int32x4:
         lir = new(alloc()) LAsmJSStoreHeap(ptrAlloc, useRegisterAtStart(ins->value()));
         break;
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("unexpected array type");
     }
-
     add(lir, ins);
 }
 
 void
 LIRGeneratorX64::visitAsmJSLoadFuncPtr(MAsmJSLoadFuncPtr *ins)
 {
     define(new(alloc()) LAsmJSLoadFuncPtr(useRegister(ins->index()), temp()), ins);
 }
--- a/js/src/jit/x86/Architecture-x86.h
+++ b/js/src/jit/x86/Architecture-x86.h
@@ -234,12 +234,17 @@ hasUnaliasedDouble()
 // On ARM, Dn aliases both S2n and S2n+1, so if you need to convert a float32
 // to a double as a temporary, you need a temporary double register.
 inline bool
 hasMultiAlias()
 {
     return false;
 }
 
+// Support some constant-offset addressing.
+// See the comments above AsmJSMappedSize in AsmJSValidate.h for more info.
+static const size_t AsmJSCheckedImmediateRange = 4096;
+static const size_t AsmJSImmediateRange = UINT32_C(0x80000000);
+
 } // namespace jit
 } // namespace js
 
 #endif /* jit_x86_Architecture_x86_h */
--- a/js/src/jit/x86/Assembler-x86.h
+++ b/js/src/jit/x86/Assembler-x86.h
@@ -437,108 +437,288 @@ class Assembler : public AssemblerX86Sha
     // Move a 32-bit immediate into a register where the immediate can be
     // patched.
     CodeOffsetLabel movlWithPatch(Imm32 imm, Register dest) {
         masm.movl_i32r(imm.value, dest.code());
         return CodeOffsetLabel(masm.currentOffset());
     }
 
     // Load from *(base + disp32) where disp32 can be patched.
-    CodeOffsetLabel movsblWithPatch(Address src, Register dest) {
-        masm.movsbl_mr_disp32(src.offset, src.base.code(), dest.code());
+    CodeOffsetLabel movsblWithPatch(const Operand &src, Register dest) {
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.movsbl_mr_disp32(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.movsbl_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel movzblWithPatch(Address src, Register dest) {
-        masm.movzbl_mr_disp32(src.offset, src.base.code(), dest.code());
+    CodeOffsetLabel movzblWithPatch(const Operand &src, Register dest) {
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.movzbl_mr_disp32(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.movzbl_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel movswlWithPatch(Address src, Register dest) {
-        masm.movswl_mr_disp32(src.offset, src.base.code(), dest.code());
+    CodeOffsetLabel movswlWithPatch(const Operand &src, Register dest) {
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.movswl_mr_disp32(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.movswl_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel movzwlWithPatch(Address src, Register dest) {
-        masm.movzwl_mr_disp32(src.offset, src.base.code(), dest.code());
+    CodeOffsetLabel movzwlWithPatch(const Operand &src, Register dest) {
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.movzwl_mr_disp32(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.movzwl_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel movlWithPatch(Address src, Register dest) {
-        masm.movl_mr_disp32(src.offset, src.base.code(), dest.code());
+    CodeOffsetLabel movlWithPatch(const Operand &src, Register dest) {
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.movl_mr_disp32(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.movl_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovssWithPatch(Address src, FloatRegister dest) {
+    CodeOffsetLabel vmovssWithPatch(const Operand &src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovss_mr_disp32(src.offset, src.base.code(), dest.code());
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovss_mr_disp32(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovss_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovdWithPatch(Address src, FloatRegister dest) {
+    CodeOffsetLabel vmovdWithPatch(const Operand &src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovd_mr_disp32(src.offset, src.base.code(), dest.code());
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovd_mr_disp32(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovd_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovqWithPatch(Address src, FloatRegister dest) {
+    CodeOffsetLabel vmovqWithPatch(const Operand &src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovq_mr_disp32(src.offset, src.base.code(), dest.code());
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovq_mr_disp32(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovq_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovsdWithPatch(Address src, FloatRegister dest) {
+    CodeOffsetLabel vmovsdWithPatch(const Operand &src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovsd_mr_disp32(src.offset, src.base.code(), dest.code());
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovsd_mr_disp32(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovsd_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovupsWithPatch(Address src, FloatRegister dest) {
+    CodeOffsetLabel vmovupsWithPatch(const Operand &src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovups_mr_disp32(src.offset, src.base.code(), dest.code());
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovups_mr_disp32(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovups_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovdquWithPatch(Address src, FloatRegister dest) {
+    CodeOffsetLabel vmovdquWithPatch(const Operand &src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovdqu_mr_disp32(src.offset, src.base.code(), dest.code());
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovdqu_mr_disp32(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovdqu_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
 
     // Store to *(base + disp32) where disp32 can be patched.
-    CodeOffsetLabel movbWithPatch(Register src, Address dest) {
-        masm.movb_rm_disp32(src.code(), dest.offset, dest.base.code());
+    CodeOffsetLabel movbWithPatch(Register src, const Operand &dest) {
+        switch (dest.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.movb_rm_disp32(src.code(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.movb_rm(src.code(), dest.address());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel movwWithPatch(Register src, Address dest) {
-        masm.movw_rm_disp32(src.code(), dest.offset, dest.base.code());
+    CodeOffsetLabel movwWithPatch(Register src, const Operand &dest) {
+        switch (dest.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.movw_rm_disp32(src.code(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.movw_rm(src.code(), dest.address());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel movlWithPatch(Register src, Address dest) {
-        masm.movl_rm_disp32(src.code(), dest.offset, dest.base.code());
+    CodeOffsetLabel movlWithPatch(Register src, const Operand &dest) {
+        switch (dest.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.movl_rm_disp32(src.code(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.movl_rm(src.code(), dest.address());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovdWithPatch(FloatRegister src, Address dest) {
+    CodeOffsetLabel vmovdWithPatch(FloatRegister src, const Operand &dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovd_rm_disp32(src.code(), dest.offset, dest.base.code());
+        switch (dest.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovd_rm_disp32(src.code(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovd_rm(src.code(), dest.address());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovqWithPatch(FloatRegister src, Address dest) {
+    CodeOffsetLabel vmovqWithPatch(FloatRegister src, const Operand &dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovq_rm_disp32(src.code(), dest.offset, dest.base.code());
+        switch (dest.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovq_rm_disp32(src.code(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovq_rm(src.code(), dest.address());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovssWithPatch(FloatRegister src, Address dest) {
+    CodeOffsetLabel vmovssWithPatch(FloatRegister src, const Operand &dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovss_rm_disp32(src.code(), dest.offset, dest.base.code());
+        switch (dest.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovss_rm_disp32(src.code(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovss_rm(src.code(), dest.address());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovsdWithPatch(FloatRegister src, Address dest) {
+    CodeOffsetLabel vmovsdWithPatch(FloatRegister src, const Operand &dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovsd_rm_disp32(src.code(), dest.offset, dest.base.code());
+        switch (dest.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovsd_rm_disp32(src.code(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovsd_rm(src.code(), dest.address());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovupsWithPatch(FloatRegister src, Address dest) {
+    CodeOffsetLabel vmovupsWithPatch(FloatRegister src, const Operand &dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovups_rm_disp32(src.code(), dest.offset, dest.base.code());
+        switch (dest.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovups_rm_disp32(src.code(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovups_rm(src.code(), dest.address());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
-    CodeOffsetLabel vmovdquWithPatch(FloatRegister src, Address dest) {
+    CodeOffsetLabel vmovdquWithPatch(FloatRegister src, const Operand &dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vmovdqu_rm_disp32(src.code(), dest.offset, dest.base.code());
+        switch (dest.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovdqu_rm_disp32(src.code(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovdqu_rm(src.code(), dest.address());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
         return CodeOffsetLabel(masm.currentOffset());
     }
 
     // Load from *(addr + index*scale) where addr can be patched.
     CodeOffsetLabel movlWithPatch(PatchedAbsoluteAddress addr, Register index, Scale scale,
                                   Register dest)
     {
         masm.movl_mr(addr.addr, index.code(), scale, dest.code());
--- a/js/src/jit/x86/CodeGenerator-x86.cpp
+++ b/js/src/jit/x86/CodeGenerator-x86.cpp
@@ -250,78 +250,66 @@ CodeGeneratorX86::visitAsmJSUInt32ToFloa
 
     if (input != temp)
         masm.mov(input, temp);
 
     // Beware: convertUInt32ToFloat32 clobbers input.
     masm.convertUInt32ToFloat32(temp, output);
 }
 
-template<typename T>
 void
-CodeGeneratorX86::load(Scalar::Type vt, const T &srcAddr, const LDefinition *out)
+CodeGeneratorX86::load(Scalar::Type accessType, const Operand &srcAddr, const LDefinition *out)
 {
-    switch (vt) {
+    switch (accessType) {
       case Scalar::Int8:         masm.movsblWithPatch(srcAddr, ToRegister(out)); break;
       case Scalar::Uint8Clamped:
       case Scalar::Uint8:        masm.movzblWithPatch(srcAddr, ToRegister(out)); break;
       case Scalar::Int16:        masm.movswlWithPatch(srcAddr, ToRegister(out)); break;
       case Scalar::Uint16:       masm.movzwlWithPatch(srcAddr, ToRegister(out)); break;
       case Scalar::Int32:
       case Scalar::Uint32:       masm.movlWithPatch(srcAddr, ToRegister(out)); break;
       case Scalar::Float32:      masm.vmovssWithPatch(srcAddr, ToFloatRegister(out)); break;
       case Scalar::Float64:      masm.vmovsdWithPatch(srcAddr, ToFloatRegister(out)); break;
       case Scalar::Float32x4:
       case Scalar::Int32x4:      MOZ_CRASH("SIMD load should be handled in their own function");
       case Scalar::MaxTypedArrayViewType: MOZ_CRASH("unexpected type");
     }
 }
 
-template<typename T>
-void
-CodeGeneratorX86::loadAndNoteViewTypeElement(Scalar::Type vt, const T &srcAddr,
-                                             const LDefinition *out)
-{
-    uint32_t before = masm.size();
-    load(vt, srcAddr, out);
-    uint32_t after = masm.size();
-    masm.append(AsmJSHeapAccess(before, after, vt, ToAnyRegister(out)));
-}
-
 void
 CodeGeneratorX86::visitLoadTypedArrayElementStatic(LLoadTypedArrayElementStatic *ins)
 {
     const MLoadTypedArrayElementStatic *mir = ins->mir();
-    Scalar::Type vt = mir->accessType();
-    MOZ_ASSERT_IF(vt == Scalar::Float32, mir->type() == MIRType_Float32);
+    Scalar::Type accessType = mir->accessType();
+    MOZ_ASSERT_IF(accessType == Scalar::Float32, mir->type() == MIRType_Float32);
 
     Register ptr = ToRegister(ins->ptr());
     const LDefinition *out = ins->output();
     OutOfLineLoadTypedArrayOutOfBounds *ool = nullptr;
     uint32_t offset = mir->offset();
 
     if (mir->needsBoundsCheck()) {
         MOZ_ASSERT(offset == 0);
         if (!mir->fallible()) {
-            ool = new(alloc()) OutOfLineLoadTypedArrayOutOfBounds(ToAnyRegister(out), vt);
+            ool = new(alloc()) OutOfLineLoadTypedArrayOutOfBounds(ToAnyRegister(out), accessType);
             addOutOfLineCode(ool, ins->mir());
         }
 
         masm.cmpPtr(ptr, ImmWord(mir->length()));
         if (ool)
             masm.j(Assembler::AboveOrEqual, ool->entry());
         else
             bailoutIf(Assembler::AboveOrEqual, ins->snapshot());
     }
 
-    Address srcAddr(ptr, int32_t(mir->base()) + int32_t(offset));
-    load(vt, srcAddr, out);
-    if (vt == Scalar::Float64)
+    Operand srcAddr(ptr, int32_t(mir->base()) + int32_t(offset));
+    load(accessType, srcAddr, out);
+    if (accessType == Scalar::Float64)
         masm.canonicalizeDouble(ToFloatRegister(out));
-    if (vt == Scalar::Float32)
+    if (accessType == Scalar::Float32)
         masm.canonicalizeFloat(ToFloatRegister(out));
     if (ool)
         masm.bind(ool->rejoin());
 }
 
 void
 CodeGeneratorX86::visitAsmJSCall(LAsmJSCall *ins)
 {
@@ -349,19 +337,19 @@ CodeGeneratorX86::visitAsmJSCall(LAsmJSC
 
 void
 CodeGeneratorX86::memoryBarrier(MemoryBarrierBits barrier)
 {
     if (barrier & MembarStoreLoad)
         masm.storeLoadFence();
 }
 
-template<typename T>
 void
-CodeGeneratorX86::loadSimd(Scalar::Type type, unsigned numElems, T srcAddr, FloatRegister out)
+CodeGeneratorX86::loadSimd(Scalar::Type type, unsigned numElems, const Operand &srcAddr,
+                           FloatRegister out)
 {
     switch (type) {
       case Scalar::Float32x4: {
         switch (numElems) {
           // In memory-to-register mode, movss zeroes out the high lanes.
           case 1: masm.vmovssWithPatch(srcAddr, out); break;
           // See comment above, which also applies to movsd.
           case 2: masm.vmovsdWithPatch(srcAddr, out); break;
@@ -369,17 +357,17 @@ CodeGeneratorX86::loadSimd(Scalar::Type 
           default: MOZ_CRASH("unexpected size for partial load");
         }
         break;
       }
       case Scalar::Int32x4: {
         switch (numElems) {
           // In memory-to-register mode, movd zeroes out the high lanes.
           case 1: masm.vmovdWithPatch(srcAddr, out); break;
-          // See comment above, which also applies to movsd.
+          // See comment above, which also applies to movq.
           case 2: masm.vmovqWithPatch(srcAddr, out); break;
           case 4: masm.vmovdquWithPatch(srcAddr, out); break;
           default: MOZ_CRASH("unexpected size for partial load");
         }
         break;
       }
       case Scalar::Int8:
       case Scalar::Uint8:
@@ -391,218 +379,169 @@ CodeGeneratorX86::loadSimd(Scalar::Type 
       case Scalar::Float64:
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("should only handle SIMD types");
     }
 }
 
 void
-CodeGeneratorX86::emitSimdLoad(Scalar::Type type, unsigned numElems, const LAllocation *ptr,
-                               FloatRegister out, bool needsBoundsCheck /* = false */,
-                               Label *oobLabel /* = nullptr */)
+CodeGeneratorX86::emitSimdLoad(LAsmJSLoadHeap *ins)
 {
-    if (ptr->isConstant()) {
-        MOZ_ASSERT(!needsBoundsCheck);
-
-        if (numElems == 3) {
-            MOZ_ASSERT(type == Scalar::Int32x4 || type == Scalar::Float32x4);
-
-            // Load XY
-            emitSimdLoad(type, 2, ptr, out);
-
-            // Load Z (W is zeroed)
-            // This add won't overflow, as we've checked that we have at least
-            // room for loading 4 elements during asm.js validation.
-            PatchedAbsoluteAddress srcAddr((void *) (ptr->toConstant()->toInt32() + 2 * sizeof(float)));
-            uint32_t before = masm.size();
-            loadSimd(type, 1, srcAddr, ScratchSimdReg);
-            uint32_t after = masm.size();
-            masm.append(AsmJSHeapAccess(before, after, 1, type));
+    const MAsmJSLoadHeap *mir = ins->mir();
+    Scalar::Type type = mir->accessType();
+    FloatRegister out = ToFloatRegister(ins->output());
+    const LAllocation *ptr = ins->ptr();
+    Operand srcAddr = ptr->isBogus()
+                      ? Operand(PatchedAbsoluteAddress(mir->offset()))
+                      : Operand(ToRegister(ptr), mir->offset());
 
-            // Move ZW atop XY
-            masm.vmovlhps(ScratchSimdReg, out, out);
-            return;
-        }
+    uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
+    if (gen->needsAsmJSBoundsCheckBranch(mir))
+        maybeCmpOffset = emitAsmJSBoundsCheckBranch(mir, mir, ToRegister(ptr),
+                                                    gen->outOfBoundsLabel());
 
-        PatchedAbsoluteAddress srcAddr((void *) ptr->toConstant()->toInt32());
-        uint32_t before = masm.size();
-        loadSimd(type, numElems, srcAddr, out);
-        uint32_t after = masm.size();
-        masm.append(AsmJSHeapAccess(before, after, numElems, type));
-        return;
-    }
-
-    Register ptrReg = ToRegister(ptr);
-    uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
-    if (needsBoundsCheck) {
-        maybeCmpOffset = masm.cmp32WithPatch(ptrReg, Imm32(0)).offset();
-        masm.j(Assembler::AboveOrEqual, oobLabel); // Throws RangeError
-    }
-
-    uint32_t before = masm.size();
+    unsigned numElems = mir->numSimdElems();
     if (numElems == 3) {
         MOZ_ASSERT(type == Scalar::Int32x4 || type == Scalar::Float32x4);
 
+        Operand srcAddrZ =
+            ptr->isBogus()
+            ? Operand(PatchedAbsoluteAddress(2 * sizeof(float) + mir->offset()))
+            : Operand(ToRegister(ptr), 2 * sizeof(float) + mir->offset());
+
         // Load XY
-        Address addr(ptrReg, 0);
-        before = masm.size();
-        loadSimd(type, 2, addr, out);
+        uint32_t before = masm.size();
+        loadSimd(type, 2, srcAddr, out);
         uint32_t after = masm.size();
-        masm.append(AsmJSHeapAccess(before, after, 3, type, maybeCmpOffset));
+        masm.append(AsmJSHeapAccess(before, after, maybeCmpOffset));
 
         // Load Z (W is zeroed)
         // This is still in bounds, as we've checked with a manual bounds check
         // or we had enough space for sure when removing the bounds check.
-        Address shiftedAddr(ptrReg, 2 * sizeof(float));
         before = after;
-        loadSimd(type, 1, shiftedAddr, ScratchSimdReg);
+        loadSimd(type, 1, srcAddrZ, ScratchSimdReg);
         after = masm.size();
-        masm.append(AsmJSHeapAccess(before, after, 1, type));
+        masm.append(AsmJSHeapAccess(before, after));
 
         // Move ZW atop XY
         masm.vmovlhps(ScratchSimdReg, out, out);
-        return;
+    } else {
+        uint32_t before = masm.size();
+        loadSimd(type, numElems, srcAddr, out);
+        uint32_t after = masm.size();
+        masm.append(AsmJSHeapAccess(before, after, maybeCmpOffset));
     }
 
-    Address addr(ptrReg, 0);
-    loadSimd(type, numElems, addr, out);
-    uint32_t after = masm.size();
-    masm.append(AsmJSHeapAccess(before, after, numElems, type, maybeCmpOffset));
+    if (maybeCmpOffset != AsmJSHeapAccess::NoLengthCheck)
+        cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
 }
 
 void
 CodeGeneratorX86::visitAsmJSLoadHeap(LAsmJSLoadHeap *ins)
 {
     const MAsmJSLoadHeap *mir = ins->mir();
     Scalar::Type accessType = mir->accessType();
+
+    if (Scalar::isSimdType(accessType))
+        return emitSimdLoad(ins);
+
     const LAllocation *ptr = ins->ptr();
     const LDefinition *out = ins->output();
-
-    if (Scalar::isSimdType(accessType)) {
-        return emitSimdLoad(accessType, mir->numSimdElems(), ptr, ToFloatRegister(out),
-                            mir->needsBoundsCheck(), gen->outOfBoundsLabel());
-    }
-
-    memoryBarrier(ins->mir()->barrierBefore());
+    Operand srcAddr = ptr->isBogus()
+                      ? Operand(PatchedAbsoluteAddress(mir->offset()))
+                      : Operand(ToRegister(ptr), mir->offset());
 
-    if (ptr->isConstant()) {
-        // The constant displacement still needs to be added to the as-yet-unknown
-        // base address of the heap. For now, embed the displacement as an
-        // immediate in the instruction. This displacement will fixed up when the
-        // base address is known during dynamic linking (AsmJSModule::initHeap).
-        PatchedAbsoluteAddress srcAddr((void *) ptr->toConstant()->toInt32());
-        loadAndNoteViewTypeElement(accessType, srcAddr, out);
-        memoryBarrier(ins->mir()->barrierAfter());
-        return;
+    memoryBarrier(mir->barrierBefore());
+    OutOfLineLoadTypedArrayOutOfBounds *ool = nullptr;
+    uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
+    if (gen->needsAsmJSBoundsCheckBranch(mir)) {
+        ool = new(alloc()) OutOfLineLoadTypedArrayOutOfBounds(ToAnyRegister(out), accessType);
+        addOutOfLineCode(ool, mir);
+        maybeCmpOffset = emitAsmJSBoundsCheckBranch(mir, mir, ToRegister(ptr), ool->entry());
     }
 
-    Register ptrReg = ToRegister(ptr);
-    Address srcAddr(ptrReg, 0);
-
-    if (!mir->needsBoundsCheck()) {
-        loadAndNoteViewTypeElement(accessType, srcAddr, out);
-        memoryBarrier(ins->mir()->barrierAfter());
-        return;
-    }
-
-    OutOfLineLoadTypedArrayOutOfBounds *ool =
-        new(alloc()) OutOfLineLoadTypedArrayOutOfBounds(ToAnyRegister(out), accessType);
-    CodeOffsetLabel cmp = masm.cmp32WithPatch(ptrReg, Imm32(0));
-    addOutOfLineCode(ool, mir);
-    masm.j(Assembler::AboveOrEqual, ool->entry());
-
     uint32_t before = masm.size();
     load(accessType, srcAddr, out);
     uint32_t after = masm.size();
-    if (ool)
+    if (ool) {
+        cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
         masm.bind(ool->rejoin());
-    memoryBarrier(ins->mir()->barrierAfter());
-    masm.append(AsmJSHeapAccess(before, after, accessType, ToAnyRegister(out), cmp.offset()));
+    }
+    memoryBarrier(mir->barrierAfter());
+    masm.append(AsmJSHeapAccess(before, after, maybeCmpOffset));
 }
 
-template<typename T>
 void
-CodeGeneratorX86::store(Scalar::Type vt, const LAllocation *value, const T &dstAddr)
+CodeGeneratorX86::store(Scalar::Type accessType, const LAllocation *value, const Operand &dstAddr)
 {
-    switch (vt) {
+    switch (accessType) {
       case Scalar::Int8:
       case Scalar::Uint8Clamped:
       case Scalar::Uint8:        masm.movbWithPatch(ToRegister(value), dstAddr); break;
       case Scalar::Int16:
       case Scalar::Uint16:       masm.movwWithPatch(ToRegister(value), dstAddr); break;
       case Scalar::Int32:
       case Scalar::Uint32:       masm.movlWithPatch(ToRegister(value), dstAddr); break;
       case Scalar::Float32:      masm.vmovssWithPatch(ToFloatRegister(value), dstAddr); break;
       case Scalar::Float64:      masm.vmovsdWithPatch(ToFloatRegister(value), dstAddr); break;
       case Scalar::Float32x4:
       case Scalar::Int32x4:      MOZ_CRASH("SIMD stores should be handled in emitSimdStore");
       case Scalar::MaxTypedArrayViewType: MOZ_CRASH("unexpected type");
     }
 }
 
-template<typename T>
-void
-CodeGeneratorX86::storeAndNoteViewTypeElement(Scalar::Type vt, const LAllocation *value,
-                                              const T &dstAddr)
-{
-    uint32_t before = masm.size();
-    store(vt, value, dstAddr);
-    uint32_t after = masm.size();
-    masm.append(AsmJSHeapAccess(before, after, vt));
-}
-
 void
 CodeGeneratorX86::visitStoreTypedArrayElementStatic(LStoreTypedArrayElementStatic *ins)
 {
     MStoreTypedArrayElementStatic *mir = ins->mir();
-    Scalar::Type vt = mir->accessType();
+    Scalar::Type accessType = mir->accessType();
     Register ptr = ToRegister(ins->ptr());
     const LAllocation *value = ins->value();
     uint32_t offset = mir->offset();
 
     if (!mir->needsBoundsCheck()) {
-        Address dstAddr(ptr, int32_t(mir->base()) + int32_t(offset));
-        store(vt, value, dstAddr);
+        Operand dstAddr(ptr, int32_t(mir->base()) + int32_t(offset));
+        store(accessType, value, dstAddr);
         return;
     }
 
     MOZ_ASSERT(offset == 0);
     masm.cmpPtr(ptr, ImmWord(mir->length()));
     Label rejoin;
     masm.j(Assembler::AboveOrEqual, &rejoin);
 
-    Address dstAddr(ptr, (int32_t) mir->base());
-    store(vt, value, dstAddr);
+    Operand dstAddr(ptr, (int32_t) mir->base());
+    store(accessType, value, dstAddr);
     masm.bind(&rejoin);
 }
 
-template<typename T>
 void
-CodeGeneratorX86::storeSimd(Scalar::Type type, unsigned numElems, FloatRegister in, T destAddr)
+CodeGeneratorX86::storeSimd(Scalar::Type type, unsigned numElems, FloatRegister in,
+                            const Operand &dstAddr)
 {
     switch (type) {
       case Scalar::Float32x4: {
         switch (numElems) {
           // In memory-to-register mode, movss zeroes out the high lanes.
-          case 1: masm.vmovssWithPatch(in, destAddr); break;
+          case 1: masm.vmovssWithPatch(in, dstAddr); break;
           // See comment above, which also applies to movsd.
-          case 2: masm.vmovsdWithPatch(in, destAddr); break;
-          case 4: masm.vmovupsWithPatch(in, destAddr); break;
+          case 2: masm.vmovsdWithPatch(in, dstAddr); break;
+          case 4: masm.vmovupsWithPatch(in, dstAddr); break;
           default: MOZ_CRASH("unexpected size for partial load");
         }
         break;
       }
       case Scalar::Int32x4: {
         switch (numElems) {
-          // In memory-to-register mode, movd zeroes destAddr the high lanes.
-          case 1: masm.vmovdWithPatch(in, destAddr); break;
+          // In memory-to-register mode, movd zeroes out the high lanes.
+          case 1: masm.vmovdWithPatch(in, dstAddr); break;
           // See comment above, which also applies to movsd.
-          case 2: masm.vmovqWithPatch(in, destAddr); break;
-          case 4: masm.vmovdquWithPatch(in, destAddr); break;
+          case 2: masm.vmovqWithPatch(in, dstAddr); break;
+          case 4: masm.vmovdquWithPatch(in, dstAddr); break;
           default: MOZ_CRASH("unexpected size for partial load");
         }
         break;
       }
       case Scalar::Int8:
       case Scalar::Uint8:
       case Scalar::Int16:
       case Scalar::Uint16:
@@ -612,224 +551,190 @@ CodeGeneratorX86::storeSimd(Scalar::Type
       case Scalar::Float64:
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("should only handle SIMD types");
     }
 }
 
 void
-CodeGeneratorX86::emitSimdStore(Scalar::Type type, unsigned numElems, FloatRegister in,
-                                const LAllocation *ptr, bool needsBoundsCheck /* = false */,
-                                Label *oobLabel /* = nullptr */)
+CodeGeneratorX86::emitSimdStore(LAsmJSStoreHeap *ins)
 {
-    if (ptr->isConstant()) {
-        MOZ_ASSERT(!needsBoundsCheck);
-
-        if (numElems == 3) {
-            MOZ_ASSERT(type == Scalar::Int32x4 || type == Scalar::Float32x4);
-
-            // Store XY
-            emitSimdStore(type, 2, in, ptr);
-
-            masm.vmovhlps(in, ScratchSimdReg, ScratchSimdReg);
+    const MAsmJSStoreHeap *mir = ins->mir();
+    Scalar::Type type = mir->accessType();
+    FloatRegister in = ToFloatRegister(ins->value());
+    const LAllocation *ptr = ins->ptr();
+    Operand dstAddr = ptr->isBogus()
+                      ? Operand(PatchedAbsoluteAddress(mir->offset()))
+                      : Operand(ToRegister(ptr), mir->offset());
 
-            // Store Z
-            // This add won't overflow, as we've checked that we have at least
-            // room for loading 4 elements during asm.js validation.
-            PatchedAbsoluteAddress dstAddr((void *) (ptr->toConstant()->toInt32() + 2 * sizeof(float)));
-            uint32_t before = masm.size();
-            storeSimd(type, 1, ScratchSimdReg, dstAddr);
-            uint32_t after = masm.size();
-            masm.append(AsmJSHeapAccess(before, after, 1, type));
-            return;
-        }
+    uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
+    if (gen->needsAsmJSBoundsCheckBranch(mir))
+        maybeCmpOffset = emitAsmJSBoundsCheckBranch(mir, mir, ToRegister(ptr),
+                                                    gen->outOfBoundsLabel());
 
-        PatchedAbsoluteAddress dstAddr((void *) ptr->toConstant()->toInt32());
-        uint32_t before = masm.size();
-        storeSimd(type, numElems, in, dstAddr);
-        uint32_t after = masm.size();
-        masm.append(AsmJSHeapAccess(before, after, 3, type));
-        return;
-    }
-
-    Register ptrReg = ToRegister(ptr);
-    uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
-    if (needsBoundsCheck) {
-        maybeCmpOffset = masm.cmp32WithPatch(ptrReg, Imm32(0)).offset();
-        masm.j(Assembler::AboveOrEqual, oobLabel); // Throws RangeError
-    }
-
-    uint32_t before = masm.size();
+    unsigned numElems = mir->numSimdElems();
     if (numElems == 3) {
         MOZ_ASSERT(type == Scalar::Int32x4 || type == Scalar::Float32x4);
 
+        Operand dstAddrZ =
+            ptr->isBogus()
+            ? Operand(PatchedAbsoluteAddress(2 * sizeof(float) + mir->offset()))
+            : Operand(ToRegister(ptr), 2 * sizeof(float) + mir->offset());
+
         // Store XY
-        Address addr(ptrReg, 0);
-        before = masm.size();
-        storeSimd(type, 2, in, addr);
+        uint32_t before = masm.size();
+        storeSimd(type, 2, in, dstAddr);
         uint32_t after = masm.size();
-        masm.append(AsmJSHeapAccess(before, after, 3, type, maybeCmpOffset));
+        masm.append(AsmJSHeapAccess(before, after, maybeCmpOffset));
 
         masm.vmovhlps(in, ScratchSimdReg, ScratchSimdReg);
 
         // Store Z (W is zeroed)
         // This is still in bounds, as we've checked with a manual bounds check
         // or we had enough space for sure when removing the bounds check.
-        Address shiftedAddr(ptrReg, 2 * sizeof(float));
         before = masm.size();
-        storeSimd(type, 1, ScratchSimdReg, shiftedAddr);
+        storeSimd(type, 1, ScratchSimdReg, dstAddrZ);
         after = masm.size();
-        masm.append(AsmJSHeapAccess(before, after, 1, type));
-        return;
+        masm.append(AsmJSHeapAccess(before, after));
+    } else {
+        uint32_t before = masm.size();
+        storeSimd(type, numElems, in, dstAddr);
+        uint32_t after = masm.size();
+        masm.append(AsmJSHeapAccess(before, after, maybeCmpOffset));
     }
 
-    Address addr(ptrReg, 0);
-    storeSimd(type, numElems, in, addr);
-    uint32_t after = masm.size();
-    masm.append(AsmJSHeapAccess(before, after, numElems, type, maybeCmpOffset));
+    if (maybeCmpOffset != AsmJSHeapAccess::NoLengthCheck)
+        cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
 }
 
 void
 CodeGeneratorX86::visitAsmJSStoreHeap(LAsmJSStoreHeap *ins)
 {
-    MAsmJSStoreHeap *mir = ins->mir();
-    Scalar::Type vt = mir->accessType();
+    const MAsmJSStoreHeap *mir = ins->mir();
+    Scalar::Type accessType = mir->accessType();
+
+    if (Scalar::isSimdType(accessType))
+        return emitSimdStore(ins);
+
     const LAllocation *value = ins->value();
     const LAllocation *ptr = ins->ptr();
+    Operand dstAddr = ptr->isBogus()
+                      ? Operand(PatchedAbsoluteAddress(mir->offset()))
+                      : Operand(ToRegister(ptr), mir->offset());
 
-    if (Scalar::isSimdType(vt)) {
-        return emitSimdStore(vt, mir->numSimdElems(), ToFloatRegister(value), ptr,
-                             mir->needsBoundsCheck(), gen->outOfBoundsLabel());
+    memoryBarrier(mir->barrierBefore());
+    Label *rejoin = nullptr;
+    uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
+    if (gen->needsAsmJSBoundsCheckBranch(mir)) {
+        rejoin = alloc().lifoAlloc()->new_<Label>();
+        maybeCmpOffset = emitAsmJSBoundsCheckBranch(mir, mir, ToRegister(ptr), rejoin);
     }
 
-    memoryBarrier(ins->mir()->barrierBefore());
-
-    if (ptr->isConstant()) {
-        // The constant displacement still needs to be added to the as-yet-unknown
-        // base address of the heap. For now, embed the displacement as an
-        // immediate in the instruction. This displacement will fixed up when the
-        // base address is known during dynamic linking (AsmJSModule::initHeap).
-        PatchedAbsoluteAddress dstAddr((void *) ptr->toConstant()->toInt32());
-        storeAndNoteViewTypeElement(vt, value, dstAddr);
-        memoryBarrier(ins->mir()->barrierAfter());
-        return;
+    uint32_t before = masm.size();
+    store(accessType, value, dstAddr);
+    uint32_t after = masm.size();
+    if (rejoin) {
+        cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
+        masm.bind(rejoin);
     }
-
-    Register ptrReg = ToRegister(ptr);
-    Address dstAddr(ptrReg, 0);
-
-    if (!mir->needsBoundsCheck()) {
-        storeAndNoteViewTypeElement(vt, value, dstAddr);
-        memoryBarrier(ins->mir()->barrierAfter());
-        return;
-    }
-
-    CodeOffsetLabel cmp = masm.cmp32WithPatch(ptrReg, Imm32(0));
-    Label rejoin;
-    masm.j(Assembler::AboveOrEqual, &rejoin);
-
-    uint32_t before = masm.size();
-    store(vt, value, dstAddr);
-    uint32_t after = masm.size();
-    masm.bind(&rejoin);
-    memoryBarrier(ins->mir()->barrierAfter());
-    masm.append(AsmJSHeapAccess(before, after, vt, cmp.offset()));
+    memoryBarrier(mir->barrierAfter());
+    masm.append(AsmJSHeapAccess(before, after, maybeCmpOffset));
 }
 
 void
 CodeGeneratorX86::visitAsmJSCompareExchangeHeap(LAsmJSCompareExchangeHeap *ins)
 {
     MAsmJSCompareExchangeHeap *mir = ins->mir();
-    Scalar::Type vt = mir->accessType();
+    Scalar::Type accessType = mir->accessType();
     const LAllocation *ptr = ins->ptr();
     Register oldval = ToRegister(ins->oldValue());
     Register newval = ToRegister(ins->newValue());
 
     MOZ_ASSERT(ptr->isRegister());
     // Set up the offset within the heap in the pointer reg.
     Register ptrReg = ToRegister(ptr);
 
     Label rejoin;
     uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
 
     if (mir->needsBoundsCheck()) {
-        maybeCmpOffset = masm.cmp32WithPatch(ptrReg, Imm32(0)).offset();
+        maybeCmpOffset = masm.cmp32WithPatch(ptrReg, Imm32(-mir->endOffset())).offset();
         Label goahead;
-        masm.j(Assembler::Below, &goahead);
+        masm.j(Assembler::BelowOrEqual, &goahead);
         memoryBarrier(MembarFull);
         Register out = ToRegister(ins->output());
         masm.xorl(out,out);
         masm.jmp(&rejoin);
         masm.bind(&goahead);
     }
 
     // Add in the actual heap pointer explicitly, to avoid opening up
     // the abstraction that is compareExchangeToTypedIntArray at this time.
     uint32_t before = masm.size();
-    masm.addlWithPatch(Imm32(0), ptrReg);
+    masm.addlWithPatch(Imm32(mir->offset()), ptrReg);
     uint32_t after = masm.size();
-    masm.append(AsmJSHeapAccess(before, after, mir->accessType(), maybeCmpOffset));
+    masm.append(AsmJSHeapAccess(before, after, maybeCmpOffset));
 
-    Address memAddr(ToRegister(ptr), 0);
-    masm.compareExchangeToTypedIntArray(vt == Scalar::Uint32 ? Scalar::Int32 : vt,
+    Address memAddr(ToRegister(ptr), mir->offset());
+    masm.compareExchangeToTypedIntArray(accessType == Scalar::Uint32 ? Scalar::Int32 : accessType,
                                         memAddr,
                                         oldval,
                                         newval,
                                         InvalidReg,
                                         ToAnyRegister(ins->output()));
     if (rejoin.used())
         masm.bind(&rejoin);
 }
 
 void
 CodeGeneratorX86::visitAsmJSAtomicBinopHeap(LAsmJSAtomicBinopHeap *ins)
 {
     MAsmJSAtomicBinopHeap *mir = ins->mir();
-    Scalar::Type vt = mir->accessType();
+    Scalar::Type accessType = mir->accessType();
     const LAllocation *ptr = ins->ptr();
     Register temp = ins->temp()->isBogusTemp() ? InvalidReg : ToRegister(ins->temp());
     const LAllocation* value = ins->value();
     AtomicOp op = mir->operation();
 
     MOZ_ASSERT(ptr->isRegister());
     // Set up the offset within the heap in the pointer reg.
     Register ptrReg = ToRegister(ptr);
 
     Label rejoin;
     uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
 
     if (mir->needsBoundsCheck()) {
-        maybeCmpOffset = masm.cmp32WithPatch(ptrReg, Imm32(0)).offset();
+        maybeCmpOffset = masm.cmp32WithPatch(ptrReg, Imm32(-mir->endOffset())).offset();
         Label goahead;
-        masm.j(Assembler::Below, &goahead);
+        masm.j(Assembler::BelowOrEqual, &goahead);
         memoryBarrier(MembarFull);
         Register out = ToRegister(ins->output());
         masm.xorl(out,out);
         masm.jmp(&rejoin);
         masm.bind(&goahead);
     }
 
     // Add in the actual heap pointer explicitly, to avoid opening up
     // the abstraction that is atomicBinopToTypedIntArray at this time.
     uint32_t before = masm.size();
-    masm.addlWithPatch(Imm32(0), ptrReg);
+    masm.addlWithPatch(Imm32(mir->offset()), ptrReg);
     uint32_t after = masm.size();
-    masm.append(AsmJSHeapAccess(before, after, mir->accessType(), maybeCmpOffset));
+    masm.append(AsmJSHeapAccess(before, after, maybeCmpOffset));
 
-    Address memAddr(ptrReg, 0);
+    Address memAddr(ptrReg, mir->offset());
     if (value->isConstant()) {
-        masm.atomicBinopToTypedIntArray(op, vt == Scalar::Uint32 ? Scalar::Int32 : vt,
+        masm.atomicBinopToTypedIntArray(op, accessType == Scalar::Uint32 ? Scalar::Int32 : accessType,
                                         Imm32(ToInt32(value)),
                                         memAddr,
                                         temp,
                                         InvalidReg,
                                         ToAnyRegister(ins->output()));
     } else {
-        masm.atomicBinopToTypedIntArray(op, vt == Scalar::Uint32 ? Scalar::Int32 : vt,
+        masm.atomicBinopToTypedIntArray(op, accessType == Scalar::Uint32 ? Scalar::Int32 : accessType,
                                         ToRegister(value),
                                         memAddr,
                                         temp,
                                         InvalidReg,
                                         ToAnyRegister(ins->output()));
     }
     if (rejoin.used())
         masm.bind(&rejoin);
--- a/js/src/jit/x86/CodeGenerator-x86.h
+++ b/js/src/jit/x86/CodeGenerator-x86.h
@@ -23,35 +23,24 @@ class CodeGeneratorX86 : public CodeGene
         return this;
     }
 
   protected:
     ValueOperand ToValue(LInstruction *ins, size_t pos);
     ValueOperand ToOutValue(LInstruction *ins);
     ValueOperand ToTempValue(LInstruction *ins, size_t pos);
 
-    template<typename T>
-    void loadAndNoteViewTypeElement(Scalar::Type vt, const T &srcAddr, const LDefinition *out);
-    template<typename T>
-    void load(Scalar::Type vt, const T &srcAddr, const LDefinition *out);
-    template<typename T>
-    void storeAndNoteViewTypeElement(Scalar::Type vt, const LAllocation *value, const T &dstAddr);
-    template<typename T>
-    void store(Scalar::Type vt, const LAllocation *value, const T &dstAddr);
+    void load(Scalar::Type vt, const Operand &srcAddr, const LDefinition *out);
+    void store(Scalar::Type vt, const LAllocation *value, const Operand &dstAddr);
 
-    template<typename T>
-    void loadSimd(Scalar::Type type, unsigned numElems, T srcAddr, FloatRegister out);
-    void emitSimdLoad(Scalar::Type type, unsigned numElems, const LAllocation *ptr,
-                      FloatRegister out, bool needsBoundsCheck = false, Label *oobLabel = nullptr);
+    void loadSimd(Scalar::Type type, unsigned numElems, const Operand &srcAddr, FloatRegister out);
+    void emitSimdLoad(LAsmJSLoadHeap *ins);
 
-    template<typename T>
-    void storeSimd(Scalar::Type type, unsigned numElems, FloatRegister in, T destAddr);
-    void emitSimdStore(Scalar::Type type, unsigned numElems, FloatRegister in,
-                       const LAllocation *ptr, bool needsBoundsCheck = false,
-                       Label *oobLabel = nullptr);
+    void storeSimd(Scalar::Type type, unsigned numElems, FloatRegister in, const Operand &dstAddr);
+    void emitSimdStore(LAsmJSStoreHeap *ins);
 
     void memoryBarrier(MemoryBarrierBits barrier);
 
   public:
     CodeGeneratorX86(MIRGenerator *gen, LIRGraph *graph, MacroAssembler *masm);
 
   public:
     void visitBox(LBox *box);
--- a/js/src/jit/x86/Lowering-x86.cpp
+++ b/js/src/jit/x86/Lowering-x86.cpp
@@ -197,79 +197,57 @@ LIRGeneratorX86::visitAsmJSUnsignedToFlo
     LAsmJSUInt32ToFloat32 *lir = new(alloc()) LAsmJSUInt32ToFloat32(useRegisterAtStart(ins->input()), temp());
     define(lir, ins);
 }
 
 void
 LIRGeneratorX86::visitAsmJSLoadHeap(MAsmJSLoadHeap *ins)
 {
     MDefinition *ptr = ins->ptr();
-    LAllocation ptrAlloc;
     MOZ_ASSERT(ptr->type() == MIRType_Int32);
 
-    // For the x86 it is best to keep the 'ptr' in a register if a bounds check is needed.
-    if (ptr->isConstant() && !ins->needsBoundsCheck()) {
-        // A bounds check is only skipped for a positive index.
-        MOZ_ASSERT(ptr->toConstant()->value().toInt32() >= 0);
-        ptrAlloc = LAllocation(ptr->toConstant()->vp());
-    } else {
-        ptrAlloc = useRegisterAtStart(ptr);
-    }
-    LAsmJSLoadHeap *lir = new(alloc()) LAsmJSLoadHeap(ptrAlloc);
-    define(lir, ins);
+    // For simplicity, require a register if we're going to emit a bounds-check
+    // branch, so that we don't have special cases for constants.
+    LAllocation ptrAlloc = gen->needsAsmJSBoundsCheckBranch(ins)
+                           ? useRegisterAtStart(ptr)
+                           : useRegisterOrZeroAtStart(ptr);
+
+    define(new(alloc()) LAsmJSLoadHeap(ptrAlloc), ins);
 }
 
 void
 LIRGeneratorX86::visitAsmJSStoreHeap(MAsmJSStoreHeap *ins)
 {
     MDefinition *ptr = ins->ptr();
-    LAsmJSStoreHeap *lir;
     MOZ_ASSERT(ptr->type() == MIRType_Int32);
 
-    if (ptr->isConstant() && !ins->needsBoundsCheck()) {
-        MOZ_ASSERT(ptr->toConstant()->value().toInt32() >= 0);
-        LAllocation ptrAlloc = LAllocation(ptr->toConstant()->vp());
-        switch (ins->accessType()) {
-          case Scalar::Int8: case Scalar::Uint8:
-            // See comment below.
-            lir = new(alloc()) LAsmJSStoreHeap(ptrAlloc, useFixed(ins->value(), eax));
-            break;
-          case Scalar::Int16: case Scalar::Uint16:
-          case Scalar::Int32: case Scalar::Uint32:
-          case Scalar::Float32: case Scalar::Float64:
-          case Scalar::Float32x4: case Scalar::Int32x4:
-            // See comment below.
-            lir = new(alloc()) LAsmJSStoreHeap(ptrAlloc, useRegisterAtStart(ins->value()));
-            break;
-          case Scalar::Uint8Clamped:
-          case Scalar::MaxTypedArrayViewType:
-            MOZ_CRASH("unexpected array type");
-        }
-        add(lir, ins);
-        return;
-    }
+    // For simplicity, require a register if we're going to emit a bounds-check
+    // branch, so that we don't have special cases for constants.
+    LAllocation ptrAlloc = gen->needsAsmJSBoundsCheckBranch(ins)
+                           ? useRegisterAtStart(ptr)
+                           : useRegisterOrZeroAtStart(ptr);
 
+    LAsmJSStoreHeap *lir = nullptr;
     switch (ins->accessType()) {
       case Scalar::Int8: case Scalar::Uint8:
         // See comment for LIRGeneratorX86::useByteOpRegister.
-        lir = new(alloc()) LAsmJSStoreHeap(useRegister(ins->ptr()), useFixed(ins->value(), eax));
+        lir = new(alloc()) LAsmJSStoreHeap(ptrAlloc, useFixed(ins->value(), eax));
         break;
       case Scalar::Int16: case Scalar::Uint16:
       case Scalar::Int32: case Scalar::Uint32:
       case Scalar::Float32: case Scalar::Float64:
       case Scalar::Float32x4: case Scalar::Int32x4:
         // For now, don't allow constant values. The immediate operand
         // affects instruction layout which affects patching.
-        lir = new(alloc()) LAsmJSStoreHeap(useRegisterAtStart(ptr), useRegisterAtStart(ins->value()));
+        lir = new(alloc()) LAsmJSStoreHeap(ptrAlloc, useRegisterAtStart(ins->value()));
         break;
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("unexpected array type");
     }
-
     add(lir, ins);
 }
 
 void
 LIRGeneratorX86::visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic *ins)
 {
     // The code generated for StoreTypedArrayElementStatic is identical to that
     // for AsmJSStoreHeap, and the same concerns apply.