Bug 840285 - ARM support for Ion asm.js (r=luke,mjrosenb also landing some of doug crosher's patches)
authorDouglas Crosher <dtc-moz@scieneer.com>
Mon, 25 Mar 2013 18:22:45 +1100
changeset 128130 8f3f965dc11677593b56c40416660af0078184b0
parent 128129 0e47bb858304b373330af0be986dd44717faaf1c
child 128131 a54f620cf84c0a8b2982c43e9b11ac6b67f95b28
push id24522
push userryanvm@gmail.com
push dateTue, 09 Apr 2013 23:24:02 +0000
treeherdermozilla-central@9db46ddfb517 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersluke, mjrosenb
bugs840285
milestone23.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 840285 - ARM support for Ion asm.js (r=luke,mjrosenb also landing some of doug crosher's patches)
js/src/configure.in
js/src/ion/AsmJS.cpp
js/src/ion/AsmJS.h
js/src/ion/AsmJSLink.cpp
js/src/ion/AsmJSModule.h
js/src/ion/AsmJSSignalHandlers.cpp
js/src/ion/CodeGenerator.cpp
js/src/ion/CodeGenerator.h
js/src/ion/IonLinker.h
js/src/ion/LIR.cpp
js/src/ion/MIRGenerator.h
js/src/ion/RegisterAllocator.h
js/src/ion/RegisterSets.h
js/src/ion/arm/Architecture-arm.h
js/src/ion/arm/Assembler-arm.cpp
js/src/ion/arm/Assembler-arm.h
js/src/ion/arm/CodeGenerator-arm.cpp
js/src/ion/arm/CodeGenerator-arm.h
js/src/ion/arm/IonFrames-arm.h
js/src/ion/arm/LIR-arm.h
js/src/ion/arm/LOpcodes-arm.h
js/src/ion/arm/Lowering-arm.cpp
js/src/ion/arm/Lowering-arm.h
js/src/ion/arm/MacroAssembler-arm.cpp
js/src/ion/arm/MacroAssembler-arm.h
js/src/ion/arm/MoveEmitter-arm.cpp
js/src/ion/shared/CodeGenerator-shared.cpp
js/src/ion/shared/CodeGenerator-x86-shared.cpp
js/src/ion/shared/CodeGenerator-x86-shared.h
js/src/ion/shared/IonAssemblerBuffer.h
js/src/ion/shared/IonAssemblerBufferWithConstantPools.h
js/src/ion/shared/Lowering-shared-inl.h
js/src/ion/shared/Lowering-shared.h
js/src/ion/shared/MacroAssembler-x86-shared.h
js/src/ion/x64/Assembler-x64.h
js/src/ion/x64/MacroAssembler-x64.h
js/src/ion/x86/Assembler-x86.h
--- a/js/src/configure.in
+++ b/js/src/configure.in
@@ -4365,17 +4365,16 @@ if test "$ACCESSIBILITY" -a "$MOZ_ENABLE
     ATK_MAJOR_VERSION=`echo ${ATK_FULL_VERSION} | $AWK -F\. '{ print $1 }'`
     ATK_MINOR_VERSION=`echo ${ATK_FULL_VERSION} | $AWK -F\. '{ print $2 }'`
     ATK_REV_VERSION=`echo ${ATK_FULL_VERSION} | $AWK -F\. '{ print $3 }'`
     AC_DEFINE_UNQUOTED(ATK_MAJOR_VERSION, $ATK_MAJOR_VERSION)
     AC_DEFINE_UNQUOTED(ATK_MINOR_VERSION, $ATK_MINOR_VERSION)
     AC_DEFINE_UNQUOTED(ATK_REV_VERSION, $ATK_REV_VERSION)
 fi
 
-
 dnl ECMAScript Internationalization API Support (uses ICU)
 dnl ========================================================
 
 dnl Source files that use ICU should have control over which parts of the ICU
 dnl namespace they want to use.
 AC_DEFINE(U_USING_ICU_NAMESPACE,0)
 
 
--- a/js/src/ion/AsmJS.cpp
+++ b/js/src/ion/AsmJS.cpp
@@ -1046,19 +1046,17 @@ class ModuleCompiler
 
     PropertyName *                 moduleFunctionName_;
 
     GlobalMap                      globals_;
     FuncVector                     functions_;
     FuncPtrTableVector             funcPtrTables_;
     ExitMap                        exits_;
     MathNameMap                    standardLibraryMathNames_;
-
     GlobalAccessVector             globalAccesses_;
-
     Label                          stackOverflowLabel_;
     Label                          operationCallbackLabel_;
 
     const char *                   errorString_;
     ParseNode *                    errorNode_;
     TokenStream &                  tokenStream_;
 
     DebugOnly<int>                 currentPass_;
@@ -1268,19 +1266,23 @@ class ModuleCompiler
         JS_ASSERT(currentPass_ == 1);
         if (!module_->addGlobalConstant(constant, fieldName))
             return false;
         Global g(Global::Constant);
         g.u.constant_ = constant;
         return globals_.putNew(varName, g);
     }
     bool collectAccesses(MIRGenerator &gen) {
+#ifdef JS_CPU_ARM
+        if (!module_->addBoundsChecks(gen.asmBoundsChecks()))
+            return false;
+#else
         if (!module_->addHeapAccesses(gen.heapAccesses()))
             return false;
-
+#endif
         for (unsigned i = 0; i < gen.globalAccesses().length(); i++) {
             if (!globalAccesses_.append(gen.globalAccesses()[i]))
                 return false;
         }
         return true;
     }
     bool addGlobalAccess(AsmJSGlobalAccess access) {
         return globalAccesses_.append(access);
@@ -1326,20 +1328,26 @@ class ModuleCompiler
         JS_ASSERT(currentPass_ == 2);
         masm_.align(gc::PageSize);
         module_->setFunctionBytes(masm_.size());
         currentPass_ = 3;
     }
 
     void setExitOffset(unsigned exitIndex) {
         JS_ASSERT(currentPass_ == 3);
+#if defined(JS_CPU_ARM)
+        masm_.flush();
+#endif
         module_->exit(exitIndex).initCodeOffset(masm_.size());
     }
     void setEntryOffset(unsigned exportIndex) {
         JS_ASSERT(currentPass_ == 3);
+#if defined(JS_CPU_ARM)
+        masm_.flush();
+#endif
         module_->exportedFunction(exportIndex).initCodeOffset(masm_.size());
     }
 
     bool finish(ScopedJSDeletePtr<AsmJSModule> *module) {
         // After finishing, the only valid operation on an ModuleCompiler is
         // destruction.
         JS_ASSERT(currentPass_ == 3);
         currentPass_ = -1;
@@ -1401,21 +1409,29 @@ class ModuleCompiler
                 uint8_t *funcPtr = code + masm_.actualOffset(table.elem(j).codeLabel()->offset());
                 module_->funcPtrIndexToGlobalDatum(elemIndex++) = funcPtr;
             }
             JS_ASSERT(elemIndex == table.baseIndex() + table.numElems());
         }
         JS_ASSERT(elemIndex == module_->numFuncPtrTableElems());
 
         // Global accesses in function bodies
+#ifdef JS_CPU_ARM
+        JS_ASSERT(globalAccesses_.length() == 0);
+        // The AsmJSHeapAccess offsets need to be updated to reflect the
+        // "actualOffset" (an ARM distinction).
+        module_->convertBoundsChecksToActualOffset(masm_);
+
+#else
+
         for (unsigned i = 0; i < globalAccesses_.length(); i++) {
             AsmJSGlobalAccess access = globalAccesses_[i];
             masm_.patchAsmJSGlobalAccess(access.offset, code, codeBytes, access.globalDataOffset);
         }
-
+#endif
         // The AsmJSHeapAccess offsets need to be updated to reflect the
         // "actualOffset" (an ARM distinction).
         for (unsigned i = 0; i < module_->numHeapAccesses(); i++) {
             AsmJSHeapAccess &access = module_->heapAccess(i);
             access.updateOffset(masm_.actualOffset(access.offset()));
         }
 
         *module = module_.forget();
@@ -4373,18 +4389,16 @@ CheckFunctionBody(ModuleCompiler &m, Mod
         return NULL;
 
     f.returnVoid();
     JS_ASSERT(!tempAlloc->rootList());
 
     return mirGen;
 }
 
-static const unsigned CodeAlignment = 8;
-
 static bool
 GenerateAsmJSCode(ModuleCompiler &m, ModuleCompiler::Func &func,
                   MIRGenerator &mirGen, LIRGraph &lir)
 {
     m.masm().bind(func.codeLabel());
 
     ScopedJSDeletePtr<CodeGenerator> codegen(GenerateCode(&mirGen, &lir, &m.masm()));
     if (!codegen)
@@ -4679,17 +4693,17 @@ StackDecrementForCall(MacroAssembler &ma
     // extraBytes, the stack is aligned for a call instruction.
     unsigned argBytes = StackArgBytes(argTypes);
     unsigned alreadyPushed = AlignmentAtPrologue + masm.framePushed();
     return AlignBytes(alreadyPushed + extraBytes + argBytes, StackAlignment) - alreadyPushed;
 }
 
 static const unsigned FramePushedAfterSave = NonVolatileRegs.gprs().size() * STACK_SLOT_SIZE +
                                              NonVolatileRegs.fpus().size() * sizeof(double);
-
+#ifndef JS_CPU_ARM
 static bool
 GenerateEntry(ModuleCompiler &m, const AsmJSModule::ExportedFunction &exportedFunc)
 {
     MacroAssembler &masm = m.masm();
 
     // In constrast to the system ABI, the Ion convention is that all registers
     // are clobbered by calls. Thus, we must save the caller's non-volatile
     // registers.
@@ -4779,16 +4793,128 @@ GenerateEntry(ModuleCompiler &m, const A
     // Restore clobbered registers.
     masm.PopRegsInMask(NonVolatileRegs);
     JS_ASSERT(masm.framePushed() == 0);
 
     masm.move32(Imm32(true), ReturnReg);
     masm.ret();
     return true;
 }
+#else
+static bool
+GenerateEntry(ModuleCompiler &m, const AsmJSModule::ExportedFunction &exportedFunc)
+{
+    const ModuleCompiler::Func &func = *m.lookupFunction(exportedFunc.name());
+
+    MacroAssembler &masm = m.masm();
+
+    // In constrast to the X64 system ABI, the Ion convention is that all
+    // registers are clobbered by calls. Thus, we must save the caller's
+    // non-volatile registers.
+    //
+    // NB: GenerateExits assumes that masm.framePushed() == 0 before
+    // PushRegsInMask(NonVolatileRegs).
+    masm.setFramePushed(0);
+    masm.PushRegsInMask(NonVolatileRegs);
+    JS_ASSERT(masm.framePushed() == FramePushedAfterSave);
+    JS_ASSERT(masm.framePushed() % 8 == 0);
+
+    // Remember the stack pointer in the current AsmJSActivation. This will be
+    // used by error exit paths to set the stack pointer back to what it was
+    // right after the (C++) caller's non-volatile registers were saved so that
+    // they can be restored.
+
+    LoadAsmJSActivationIntoRegister(masm, r9);
+    masm.ma_str(StackPointer, Address(r9, AsmJSActivation::offsetOfErrorRejoinSP()));
+    //    masm.storeErrorRejoinSp();
+
+    // Move the parameters into non-argument registers since we are about to
+    // clobber these registers with the contents of argv.
+    Register argv = r9;
+    masm.movePtr(IntArgReg1, GlobalReg);  // globalData
+    masm.movePtr(IntArgReg0, argv);       // argv
+
+    masm.ma_ldr(Operand(GlobalReg, Imm32(m.module().heapOffset())), HeapReg);
+    // Remember argv so that we can load argv[0] after the call.
+    JS_ASSERT(masm.framePushed() % 8 == 0);
+    masm.Push(argv);
+    JS_ASSERT(masm.framePushed() % 8 == 4);
+
+    // Determine how many stack slots we need to hold arguments that don't fit
+    // in registers.
+    unsigned numStackArgs = 0;
+    for (ABIArgIter iter(func.argMIRTypes()); !iter.done(); iter++) {
+        if (iter->kind() == ABIArg::Stack)
+            numStackArgs++;
+    }
+
+    // Before calling, we must ensure sp % 16 == 0. Since (sp % 16) = 8 on
+    // entry, we need to push 8 (mod 16) bytes.
+    //JS_ASSERT(AlignmentAtPrologue == 8);
+    JS_ASSERT(masm.framePushed() % 8 == 4);
+    unsigned stackDec = numStackArgs * sizeof(double) + (masm.framePushed() >> 2) % 2 * sizeof(uint32_t);
+    masm.reserveStack(stackDec);
+    //JS_ASSERT(masm.framePushed() % 8 == 0);
+    if(getenv("GDB_BREAK")) {
+        masm.breakpoint(js::ion::Assembler::Always);
+    }
+    // Copy parameters out of argv into the registers/stack-slots specified by
+    // the system ABI.
+    for (ABIArgIter iter(func.argMIRTypes()); !iter.done(); iter++) {
+        unsigned argOffset = iter.index() * sizeof(uint64_t);
+        switch (iter->kind()) {
+          case ABIArg::GPR:
+            masm.ma_ldr(Operand(argv, argOffset), iter->gpr());
+            break;
+          case ABIArg::FPU:
+#if defined(JS_CPU_ARM_HARDFP)
+            masm.ma_vldr(Operand(argv, argOffset), iter->fpu());
+#else
+            // The ABI is expecting a double value in a pair of gpr's.  Figure out which gprs it is,
+            // and use them explicityl.
+            masm.ma_dataTransferN(IsLoad, 64, true, argv, Imm32(argOffset), Register::FromCode(iter->fpu().code()*2));
+#endif
+            break;
+          case ABIArg::Stack:
+            if (iter.mirType() == MIRType_Int32) {
+                masm.memMove32(Address(argv, argOffset), Address(StackPointer, iter->offsetFromArgBase()));
+            } else {
+                masm.memMove64(Address(argv, argOffset), Address(StackPointer, iter->offsetFromArgBase()));
+            }
+            break;
+        }
+    }
+    masm.ma_vimm(js_NaN, NANReg);
+    masm.call(func.codeLabel());
+
+    // Recover argv.
+    masm.freeStack(stackDec);
+    masm.Pop(argv);
+
+    // Store the result in argv[0].
+    switch (func.returnType().which()) {
+      case RetType::Void:
+        break;
+      case RetType::Signed:
+        masm.storeValue(JSVAL_TYPE_INT32, ReturnReg, Address(argv, 0));
+        break;
+      case RetType::Double:
+        masm.ma_vxfer(r0, r1, d0);
+        masm.canonicalizeDouble(ReturnFloatReg);
+        masm.storeDouble(ReturnFloatReg, Address(argv, 0));
+        break;
+    }
+
+    masm.PopRegsInMask(NonVolatileRegs);
+
+    masm.ma_mov(Imm32(true), ReturnReg);
+    masm.abiret();
+    return true;
+}
+#endif
 
 static bool
 GenerateEntries(ModuleCompiler &m)
 {
     for (unsigned i = 0; i < m.module().numExportedFunctions(); i++) {
         m.setEntryOffset(i);
         if (!GenerateEntry(m, m.module().exportedFunction(i)))
             return false;
@@ -4843,17 +4969,17 @@ InvokeFromAsmJS_ToNumber(JSContext *cx, 
 // See "asm.js FFI calls" comment above.
 static void
 GenerateFFIExit(ModuleCompiler &m, const ModuleCompiler::ExitDescriptor &exit, unsigned exitIndex,
                 Label *throwLabel)
 {
     MacroAssembler &masm = m.masm();
     masm.align(CodeAlignment);
     m.setExitOffset(exitIndex);
-
+#if defined(JS_CPU_X86) || defined(JS_CPU_X64)
     MIRType typeArray[] = { MIRType_Pointer,   // cx
                             MIRType_Pointer,   // exitDatum
                             MIRType_Int32,     // argc
                             MIRType_Pointer }; // argv
     MIRTypeVector invokeArgTypes(m.cx());
     invokeArgTypes.infallibleAppend(typeArray, ArrayLength(typeArray));
 
     // Reserve space for a call to InvokeFromAsmJS_* and an array of values
@@ -4963,16 +5089,100 @@ GenerateFFIExit(ModuleCompiler &m, const
       case Use::AddOrSub:
         JS_NOT_REACHED("Should have been a type error");
     }
 
     // Note: the caller is IonMonkey code which means there are no non-volatile
     // registers to restore.
     masm.freeStack(stackDec);
     masm.ret();
+#else
+    const unsigned arrayLength = Max<size_t>(1, exit.argTypes().length());
+    const unsigned arraySize = arrayLength * sizeof(Value);
+    const unsigned reserveSize = AlignBytes(arraySize, StackAlignment) +
+        ShadowStackSpace;
+    const unsigned callerArgsOffset = reserveSize + NativeFrameSize + sizeof(int32_t);
+    masm.setFramePushed(0);
+    masm.Push(lr);
+    masm.reserveStack(reserveSize + sizeof(int32_t));
+
+    for (ABIArgIter i(exit.argTypes()); !i.done(); i++) {
+        Address dstAddr = Address(StackPointer, ShadowStackSpace + i.index() * sizeof(Value));
+        switch (i->kind()) {
+          case ABIArg::GPR:
+            masm.storeValue(JSVAL_TYPE_INT32, i->gpr(), dstAddr);
+            break;
+          case ABIArg::FPU: {
+#ifndef JS_CPU_ARM_HARDFP
+              FloatRegister fr = i->fpu();
+              int srcId = fr.code() * 2;
+              masm.ma_vxfer(Register::FromCode(srcId), Register::FromCode(srcId+1), fr);
+#endif
+              masm.canonicalizeDouble(i->fpu());
+              masm.storeDouble(i->fpu(), dstAddr);
+              break;
+          }
+          case ABIArg::Stack:
+            if (i.mirType() == MIRType_Int32) {
+                Address src(StackPointer, callerArgsOffset + i->offsetFromArgBase());
+                masm.memIntToValue(src, dstAddr);
+            } else {
+                JS_ASSERT(i.mirType() == MIRType_Double);
+                Address src(StackPointer, callerArgsOffset + i->offsetFromArgBase());
+                masm.loadDouble(src, ScratchFloatReg);
+                masm.canonicalizeDouble(ScratchFloatReg);
+                masm.storeDouble(ScratchFloatReg, dstAddr);
+            }
+            break;
+        }
+    }
+
+    // argument 0: cx
+    Register activation = IntArgReg3;
+    LoadAsmJSActivationIntoRegister(masm, activation);
+
+    LoadJSContextFromActivation(masm, activation, IntArgReg0);
+
+    // argument 1: exitDatum
+    masm.lea(Operand(GlobalReg, m.module().exitIndexToGlobalDataOffset(exitIndex)), IntArgReg1);
+
+    // argument 2: argc
+    masm.mov(Imm32(exit.argTypes().length()), IntArgReg2);
+
+    // argument 3: argv
+    Address argv(StackPointer, ShadowStackSpace);
+    masm.lea(Operand(argv), IntArgReg3);
+
+    AssertStackAlignment(masm);
+    switch (exit.use().which()) {
+      case Use::NoCoercion:
+        masm.call(ImmWord(JS_FUNC_TO_DATA_PTR(void*, &InvokeFromAsmJS_Ignore)));
+        masm.branchTest32(Assembler::Zero, ReturnReg, ReturnReg, throwLabel);
+        break;
+      case Use::ToInt32:
+        masm.call(ImmWord(JS_FUNC_TO_DATA_PTR(void*, &InvokeFromAsmJS_ToInt32)));
+        masm.branchTest32(Assembler::Zero, ReturnReg, ReturnReg, throwLabel);
+        masm.unboxInt32(argv, ReturnReg);
+        break;
+      case Use::ToNumber:
+        masm.call(ImmWord(JS_FUNC_TO_DATA_PTR(void*, &InvokeFromAsmJS_ToNumber)));
+        masm.branchTest32(Assembler::Zero, ReturnReg, ReturnReg, throwLabel);
+#if defined(JS_CPU_ARM) && !defined(JS_CPU_ARM_HARDFP)
+        masm.loadValue(argv, softfpReturnOperand);
+#else
+        masm.loadDouble(argv, ReturnFloatReg);
+#endif
+        break;
+      case Use::AddOrSub:
+        JS_NOT_REACHED("Should have been a type error");
+    }
+
+    masm.freeStack(reserveSize + sizeof(int32_t));
+    masm.ret();
+#endif
 }
 
 // The stack-overflow exit is called when the stack limit has definitely been
 // exceeded. In this case, we can clobber everything since we are about to pop
 // all the frames.
 static void
 GenerateStackOverflowExit(ModuleCompiler &m, Label *throwLabel)
 {
@@ -4996,22 +5206,26 @@ GenerateStackOverflowExit(ModuleCompiler
 #if defined(JS_CPU_X86)
     LoadAsmJSActivationIntoRegister(masm, eax);
     LoadJSContextFromActivation(masm, eax, eax);
     masm.storePtr(eax, Address(StackPointer, 0));
 #elif defined(JS_CPU_X64)
     LoadAsmJSActivationIntoRegister(masm, IntArgReg0);
     LoadJSContextFromActivation(masm, IntArgReg0, IntArgReg0);
 #else
-# error "ARM here"
+
+    // on ARM, we should always be aligned, just do the context manipulation
+    // and make the call.
+    LoadAsmJSActivationIntoRegister(masm, IntArgReg0);
+    LoadJSContextFromActivation(masm, IntArgReg0, IntArgReg0);
+
 #endif
-
     void (*pf)(JSContext*) = js_ReportOverRecursed;
     masm.call(ImmWord(JS_FUNC_TO_DATA_PTR(void*, pf)));
-    masm.jmp(throwLabel);
+    masm.jump(throwLabel);
 }
 
 // The operation-callback exit is called from arbitrarily-interrupted asm.js
 // code. That means we must first save *all* registers and restore *all*
 // registers when we resume. The address to resume to (assuming that
 // js_HandleExecutionInterrupt doesn't indicate that the execution should be
 // aborted) is stored in AsmJSActivation::resumePC_. Unfortunately, loading
 // this requires a scratch register which we don't have after restoring all
@@ -5019,16 +5233,17 @@ GenerateStackOverflowExit(ModuleCompiler
 // can be popped directly into PC.
 static void
 GenerateOperationCallbackExit(ModuleCompiler &m, Label *throwLabel)
 {
     MacroAssembler &masm = m.masm();
     masm.align(CodeAlignment);
     masm.bind(&m.operationCallbackLabel());
 
+#ifndef JS_CPU_ARM
     // Be very careful here not to perturb the machine state before saving it
     // to the stack. In particular, add/sub instructions may set conditions in
     // the flags register.
     masm.push(Imm32(0));            // space for resumePC
     masm.pushFlags();               // after this we are safe to use sub
     masm.setFramePushed(0);         // set to zero so we can use masm.framePushed() below
     masm.PushRegsInMask(AllRegs);   // save all GP/FP registers
 
@@ -5065,16 +5280,67 @@ GenerateOperationCallbackExit(ModuleComp
 
     // Restore the StackPointer to it's position before the call.
     masm.mov(ABIArgGenerator::NonVolatileReg, StackPointer);
 
     // Restore the machine state to before the interrupt.
     masm.PopRegsInMask(AllRegs);  // restore all GP/FP registers
     masm.popFlags();              // after this, nothing that sets conditions
     masm.ret();                   // pop resumePC into PC
+#else
+    masm.setFramePushed(0);         // set to zero so we can use masm.framePushed() below
+    masm.PushRegsInMask(RegisterSet(GeneralRegisterSet(Registers::AllMask & ~(1<<Registers::sp)), FloatRegisterSet(uint32_t(0))));   // save all GP registers,excep sp
+
+    // Save both the APSR and FPSCR in non-volatile registers.
+    masm.as_mrs(r4);
+    masm.as_vmrs(r5);
+    // Save the stack pointer in a non-volatile register.
+    masm.mov(sp,r6);
+    // Align the stack.
+    masm.ma_and(Imm32(~7), sp, sp);
+
+    // Store resumePC into the return PC stack slot.
+    LoadAsmJSActivationIntoRegister(masm, IntArgReg0);
+    masm.loadPtr(Address(IntArgReg0, AsmJSActivation::offsetOfResumePC()), IntArgReg1);
+    masm.storePtr(IntArgReg1, Address(r6, 14 * sizeof(uint32_t*)));
+
+    // argument 0: cx
+    masm.loadPtr(Address(IntArgReg0, AsmJSActivation::offsetOfContext()), IntArgReg0);
+
+    masm.PushRegsInMask(RegisterSet(GeneralRegisterSet(0), FloatRegisterSet(FloatRegisters::AllMask)));   // save all FP registers
+    JSBool (*pf)(JSContext*) = js_HandleExecutionInterrupt;
+    masm.call(ImmWord(JS_FUNC_TO_DATA_PTR(void*, pf)));
+    masm.branchTest32(Assembler::Zero, ReturnReg, ReturnReg, throwLabel);
+
+    // Restore the machine state to before the interrupt. this will set the pc!
+    masm.PopRegsInMask(RegisterSet(GeneralRegisterSet(0), FloatRegisterSet(FloatRegisters::AllMask)));   // restore all FP registers
+    masm.mov(r6,sp);
+    masm.as_vmsr(r5);
+    masm.as_msr(r4);
+    // Restore all GP registers
+    masm.startDataTransferM(IsLoad, sp, IA, WriteBack);
+    masm.transferReg(r0);
+    masm.transferReg(r1);
+    masm.transferReg(r2);
+    masm.transferReg(r3);
+    masm.transferReg(r4);
+    masm.transferReg(r5);
+    masm.transferReg(r6);
+    masm.transferReg(r7);
+    masm.transferReg(r8);
+    masm.transferReg(r9);
+    masm.transferReg(r10);
+    masm.transferReg(r11);
+    masm.transferReg(r12);
+    masm.transferReg(lr);
+    masm.finishDataTransfer();
+    masm.ret();
+
+#endif
+
 }
 
 // If an exception is thrown, simply pop all frames (since asm.js does not
 // contain try/catch). To do this:
 //  1. Restore 'sp' to it's value right after the PushRegsInMask in GenerateEntry.
 //  2. PopRegsInMask to restore the caller's non-volatile registers.
 //  3. Return (to CallAsmJS).
 static void
@@ -5083,22 +5349,24 @@ GenerateThrowExit(ModuleCompiler &m, Lab
     MacroAssembler &masm = m.masm();
     masm.align(CodeAlignment);
     masm.bind(throwLabel);
 
     Register activation = ABIArgGenerator::NonArgReturnVolatileReg1;
     LoadAsmJSActivationIntoRegister(masm, activation);
 
     masm.setFramePushed(FramePushedAfterSave);
-    masm.mov(Operand(activation, AsmJSActivation::offsetOfErrorRejoinSP()), StackPointer);
+    masm.loadPtr(Address(activation, AsmJSActivation::offsetOfErrorRejoinSP()), StackPointer);
+
     masm.PopRegsInMask(NonVolatileRegs);
     JS_ASSERT(masm.framePushed() == 0);
 
     masm.mov(Imm32(0), ReturnReg);
-    masm.ret();
+    masm.abiret();
+
 }
 
 static bool
 GenerateExits(ModuleCompiler &m)
 {
     Label throwLabel;
 
     for (ModuleCompiler::ExitMap::Range r = m.allExits(); !r.empty(); r.popFront()) {
--- a/js/src/ion/AsmJS.h
+++ b/js/src/ion/AsmJS.h
@@ -8,20 +8,17 @@
 #if !defined(jsion_asmjs_h__)
 #define jsion_asmjs_h__
 
 #ifdef XP_MACOSX
 # include <pthread.h>
 # include <mach/mach.h>
 #endif
 
-// asm.js compilation is only available on desktop x86/x64 at the moment.
-// Don't panic, mobile support is coming soon.
-#if defined(JS_ION) && \
-    (defined(JS_CPU_X86) || defined(JS_CPU_X64))
+#if defined(JS_ION)
 # define JS_ASMJS
 #endif
 
 namespace js {
 
 class ScriptSource;
 class SPSProfiler;
 class AsmJSModule;
--- a/js/src/ion/AsmJSLink.cpp
+++ b/js/src/ion/AsmJSLink.cpp
@@ -9,16 +9,18 @@
 #include "jscntxt.h"
 
 #include "jstypedarrayinlines.h"
 
 #include "AsmJS.h"
 #include "AsmJSModule.h"
 #include "frontend/BytecodeCompiler.h"
 
+#include "Ion.h"
+
 using namespace js;
 using namespace js::ion;
 using namespace mozilla;
 
 #ifdef JS_ASMJS
 
 static bool
 LinkFail(JSContext *cx, const char *str)
@@ -195,16 +197,22 @@ DynamicallyLinkModule(JSContext *cx, Cal
         void *heapOffset = (void*)heap->dataPointer();
         void *heapLength = (void*)heap->byteLength();
         uint8_t *code = module.functionCode();
         for (unsigned i = 0; i < module.numHeapAccesses(); i++) {
             const AsmJSHeapAccess &access = module.heapAccess(i);
             JSC::X86Assembler::setPointer(access.patchLengthAt(code), heapLength);
             JSC::X86Assembler::setPointer(access.patchOffsetAt(code), heapOffset);
         }
+#elif defined(JS_CPU_ARM)
+        // Now the length of the array is know, patch all of the bounds check sites
+        // with the new length.
+        ion::IonContext ic(cx, NULL);
+        module.patchBoundsChecks(heap->byteLength());
+
 #endif
     }
 
     AutoObjectVector ffis(cx);
     if (!ffis.resize(module.numFFIs()))
         return false;
 
     for (unsigned i = 0; i < module.numGlobals(); i++) {
@@ -318,18 +326,23 @@ CallAsmJS(JSContext *cx, unsigned argc, 
             break;
         }
     }
 
     {
         AsmJSActivation activation(cx, module);
 
         // Call into generated code.
+#ifdef JS_CPU_ARM
+        if (!func.code()(coercedArgs.begin(), module.globalData()))
+            return false;
+#else
         if (!func.code()(coercedArgs.begin()))
             return false;
+#endif
     }
 
     switch (func.returnType()) {
       case AsmJSModule::Return_Void:
         callArgs.rval().set(UndefinedValue());
         break;
       case AsmJSModule::Return_Int32:
         callArgs.rval().set(Int32Value(*(int32_t*)&coercedArgs[0]));
--- a/js/src/ion/AsmJSModule.h
+++ b/js/src/ion/AsmJSModule.h
@@ -9,16 +9,18 @@
 #define jsion_asmjsmodule_h__
 
 #include "gc/Marking.h"
 #include "ion/RegisterSets.h"
 
 #include "jsscript.h"
 #include "jstypedarrayinlines.h"
 
+#include "IonMacroAssembler.h"
+
 namespace js {
 
 // The basis of the asm.js type system is the EcmaScript-defined coercions
 // ToInt32 and ToNumber.
 enum AsmJSCoercion
 {
     AsmJS_ToInt32,
     AsmJS_ToNumber
@@ -163,18 +165,21 @@ class AsmJSModule
         }
         void patch(uint8_t *baseAddress) {
             u.code_ = baseAddress + u.codeOffset_;
         }
         uint8_t *code() const {
             return u.code_;
         }
     };
-
+#ifdef JS_CPU_ARM
+    typedef int32_t (*CodePtr)(uint64_t *args, uint8_t *global);
+#else
     typedef int32_t (*CodePtr)(uint64_t *args);
+#endif
 
     typedef Vector<AsmJSCoercion, 0, SystemAllocPolicy> ArgCoercionVector;
 
     enum ReturnType { Return_Int32, Return_Double, Return_Void };
 
     class ExportedFunction
     {
       public:
@@ -291,21 +296,27 @@ class AsmJSModule
         }
     };
 
   private:
     typedef Vector<ExportedFunction, 0, SystemAllocPolicy> ExportedFunctionVector;
     typedef Vector<Global, 0, SystemAllocPolicy> GlobalVector;
     typedef Vector<Exit, 0, SystemAllocPolicy> ExitVector;
     typedef Vector<ion::AsmJSHeapAccess, 0, SystemAllocPolicy> HeapAccessVector;
+#if defined(JS_CPU_ARM)
+    typedef Vector<ion::AsmJSBoundsCheck, 0, SystemAllocPolicy> BoundsCheckVector;
+#endif
 
     GlobalVector                          globals_;
     ExitVector                            exits_;
     ExportedFunctionVector                exports_;
     HeapAccessVector                      heapAccesses_;
+#if defined(JS_CPU_ARM)
+    BoundsCheckVector                     boundsChecks_;
+#endif
     uint32_t                              numGlobalVars_;
     uint32_t                              numFFIs_;
     uint32_t                              numFuncPtrTableElems_;
     bool                                  hasArrayView_;
 
     ScopedReleasePtr<JSC::ExecutablePool> codePool_;
     uint8_t *                             code_;
     uint8_t *                             operationCallbackExit_;
@@ -317,21 +328,16 @@ class AsmJSModule
     HeapPtr<ArrayBufferObject>            maybeHeap_;
 
     HeapPtrPropertyName                   globalArgumentName_;
     HeapPtrPropertyName                   importArgumentName_;
     HeapPtrPropertyName                   bufferArgumentName_;
 
     PostLinkFailureInfo                   postLinkFailureInfo_;
 
-    uint8_t *globalData() const {
-        JS_ASSERT(code_);
-        return code_ + codeBytes_;
-    }
-
   public:
     AsmJSModule(JSContext *cx)
       : numGlobalVars_(0),
         numFFIs_(0),
         numFuncPtrTableElems_(0),
         hasArrayView_(false),
         code_(NULL),
         operationCallbackExit_(NULL),
@@ -477,16 +483,21 @@ class AsmJSModule
     // are laid out in this order:
     //   0. a pointer/descriptor for the heap that was linked to the module
     //   1. global variable state (elements are sizeof(uint64_t))
     //   2. function-pointer table elements (elements are sizeof(void*))
     //   3. exits (elements are sizeof(ExitDatum))
     //
     // NB: The list of exits is extended while emitting function bodies and
     // thus exits must be at the end of the list to avoid invalidating indices.
+    uint8_t *globalData() const {
+        JS_ASSERT(code_);
+        return code_ + codeBytes_;
+    }
+
     size_t globalDataBytes() const {
         return sizeof(void*) +
                numGlobalVars_ * sizeof(uint64_t) +
                numFuncPtrTableElems_ * sizeof(void*) +
                exits_.length() * sizeof(ExitDatum);
     }
     unsigned heapOffset() const {
         return 0;
@@ -546,16 +557,51 @@ class AsmJSModule
         return heapAccesses_.length();
     }
     ion::AsmJSHeapAccess &heapAccess(unsigned i) {
         return heapAccesses_[i];
     }
     const ion::AsmJSHeapAccess &heapAccess(unsigned i) const {
         return heapAccesses_[i];
     }
+#if defined(JS_CPU_ARM)
+    bool addBoundsChecks(const ion::AsmJSBoundsCheckVector &checks) {
+        if (!boundsChecks_.reserve(boundsChecks_.length() + checks.length()))
+            return false;
+        for (size_t i = 0; i < checks.length(); i++)
+            boundsChecks_.infallibleAppend(checks[i]);
+        return true;
+    }
+    void convertBoundsChecksToActualOffset(ion::MacroAssembler &masm) {
+        for (unsigned i = 0; i < boundsChecks_.length(); i++)
+            boundsChecks_[i].setOffset(masm.actualOffset(boundsChecks_[i].offset()));
+    }
+
+    void patchBoundsChecks(unsigned heapSize) {
+        ion::AutoFlushCache afc("patchBoundsCheck");
+        int bits = -1;
+        JS_CEILING_LOG2(bits, heapSize);
+        if (bits == -1) {
+            // tried to size the array to 0, that is bad, but not horrible
+            return;
+        }
+
+        for (unsigned i = 0; i < boundsChecks_.length(); i++)
+            ion::Assembler::updateBoundsCheck(bits, (ion::Instruction*)(boundsChecks_[i].offset() + code_));
+
+    }
+    unsigned numBoundsChecks() const {
+        return boundsChecks_.length();
+    }
+    const ion::AsmJSBoundsCheck &boundsCheck(unsigned i) const {
+        return boundsChecks_[i];
+    }
+#endif
+
+
 
     void takeOwnership(JSC::ExecutablePool *pool, uint8_t *code, size_t codeBytes, size_t totalBytes) {
         JS_ASSERT(uintptr_t(code) % gc::PageSize == 0);
         codePool_ = pool;
         code_ = code;
         codeBytes_ = codeBytes;
         totalBytes_ = totalBytes;
     }
--- a/js/src/ion/AsmJSSignalHandlers.cpp
+++ b/js/src/ion/AsmJSSignalHandlers.cpp
@@ -278,18 +278,23 @@ LookupHeapAccess(const AsmJSModule &modu
 
 // Not all versions of the Android NDK define ucontext_t or mcontext_t.
 // Detect this and provide custom but compatible definitions. Note that these
 // follow the GLibc naming convention to access register values from
 // mcontext_t.
 //
 // See: https://chromiumcodereview.appspot.com/10829122/
 // See: http://code.google.com/p/android/issues/detail?id=34784
-# if defined(__ANDROID__) && !defined(__BIONIC_HAVE_UCONTEXT_T)
+# if (defined(ANDROID)) && !defined(__BIONIC_HAVE_UCONTEXT_T)
 #  if defined(__arm__)
+// GLibc on ARM defines mcontext_t has a typedef for 'struct sigcontext'.
+// Old versions of the C library <signal.h> didn't define the type.
+#if !defined(__BIONIC_HAVE_STRUCT_SIGCONTEXT)
+#include <asm/sigcontext.h>
+#endif
 
 typedef struct sigcontext mcontext_t;
 
 typedef struct ucontext {
     uint32_t uc_flags;
     struct ucontext* uc_link;
     stack_t uc_stack;
     mcontext_t uc_mcontext;
--- a/js/src/ion/CodeGenerator.cpp
+++ b/js/src/ion/CodeGenerator.cpp
@@ -2969,36 +2969,16 @@ CodeGenerator::visitPowD(LPowD *ins)
     masm.passABIArg(power);
     masm.callWithABI(JS_FUNC_TO_DATA_PTR(void *, ecmaPow), MacroAssembler::DOUBLE);
 
     JS_ASSERT(ToFloatRegister(ins->output()) == ReturnFloatReg);
     return true;
 }
 
 bool
-CodeGenerator::visitNegI(LNegI *ins)
-{
-    Register input = ToRegister(ins->input());
-    JS_ASSERT(input == ToRegister(ins->output()));
-
-    masm.neg32(input);
-    return true;
-}
-
-bool
-CodeGenerator::visitNegD(LNegD *ins)
-{
-    FloatRegister input = ToFloatRegister(ins->input());
-    JS_ASSERT(input == ToFloatRegister(ins->output()));
-
-    masm.negateDouble(input);
-    return true;
-}
-
-bool
 CodeGenerator::visitRandom(LRandom *ins)
 {
     Register temp = ToRegister(ins->temp());
     Register temp2 = ToRegister(ins->temp2());
 
     masm.loadJSContext(temp);
 
     masm.setupUnalignedABICall(1, temp2);
@@ -5917,20 +5897,31 @@ CodeGenerator::visitOutOfLineParallelAbo
     return true;
 }
 
 bool
 CodeGenerator::visitAsmJSCall(LAsmJSCall *ins)
 {
     MAsmJSCall *mir = ins->mir();
 
-    if (mir->spIncrement())
+#if defined(JS_CPU_ARM) && !defined(JS_CPU_ARM_HARDFP)
+    for (unsigned i = 0; i < ins->numOperands(); i++) {
+        LAllocation *a = ins->getOperand(i);
+        if (a->isFloatReg()) {
+            FloatRegister fr = ToFloatRegister(a);
+            int srcId = fr.code() * 2;
+            masm.ma_vxfer(fr, Register::FromCode(srcId), Register::FromCode(srcId+1));
+        }
+    }
+#endif
+   if (mir->spIncrement())
         masm.freeStack(mir->spIncrement());
 
-    JS_ASSERT((AlignmentAtPrologue + masm.framePushed()) % StackAlignment == 0);
+   JS_ASSERT((AlignmentAtPrologue +  masm.framePushed()) % StackAlignment == 0);
+
 #ifdef DEBUG
     Label ok;
     JS_ASSERT(IsPowerOfTwo(StackAlignment));
     masm.branchTestPtr(Assembler::Zero, StackPointer, Imm32(StackAlignment - 1), &ok);
     masm.breakpoint();
     masm.bind(&ok);
 #endif
 
@@ -5952,23 +5943,37 @@ CodeGenerator::visitAsmJSCall(LAsmJSCall
 
     postAsmJSCall(ins);
     return true;
 }
 
 bool
 CodeGenerator::visitAsmJSParameter(LAsmJSParameter *lir)
 {
+#if defined(JS_CPU_ARM) && !defined(JS_CPU_ARM_HARDFP)
+    // softfp transfers some double values in gprs.
+    // undo this.
+    LAllocation *a = lir->getDef(0)->output();
+    if (a->isFloatReg()) {
+        FloatRegister fr = ToFloatRegister(a);
+        int srcId = fr.code() * 2;
+        masm.ma_vxfer(Register::FromCode(srcId), Register::FromCode(srcId+1), fr);
+    }
+#endif
     return true;
 }
 
 bool
 CodeGenerator::visitAsmJSReturn(LAsmJSReturn *lir)
 {
     // Don't emit a jump to the return label if this is the last block.
+#if defined(JS_CPU_ARM) && !defined(JS_CPU_ARM_HARDFP)
+    if (lir->getOperand(0)->isFloatReg())
+        masm.ma_vxfer(d0, r0, r1);
+#endif
     if (current->mir() != *gen->graph().poBegin())
         masm.jump(returnLabel_);
     return true;
 }
 
 bool
 CodeGenerator::visitAsmJSVoidReturn(LAsmJSVoidReturn *lir)
 {
--- a/js/src/ion/CodeGenerator.h
+++ b/js/src/ion/CodeGenerator.h
@@ -135,18 +135,16 @@ class CodeGenerator : public CodeGenerat
     bool visitBoundsCheckLower(LBoundsCheckLower *lir);
     bool visitLoadFixedSlotV(LLoadFixedSlotV *ins);
     bool visitLoadFixedSlotT(LLoadFixedSlotT *ins);
     bool visitStoreFixedSlotV(LStoreFixedSlotV *ins);
     bool visitStoreFixedSlotT(LStoreFixedSlotT *ins);
     bool visitAbsI(LAbsI *lir);
     bool visitPowI(LPowI *lir);
     bool visitPowD(LPowD *lir);
-    bool visitNegI(LNegI *lir);
-    bool visitNegD(LNegD *lir);
     bool visitRandom(LRandom *lir);
     bool visitMathFunctionD(LMathFunctionD *ins);
     bool visitModD(LModD *ins);
     bool visitMinMaxI(LMinMaxI *lir);
     bool visitBinaryV(LBinaryV *lir);
     bool emitCompareS(LInstruction *lir, JSOp op, Register left, Register right,
                       Register output, Register temp);
     bool visitCompareS(LCompareS *lir);
--- a/js/src/ion/IonLinker.h
+++ b/js/src/ion/IonLinker.h
@@ -14,17 +14,16 @@
 #include "ion/IonCompartment.h"
 #include "assembler/jit/ExecutableAllocator.h"
 #include "ion/IonMacroAssembler.h"
 #include "jsgcinlines.h"
 
 namespace js {
 namespace ion {
 
-static const int CodeAlignment = 8;
 class Linker
 {
     MacroAssembler &masm;
 
     IonCode *fail(JSContext *cx) {
         js_ReportOutOfMemory(cx);
         return NULL;
     }
--- a/js/src/ion/LIR.cpp
+++ b/js/src/ion/LIR.cpp
@@ -5,17 +5,17 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "MIR.h"
 #include "MIRGraph.h"
 #include "LIR.h"
 #include "IonSpewer.h"
 #include "LIR-inl.h"
-
+#include "shared/CodeGenerator-shared.h"
 using namespace js;
 using namespace js::ion;
 
 LIRGraph::LIRGraph(MIRGraph *mir)
   : numVirtualRegisters_(0),
     numInstructions_(1), // First id is 1.
     localSlotCount_(0),
     argumentSlotCount_(0),
--- a/js/src/ion/MIRGenerator.h
+++ b/js/src/ion/MIRGenerator.h
@@ -106,22 +106,31 @@ class MIRGenerator
     void setPerformsAsmJSCall() {
         JS_ASSERT(compilingAsmJS());
         performsAsmJSCall_ = true;
     }
     bool performsAsmJSCall() const {
         JS_ASSERT(compilingAsmJS());
         return performsAsmJSCall_;
     }
+#ifndef JS_CPU_ARM
     bool noteHeapAccess(AsmJSHeapAccess heapAccess) {
         return asmJSHeapAccesses_.append(heapAccess);
     }
     const Vector<AsmJSHeapAccess, 0, IonAllocPolicy> &heapAccesses() const {
         return asmJSHeapAccesses_;
     }
+#else
+    bool noteBoundsCheck(uint32_t offsetBefore) {
+        return asmJSBoundsChecks_.append(AsmJSBoundsCheck(offsetBefore));
+    }
+    const Vector<AsmJSBoundsCheck, 0, IonAllocPolicy> &asmBoundsChecks() const {
+        return asmJSBoundsChecks_;
+    }
+#endif
     bool noteGlobalAccess(unsigned offset, unsigned globalDataOffset) {
         return asmJSGlobalAccesses_.append(AsmJSGlobalAccess(offset, globalDataOffset));
     }
     const Vector<AsmJSGlobalAccess, 0, IonAllocPolicy> &globalAccesses() const {
         return asmJSGlobalAccesses_;
     }
 
   public:
@@ -133,17 +142,21 @@ class MIRGenerator
     JSFunction *fun_;
     uint32_t nslots_;
     MIRGraph *graph_;
     bool error_;
     size_t cancelBuild_;
 
     uint32_t maxAsmJSStackArgBytes_;
     bool performsAsmJSCall_;
+#ifdef JS_CPU_ARM
+    AsmJSBoundsCheckVector asmJSBoundsChecks_;
+#else
     AsmJSHeapAccessVector asmJSHeapAccesses_;
+#endif
     AsmJSGlobalAccessVector asmJSGlobalAccesses_;
 };
 
 } // namespace ion
 } // namespace js
 
 #endif // jsion_mirgen_h__
 
--- a/js/src/ion/RegisterAllocator.h
+++ b/js/src/ion/RegisterAllocator.h
@@ -304,19 +304,25 @@ class RegisterAllocator
     RegisterAllocator(MIRGenerator *mir, LIRGenerator *lir, LIRGraph &graph)
       : mir(mir),
         lir(lir),
         graph(graph),
         allRegisters_(RegisterSet::All())
     {
         if (FramePointer != InvalidReg && lir->mir()->instrumentedProfiling())
             allRegisters_.take(AnyRegister(FramePointer));
-#ifdef JS_CPU_X64
+#if defined(JS_CPU_X64)
         if (mir->compilingAsmJS())
             allRegisters_.take(AnyRegister(HeapReg));
+#elif defined(JS_CPU_ARM)
+        if (mir->compilingAsmJS()) {
+            allRegisters_.take(AnyRegister(HeapReg));
+            allRegisters_.take(AnyRegister(GlobalReg));
+            allRegisters_.take(AnyRegister(NANReg));
+        }
 #endif
     }
 
   protected:
     bool init();
 
     CodePosition outputOf(uint32_t pos) {
         return CodePosition(pos, CodePosition::OUTPUT);
--- a/js/src/ion/RegisterSets.h
+++ b/js/src/ion/RegisterSets.h
@@ -797,12 +797,26 @@ class AsmJSHeapAccess
     void *patchLengthAt(uint8_t *code) const { return code + (offset_ - cmpDelta_); }
     void *patchOffsetAt(uint8_t *code) const { return code + (offset_ + opLength_); }
 #endif
     void updateOffset(uint32_t offset) { offset_ = offset; }
 };
 
 typedef Vector<AsmJSHeapAccess, 0, IonAllocPolicy> AsmJSHeapAccessVector;
 
+#ifdef JS_CPU_ARM
+struct AsmJSBoundsCheck
+{
+    unsigned offset_;
+    AsmJSBoundsCheck(unsigned offset)
+    : offset_(offset)
+    {}
+    void setOffset(uint32_t offset) { offset_ = offset; }
+    unsigned offset() {return offset_;}
+};
+
+typedef Vector<AsmJSBoundsCheck, 0, IonAllocPolicy> AsmJSBoundsCheckVector;
+#endif
+
 } // namespace ion
 } // namespace js
 
 #endif // jsion_cpu_registersets_h__
--- a/js/src/ion/arm/Architecture-arm.h
+++ b/js/src/ion/arm/Architecture-arm.h
@@ -28,16 +28,17 @@ static const uint32_t ION_FRAME_SLACK_SI
 // An offset that is illegal for a local variable's stack allocation.
 static const int32_t INVALID_STACK_SLOT      = -1;
 
 // These offsets are specific to nunboxing, and capture offsets into the
 // components of a js::Value.
 static const int32_t NUNBOX32_TYPE_OFFSET    = 4;
 static const int32_t NUNBOX32_PAYLOAD_OFFSET = 0;
 
+static const uint32_t ShadowStackSpace = 0;
 ////
 // These offsets are related to bailouts.
 ////
 
 // Size of each bailout table entry. On arm, this is presently
 // a single call (which is wrong!). the call clobbers lr.
 // For now, I've dealt with this by ensuring that we never allocate to lr.
 // it should probably be 8 bytes, a mov of an immediate into r12 (not
--- a/js/src/ion/arm/Assembler-arm.cpp
+++ b/js/src/ion/arm/Assembler-arm.cpp
@@ -13,16 +13,94 @@
 #include "jsutil.h"
 #include "assembler/jit/ExecutableAllocator.h"
 #include "jscompartment.h"
 #include "ion/IonCompartment.h"
 
 using namespace js;
 using namespace js::ion;
 
+ABIArgGenerator::ABIArgGenerator() :
+#if defined(JS_CPU_ARM_HARDFP)
+    intRegIndex_(0),
+    floatRegIndex_(0),
+#else
+    argRegIndex_(0),
+#endif
+    stackOffset_(0),
+    current_()
+{}
+
+ABIArg
+ABIArgGenerator::next(MIRType type)
+{
+#if defined(JS_CPU_ARM_HARDFP)
+    switch (type) {
+      case MIRType_Int32:
+      case MIRType_Pointer:
+        if (intRegIndex_ == NumIntArgRegs) {
+            current_ = ABIArg(stackOffset_);
+            stackOffset_ += sizeof(uint32_t);
+            break;
+        }
+        current_ = ABIArg(Register::FromCode(intRegIndex_));
+        intRegIndex_++;
+        break;
+      case MIRType_Double:
+        if (floatRegIndex_ == NumFloatArgRegs) {
+            static const int align = sizeof(double) - 1;
+            stackOffset_ = (stackOffset_ + align) & ~align;
+            current_ = ABIArg(stackOffset_);
+            stackOffset_ += sizeof(uint64_t);
+            break;
+        }
+        current_ = ABIArg(FloatRegister::FromCode(floatRegIndex_));
+        floatRegIndex_++;
+        break;
+      default:
+        JS_NOT_REACHED("Unexpected argument type");
+    }
+    return current_;
+#else
+    switch (type) {
+      case MIRType_Int32:
+      case MIRType_Pointer:
+        if (argRegIndex_ == NumIntArgRegs) {
+            current_ = ABIArg(stackOffset_);
+            stackOffset_ += sizeof(uint32_t);
+            break;
+        }
+        current_ = ABIArg(Register::FromCode(argRegIndex_));
+        argRegIndex_++;
+        break;
+      case MIRType_Double: {
+        unsigned alignedArgRegIndex_ = (argRegIndex_ + 1) & ~1;
+        if (alignedArgRegIndex_ + 1 > NumIntArgRegs) {
+            static const int align = sizeof(double) - 1;
+            stackOffset_ = (stackOffset_ + align) & ~align;
+            current_ = ABIArg(stackOffset_);
+            stackOffset_ += sizeof(uint64_t);
+            argRegIndex_ = NumIntArgRegs;
+            break;
+        }
+        argRegIndex_ = alignedArgRegIndex_;
+        current_ = ABIArg(FloatRegister::FromCode(argRegIndex_ >> 1));
+
+        argRegIndex_+=2;
+      }
+        break;
+      default:
+        JS_NOT_REACHED("Unexpected argument type");
+    }
+    return current_;
+#endif
+}
+const Register ABIArgGenerator::NonArgReturnVolatileReg0 = r4;
+const Register ABIArgGenerator::NonArgReturnVolatileReg1 = r5;
+
 // Encode a standard register when it is being used as src1, the dest, and
 // an extra register. These should never be called with an InvalidReg.
 uint32_t
 js::ion::RT(Register r)
 {
     JS_ASSERT((r.code() & ~0xf) == 0);
     return r.code() << 12;
 }
@@ -395,29 +473,61 @@ InstALU::extractOp1(Register *ret)
 {
     *ret = toRN(*this);
 }
 bool
 InstALU::checkOp1(Register rn)
 {
     return rn == toRN(*this);
 }
+Operand2
+InstALU::extractOp2()
+{
+    return Operand2(encode());
+}
 
 InstCMP *
 InstCMP::asTHIS(const Instruction &i)
 {
     if (isTHIS(i))
         return (InstCMP*) (&i);
     return NULL;
 }
 
 bool
 InstCMP::isTHIS(const Instruction &i)
 {
-    return InstALU::isTHIS(i) && InstALU::asTHIS(i)->checkDest(r0);
+    return InstALU::isTHIS(i) && InstALU::asTHIS(i)->checkDest(r0) && InstALU::asTHIS(i)->checkOp(op_cmp);
+}
+
+InstMOV *
+InstMOV::asTHIS(const Instruction &i)
+{
+    if (isTHIS(i))
+        return (InstMOV*) (&i);
+    return NULL;
+}
+
+bool
+InstMOV::isTHIS(const Instruction &i)
+{
+    return InstALU::isTHIS(i) && InstALU::asTHIS(i)->checkOp1(r0) && InstALU::asTHIS(i)->checkOp(op_mov);
+}
+
+Op2Reg
+Operand2::toOp2Reg() {
+    return *(Op2Reg*)this;
+}
+O2RegImmShift
+Op2Reg::toO2RegImmShift() {
+    return *(O2RegImmShift*)this;
+}
+O2RegRegShift
+Op2Reg::toO2RegRegShift() {
+    return *(O2RegRegShift*)this;
 }
 
 Imm16::Imm16(Instruction &inst)
   : lower(inst.encode() & 0xfff),
     upper(inst.encode() >> 16),
     invalid(0xfff)
 { }
 
@@ -1232,42 +1342,54 @@ Assembler::writeInstStatic(uint32_t x, u
     JS_ASSERT(dest != NULL);
     *dest = x;
 }
 
 BufferOffset
 Assembler::align(int alignment)
 {
     BufferOffset ret;
-    while (!m_buffer.isAligned(alignment)) {
-        BufferOffset tmp = as_nop();
-        if (!ret.assigned())
-            ret = tmp;
+    if (alignment == 8) {
+        while (!m_buffer.isAligned(alignment)) {
+            BufferOffset tmp = as_nop();
+            if (!ret.assigned())
+                ret = tmp;
+        }
+    } else {
+        flush();
+        JS_ASSERT((alignment & (alignment - 1)) == 0);
+        while (size() & (alignment-1)) {
+            BufferOffset tmp = as_nop();
+            if (!ret.assigned())
+                ret = tmp;
+        }
     }
     return ret;
 
 }
 BufferOffset
 Assembler::as_nop()
 {
     return writeInst(0xe320f000);
 }
 BufferOffset
 Assembler::as_alu(Register dest, Register src1, Operand2 op2,
-                ALUOp op, SetCond_ sc, Condition c)
+                  ALUOp op, SetCond_ sc, Condition c, Instruction *instdest)
 {
     return writeInst((int)op | (int)sc | (int) c | op2.encode() |
                      ((dest == InvalidReg) ? 0 : RD(dest)) |
-                     ((src1 == InvalidReg) ? 0 : RN(src1)));
+                     ((src1 == InvalidReg) ? 0 : RN(src1)), (uint32_t*)instdest);
 }
+
 BufferOffset
-Assembler::as_mov(Register dest, Operand2 op2, SetCond_ sc, Condition c)
+Assembler::as_mov(Register dest, Operand2 op2, SetCond_ sc, Condition c, Instruction *instdest)
 {
-    return as_alu(dest, InvalidReg, op2, op_mov, sc, c);
+    return as_alu(dest, InvalidReg, op2, op_mov, sc, c, instdest);
 }
+
 BufferOffset
 Assembler::as_mvn(Register dest, Operand2 op2, SetCond_ sc, Condition c)
 {
     return as_alu(dest, InvalidReg, op2, op_mvn, sc, c);
 }
 
 // Logical operations.
 BufferOffset
@@ -1754,22 +1876,29 @@ Assembler::as_blx(Register r, Condition 
     return writeInst(((int) c) | op_blx | r.code());
 }
 
 // bl can only branch to an pc-relative immediate offset
 // It cannot change the processor state.
 BufferOffset
 Assembler::as_bl(BOffImm off, Condition c)
 {
+    m_buffer.markNextAsBranch();
     return writeInst(((int)c) | op_bl | off.encode());
 }
 
 BufferOffset
 Assembler::as_bl(Label *l, Condition c)
 {
+    if (m_buffer.oom()) {
+        BufferOffset ret;
+        return ret;
+    }
+    //as_bkpt();
+    m_buffer.markNextAsBranch();
     if (l->bound()) {
         BufferOffset ret = as_nop();
         as_bl(BufferOffset(l).diffB<BOffImm>(ret), c, ret);
         return ret;
     }
 
     int32_t old;
     BufferOffset ret;
@@ -1790,16 +1919,30 @@ Assembler::as_bl(Label *l, Condition c)
 }
 BufferOffset
 Assembler::as_bl(BOffImm off, Condition c, BufferOffset inst)
 {
     *editSrc(inst) = InstBLImm(off, c);
     return inst;
 }
 
+BufferOffset
+Assembler::as_mrs(Register r, Condition c)
+{
+    return writeInst(0x010f0000 | int(c) | RD(r));
+}
+
+BufferOffset
+Assembler::as_msr(Register r, Condition c)
+{
+    // hardcode the 'mask' field to 0b11 for now.  it is bits 18 and 19, which are the two high bits of the 'c' in this constant.
+    JS_ASSERT((r.code() & ~0xf) == 0);
+    return writeInst(0x012cf000 | int(c) | r.code());
+}
+
 // VFP instructions!
 enum vfp_tags {
     vfp_tag   = 0x0C000A00,
     vfp_arith = 0x02000000
 };
 BufferOffset
 Assembler::writeVFPInst(vfp_size sz, uint32_t blob, uint32_t *dest)
 {
@@ -2055,16 +2198,22 @@ Assembler::as_vimm(VFPRegister vd, VFPIm
 
 }
 BufferOffset
 Assembler::as_vmrs(Register r, Condition c)
 {
     return writeInst(c | 0x0ef10a10 | RT(r));
 }
 
+BufferOffset
+Assembler::as_vmsr(Register r, Condition c)
+{
+    return writeInst(c | 0x0ee10a10 | RT(r));
+}
+
 bool
 Assembler::nextLink(BufferOffset b, BufferOffset *next)
 {
     Instruction branch = *editSrc(b);
     JS_ASSERT(branch.is<InstBranchImm>());
 
     BOffImm destOff;
     branch.as<InstBranchImm>()->extractImm(&destOff);
@@ -2239,18 +2388,21 @@ Assembler::retargetNearBranch(Instructio
     i->extractCond(&c);
     retargetNearBranch(i, offset, c, final);
 }
 
 void
 Assembler::retargetNearBranch(Instruction *i, int offset, Condition cond, bool final)
 {
     // Retargeting calls is totally unsupported!
-    JS_ASSERT_IF(i->is<InstBranchImm>(), i->is<InstBImm>());
-    new (i) InstBImm(BOffImm(offset), cond);
+    JS_ASSERT_IF(i->is<InstBranchImm>(), i->is<InstBImm>() || i->is<InstBLImm>());
+    if (i->is<InstBLImm>())
+        new (i) InstBLImm(BOffImm(offset), cond);
+    else
+        new (i) InstBImm(BOffImm(offset), cond);
 
     // Flush the cache, since an instruction was overwritten
     if (final)
         AutoFlushCache::updateTop(uintptr_t(i), 4);
 }
 
 void
 Assembler::retargetFarBranch(Instruction *i, uint8_t **slot, uint8_t *dest, Condition cond)
@@ -2553,20 +2705,44 @@ Assembler::ToggleCall(CodeLocationLabel 
     if (enabled)
         *inst = InstBLXReg(ScratchRegister, Always);
     else
         *inst = InstNOP();
 
     AutoFlushCache::updateTop(uintptr_t(inst), 4);
 }
 
+void Assembler::updateBoundsCheck(uint32_t logHeapSize, Instruction *inst)
+{
+    JS_ASSERT(inst->is<InstMOV>());
+    InstMOV *mov = inst->as<InstMOV>();
+    JS_ASSERT(mov->checkDest(ScratchRegister));
+
+    Operand2 op = mov->extractOp2();
+    JS_ASSERT(op.isO2Reg());
+
+    Op2Reg reg = op.toOp2Reg();
+    Register index;
+    reg.getRM(&index);
+    JS_ASSERT(reg.isO2RegImmShift());
+    // O2RegImmShift shift = reg.toO2RegImmShift();
+
+    *inst = InstALU(ScratchRegister, InvalidReg, lsr(index, logHeapSize), op_mov, SetCond, Always);
+    AutoFlushCache::updateTop(uintptr_t(inst), 4);
+}
+
 void
 AutoFlushCache::update(uintptr_t newStart, size_t len)
 {
     uintptr_t newStop = newStart + len;
+    if (this == NULL) {
+        // just flush right here and now.
+        JSC::ExecutableAllocator::cacheFlush((void*)newStart, len);
+        return;
+    }
     used_ = true;
     if (!start_) {
         IonSpewCont(IonSpew_CacheFlush,  ".");
         start_ = newStart;
         stop_ = newStop;
         return;
     }
 
--- a/js/src/ion/arm/Assembler-arm.h
+++ b/js/src/ion/arm/Assembler-arm.h
@@ -52,33 +52,62 @@ static const Register OsrFrameReg = r3;
 static const Register ArgumentsRectifierReg = r8;
 static const Register CallTempReg0 = r5;
 static const Register CallTempReg1 = r6;
 static const Register CallTempReg2 = r7;
 static const Register CallTempReg3 = r8;
 static const Register CallTempReg4 = r0;
 static const Register CallTempReg5 = r1;
 
+
+static const Register IntArgReg0 = r0;
+static const Register IntArgReg1 = r1;
+static const Register IntArgReg2 = r2;
+static const Register IntArgReg3 = r3;
+static const Register GlobalReg = r10;
+static const Register HeapReg = r11;
 static const Register CallTempNonArgRegs[] = { r5, r6, r7, r8 };
 static const uint32_t NumCallTempNonArgRegs =
     mozilla::ArrayLength(CallTempNonArgRegs);
+class ABIArgGenerator
+{
+#if defined(JS_CPU_ARM_HARDFP)
+    unsigned intRegIndex_;
+    unsigned floatRegIndex_;
+#else
+    unsigned argRegIndex_;
+#endif
+    uint32_t stackOffset_;
+    ABIArg current_;
+
+  public:
+    ABIArgGenerator();
+    ABIArg next(MIRType argType);
+    ABIArg &current() { return current_; }
+    uint32_t stackBytesConsumedSoFar() const { return stackOffset_; }
+    static const Register NonArgReturnVolatileReg0;
+    static const Register NonArgReturnVolatileReg1;
+
+};
 
 static const Register PreBarrierReg = r1;
 
 static const Register InvalidReg = { Registers::invalid_reg };
 static const FloatRegister InvalidFloatReg = { FloatRegisters::invalid_freg };
 
 static const Register JSReturnReg_Type = r3;
 static const Register JSReturnReg_Data = r2;
 static const Register StackPointer = sp;
 static const Register FramePointer = InvalidReg;
 static const Register ReturnReg = r0;
 static const FloatRegister ReturnFloatReg = { FloatRegisters::d0 };
 static const FloatRegister ScratchFloatReg = { FloatRegisters::d1 };
 
+static const FloatRegister NANReg = { FloatRegisters::d15 };
+
 static const FloatRegister d0  = {FloatRegisters::d0};
 static const FloatRegister d1  = {FloatRegisters::d1};
 static const FloatRegister d2  = {FloatRegisters::d2};
 static const FloatRegister d3  = {FloatRegisters::d3};
 static const FloatRegister d4  = {FloatRegisters::d4};
 static const FloatRegister d5  = {FloatRegisters::d5};
 static const FloatRegister d6  = {FloatRegisters::d6};
 static const FloatRegister d7  = {FloatRegisters::d7};
@@ -92,19 +121,22 @@ static const FloatRegister d14 = {FloatR
 static const FloatRegister d15 = {FloatRegisters::d15};
 
 // For maximal awesomeness, 8 should be sufficent.
 // ldrd/strd (dual-register load/store) operate in a single cycle
 // when the address they are dealing with is 8 byte aligned.
 // Also, the ARM abi wants the stack to be 8 byte aligned at
 // function boundaries.  I'm trying to make sure this is always true.
 static const uint32_t StackAlignment = 8;
+static const uint32_t CodeAlignment = 8;
 static const bool StackKeptAligned = true;
 static const uint32_t NativeFrameSize = sizeof(void*);
-static const uint32_t AlignmentAtPrologue = sizeof(void*);
+static const uint32_t AlignmentAtPrologue = 0;
+static const uint32_t AlignmentMidPrologue = 4;
+
 
 static const Scale ScalePointer = TimesFour;
 
 class Instruction;
 class InstBranchImm;
 uint32_t RM(Register r);
 uint32_t RS(Register r);
 uint32_t RD(Register r);
@@ -367,17 +399,17 @@ enum VFPOp {
 ALUOp ALUNeg(ALUOp op, Register dest, Imm32 *imm, Register *negDest);
 bool can_dbl(ALUOp op);
 bool condsAreSafe(ALUOp op);
 // If there is a variant of op that has a dest (think cmp/sub)
 // return that variant of it.
 ALUOp getDestVariant(ALUOp op);
 
 static const ValueOperand JSReturnOperand = ValueOperand(JSReturnReg_Type, JSReturnReg_Data);
-
+static const ValueOperand softfpReturnOperand = ValueOperand(r1, r0);
 // All of these classes exist solely to shuffle data into the various operands.
 // For example Operand2 can be an imm8, a register-shifted-by-a-constant or
 // a register-shifted-by-a-register.  I represent this in C++ by having a
 // base class Operand2, which just stores the 32 bits of data as they will be
 // encoded in the instruction.  You cannot directly create an Operand2
 // since it is tricky, and not entirely sane to do so.  Instead, you create
 // one of its child classes, e.g. Imm8.  Imm8's constructor takes a single
 // integer argument.  Imm8 will verify that its argument can be encoded
@@ -386,16 +418,19 @@ static const ValueOperand JSReturnOperan
 // constructor will then call the Imm8data's encode() function to extract
 // the raw bits from it.  In the future, we should be able to extract
 // data from the Operand2 by asking it for its component Imm8data
 // structures.  The reason this is so horribly round-about is I wanted
 // to have Imm8 and RegisterShiftedRegister inherit directly from Operand2
 // but have all of them take up only a single word of storage.
 // I also wanted to avoid passing around raw integers at all
 // since they are error prone.
+class Op2Reg;
+class O2RegImmShift;
+class O2RegRegShift;
 namespace datastore {
 struct Reg
 {
     // the "second register"
     uint32_t RM : 4;
     // do we get another register for shifting
     uint32_t RRS : 1;
     ShiftType Type : 2;
@@ -406,16 +441,19 @@ struct Reg
 
     Reg(uint32_t rm, ShiftType type, uint32_t rsr, uint32_t shiftamount)
       : RM(rm), RRS(rsr), Type(type), ShiftAmount(shiftamount), pad(0)
     { }
 
     uint32_t encode() {
         return RM | RRS << 4 | Type << 5 | ShiftAmount << 7;
     }
+    explicit Reg(const Op2Reg &op) {
+        memcpy(this, &op, sizeof(*this));
+    }
 };
 
 // Op2 has a mode labelled "<imm8m>", which is arm's magical
 // immediate encoding.  Some instructions actually get 8 bits of
 // data, which is called Imm8Data below.  These should have edit
 // distance > 1, but this is how it is for now.
 struct Imm8mData
 {
@@ -529,16 +567,17 @@ struct RIS
         return ShiftAmount;
     }
 
     RIS(uint32_t imm)
       : ShiftAmount(imm)
     {
         JS_ASSERT(ShiftAmount == imm);
     }
+    explicit RIS(Reg r) : ShiftAmount(ShiftAmount) { }
 };
 
 struct RRS
 {
     uint32_t MustZero : 1;
     // the register that holds the shift amount
     uint32_t RS : 4;
 
@@ -552,25 +591,31 @@ struct RRS
         return RS << 1;
     }
 };
 
 } // namespace datastore
 
 class MacroAssemblerARM;
 class Operand;
-
 class Operand2
 {
     friend class Operand;
     friend class MacroAssemblerARM;
-
+    friend class InstALU;
   public:
     uint32_t oper : 31;
     uint32_t invalid : 1;
+    bool isO2Reg() {
+        return !(oper & IsImmOp2);
+    }
+    Op2Reg toOp2Reg();
+    bool isImm8() {
+        return oper & IsImmOp2;
+    }
 
   protected:
     Operand2(datastore::Imm8mData base)
       : oper(base.invalid ? -1 : (base.encode() | (uint32_t)IsImmOp2)),
         invalid(base.invalid)
     { }
 
     Operand2(datastore::Reg base)
@@ -646,24 +691,54 @@ class Op2Reg : public Operand2
   public:
     Op2Reg(Register rm, ShiftType type, datastore::RIS shiftImm)
       : Operand2(datastore::Reg(rm.code(), type, 0, shiftImm.encode()))
     { }
 
     Op2Reg(Register rm, ShiftType type, datastore::RRS shiftReg)
       : Operand2(datastore::Reg(rm.code(), type, 1, shiftReg.encode()))
     { }
+    bool isO2RegImmShift() {
+        datastore::Reg r(*this);
+        return !r.RRS;
+    }
+    O2RegImmShift toO2RegImmShift();
+    bool isO2RegRegShift() {
+        datastore::Reg r(*this);
+        return r.RRS;
+    }
+    O2RegRegShift toO2RegRegShift();
+
+    bool checkType(ShiftType type) {
+        datastore::Reg r(*this);
+        return r.Type == type;
+    }
+    bool checkRM(Register rm) {
+        datastore::Reg r(*this);
+        return r.RM == rm.code();
+    }
+    bool getRM(Register *rm) {
+        datastore::Reg r(*this);
+        *rm = Register::FromCode(r.RM);
+        return true;
+    }
 };
 
 class O2RegImmShift : public Op2Reg
 {
   public:
     O2RegImmShift(Register rn, ShiftType type, uint32_t shift)
       : Op2Reg(rn, type, datastore::RIS(shift))
     { }
+    int getShift() {
+        datastore::Reg r(*this);
+        datastore::RIS ris(r);
+        return ris.ShiftAmount;
+        
+    }
 };
 
 class O2RegRegShift : public Op2Reg
 {
   public:
     O2RegRegShift(Register rn, ShiftType type, Register rs)
       : Op2Reg(rn, type, datastore::RRS(rs.code()))
     { }
@@ -1174,29 +1249,21 @@ class Assembler
           : offset(offset),
             target(target),
             kind(kind)
         { }
     };
 
     // TODO: this should actually be a pool-like object
     //       It is currently a big hack, and probably shouldn't exist
-    class JumpPool;
     js::Vector<CodeLabel, 0, SystemAllocPolicy> codeLabels_;
     js::Vector<RelativePatch, 8, SystemAllocPolicy> jumps_;
-    js::Vector<JumpPool *, 0, SystemAllocPolicy> jumpPools_;
     js::Vector<BufferOffset, 0, SystemAllocPolicy> tmpJumpRelocations_;
     js::Vector<BufferOffset, 0, SystemAllocPolicy> tmpDataRelocations_;
     js::Vector<BufferOffset, 0, SystemAllocPolicy> tmpPreBarriers_;
-    class JumpPool : TempObject
-    {
-        BufferOffset start;
-        uint32_t size;
-        bool fixup(IonCode *code, uint8_t *data);
-    };
 
     CompactBufferWriter jumpRelocations_;
     CompactBufferWriter dataRelocations_;
     CompactBufferWriter relocations_;
     CompactBufferWriter preBarriers_;
 
     bool enoughMemory_;
 
@@ -1230,23 +1297,23 @@ class Assembler
     }
 
     // We need to wait until an AutoIonContextAlloc is created by the
     // IonMacroAssembler, before allocating any space.
     void initWithAllocator() {
         m_buffer.initWithAllocator();
 
         // Set up the backwards double region
-        new (&pools_[2]) Pool (1024, 8, 4, 8, 8, true);
+        new (&pools_[2]) Pool (1024, 8, 4, 8, 8, m_buffer.LifoAlloc_, true);
         // Set up the backwards 32 bit region
-        new (&pools_[3]) Pool (4096, 4, 4, 8, 4, true, true);
+        new (&pools_[3]) Pool (4096, 4, 4, 8, 4, m_buffer.LifoAlloc_, true, true);
         // Set up the forwards double region
-        new (doublePool) Pool (1024, 8, 4, 8, 8, false, false, &pools_[2]);
+        new (doublePool) Pool (1024, 8, 4, 8, 8, m_buffer.LifoAlloc_, false, false, &pools_[2]);
         // Set up the forwards 32 bit region
-        new (int32Pool) Pool (4096, 4, 4, 8, 4, false, true, &pools_[3]);
+        new (int32Pool) Pool (4096, 4, 4, 8, 4, m_buffer.LifoAlloc_, false, true, &pools_[3]);
         for (int i = 0; i < 4; i++) {
             if (pools_[i].poolData == NULL) {
                 m_buffer.fail_oom();
                 return;
             }
         }
     }
 
@@ -1296,17 +1363,16 @@ class Assembler
     void setPrinter(Sprinter *sp) {
     }
 
   private:
     bool isFinished;
   public:
     void finish();
     void executableCopy(void *buffer);
-    void processCodeLabels(uint8_t *rawCode);
     void copyJumpRelocationTable(uint8_t *dest);
     void copyDataRelocationTable(uint8_t *dest);
     void copyPreBarrierTable(uint8_t *dest);
 
     bool addCodeLabel(CodeLabel label);
 
     // Size of the instruction stream, in bytes.
     size_t size() const;
@@ -1329,20 +1395,20 @@ class Assembler
     static void writeInstStatic(uint32_t x, uint32_t *dest);
 
   public:
     void writeCodePointer(AbsoluteLabel *label);
 
     BufferOffset align(int alignment);
     BufferOffset as_nop();
     BufferOffset as_alu(Register dest, Register src1, Operand2 op2,
-                ALUOp op, SetCond_ sc = NoSetCond, Condition c = Always);
+                ALUOp op, SetCond_ sc = NoSetCond, Condition c = Always, Instruction *instdest = NULL);
 
     BufferOffset as_mov(Register dest,
-                Operand2 op2, SetCond_ sc = NoSetCond, Condition c = Always);
+                Operand2 op2, SetCond_ sc = NoSetCond, Condition c = Always, Instruction *instdest = NULL);
     BufferOffset as_mvn(Register dest, Operand2 op2,
                 SetCond_ sc = NoSetCond, Condition c = Always);
     // logical operations
     BufferOffset as_and(Register dest, Register src1,
                 Operand2 op2, SetCond_ sc = NoSetCond, Condition c = Always);
     BufferOffset as_bic(Register dest, Register src1,
                 Operand2 op2, SetCond_ sc = NoSetCond, Condition c = Always);
     BufferOffset as_eor(Register dest, Register src1,
@@ -1443,16 +1509,18 @@ class Assembler
     // bl can only branch+link to an immediate, never to a register
     // it never changes processor state
     BufferOffset as_bl();
     // bl #imm can have a condition code, blx #imm cannot.
     // blx reg can be conditional.
     BufferOffset as_bl(Label *l, Condition c);
     BufferOffset as_bl(BOffImm off, Condition c, BufferOffset inst);
 
+    BufferOffset as_mrs(Register r, Condition c = Always);
+    BufferOffset as_msr(Register r, Condition c = Always);
     // VFP instructions!
   private:
 
     enum vfp_size {
         isDouble = 1 << 8,
         isSingle = 0 << 8
     };
 
@@ -1534,26 +1602,28 @@ class Assembler
     // You can only transfer a range
 
     BufferOffset as_vdtm(LoadStore st, Register rn, VFPRegister vd, int length,
                  /*also has update conditions*/Condition c = Always);
 
     BufferOffset as_vimm(VFPRegister vd, VFPImm imm, Condition c = Always);
 
     BufferOffset as_vmrs(Register r, Condition c = Always);
+    BufferOffset as_vmsr(Register r, Condition c = Always);
     // label operations
     bool nextLink(BufferOffset b, BufferOffset *next);
     void bind(Label *label, BufferOffset boff = BufferOffset());
     void bind(RepatchLabel *label);
     uint32_t currentOffset() {
         return nextOffset().getOffset();
     }
     void retarget(Label *label, Label *target);
     // I'm going to pretend this doesn't exist for now.
     void retarget(Label *label, void *target, Relocation::Kind reloc);
+    //    void Bind(IonCode *code, AbsoluteLabel *label, const void *address);
     void Bind(uint8_t *rawCode, AbsoluteLabel *label, const void *address);
 
     void call(Label *label);
     void call(void *target);
 
     void as_bkpt();
 
   public:
@@ -1713,16 +1783,20 @@ class Assembler
     }
     static uint8_t *nextInstruction(uint8_t *instruction, uint32_t *count = NULL);
     // Toggle a jmp or cmp emitted by toggledJump().
 
     static void ToggleToJmp(CodeLocationLabel inst_);
     static void ToggleToCmp(CodeLocationLabel inst_);
 
     static void ToggleCall(CodeLocationLabel inst_, bool enabled);
+
+    static void updateBoundsCheck(uint32_t logHeapSize, Instruction *inst);
+    void processCodeLabels(uint8_t *rawCode);
+
 }; // Assembler
 
 // An Instruction is a structure for both encoding and decoding any and all ARM instructions.
 // many classes have not been implemented thusfar.
 class Instruction
 {
     uint32_t data;
 
@@ -1951,35 +2025,43 @@ class InstMovT : public InstMovWT
     static InstMovT *asTHIS (const Instruction &i);
 };
 
 class InstALU : public Instruction
 {
     static const int32_t ALUMask = 0xc << 24;
   public:
     InstALU (Register rd, Register rn, Operand2 op2, ALUOp op, SetCond_ sc, Assembler::Condition c)
-        : Instruction(RD(rd) | RN(rn) | op2.encode() | op | sc | c)
+        : Instruction(maybeRD(rd) | maybeRN(rn) | op2.encode() | op | sc, c)
     { }
     static bool isTHIS (const Instruction &i);
     static InstALU *asTHIS (const Instruction &i);
     void extractOp(ALUOp *ret);
     bool checkOp(ALUOp op);
     void extractDest(Register *ret);
     bool checkDest(Register rd);
     void extractOp1(Register *ret);
     bool checkOp1(Register rn);
-    void extractOp2(Operand2 *ret);
+    Operand2 extractOp2();
 };
+
 class InstCMP : public InstALU
 {
   public:
     static bool isTHIS (const Instruction &i);
     static InstCMP *asTHIS (const Instruction &i);
 };
 
+class InstMOV : public InstALU
+{
+  public:
+    static bool isTHIS (const Instruction &i);
+    static InstMOV *asTHIS (const Instruction &i);
+};
+
 
 class InstructionIterator {
   private:
     Instruction *i;
   public:
     InstructionIterator(Instruction *i_) : i(i_) {}
     Instruction *next() {
         i = i->next();
--- a/js/src/ion/arm/CodeGenerator-arm.cpp
+++ b/js/src/ion/arm/CodeGenerator-arm.cpp
@@ -6,16 +6,17 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 
 #include "jscntxt.h"
 #include "jscompartment.h"
 #include "jsnum.h"
 
 #include "CodeGenerator-arm.h"
+#include "ion/CodeGenerator.h"
 #include "ion/IonCompartment.h"
 #include "ion/IonFrames.h"
 #include "ion/MIR.h"
 #include "ion/MIRGraph.h"
 #include "ion/MoveEmitter.h"
 #include "ion/shared/CodeGenerator-shared-inl.h"
 #include "vm/Shape.h"
 
@@ -31,36 +32,49 @@ CodeGeneratorARM::CodeGeneratorARM(MIRGe
   : CodeGeneratorShared(gen, graph, masm),
     deoptLabel_(NULL)
 {
 }
 
 bool
 CodeGeneratorARM::generatePrologue()
 {
-    // Note that this automatically sets MacroAssembler::framePushed().
-    masm.reserveStack(frameSize());
-    masm.checkStackAlignment();
+    if (gen->compilingAsmJS()) {
+        masm.Push(lr);
+        // Note that this automatically sets MacroAssembler::framePushed().
+        masm.reserveStack(frameDepth_);
+    } else {
+        // Note that this automatically sets MacroAssembler::framePushed().
+        masm.reserveStack(frameSize());
+        masm.checkStackAlignment();
+    }
+
     // Allocate returnLabel_ on the heap, so we don't run its destructor and
     // assert-not-bound in debug mode on compilation failure.
     returnLabel_ = new HeapLabel();
 
     return true;
 }
 
 bool
 CodeGeneratorARM::generateEpilogue()
 {
-    masm.bind(returnLabel_);
-
-    // Pop the stack we allocated at the start of the function.
-    masm.freeStack(frameSize());
-    JS_ASSERT(masm.framePushed() == 0);
-
-    masm.ma_pop(pc);
+    masm.bind(returnLabel_); 
+    if (gen->compilingAsmJS()) {
+        // Pop the stack we allocated at the start of the function.
+        masm.freeStack(frameDepth_);
+        masm.Pop(pc);
+        JS_ASSERT(masm.framePushed() == 0);
+        //masm.as_bkpt();
+    } else {
+        // Pop the stack we allocated at the start of the function.
+        masm.freeStack(frameSize());
+        JS_ASSERT(masm.framePushed() == 0);
+        masm.ma_pop(pc);
+    }
     masm.dumpPool();
     return true;
 }
 
 void
 CodeGeneratorARM::emitBranch(Assembler::Condition cond, MBasicBlock *mirTrue, MBasicBlock *mirFalse)
 {
     LBlock *ifTrue = mirTrue->lir();
@@ -474,16 +488,17 @@ CodeGeneratorARM::visitMulI(LMulI *ins)
         }
     }
 
     return true;
 }
 
 extern "C" {
     extern int __aeabi_idivmod(int,int);
+    extern int __aeabi_uidivmod(int,int);
 }
 
 bool
 CodeGeneratorARM::visitDivI(LDivI *ins)
 {
     // Extract the registers from this instruction
     Register lhs = ToRegister(ins->lhs());
     Register rhs = ToRegister(ins->rhs());
@@ -838,17 +853,25 @@ typedef MoveResolver::MoveOperand MoveOp
 
 MoveOperand
 CodeGeneratorARM::toMoveOperand(const LAllocation *a) const
 {
     if (a->isGeneralReg())
         return MoveOperand(ToRegister(a));
     if (a->isFloatReg())
         return MoveOperand(ToFloatRegister(a));
-    return MoveOperand(StackPointer, ToStackOffset(a));
+    JS_ASSERT((ToStackOffset(a) & 3) == 0);
+    int32_t offset = ToStackOffset(a);
+    
+    // The way the stack slots work, we assume that everything from depth == 0 downwards is writable
+    // however, since our frame is included in this, ensure that the frame gets skipped
+    if (gen->compilingAsmJS())
+        offset -= AlignmentMidPrologue;
+
+    return MoveOperand(StackPointer, offset);
 }
 
 bool
 CodeGeneratorARM::visitMoveGroup(LMoveGroup *group)
 {
     if (!group->numMoves())
         return true;
 
@@ -1173,29 +1196,19 @@ CodeGeneratorARM::visitUnbox(LUnbox *unb
     return true;
 }
 
 bool
 CodeGeneratorARM::visitDouble(LDouble *ins)
 {
 
     const LDefinition *out = ins->getDef(0);
-    const LConstantIndex *cindex = ins->getOperand(0)->toConstantIndex();
-    const Value &v = graph.getConstant(cindex->index());
 
-    masm.ma_vimm(v.toDouble(), ToFloatRegister(out));
+    masm.ma_vimm(ins->getDouble(), ToFloatRegister(out));
     return true;
-#if 0
-    DeferredDouble *d = new DeferredDouble(cindex->index());
-    if (!deferredDoubles_.append(d))
-        return false;
-
-    masm.movsd(d->label(), ToFloatRegister(out));
-    return true;
-#endif
 }
 
 Register
 CodeGeneratorARM::splitTagForTest(const ValueOperand &value)
 {
     return value.typeReg();
 }
 
@@ -1611,8 +1624,229 @@ CodeGeneratorARM::generateInvalidateEpil
 
     masm.branch(thunk);
 
     // We should never reach this point in JIT code -- the invalidation thunk should
     // pop the invalidated JS frame and return directly to its caller.
     masm.breakpoint();
     return true;
 }
+
+template <class U>
+Register
+getBase(U *mir)
+{
+    switch (mir->base()) {
+      case U::Heap: return HeapReg;
+      case U::Global: return GlobalReg;
+    }
+    return InvalidReg;
+}
+
+bool
+CodeGeneratorARM::visitAsmJSLoadHeap(LAsmJSLoadHeap *ins)
+{
+    const MAsmJSLoadHeap *mir = ins->mir();
+    bool isSigned;
+    int size;
+    bool isFloat = false;
+    switch (mir->viewType()) {
+      case ArrayBufferView::TYPE_INT8:    isSigned = true; size = 8; break;
+      case ArrayBufferView::TYPE_UINT8:   isSigned = false; size = 8; break;
+      case ArrayBufferView::TYPE_INT16:   isSigned = true; size = 16; break;
+      case ArrayBufferView::TYPE_UINT16:  isSigned = false; size = 16; break;
+      case ArrayBufferView::TYPE_INT32:
+      case ArrayBufferView::TYPE_UINT32:  isSigned = true;  size = 32; break;
+      case ArrayBufferView::TYPE_FLOAT64: isFloat = true;   size = 64; break;
+      case ArrayBufferView::TYPE_FLOAT32:
+        isFloat = true;
+        size = 32;
+        break;
+      default: JS_NOT_REACHED("unexpected array type");
+    }
+    Register index = ToRegister(ins->ptr());
+    BufferOffset bo = masm.ma_BoundsCheck(index);
+    if (isFloat) {
+        VFPRegister vd(ToFloatRegister(ins->output()));
+        if (size == 32) {
+            masm.ma_vldr(vd.singleOverlay(), HeapReg, index, 0, Assembler::Zero);
+            masm.as_vcvt(vd, vd.singleOverlay(), false, Assembler::Zero);
+        } else {
+            masm.ma_vldr(vd, HeapReg, index, 0, Assembler::Zero);
+        }
+        masm.ma_vmov(NANReg, ToFloatRegister(ins->output()), Assembler::NonZero);
+    }  else {
+        masm.ma_dataTransferN(IsLoad, size, isSigned, HeapReg, index,
+                              ToRegister(ins->output()), Offset, Assembler::Zero);
+        masm.ma_mov(Imm32(0), ToRegister(ins->output()), NoSetCond, Assembler::NonZero);
+    }
+    return gen->noteBoundsCheck(bo.getOffset());
+}
+
+bool
+CodeGeneratorARM::visitAsmJSStoreHeap(LAsmJSStoreHeap *ins)
+{
+    const MAsmJSStoreHeap *mir = ins->mir();
+    bool isSigned;
+    int size;
+    bool isFloat = false;
+    switch (mir->viewType()) {
+      case ArrayBufferView::TYPE_INT8:
+      case ArrayBufferView::TYPE_UINT8:   isSigned = false; size = 8; break;
+      case ArrayBufferView::TYPE_INT16:
+      case ArrayBufferView::TYPE_UINT16:  isSigned = false; size = 16; break;
+      case ArrayBufferView::TYPE_INT32:
+      case ArrayBufferView::TYPE_UINT32:  isSigned = true;  size = 32; break;
+      case ArrayBufferView::TYPE_FLOAT64: isFloat = true;   size = 64; break;
+      case ArrayBufferView::TYPE_FLOAT32:
+        isFloat = true;
+        size = 32;
+        break;
+      default: JS_NOT_REACHED("unexpected array type");
+    }
+    Register index = ToRegister(ins->ptr());
+
+    BufferOffset bo = masm.ma_BoundsCheck(index);
+    if (isFloat) {
+        VFPRegister vd(ToFloatRegister(ins->value()));
+        if (size == 32) {
+            masm.storeFloat(vd, HeapReg, index, Assembler::Zero);
+        } else {
+            masm.ma_vstr(vd, HeapReg, index, 0, Assembler::Zero);
+        }
+    }  else {
+        masm.ma_dataTransferN(IsStore, size, isSigned, HeapReg, index,
+                              ToRegister(ins->value()), Offset, Assembler::Zero);
+    }
+    return gen->noteBoundsCheck(bo.getOffset());
+}
+
+bool
+CodeGeneratorARM::visitAsmJSPassStackArg(LAsmJSPassStackArg *ins)
+{
+    const MAsmJSPassStackArg *mir = ins->mir();
+    Operand dst(StackPointer, mir->spOffset());
+    if (ins->arg()->isConstant()) {
+        //masm.as_bkpt();
+        masm.ma_storeImm(Imm32(ToInt32(ins->arg())), dst);
+    } else {
+        if (ins->arg()->isGeneralReg())
+            masm.ma_str(ToRegister(ins->arg()), dst);
+        else
+            masm.ma_vstr(ToFloatRegister(ins->arg()), dst);
+    }
+
+    return true;
+}
+
+
+bool
+CodeGeneratorARM::visitAsmJSDivOrMod(LAsmJSDivOrMod *ins)
+{
+    //Register remainder = ToRegister(ins->remainder());
+    Register lhs = ToRegister(ins->lhs());
+    Register rhs = ToRegister(ins->rhs());
+    Register output = ToRegister(ins->output());
+
+    //JS_ASSERT(remainder == edx);
+    //JS_ASSERT(lhs == eax);
+    JS_ASSERT(ins->mirRaw()->isAsmJSUDiv() || ins->mirRaw()->isAsmJSUMod());
+    //JS_ASSERT_IF(ins->mirRaw()->isAsmUDiv(), output == eax);
+    //JS_ASSERT_IF(ins->mirRaw()->isAsmUMod(), output == edx);
+
+    Label afterDiv;
+
+    masm.ma_cmp(rhs, Imm32(0));
+    Label notzero;
+    masm.ma_b(&notzero, Assembler::NonZero);
+    masm.ma_mov(Imm32(0), output);
+    masm.ma_b(&afterDiv);
+    masm.bind(&notzero);
+
+    masm.setupAlignedABICall(2);
+    masm.passABIArg(lhs);
+    masm.passABIArg(rhs);
+    masm.callWithABI(JS_FUNC_TO_DATA_PTR(void *, __aeabi_uidivmod));
+
+    masm.bind(&afterDiv);
+    return true;
+}
+
+bool
+CodeGeneratorARM::visitEffectiveAddress(LEffectiveAddress *ins)
+{
+    const MEffectiveAddress *mir = ins->mir();
+    Register base = ToRegister(ins->base());
+    Register index = ToRegister(ins->index());
+    Register output = ToRegister(ins->output());
+    masm.as_add(output, base, lsl(index, mir->scale()));
+    masm.ma_add(Imm32(mir->displacement()), output);
+    return true;
+}
+
+bool
+CodeGeneratorARM::visitAsmJSLoadGlobalVar(LAsmJSLoadGlobalVar *ins)
+{
+    const MAsmJSLoadGlobalVar *mir = ins->mir();
+    unsigned addr = mir->globalDataOffset();
+    if (mir->type() == MIRType_Int32)
+        masm.ma_dtr(IsLoad, GlobalReg, Imm32(addr), ToRegister(ins->output()));
+    else
+        masm.ma_vldr(Operand(GlobalReg, addr), ToFloatRegister(ins->output()));
+    return true;
+}
+
+bool
+CodeGeneratorARM::visitAsmJSStoreGlobalVar(LAsmJSStoreGlobalVar *ins)
+{
+    const MAsmJSStoreGlobalVar *mir = ins->mir();
+
+    MIRType type = mir->value()->type();
+    JS_ASSERT(type == MIRType_Int32 || type == MIRType_Double);
+    unsigned addr = mir->globalDataOffset();
+    if (mir->value()->type() == MIRType_Int32)
+        masm.ma_dtr(IsStore, GlobalReg, Imm32(addr), ToRegister(ins->value()));
+    else
+        masm.ma_vstr(ToFloatRegister(ins->value()), Operand(GlobalReg, addr));
+    return true;
+}
+
+bool
+CodeGeneratorARM::visitAsmJSLoadFuncPtr(LAsmJSLoadFuncPtr *ins)
+{
+    const MAsmJSLoadFuncPtr *mir = ins->mir();
+
+    Register index = ToRegister(ins->index());
+    Register tmp = ToRegister(ins->temp());
+    Register out = ToRegister(ins->output());
+    unsigned addr = mir->globalDataOffset();
+    masm.ma_mov(Imm32(addr), tmp);
+    masm.as_add(tmp, tmp, lsl(index, 2));
+    masm.ma_ldr(DTRAddr(GlobalReg, DtrRegImmShift(tmp, LSL, 0)), out);
+
+    return true;
+}
+
+bool
+CodeGeneratorARM::visitAsmJSLoadFFIFunc(LAsmJSLoadFFIFunc *ins)
+{
+    const MAsmJSLoadFFIFunc *mir = ins->mir();
+
+    masm.ma_ldr(Operand(GlobalReg, mir->globalDataOffset()), ToRegister(ins->output()));
+
+    return true;
+}
+
+bool
+CodeGeneratorARM::visitNegI(LNegI *ins)
+{
+    Register input = ToRegister(ins->input());
+    masm.ma_neg(input, ToRegister(ins->output()));
+    return true;
+}
+
+bool
+CodeGeneratorARM::visitNegD(LNegD *ins)
+{
+    FloatRegister input = ToFloatRegister(ins->input());
+    masm.ma_vneg(input, ToFloatRegister(ins->output()));
+    return true;
+}
--- a/js/src/ion/arm/CodeGenerator-arm.h
+++ b/js/src/ion/arm/CodeGenerator-arm.h
@@ -136,19 +136,41 @@ class CodeGeneratorARM : public CodeGene
     bool visitLoadElementT(LLoadElementT *load);
 
     bool visitGuardShape(LGuardShape *guard);
     bool visitGuardClass(LGuardClass *guard);
     bool visitImplicitThis(LImplicitThis *lir);
 
     bool visitInterruptCheck(LInterruptCheck *lir);
 
-    bool generateInvalidateEpilogue();
+    bool visitNegI(LNegI *lir);
+    bool visitNegD(LNegD *lir);
+    bool visitAsmJSLoadHeap(LAsmJSLoadHeap *ins);
+    bool visitAsmJSStoreHeap(LAsmJSStoreHeap *ins);
+    bool visitAsmJSLoadGlobalVar(LAsmJSLoadGlobalVar *ins);
+    bool visitAsmJSStoreGlobalVar(LAsmJSStoreGlobalVar *ins);
+    bool visitAsmJSLoadFuncPtr(LAsmJSLoadFuncPtr *ins);
+    bool visitAsmJSLoadFFIFunc(LAsmJSLoadFFIFunc *ins);
+
+    bool visitAsmJSPassStackArg(LAsmJSPassStackArg *ins);
 
-    void postAsmJSCall(LAsmJSCall *lir) {}
+    bool generateInvalidateEpilogue();
+  protected:
+    bool generateAsmJSPrologue(const MIRTypeVector &argTypes, MIRType returnType,
+                             Label *internalEntry);
+    void postAsmJSCall(LAsmJSCall *lir) {
+#if  !defined(JS_CPU_ARM_HARDFP)
+        if (lir->mir()->type() == MIRType_Double) {
+            masm.ma_vxfer(r0, r1, d0);
+        }
+#endif
+}
+ 
+    bool visitEffectiveAddress(LEffectiveAddress *ins);
+    bool visitAsmJSDivOrMod(LAsmJSDivOrMod *ins);
 };
 
 typedef CodeGeneratorARM CodeGeneratorSpecific;
 
 // An out-of-line bailout thunk.
 class OutOfLineBailout : public OutOfLineCodeBase<CodeGeneratorARM>
 {
     LSnapshot *snapshot_;
--- a/js/src/ion/arm/IonFrames-arm.h
+++ b/js/src/ion/arm/IonFrames-arm.h
@@ -4,16 +4,17 @@
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifndef jsion_ionframes_arm_h__
 #define jsion_ionframes_arm_h__
 
 #include "ion/shared/IonFrames-shared.h"
+//#include "ion/arm/Assembler-arm.h"
 
 namespace js {
 namespace ion {
 
 class IonFramePrefix;
 // Layout of the frame prefix. This assumes the stack architecture grows down.
 // If this is ever not the case, we'll have to refactor.
 class IonCommonFrameLayout
--- a/js/src/ion/arm/LIR-arm.h
+++ b/js/src/ion/arm/LIR-arm.h
@@ -66,23 +66,26 @@ class LUnboxDouble : public LInstruction
     static const size_t Input = 0;
 
     MUnbox *mir() const {
         return mir_->toUnbox();
     }
 };
 
 // Constant double.
-class LDouble : public LInstructionHelper<1, 1, 0>
+class LDouble : public LInstructionHelper<1, 0, 0>
 {
+    double d_;
   public:
     LIR_HEADER(Double);
 
-    LDouble(const LConstantIndex &cindex) {
-        setOperand(0, cindex);
+    LDouble(double d) : d_(d)
+    { }
+    double getDouble() const {
+        return d_;
     }
 };
 
 // Convert a 32-bit unsigned integer to a double.
 class LUInt32ToDouble : public LInstructionHelper<1, 1, 0>
 {
   public:
     LIR_HEADER(UInt32ToDouble)
@@ -289,12 +292,49 @@ class LMulI : public LBinaryMath<0>
   public:
     LIR_HEADER(MulI);
 
     MMul *mir() {
         return mir_->toMul();
     }
 };
 
+// This class performs a simple x86 'div', yielding either a quotient or remainder depending on
+// whether this instruction is defined to output eax (quotient) or edx (remainder).
+class LAsmJSDivOrMod : public LBinaryMath<2>
+{
+  public:
+    LIR_HEADER(AsmJSDivOrMod);
+
+    LAsmJSDivOrMod(const LAllocation &lhs, const LAllocation &rhs, const LDefinition &temp1, const LDefinition &temp2) {
+        setOperand(0, lhs);
+        setOperand(1, rhs);
+        setTemp(0, temp1);
+        setTemp(1, temp2);
+    }
+    // this is incorrect, it is returned in r1, getTemp(0) is r2.
+    const LDefinition *remainder() {
+        return getTemp(0);
+    }
+};
+class LAsmJSLoadFuncPtr : public LInstructionHelper<1, 1, 1>
+{
+  public:
+    LIR_HEADER(AsmJSLoadFuncPtr);
+    LAsmJSLoadFuncPtr(const LAllocation &index, const LDefinition &temp) {
+        setOperand(0, index);
+        setTemp(0, temp);
+    }
+    const MAsmJSLoadFuncPtr *mir() const {
+        return mir_->toAsmJSLoadFuncPtr();
+    }
+    const LAllocation *index() {
+        return getOperand(0);
+    }
+    const LDefinition *temp() {
+        return getTemp(0);
+    }
+};
+
 } // namespace ion
 } // namespace js
 
 #endif // jsion_lir_arm_h__
--- a/js/src/ion/arm/LOpcodes-arm.h
+++ b/js/src/ion/arm/LOpcodes-arm.h
@@ -13,12 +13,15 @@
     _(UnboxDouble)              \
     _(Box)                      \
     _(BoxDouble)                \
     _(DivI)                     \
     _(ModI)                     \
     _(ModPowTwoI)               \
     _(ModMaskI)                 \
     _(PowHalfD)                 \
-    _(UInt32ToDouble)
+    _(UInt32ToDouble)           \
+    _(AsmJSDivOrMod)            \
+    _(AsmJSLoadFuncPtr)
+
 
 #endif // jsion_lir_opcodes_arm_h__
 
--- a/js/src/ion/arm/Lowering-arm.cpp
+++ b/js/src/ion/arm/Lowering-arm.cpp
@@ -38,31 +38,25 @@ LIRGeneratorARM::useBoxFixed(LInstructio
     lir->setOperand(n + 1, LUse(reg2, VirtualRegisterOfPayload(mir)));
     return true;
 }
 
 bool
 LIRGeneratorARM::lowerConstantDouble(double d, MInstruction *mir)
 {
     uint32_t index;
-    if (!lirGraph_.addConstantToPool(DoubleValue(d), &index))
-        return false;
-
-    LDouble *lir = new LDouble(LConstantIndex::FromIndex(index));
+    LDouble *lir = new LDouble(d);
     return define(lir, mir);
 }
 
 bool
 LIRGeneratorARM::visitConstant(MConstant *ins)
 {
     if (ins->type() == MIRType_Double) {
-        uint32_t index;
-        if (!lirGraph_.addConstantToPool(ins->value(), &index))
-            return false;
-        LDouble *lir = new LDouble(LConstantIndex::FromIndex(index));
+        LDouble *lir = new LDouble(ins->value().toDouble());
         return define(lir, ins);
     }
 
     // Emit non-double constants at their uses.
     if (ins->canEmitAtUses())
         return emitAtUses(ins);
 
     return LIRGeneratorShared::visitConstant(ins);
@@ -370,8 +364,73 @@ LIRGeneratorARM::lowerUrshD(MUrsh *mir)
     MDefinition *rhs = mir->rhs();
 
     JS_ASSERT(lhs->type() == MIRType_Int32);
     JS_ASSERT(rhs->type() == MIRType_Int32);
 
     LUrshD *lir = new LUrshD(useRegister(lhs), useRegisterOrConstant(rhs), temp());
     return define(lir, mir);
 }
+
+bool
+LIRGeneratorARM::visitAsmJSNeg(MAsmJSNeg *ins)
+{
+    if (ins->type() == MIRType_Int32)
+        return define(new LNegI(useRegisterAtStart(ins->input())), ins);
+
+    JS_ASSERT(ins->type() == MIRType_Double);
+    return define(new LNegD(useRegisterAtStart(ins->input())), ins);
+}
+bool
+LIRGeneratorARM::visitAsmJSUDiv(MAsmJSUDiv *div)
+{
+    LAsmJSDivOrMod *lir = new LAsmJSDivOrMod(useFixed(div->lhs(), r0),
+                                         useFixed(div->rhs(), r1),
+                                         tempFixed(r2), tempFixed(r3));
+    return defineFixed(lir, div, LAllocation(AnyRegister(r0)));
+}
+
+bool
+LIRGeneratorARM::visitAsmJSUMod(MAsmJSUMod *mod)
+{
+    LAsmJSDivOrMod *lir = new LAsmJSDivOrMod(useFixed(mod->lhs(), r0),
+                                         useFixed(mod->rhs(), r1),
+                                         tempFixed(r2), tempFixed(r3));
+    return defineFixed(lir, mod, LAllocation(AnyRegister(r1)));
+}
+
+bool
+LIRGeneratorARM::visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins)
+{
+    JS_ASSERT(ins->input()->type() == MIRType_Int32);
+    LUInt32ToDouble *lir = new LUInt32ToDouble(useRegisterAtStart(ins->input()));
+    return define(lir, ins);
+}
+
+bool
+LIRGeneratorARM::visitAsmJSStoreHeap(MAsmJSStoreHeap *ins)
+{
+    LAsmJSStoreHeap *lir;
+    switch (ins->viewType()) {
+      case ArrayBufferView::TYPE_INT8: case ArrayBufferView::TYPE_UINT8:
+      case ArrayBufferView::TYPE_INT16: case ArrayBufferView::TYPE_UINT16:
+      case ArrayBufferView::TYPE_INT32: case ArrayBufferView::TYPE_UINT32:
+        lir = new LAsmJSStoreHeap(useRegisterAtStart(ins->ptr()),
+                                  useRegisterAtStart(ins->value()));
+        break;
+      case ArrayBufferView::TYPE_FLOAT32:
+      case ArrayBufferView::TYPE_FLOAT64:
+        lir = new LAsmJSStoreHeap(useRegisterAtStart(ins->ptr()),
+                                  useRegisterAtStart(ins->value()));
+        break;
+      default: JS_NOT_REACHED("unexpected array type");
+    }
+
+    return add(lir, ins);
+}
+
+bool
+LIRGeneratorARM::visitAsmJSLoadFuncPtr(MAsmJSLoadFuncPtr *ins)
+{
+    return define(new LAsmJSLoadFuncPtr(useRegister(ins->index()), temp()), ins);
+}
+
+//__aeabi_uidiv
--- a/js/src/ion/arm/Lowering-arm.h
+++ b/js/src/ion/arm/Lowering-arm.h
@@ -43,30 +43,36 @@ class LIRGeneratorARM : public LIRGenera
     bool lowerForFPU(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir,
                      MDefinition *lhs, MDefinition *rhs);
 
     bool lowerConstantDouble(double d, MInstruction *ins);
     bool lowerDivI(MDiv *div);
     bool lowerModI(MMod *mod);
     bool lowerMulI(MMul *mul, MDefinition *lhs, MDefinition *rhs);
     bool visitPowHalf(MPowHalf *ins);
+    bool visitAsmJSNeg(MAsmJSNeg *ins);
+    bool visitAsmJSUDiv(MAsmJSUDiv *ins);
+    bool visitAsmJSUMod(MAsmJSUMod *ins);
 
     LTableSwitch *newLTableSwitch(const LAllocation &in, const LDefinition &inputCopy,
                                   MTableSwitch *ins);
     LTableSwitchV *newLTableSwitchV(MTableSwitch *ins);
 
   public:
     bool visitConstant(MConstant *ins);
     bool visitBox(MBox *box);
     bool visitUnbox(MUnbox *unbox);
     bool visitReturn(MReturn *ret);
     bool lowerPhi(MPhi *phi);
     bool visitGuardShape(MGuardShape *ins);
     bool visitStoreTypedArrayElement(MStoreTypedArrayElement *ins);
     bool visitStoreTypedArrayElementHole(MStoreTypedArrayElementHole *ins);
+    bool visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins);
+    bool visitAsmJSStoreHeap(MAsmJSStoreHeap *ins);
+    bool visitAsmJSLoadFuncPtr(MAsmJSLoadFuncPtr *ins);
     bool visitInterruptCheck(MInterruptCheck *ins);
 };
 
 typedef LIRGeneratorARM LIRGeneratorSpecific;
 
 } // namespace ion
 } // namespace js
 
--- a/js/src/ion/arm/MacroAssembler-arm.cpp
+++ b/js/src/ion/arm/MacroAssembler-arm.cpp
@@ -367,16 +367,23 @@ MacroAssemblerARM::ma_mov(Register src, 
 void
 MacroAssemblerARM::ma_mov(Imm32 imm, Register dest,
                           SetCond_ sc, Assembler::Condition c)
 {
     ma_alu(InvalidReg, imm, dest, op_mov, sc, c);
 }
 
 void
+MacroAssemblerARM::ma_mov(ImmWord imm, Register dest,
+                          SetCond_ sc, Assembler::Condition c)
+{
+    ma_alu(InvalidReg, Imm32(imm.value), dest, op_mov, sc, c);
+}
+
+void
 MacroAssemblerARM::ma_mov(const ImmGCPtr &ptr, Register dest)
 {
     // As opposed to x86/x64 version, the data relocation has to be executed
     // before to recover the pointer, and not after.
     writeDataRelocation(ptr);
     RelocStyle rs;
     if (hasMOVWT()) {
         rs = L_MOVWT;
@@ -968,37 +975,46 @@ MacroAssemblerARM::ma_strh(Register rt, 
 
 void
 MacroAssemblerARM::ma_strb(Register rt, DTRAddr addr, Index mode, Condition cc)
 {
     as_dtr(IsStore, 8, mode, rt, addr, cc);
 }
 
 // Specialty for moving N bits of data, where n == 8,16,32,64.
-void
+BufferOffset
 MacroAssemblerARM::ma_dataTransferN(LoadStore ls, int size, bool IsSigned,
                           Register rn, Register rm, Register rt,
-                          Index mode, Assembler::Condition cc)
-{
-    JS_NOT_REACHED("Feature NYI");
-}
-
-void
+                                    Index mode, Assembler::Condition cc, unsigned shiftAmount)
+{
+    if (size == 32 || (size == 8 && !IsSigned)) {
+        return as_dtr(ls, size, mode, rt, DTRAddr(rn, DtrRegImmShift(rm, LSL, shiftAmount)), cc);
+    } else {
+        if (shiftAmount != 0) {
+            JS_ASSERT(rn != ScratchRegister);
+            JS_ASSERT(rt != ScratchRegister);
+            ma_lsl(Imm32(shiftAmount), rm, ScratchRegister);
+            rm = ScratchRegister;
+        }
+        return as_extdtr(ls, size, IsSigned, mode, rt, EDtrAddr(rn, EDtrOffReg(rm)), cc);
+    }
+}
+
+BufferOffset
 MacroAssemblerARM::ma_dataTransferN(LoadStore ls, int size, bool IsSigned,
                                     Register rn, Imm32 offset, Register rt,
                                     Index mode, Assembler::Condition cc)
 {
     int off = offset.value;
     // we can encode this as a standard ldr... MAKE IT SO
     if (size == 32 || (size == 8 && !IsSigned) ) {
         if (off < 4096 && off > -4096) {
             // This encodes as a single instruction, Emulating mode's behavior
             // in a multi-instruction sequence is not necessary.
-            as_dtr(ls, size, mode, rt, DTRAddr(rn, DtrOffImm(off)), cc);
-            return;
+            return as_dtr(ls, size, mode, rt, DTRAddr(rn, DtrOffImm(off)), cc);
         }
 
         // We cannot encode this offset in a a single ldr. For mode == index,
         // try to encode it as |add scratch, base, imm; ldr dest, [scratch, +offset]|.
         // This does not wark for mode == PreIndex or mode == PostIndex.
         // PreIndex is simple, just do the add into the base register first, then do
         // a PreIndex'ed load. PostIndexed loads can be tricky.  Normally, doing the load with
         // an index of 0, then doing an add would work, but if the destination is the PC,
@@ -1026,18 +1042,17 @@ MacroAssemblerARM::ma_dataTransferN(Load
         //
         // mode == PostIndex, dest != pc
         //  ldr   dest, [base], offset_lo
         //  add   base, base, offset_hi
 
         if (rt == pc && mode == PostIndex && ls == IsLoad) {
             ma_mov(rn, ScratchRegister);
             ma_alu(rn, offset, rn, op_add);
-            as_dtr(IsLoad, size, Offset, pc, DTRAddr(ScratchRegister, DtrOffImm(0)), cc);
-            return;
+            return as_dtr(IsLoad, size, Offset, pc, DTRAddr(ScratchRegister, DtrOffImm(0)), cc);
         }
 
         int bottom = off & 0xfff;
         int neg_bottom = 0x1000 - bottom;
         // For a regular offset, base == ScratchRegister does what we want.  Modify the
         // scratch register, leaving the actual base unscathed.
         Register base = ScratchRegister;
         // For the preindex case, we want to just re-use rn as the base register, so when
@@ -1046,92 +1061,84 @@ MacroAssemblerARM::ma_dataTransferN(Load
             base = rn;
         JS_ASSERT(mode != PostIndex);
         // at this point, both off - bottom and off + neg_bottom will be reasonable-ish
         // quantities.
         if (off < 0) {
             Operand2 sub_off = Imm8(-(off-bottom)); // sub_off = bottom - off
             if (!sub_off.invalid) {
                 as_sub(ScratchRegister, rn, sub_off, NoSetCond, cc); // - sub_off = off - bottom
-                as_dtr(ls, size, Offset, rt, DTRAddr(ScratchRegister, DtrOffImm(bottom)), cc);
-                return;
+                return as_dtr(ls, size, Offset, rt, DTRAddr(ScratchRegister, DtrOffImm(bottom)), cc);
             }
             sub_off = Imm8(-(off+neg_bottom));// sub_off = -neg_bottom - off
             if (!sub_off.invalid) {
                 as_sub(ScratchRegister, rn, sub_off, NoSetCond, cc); // - sub_off = neg_bottom + off
-                as_dtr(ls, size, Offset, rt, DTRAddr(ScratchRegister, DtrOffImm(-neg_bottom)), cc);
-                return;
+                return as_dtr(ls, size, Offset, rt, DTRAddr(ScratchRegister, DtrOffImm(-neg_bottom)), cc);
             }
         } else {
             Operand2 sub_off = Imm8(off-bottom); // sub_off = off - bottom
             if (!sub_off.invalid) {
                 as_add(ScratchRegister, rn, sub_off, NoSetCond, cc); //  sub_off = off - bottom
-                as_dtr(ls, size, Offset, rt, DTRAddr(ScratchRegister, DtrOffImm(bottom)), cc);
-                return;
+                return as_dtr(ls, size, Offset, rt, DTRAddr(ScratchRegister, DtrOffImm(bottom)), cc);
             }
             sub_off = Imm8(off+neg_bottom);// sub_off = neg_bottom + off
             if (!sub_off.invalid) {
                 as_add(ScratchRegister, rn, sub_off, NoSetCond,  cc); // sub_off = neg_bottom + off
-                as_dtr(ls, size, Offset, rt, DTRAddr(ScratchRegister, DtrOffImm(-neg_bottom)), cc);
-                return;
+                return as_dtr(ls, size, Offset, rt, DTRAddr(ScratchRegister, DtrOffImm(-neg_bottom)), cc);
             }
         }
         ma_mov(offset, ScratchRegister);
-        as_dtr(ls, size, mode, rt, DTRAddr(rn, DtrRegImmShift(ScratchRegister, LSL, 0)));
+        return as_dtr(ls, size, mode, rt, DTRAddr(rn, DtrRegImmShift(ScratchRegister, LSL, 0)));
     } else {
         // should attempt to use the extended load/store instructions
-        if (off < 256 && off > -256) {
-            as_extdtr(ls, size, IsSigned, mode, rt, EDtrAddr(rn, EDtrOffImm(off)), cc);
-            return;
-        }
+        if (off < 256 && off > -256)
+            return as_extdtr(ls, size, IsSigned, mode, rt, EDtrAddr(rn, EDtrOffImm(off)), cc);
+
         // We cannot encode this offset in a a single extldr.  Try to encode it as
         // an add scratch, base, imm; extldr dest, [scratch, +offset].
         int bottom = off & 0xff;
         int neg_bottom = 0x100 - bottom;
         // at this point, both off - bottom and off + neg_bottom will be reasonable-ish
         // quantities.
         if (off < 0) {
             Operand2 sub_off = Imm8(-(off-bottom)); // sub_off = bottom - off
             if (!sub_off.invalid) {
                 as_sub(ScratchRegister, rn, sub_off, NoSetCond, cc); // - sub_off = off - bottom
-                as_extdtr(ls, size, IsSigned, Offset, rt,
-                          EDtrAddr(ScratchRegister, EDtrOffImm(bottom)),
-                          cc);
-                return;
+                return as_extdtr(ls, size, IsSigned, Offset, rt,
+                                 EDtrAddr(ScratchRegister, EDtrOffImm(bottom)),
+                                 cc);
             }
             sub_off = Imm8(-(off+neg_bottom));// sub_off = -neg_bottom - off
             if (!sub_off.invalid) {
                 as_sub(ScratchRegister, rn, sub_off, NoSetCond, cc); // - sub_off = neg_bottom + off
-                as_extdtr(ls, size, IsSigned, Offset, rt,
-                          EDtrAddr(ScratchRegister, EDtrOffImm(-neg_bottom)),
-                          cc);
-                return;
+                return as_extdtr(ls, size, IsSigned, Offset, rt,
+                                 EDtrAddr(ScratchRegister, EDtrOffImm(-neg_bottom)),
+                                 cc);
             }
         } else {
             Operand2 sub_off = Imm8(off-bottom); // sub_off = off - bottom
             if (!sub_off.invalid) {
                 as_add(ScratchRegister, rn, sub_off, NoSetCond, cc); //  sub_off = off - bottom
-                as_extdtr(ls, size, IsSigned, Offset, rt,
-                          EDtrAddr(ScratchRegister, EDtrOffImm(bottom)),
-                          cc);
-                return;
+                return as_extdtr(ls, size, IsSigned, Offset, rt,
+                                 EDtrAddr(ScratchRegister, EDtrOffImm(bottom)),
+                                 cc);
             }
             sub_off = Imm8(off+neg_bottom);// sub_off = neg_bottom + off
             if (!sub_off.invalid) {
                 as_add(ScratchRegister, rn, sub_off, NoSetCond,  cc); // sub_off = neg_bottom + off
-                as_extdtr(ls, size, IsSigned, Offset, rt,
-                          EDtrAddr(ScratchRegister, EDtrOffImm(-neg_bottom)),
-                          cc);
-                return;
+                return as_extdtr(ls, size, IsSigned, Offset, rt,
+                                 EDtrAddr(ScratchRegister, EDtrOffImm(-neg_bottom)),
+                                 cc);
             }
         }
         ma_mov(offset, ScratchRegister);
-        as_extdtr(ls, size, IsSigned, mode, rt, EDtrAddr(rn, EDtrOffReg(ScratchRegister)), cc);
+        return as_extdtr(ls, size, IsSigned, mode, rt, EDtrAddr(rn, EDtrOffReg(ScratchRegister)), cc);
     }
 }
+
 void
 MacroAssemblerARM::ma_pop(Register r)
 {
     ma_dtr(IsLoad, sp, Imm32(4), r, PostIndex);
     if (r == pc)
         m_buffer.markGuard();
 }
 void
@@ -1346,102 +1353,108 @@ MacroAssemblerARM::ma_vxfer(FloatRegiste
 
 void
 MacroAssemblerARM::ma_vxfer(FloatRegister src, Register dest1, Register dest2, Condition cc)
 {
     as_vxfer(dest1, dest2, VFPRegister(src), FloatToCore, cc);
 }
 
 void
+MacroAssemblerARM::ma_vxfer(Register src1, Register src2, FloatRegister dest, Condition cc)
+{
+    as_vxfer(src1, src2, VFPRegister(dest), CoreToFloat, cc);
+}
+
+void
 MacroAssemblerARM::ma_vxfer(VFPRegister src, Register dest, Condition cc)
 {
     as_vxfer(dest, InvalidReg, src, FloatToCore, cc);
 }
 
 void
 MacroAssemblerARM::ma_vxfer(VFPRegister src, Register dest1, Register dest2, Condition cc)
 {
     as_vxfer(dest1, dest2, src, FloatToCore, cc);
 }
 
-void
+BufferOffset
 MacroAssemblerARM::ma_vdtr(LoadStore ls, const Operand &addr, VFPRegister rt, Condition cc)
 {
     int off = addr.disp();
     JS_ASSERT((off & 3) == 0);
     Register base = Register::FromCode(addr.base());
-    if (off > -1024 && off < 1024) {
-        as_vdtr(ls, rt, addr.toVFPAddr(), cc);
-        return;
-    }
+    if (off > -1024 && off < 1024)
+        return as_vdtr(ls, rt, addr.toVFPAddr(), cc);
 
     // We cannot encode this offset in a a single ldr.  Try to encode it as
     // an add scratch, base, imm; ldr dest, [scratch, +offset].
     int bottom = off & (0xff << 2);
     int neg_bottom = (0x100 << 2) - bottom;
     // at this point, both off - bottom and off + neg_bottom will be reasonable-ish
     // quantities.
     if (off < 0) {
         Operand2 sub_off = Imm8(-(off-bottom)); // sub_off = bottom - off
         if (!sub_off.invalid) {
             as_sub(ScratchRegister, base, sub_off, NoSetCond, cc); // - sub_off = off - bottom
-            as_vdtr(ls, rt, VFPAddr(ScratchRegister, VFPOffImm(bottom)), cc);
-            return;
+            return as_vdtr(ls, rt, VFPAddr(ScratchRegister, VFPOffImm(bottom)), cc);
         }
         sub_off = Imm8(-(off+neg_bottom));// sub_off = -neg_bottom - off
         if (!sub_off.invalid) {
             as_sub(ScratchRegister, base, sub_off, NoSetCond, cc); // - sub_off = neg_bottom + off
-            as_vdtr(ls, rt, VFPAddr(ScratchRegister, VFPOffImm(-neg_bottom)), cc);
-            return;
+            return as_vdtr(ls, rt, VFPAddr(ScratchRegister, VFPOffImm(-neg_bottom)), cc);
         }
     } else {
         Operand2 sub_off = Imm8(off-bottom); // sub_off = off - bottom
         if (!sub_off.invalid) {
             as_add(ScratchRegister, base, sub_off, NoSetCond, cc); //  sub_off = off - bottom
-            as_vdtr(ls, rt, VFPAddr(ScratchRegister, VFPOffImm(bottom)), cc);
-            return;
+            return as_vdtr(ls, rt, VFPAddr(ScratchRegister, VFPOffImm(bottom)), cc);
         }
         sub_off = Imm8(off+neg_bottom);// sub_off = neg_bottom + off
         if (!sub_off.invalid) {
             as_add(ScratchRegister, base, sub_off, NoSetCond,  cc); // sub_off = neg_bottom + off
-            as_vdtr(ls, rt, VFPAddr(ScratchRegister, VFPOffImm(-neg_bottom)), cc);
-            return;
+            return as_vdtr(ls, rt, VFPAddr(ScratchRegister, VFPOffImm(-neg_bottom)), cc);
         }
     }
     ma_add(base, Imm32(off), ScratchRegister, NoSetCond, cc);
-    as_vdtr(ls, rt, VFPAddr(ScratchRegister, VFPOffImm(0)), cc);
-}
-
-void
+    return as_vdtr(ls, rt, VFPAddr(ScratchRegister, VFPOffImm(0)), cc);
+}
+
+BufferOffset
 MacroAssemblerARM::ma_vldr(VFPAddr addr, VFPRegister dest, Condition cc)
 {
-    as_vdtr(IsLoad, dest, addr, cc);
-}
-void
+    return as_vdtr(IsLoad, dest, addr, cc);
+}
+BufferOffset
 MacroAssemblerARM::ma_vldr(const Operand &addr, VFPRegister dest, Condition cc)
 {
-    ma_vdtr(IsLoad, addr, dest, cc);
-}
-
-void
+    return ma_vdtr(IsLoad, addr, dest, cc);
+}
+BufferOffset
+MacroAssemblerARM::ma_vldr(VFPRegister src, Register base, Register index, int32_t shift, Condition cc)
+{
+    as_add(ScratchRegister, base, lsl(index, shift), NoSetCond, cc);
+    return ma_vldr(Operand(ScratchRegister, 0), src, cc);
+}
+
+BufferOffset
 MacroAssemblerARM::ma_vstr(VFPRegister src, VFPAddr addr, Condition cc)
 {
-    as_vdtr(IsStore, src, addr, cc);
-}
-
-void
+    return as_vdtr(IsStore, src, addr, cc);
+}
+
+BufferOffset
 MacroAssemblerARM::ma_vstr(VFPRegister src, const Operand &addr, Condition cc)
 {
-    ma_vdtr(IsStore, addr, src, cc);
-}
-void
+    return ma_vdtr(IsStore, addr, src, cc);
+}
+BufferOffset
 MacroAssemblerARM::ma_vstr(VFPRegister src, Register base, Register index, int32_t shift, Condition cc)
 {
     as_add(ScratchRegister, base, lsl(index, shift), NoSetCond, cc);
-    ma_vstr(src, Operand(ScratchRegister, 0), cc);
+    return ma_vstr(src, Operand(ScratchRegister, 0), cc);
 }
 
 bool
 MacroAssemblerARMCompat::buildFakeExitFrame(const Register &scratch, uint32_t *offset)
 {
     DebugOnly<uint32_t> initialDepth = framePushed();
     uint32_t descriptor = MakeFrameDescriptor(framePushed(), IonFrame_OptimizedJS);
 
@@ -3343,8 +3356,9 @@ MacroAssemblerARMCompat::jumpWithPatch(R
     ARMBuffer::PoolEntry pe;
     BufferOffset bo = as_BranchPool(0xdeadbeef, label, &pe, cond);
 
     // Fill in a new CodeOffset with both the load and the
     // pool entry that the instruction loads from.
     CodeOffsetJump ret(bo.getOffset(), pe.encode());
     return ret;
 }
+
--- a/js/src/ion/arm/MacroAssembler-arm.h
+++ b/js/src/ion/arm/MacroAssembler-arm.h
@@ -86,16 +86,18 @@ class MacroAssemblerARM : public Assembl
     // to explicitly have all of this code.
     // ALU based ops
     // mov
     void ma_mov(Register src, Register dest,
                 SetCond_ sc = NoSetCond, Condition c = Always);
 
     void ma_mov(Imm32 imm, Register dest,
                 SetCond_ sc = NoSetCond, Condition c = Always);
+    void ma_mov(ImmWord imm, Register dest,
+                SetCond_ sc = NoSetCond, Condition c = Always);
 
     void ma_mov(const ImmGCPtr &ptr, Register dest);
 
     // Shifts (just a move with a shifting op2)
     void ma_lsl(Imm32 shift, Register src, Register dst);
     void ma_lsr(Imm32 shift, Register src, Register dst);
     void ma_asr(Imm32 shift, Register src, Register dst);
     void ma_ror(Imm32 shift, Register src, Register dst);
@@ -257,21 +259,21 @@ class MacroAssemblerARM : public Assembl
     void ma_ldrh(EDtrAddr addr, Register rt, Index mode = Offset, Condition cc = Always);
     void ma_ldrsh(EDtrAddr addr, Register rt, Index mode = Offset, Condition cc = Always);
     void ma_ldrsb(EDtrAddr addr, Register rt, Index mode = Offset, Condition cc = Always);
     void ma_ldrd(EDtrAddr addr, Register rt, DebugOnly<Register> rt2, Index mode = Offset, Condition cc = Always);
     void ma_strb(Register rt, DTRAddr addr, Index mode = Offset, Condition cc = Always);
     void ma_strh(Register rt, EDtrAddr addr, Index mode = Offset, Condition cc = Always);
     void ma_strd(Register rt, DebugOnly<Register> rt2, EDtrAddr addr, Index mode = Offset, Condition cc = Always);
     // specialty for moving N bits of data, where n == 8,16,32,64
-    void ma_dataTransferN(LoadStore ls, int size, bool IsSigned,
+    BufferOffset ma_dataTransferN(LoadStore ls, int size, bool IsSigned,
                           Register rn, Register rm, Register rt,
-                          Index mode = Offset, Condition cc = Always);
+                          Index mode = Offset, Condition cc = Always, unsigned scale = TimesOne);
 
-    void ma_dataTransferN(LoadStore ls, int size, bool IsSigned,
+    BufferOffset ma_dataTransferN(LoadStore ls, int size, bool IsSigned,
                           Register rn, Imm32 offset, Register rt,
                           Index mode = Offset, Condition cc = Always);
     void ma_pop(Register r);
     void ma_push(Register r);
 
     void ma_vpop(VFPRegister r);
     void ma_vpush(VFPRegister r);
 
@@ -314,25 +316,29 @@ class MacroAssemblerARM : public Assembl
     void ma_vcvt_U32_F64(FloatRegister src, FloatRegister dest, Condition cc = Always);
 
     void ma_vxfer(FloatRegister src, Register dest, Condition cc = Always);
     void ma_vxfer(FloatRegister src, Register dest1, Register dest2, Condition cc = Always);
 
     void ma_vxfer(VFPRegister src, Register dest, Condition cc = Always);
     void ma_vxfer(VFPRegister src, Register dest1, Register dest2, Condition cc = Always);
 
-    void ma_vdtr(LoadStore ls, const Operand &addr, VFPRegister dest, Condition cc = Always);
+    void ma_vxfer(Register src1, Register src2, FloatRegister dest, Condition cc = Always);
 
-    void ma_vldr(VFPAddr addr, VFPRegister dest, Condition cc = Always);
-    void ma_vldr(const Operand &addr, VFPRegister dest, Condition cc = Always);
+    BufferOffset ma_vdtr(LoadStore ls, const Operand &addr, VFPRegister dest, Condition cc = Always);
+
 
-    void ma_vstr(VFPRegister src, VFPAddr addr, Condition cc = Always);
-    void ma_vstr(VFPRegister src, const Operand &addr, Condition cc = Always);
+    BufferOffset ma_vldr(VFPAddr addr, VFPRegister dest, Condition cc = Always);
+    BufferOffset ma_vldr(const Operand &addr, VFPRegister dest, Condition cc = Always);
+    BufferOffset ma_vldr(VFPRegister src, Register base, Register index, int32_t shift = defaultShift, Condition cc = Always);
 
-    void ma_vstr(VFPRegister src, Register base, Register index, int32_t shift = defaultShift, Condition cc = Always);
+    BufferOffset ma_vstr(VFPRegister src, VFPAddr addr, Condition cc = Always);
+    BufferOffset ma_vstr(VFPRegister src, const Operand &addr, Condition cc = Always);
+
+    BufferOffset ma_vstr(VFPRegister src, Register base, Register index, int32_t shift = defaultShift, Condition cc = Always);
     // calls an Ion function, assumes that the stack is untouched (8 byte alinged)
     void ma_callIon(const Register reg);
     // callso an Ion function, assuming that sp has already been decremented
     void ma_callIonNoPush(const Register reg);
     // calls an ion function, assuming that the stack is currently not 8 byte aligned
     void ma_callIonHalfPush(const Register reg);
 
     void ma_call(void *dest);
@@ -439,17 +445,16 @@ class MacroAssemblerARMCompat : public M
         DOUBLE
     };
 
     MacroAssemblerARMCompat()
       : inCall_(false),
         enoughMemory_(true),
         framePushed_(0)
     { }
-
     bool oom() const {
         return Assembler::oom() || !enoughMemory_;
     }
 
   public:
     using MacroAssemblerARM::call;
 
     // jumps + other functions that should be called from
@@ -480,20 +485,18 @@ class MacroAssemblerARMCompat : public M
         JS_NOT_REACHED("NYI-IC");
     }
 
     void call(const Register reg) {
         as_blx(reg);
     }
 
     void call(Label *label) {
-        JS_NOT_REACHED("Feature NYI");
-        /* we can blx to it if it close by, otherwise, we need to
-         * set up a branch + link node.
-         */
+        // for now, assume that it'll be nearby?
+        as_bl(label, Always);
     }
     void call(ImmWord word) {
         BufferOffset bo = m_buffer.nextOffset();
         addPendingJump(bo, (void*)word.value, Relocation::HARDCODED);
         ma_call((void *) word.value);
     }
     void call(IonCode *c) {
         BufferOffset bo = m_buffer.nextOffset();
@@ -589,16 +592,19 @@ class MacroAssemblerARMCompat : public M
     }
     void jump(Register reg) {
         ma_bx(reg);
     }
 
     void neg32(Register reg) {
         ma_neg(reg, reg, SetCond);
     }
+    void negl(Register reg) {
+        ma_neg(reg, reg, SetCond);
+    }
     void test32(Register lhs, Register rhs) {
         ma_tst(lhs, rhs);
     }
     void test32(const Address &address, Imm32 imm) {
         ma_ldr(Operand(address.base, address.offset), ScratchRegister);
         ma_tst(ScratchRegister, imm);
     }
     void testPtr(Register lhs, Register rhs) {
@@ -816,18 +822,18 @@ class MacroAssemblerARMCompat : public M
     }
     void branchTest32(Condition cond, const Address &address, Imm32 imm, Label *label) {
         ma_ldr(Operand(address.base, address.offset), ScratchRegister);
         branchTest32(cond, ScratchRegister, imm, label);
     }
     void branchTestPtr(Condition cond, const Register &lhs, const Register &rhs, Label *label) {
         branchTest32(cond, lhs, rhs, label);
     }
-    void branchTestPtr(Condition cond, const Register &lhs, Imm32 imm, Label *label) {
-        branchTest32(cond, lhs, imm, label);
+    void branchTestPtr(Condition cond, const Register &lhs, const Imm32 rhs, Label *label) {
+        branchTest32(cond, lhs, rhs, label);
     }
     void branchPtr(Condition cond, Register lhs, Register rhs, Label *label) {
         branch32(cond, lhs, rhs, label);
     }
     void branchPtr(Condition cond, Register lhs, ImmGCPtr ptr, Label *label) {
         movePtr(ptr, ScratchRegister);
         branchPtr(cond, lhs, ScratchRegister, label);
     }
@@ -905,19 +911,19 @@ class MacroAssemblerARMCompat : public M
         JS_ASSERT(dest.offset == 0);
         ma_alu(dest.base, lsl(dest.index, dest.scale), ScratchRegister, op_add);
         storeValue(type, reg, Address(ScratchRegister, 0));
     }
     void storeValue(ValueOperand val, const Address &dest) {
         storeValue(val, Operand(dest));
     }
     void storeValue(JSValueType type, Register reg, Address dest) {
+        ma_str(reg, dest);
         ma_mov(ImmTag(JSVAL_TYPE_TO_TAG(type)), secondScratchReg_);
         ma_str(secondScratchReg_, Address(dest.base, dest.offset + 4));
-        ma_str(reg, dest);
     }
     void storeValue(const Value &val, Address dest) {
         jsval_layout jv = JSVAL_TO_IMPL(val);
         ma_mov(Imm32(jv.s.tag), secondScratchReg_);
         ma_str(secondScratchReg_, Address(dest.base, dest.offset + 4));
         if (val.isMarkable())
             ma_mov(ImmGCPtr(reinterpret_cast<gc::Cell *>(val.toGCThing())), secondScratchReg_);
         else
@@ -1143,16 +1149,17 @@ class MacroAssemblerARMCompat : public M
         ma_mov(Imm32(0xff), dest, NoSetCond, NotEqual);
         ma_mov(Imm32(0), dest, NoSetCond, Signed);
     }
 
     void cmp32(const Register &lhs, const Imm32 &rhs);
     void cmp32(const Register &lhs, const Register &rhs);
     void cmp32(const Operand &lhs, const Imm32 &rhs);
     void cmp32(const Operand &lhs, const Register &rhs);
+
     void cmpPtr(const Register &lhs, const ImmWord &rhs);
     void cmpPtr(const Register &lhs, const Register &rhs);
     void cmpPtr(const Register &lhs, const ImmGCPtr &rhs);
     void cmpPtr(const Address &lhs, const Register &rhs);
     void cmpPtr(const Address &lhs, const ImmWord &rhs);
 
     void subPtr(Imm32 imm, const Register dest);
     void subPtr(const Address &addr, const Register dest);
@@ -1245,16 +1252,60 @@ class MacroAssemblerARMCompat : public M
         // this is the instruction sequence that gcc generated for this
         // operation.
         ma_sub(r, Imm32(0x80000001), ScratchRegister);
         ma_cmn(ScratchRegister, Imm32(3));
         ma_b(handleNotAnInt, Above);
     }
 
     void enterOsr(Register calleeToken, Register code);
+    void memIntToValue(Address Source, Address Dest) {
+        load32(Source, lr);
+        storeValue(JSVAL_TYPE_INT32, lr, Dest);
+    }
+    void memMove32(Address Source, Address Dest) {
+        loadPtr(Source, lr);
+        storePtr(lr, Dest);
+    }
+    void memMove64(Address Source, Address Dest) {
+        loadPtr(Source, lr);
+        storePtr(lr, Dest);
+        loadPtr(Address(Source.base, Source.offset+4), lr);
+        storePtr(lr, Address(Dest.base, Dest.offset+4));
+    }
+
+    void lea(Operand addr, Register dest) {
+        ma_add(addr.baseReg(), Imm32(addr.disp()), dest);
+    }
+
+    void stackCheck(ImmWord limitAddr, Label *label) {
+        int *foo = 0;
+        *foo = 5;
+        movePtr(limitAddr, ScratchRegister);
+        ma_ldr(Address(ScratchRegister, 0), ScratchRegister);
+        ma_cmp(ScratchRegister, StackPointer);
+        ma_b(label, Assembler::AboveOrEqual);
+    }
+    void abiret() {
+        as_bx(lr);
+    }
+
+    void ma_storeImm(Imm32 c, const Operand &dest) {
+        ma_mov(c, lr);
+        ma_str(lr, dest);
+    }
+    BufferOffset ma_BoundsCheck(Register bounded) {
+        return as_mov(ScratchRegister, lsl(bounded, 0), SetCond);
+    }
+
+    void storeFloat(VFPRegister src, Register base, Register index, Condition cond) {
+        as_vcvt(VFPRegister(ScratchFloatReg).singleOverlay(), src, false, cond);
+        ma_vstr(VFPRegister(ScratchFloatReg).singleOverlay(), base, index, 0, cond);
+
+    }
 };
 
 typedef MacroAssemblerARMCompat MacroAssemblerSpecific;
 
 } // namespace ion
 } // namespace js
 
 #endif // jsion_macro_assembler_arm_h__
--- a/js/src/ion/arm/MoveEmitter-arm.cpp
+++ b/js/src/ion/arm/MoveEmitter-arm.cpp
@@ -81,17 +81,21 @@ MoveEmitterARM::toOperand(const MoveOper
 Register
 MoveEmitterARM::tempReg()
 {
     if (spilledReg_ != InvalidReg)
         return spilledReg_;
 
     // For now, just pick r12/ip as the eviction point. This is totally
     // random, and if it ends up being bad, we can use actual heuristics later.
-    spilledReg_ = r12;
+    // r12 is actually a bad choice.  it is the scratch register, which is frequently
+    // used for address computations, such as those found when we attempt to access
+    // values more than 4096 off of the stack pointer.
+    // instead, use lr, the LinkRegister.
+    spilledReg_ = r14;
     if (pushedAtSpill_ == -1) {
         masm.Push(spilledReg_);
         pushedAtSpill_ = masm.framePushed();
     } else {
         masm.ma_str(spilledReg_, spillSlot());
     }
     return spilledReg_;
 }
--- a/js/src/ion/shared/CodeGenerator-shared.cpp
+++ b/js/src/ion/shared/CodeGenerator-shared.cpp
@@ -60,18 +60,23 @@ CodeGeneratorShared::CodeGeneratorShared
     // argument stack depth separately.
     if (gen->compilingAsmJS()) {
         JS_ASSERT(graph->argumentSlotCount() == 0);
         frameDepth_ += gen->maxAsmJSStackArgBytes();
 
         // An MAsmJSCall does not align the stack pointer at calls sites but instead
         // relies on the a priori stack adjustment (in the prologue) on platforms
         // (like x64) which require the stack to be aligned.
-        if (gen->performsAsmJSCall()) {
-            unsigned alignmentAtCall = AlignmentAtPrologue + frameDepth_;
+#ifdef JS_CPU_ARM
+        bool forceAlign = true;
+#else
+        bool forceAlign = false;
+#endif
+        if (gen->performsAsmJSCall() || forceAlign) {
+            unsigned alignmentAtCall = AlignmentMidPrologue + frameDepth_;
             if (unsigned rem = alignmentAtCall % StackAlignment)
                 frameDepth_ += StackAlignment - rem;
         }
 
         // FrameSizeClass is only used for bailing, which cannot happen in
         // asm.js code.
         frameClass_ = FrameSizeClass::None();
     } else {
@@ -439,17 +444,16 @@ CodeGeneratorShared::callVM(const VMFunc
         return false;
 
     // Remove rest of the frame left on the stack. We remove the return address
     // which is implicitly poped when returning.
     int framePop = sizeof(IonExitFrameLayout) - sizeof(void*);
 
     // Pop arguments from framePushed.
     masm.implicitPop(fun.explicitStackSlots() * sizeof(void *) + framePop);
-
     // Stack is:
     //    ... frame ...
     return true;
 }
 
 class OutOfLineTruncateSlow : public OutOfLineCodeBase<CodeGeneratorShared>
 {
     FloatRegister src_;
--- a/js/src/ion/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/ion/shared/CodeGenerator-x86-shared.cpp
@@ -1450,10 +1450,31 @@ CodeGeneratorX86Shared::generateInvalida
     masm.call(thunk);
 
     // We should never reach this point in JIT code -- the invalidation thunk should
     // pop the invalidated JS frame and return directly to its caller.
     masm.breakpoint();
     return true;
 }
 
+bool
+CodeGeneratorX86Shared::visitNegI(LNegI *ins)
+{
+    Register input = ToRegister(ins->input());
+    JS_ASSERT(input == ToRegister(ins->output()));
+
+    masm.neg32(input);
+    return true;
+}
+
+bool
+CodeGeneratorX86Shared::visitNegD(LNegD *ins)
+{
+    FloatRegister input = ToFloatRegister(ins->input());
+    JS_ASSERT(input == ToFloatRegister(ins->output()));
+
+    masm.negateDouble(input);
+    return true;
+}
+
+
 } // namespace ion
 } // namespace js
--- a/js/src/ion/shared/CodeGenerator-x86-shared.h
+++ b/js/src/ion/shared/CodeGenerator-x86-shared.h
@@ -106,16 +106,19 @@ class CodeGeneratorX86Shared : public Co
     virtual bool visitRound(LRound *lir);
     virtual bool visitGuardShape(LGuardShape *guard);
     virtual bool visitGuardClass(LGuardClass *guard);
     virtual bool visitTruncateDToInt32(LTruncateDToInt32 *ins);
     virtual bool visitEffectiveAddress(LEffectiveAddress *ins);
     virtual bool visitAsmJSDivOrMod(LAsmJSDivOrMod *ins);
     virtual bool visitAsmJSPassStackArg(LAsmJSPassStackArg *ins);
 
+    bool visitNegI(LNegI *lir);
+    bool visitNegD(LNegD *lir);
+
     // Out of line visitors.
     bool visitOutOfLineBailout(OutOfLineBailout *ool);
     bool visitOutOfLineUndoALUOperation(OutOfLineUndoALUOperation *ool);
     bool visitMulNegativeZeroCheck(MulNegativeZeroCheck *ool);
     bool visitOutOfLineTruncate(OutOfLineTruncate *ool);
     bool visitOutOfLineTableSwitch(OutOfLineTableSwitch *ool);
     bool generateInvalidateEpilogue();
 };
--- a/js/src/ion/shared/IonAssemblerBuffer.h
+++ b/js/src/ion/shared/IonAssemblerBuffer.h
@@ -71,49 +71,48 @@ struct BufferSlice : public InlineForwar
         if (inst != NULL)
             memcpy(&instructions[size()], inst, instSize);
         nodeSize += instSize;
     }
 };
 
 template<int SliceSize, class Inst>
 struct AssemblerBuffer
-  : public IonAllocPolicy
 {
   public:
-    AssemblerBuffer() : head(NULL), tail(NULL), m_oom(false), m_bail(false), bufferSize(0) {}
+    AssemblerBuffer() : head(NULL), tail(NULL), m_oom(false), m_bail(false), bufferSize(0), LifoAlloc_(8192) {}
   protected:
     typedef BufferSlice<SliceSize> Slice;
     typedef AssemblerBuffer<SliceSize, Inst> AssemblerBuffer_;
     Slice *head;
     Slice *tail;
   public:
     bool m_oom;
     bool m_bail;
     // How much data has been added to the buffer thusfar.
     uint32_t bufferSize;
     uint32_t lastInstSize;
     bool isAligned(int alignment) const {
         // make sure the requested alignment is a power of two.
         JS_ASSERT((alignment & (alignment-1)) == 0);
         return !(size() & (alignment - 1));
     }
-    virtual Slice *newSlice() {
-        Slice *tmp = static_cast<Slice*>(malloc_(sizeof(Slice)));
+    virtual Slice *newSlice(LifoAlloc &a) {
+        Slice *tmp = static_cast<Slice*>(a.alloc(sizeof(Slice)));
         if (!tmp) {
             m_oom = true;
             return NULL;
         }
         new (tmp) Slice;
         return tmp;
     }
     bool ensureSpace(int size) {
         if (tail != NULL && tail->size()+size <= SliceSize)
             return true;
-        Slice *tmp = newSlice();
+        Slice *tmp = newSlice(LifoAlloc_);
         if (tmp == NULL)
             return false;
         if (tail != NULL) {
             bufferSize += tail->size();
             tail->setNext(tmp);
         }
         tail = tmp;
         if (head == NULL)
@@ -188,17 +187,17 @@ struct AssemblerBuffer
     }
     BufferOffset prevOffset() const {
         JS_NOT_REACHED("Don't current record lastInstSize");
         return BufferOffset(bufferSize + tail->nodeSize - lastInstSize);
     }
 
     // Break the instruction stream so we can go back and edit it at this point
     void perforate() {
-        Slice *tmp = newSlice();
+        Slice *tmp = newSlice(LifoAlloc_);
         if (!tmp)
             m_oom = true;
         bufferSize += tail->size();
         tail->setNext(tmp);
         tail = tmp;
     }
 
     class AssemblerBufferInstIterator {
@@ -211,14 +210,15 @@ struct AssemblerBuffer
             Inst *i = m_buffer->getInst(bo);
             bo = BufferOffset(bo.getOffset()+i->size());
             return cur();
         };
         Inst *cur() {
             return m_buffer->getInst(bo);
         }
     };
-
+  public:
+    LifoAlloc LifoAlloc_;
 };
 
 } // ion
 } // js
 #endif // __ion_assembler_buffer_h
--- a/js/src/ion/shared/IonAssemblerBufferWithConstantPools.h
+++ b/js/src/ion/shared/IonAssemblerBufferWithConstantPools.h
@@ -42,28 +42,28 @@ struct Pool
     // dump time, we may not know the location in a pool (and thus the limiting load)
     // until very late.
     // Lastly, it may be beneficial to interleave the pools.  I have absolutely no idea
     // how that will work, but my suspicions are that it will be difficult.
 
     BufferOffset limitingUser;
     int limitingUsee;
 
-    Pool(int maxOffset_, int immSize_, int instSize_, int bias_, int alignment_,
+    Pool(int maxOffset_, int immSize_, int instSize_, int bias_, int alignment_, LifoAlloc &LifoAlloc_,
          bool isBackref_ = false, bool canDedup_ = false, Pool *other_ = NULL)
         : maxOffset(maxOffset_), immSize(immSize_), instSize(instSize),
           bias(bias_), alignment(alignment_),
           isBackref(isBackref_), canDedup(canDedup_), other(other_),
-          poolData(static_cast<uint8_t *>(malloc_(8*immSize))), numEntries(0),
+          poolData(static_cast<uint8_t *>(LifoAlloc_.alloc(8*immSize))), numEntries(0),
           buffSize(8), loadOffsets(), limitingUser(), limitingUsee(INT_MIN)
     {
     }
     static const int garbage=0xa5a5a5a5;
     Pool() : maxOffset(garbage), immSize(garbage), instSize(garbage), bias(garbage),
-             alignment(garbage), isBackref(garbage), canDedup(garbage)
+             alignment(garbage), isBackref(garbage), canDedup(garbage), other((Pool*)garbage)
     {
     }
     // Sometimes, when we are adding large values to a pool, the limiting use may change.
     // Handle this case.  nextInst is the address of the
     void updateLimiter(BufferOffset nextInst) {
         int oldRange, newRange;
         if (isBackref) {
             // common expressions that are not subtracted: the location of the pool, ...
@@ -122,44 +122,45 @@ struct Pool
         // bias + abs(poolOffset + limitingeUsee * numEntries - limitingUser) + sizeof(other_pools) >= maxOffset
         if (poolOffset + limitingUsee * immSize - (limitingUser.getOffset() + bias) >= maxOffset) {
             return true;
         }
         return false;
     }
 
     // By the time this function is called, we'd damn well better know that this is going to succeed.
-    uint32_t insertEntry(uint8_t *data, BufferOffset off) {
+    uint32_t insertEntry(uint8_t *data, BufferOffset off, LifoAlloc &LifoAlloc_) {
         if (numEntries == buffSize) {
             buffSize <<= 1;
-            poolData = static_cast<uint8_t*>(realloc_(poolData, immSize * numEntries,
-                                                      immSize * buffSize));
+            uint8_t *tmp = static_cast<uint8_t*>(LifoAlloc_.alloc(immSize * buffSize));
+            memcpy(tmp, poolData,  immSize * numEntries);
             if (poolData == NULL) {
                 buffSize = 0;
                 return -1;
             }
+            poolData = tmp;
         }
         memcpy(&poolData[numEntries * immSize], data, immSize);
         loadOffsets.append(off.getOffset());
         return numEntries++;
     }
 
-    bool reset() {
+    bool reset(LifoAlloc &a) {
         numEntries = 0;
         buffSize = 8;
-        poolData = static_cast<uint8_t*>(malloc_(buffSize * immSize));
+        poolData = static_cast<uint8_t*>(a.alloc(buffSize * immSize));
         if (poolData == NULL)
             return false;
 
-        void *otherSpace = malloc_(sizeof(Pool));
+        void *otherSpace = a.alloc(sizeof(Pool));
         if (otherSpace == NULL)
             return false;
 
         other = new (otherSpace) Pool(other->maxOffset, other->immSize, other->instSize,
-                                      other->bias, other->alignment, other->isBackref,
+                                      other->bias, other->alignment, a, other->isBackref,
                                       other->canDedup);
         new (&loadOffsets) LoadOffsets;
 
         limitingUser = BufferOffset();
         limitingUsee = -1;
         return true;
 
     }
@@ -210,16 +211,22 @@ struct BufferSliceTail : public BufferSl
     }
     BufferSliceTail() : data(NULL), isNatural(true) {
         memset(isBranch, 0, sizeof(isBranch));
     }
     void markNextAsBranch() {
         int idx = this->nodeSize / InstBaseSize;
         isBranch[idx >> 3] |= 1 << (idx & 0x7);
     }
+    bool isNextBranch() {
+        if (this->nodeSize == InstBaseSize)
+            return false;
+        int idx = this->nodeSize / InstBaseSize;
+        return (isBranch[idx >> 3] >> (idx & 0x7)) & 1;
+    }
 };
 
 #if 0
 static int getId() {
     if (MaybeGetIonContext())
         return MaybeGetIonContext()->getNextAssemblerId();
     return NULL_ID;
 }
@@ -346,18 +353,18 @@ struct AssemblerBufferWithConstantPool :
     static const int logBasePoolInfo = 3;
     BufferSlice ** getHead() {
         return (BufferSlice**)&this->head;
     }
     BufferSlice ** getTail() {
         return (BufferSlice**)&this->tail;
     }
 
-    virtual BufferSlice *newSlice() {
-        BufferSlice *tmp = static_cast<BufferSlice*>(this->malloc_(sizeof(BufferSlice)));
+    virtual BufferSlice *newSlice(LifoAlloc &a) {
+        BufferSlice *tmp = static_cast<BufferSlice*>(a.alloc(sizeof(BufferSlice)));
         if (!tmp) {
             this->m_oom = true;
             return NULL;
         }
         new (tmp) BufferSlice;
         return tmp;
     }
   public:
@@ -373,17 +380,17 @@ struct AssemblerBufferWithConstantPool :
         for (int idx = 0; idx < numPoolKinds; idx++) {
             entryCount[idx] = 0;
         }
     }
 
     // We need to wait until an AutoIonContextAlloc is created by the
     // IonMacroAssembler, before allocating any space.
     void initWithAllocator() {
-        poolInfo = static_cast<PoolInfo*>(this->calloc_(sizeof(PoolInfo) * (1 << logBasePoolInfo)));
+        poolInfo = static_cast<PoolInfo*>(this->LifoAlloc_.alloc(sizeof(PoolInfo) * (1 << logBasePoolInfo)));
     }
 
     const PoolInfo & getInfo(int x) const {
         static const PoolInfo nil = {0,0,0};
         if (x < 0 || x >= numDumps)
             return nil;
         return poolInfo[x];
     }
@@ -518,17 +525,17 @@ struct AssemblerBufferWithConstantPool :
             }
             // when moving back to front, calculating the alignment is hard, just be
             // conservative with it.
             poolOffset += tmp->immSize * tmp->numEntries + tmp->getAlignment();
             if (p == tmp) {
                 poolOffset += tmp->immSize;
             }
         }
-        return p->numEntries + p->other->insertEntry(data, this->nextOffset());
+        return p->numEntries + p->other->insertEntry(data, this->nextOffset(), this->LifoAlloc_);
     }
 
     // Simultaneously insert an instSized instruction into the stream,
     // and an entry into the pool.  There are many things that can happen.
     // 1) the insertion goes as planned
     // 2) inserting an instruction pushes a previous pool-reference out of range, forcing a dump
     // 2a) there isn't a reasonable save point in the instruction stream. We need to save room for
     //     a guard instruction to branch over the pool.
@@ -575,17 +582,17 @@ struct AssemblerBufferWithConstantPool :
             if (p == tmp) {
                 nextOffset += tmp->immSize;
             }
             nextOffset += tmp->immSize * tmp->numEntries;
         }
         if (p == NULL) {
             return INT_MIN;
         }
-        return p->insertEntry(data, this->nextOffset());
+        return p->insertEntry(data, this->nextOffset(), this->LifoAlloc_);
     }
     BufferOffset putInt(uint32_t value) {
         return insertEntry(sizeof(uint32_t) / sizeof(uint8_t), (uint8_t*)&value, NULL, NULL);
     }
     // Mark the current section as an area where we can
     // later go to dump a pool
     void perforate() {
         // If we're filling the backrefrences, we don't want to start looking for a new dumpsite.
@@ -670,23 +677,24 @@ struct AssemblerBufferWithConstantPool :
             inBackref = false;
             IonSpew(IonSpew_Pools, "[%d] Aborting because the pool is empty", id);
             // Bail out early, since we don't want to even pretend these pools exist.
             return;
         }
         JS_ASSERT(perforatedNode != NULL);
         if (numDumps >= (1<<logBasePoolInfo) && (numDumps & (numDumps-1)) == 0) {
             // need to resize.
-            poolInfo = static_cast<PoolInfo*>(
-                this->realloc_(poolInfo, sizeof(PoolInfo) * numDumps,
-                               sizeof(PoolInfo) * numDumps * 2));
-            if (poolInfo == NULL) {
+            PoolInfo *tmp = static_cast<PoolInfo*>(this->LifoAlloc_.alloc(sizeof(PoolInfo) * numDumps * 2));
+            if (tmp == NULL) {
                 this->fail_oom();
                 return;
             }
+            memcpy(tmp, poolInfo, sizeof(PoolInfo) * numDumps);
+            poolInfo = tmp;
+
         }
 
         // In order to figure out how to fix up the loads for the second half of the pool
         // we need to find where the bits of the pool that have been implemented end.
         int poolOffset = perforation.getOffset();
         int magicAlign = getInfo(numDumps-1).finalPos - getInfo(numDumps-1).offset;
         poolOffset += magicAlign;
         poolOffset += headerSize;
@@ -769,33 +777,33 @@ struct AssemblerBufferWithConstantPool :
                 }
                 p->numEntries -= numSkips;
             }
             poolOffset += p->numEntries * p->immSize;
             delete[] preservedEntries;
         }
         // bind the current pool to the perforation point.
         Pool **tmp = &perforatedNode->data;
-        *tmp = static_cast<Pool*>(this->malloc_(sizeof(Pool) * numPoolKinds));
+        *tmp = static_cast<Pool*>(this->LifoAlloc_.alloc(sizeof(Pool) * numPoolKinds));
         if (tmp == NULL) {
             this->fail_oom();
             return;
         }
         // The above operations may have changed the size of pools!
         // recalibrate the size of the pool.
         newPoolInfo = getPoolData();
         poolInfo[numDumps] = newPoolInfo;
         poolSize += poolInfo[numDumps].size;
         numDumps++;
 
         memcpy(*tmp, pools, sizeof(Pool) * numPoolKinds);
 
         // reset everything to the state that it was in when we started
         for (int poolIdx = 0; poolIdx < numPoolKinds; poolIdx++) {
-            if (!pools[poolIdx].reset()) {
+            if (!pools[poolIdx].reset(this->LifoAlloc_)) {
                 this->fail_oom();
                 return;
             }
         }
         new (&perforation) BufferOffset();
         perforatedNode = NULL;
         inBackref = false;
 
@@ -809,17 +817,17 @@ struct AssemblerBufferWithConstantPool :
             // should be front-to-back, this insertion also needs to proceed backwards
             int idx = outcasts[poolIdx].length();
             for (BufferOffset *iter = outcasts[poolIdx].end()-1;
                  iter != outcasts[poolIdx].begin()-1;
                  --iter, --idx) {
                 pools[poolIdx].updateLimiter(*iter);
                 Inst *inst = this->getInst(*iter);
                 Asm::insertTokenIntoTag(pools[poolIdx].instSize, (uint8_t*)inst, outcasts[poolIdx].end()-1-iter);
-                pools[poolIdx].insertEntry(&outcastEntries[poolIdx][idx*pools[poolIdx].immSize], *iter);
+                pools[poolIdx].insertEntry(&outcastEntries[poolIdx][idx*pools[poolIdx].immSize], *iter, this->LifoAlloc_);
             }
             delete[] outcastEntries[poolIdx];
         }
         // this (*2) is not technically kosher, but I want to get this bug fixed.
         // It should actually be guardSize + the size of the instruction that we're attempting
         // to insert. Unfortunately that vaue is never passed in.  On ARM, these instructions
         // are always 4 bytes, so guardSize is legit to use.
         poolOffset = this->size() + guardSize * 2;
@@ -850,22 +858,25 @@ struct AssemblerBufferWithConstantPool :
             return;
         }
 
         IonSpew(IonSpew_Pools, "[%d] Dumping %d bytes", id, newPoolInfo.size);
         if (!perforation.assigned()) {
             IonSpew(IonSpew_Pools, "[%d] No Perforation point selected, generating a new one", id);
             // There isn't a perforation here, we need to dump the pool with a guard.
             BufferOffset branch = this->nextOffset();
+            bool shouldMarkAsBranch = this->isNextBranch();
             this->markNextAsBranch();
             this->putBlob(guardSize, NULL);
             BufferOffset afterPool = this->nextOffset();
             Asm::writePoolGuard(branch, this->getInst(branch), afterPool);
             markGuard();
             perforatedNode->isNatural = false;
+            if (shouldMarkAsBranch)
+                this->markNextAsBranch();
         }
 
         // We have a perforation.  Time to cut the instruction stream, patch in the pool
         // and possibly re-arrange the pool to accomodate its new location.
         int poolOffset = perforation.getOffset();
         int magicAlign =  getInfo(numDumps-1).finalPos - getInfo(numDumps-1).offset;
         poolOffset += magicAlign;
         poolOffset += headerSize;
@@ -877,17 +888,17 @@ struct AssemblerBufferWithConstantPool :
             int idx = 0;
             for (BufferOffset *iter = p->loadOffsets.begin();
                  iter != p->loadOffsets.end(); ++iter, ++idx)
             {
                 if (iter->getOffset() >= perforation.getOffset()) {
                     IonSpew(IonSpew_Pools, "[%d] Pushing entry %d in pool %d into the backwards section.", id, idx, poolIdx);
                     // insert this into the rear part of the pool.
                     int offset = idx * p->immSize;
-                    p->other->insertEntry(&p->poolData[offset], BufferOffset(*iter));
+                    p->other->insertEntry(&p->poolData[offset], BufferOffset(*iter), this->LifoAlloc_);
                     // update the limiting entry for this pool.
                     p->other->updateLimiter(*iter);
 
                     // Update the current pool to report fewer entries.  They are now in the
                     // backwards section.
                     p->numEntries--;
                     beforePool = false;
                 } else {
@@ -938,17 +949,18 @@ struct AssemblerBufferWithConstantPool :
     void patchBranch(Inst *i, int curpool, BufferOffset branch) {
         const Inst *ci = i;
         ptrdiff_t offset = Asm::getBranchOffset(ci);
         // If the offset is 0, then there is nothing to do.
         if (offset == 0)
             return;
         int destOffset = branch.getOffset() + offset;
         if (offset > 0) {
-            while (poolInfo[curpool].offset <= destOffset && curpool < numDumps) {
+
+            while (curpool < numDumps && poolInfo[curpool].offset <= destOffset) {
                 offset += poolInfo[curpool].size;
                 curpool++;
             }
         } else {
             // Ignore the pool that comes next, since this is a backwards branch
             curpool--;
             while (curpool >= 0 && poolInfo[curpool].offset > destOffset) {
                 offset -= poolInfo[curpool].size;
@@ -1011,23 +1023,28 @@ struct AssemblerBufferWithConstantPool :
     }
     int size() const {
         return uncheckedSize();
     }
     Pool *getPool(int idx) {
         return &pools[idx];
     }
     void markNextAsBranch() {
-        JS_ASSERT(*this->getTail() != NULL);
         // If the previous thing inserted was the last instruction of
         // the node, then whoops, we want to mark the first instruction of
         // the next node.
         this->ensureSpace(InstBaseSize);
+        JS_ASSERT(*this->getTail() != NULL);
         (*this->getTail())->markNextAsBranch();
     }
+    bool isNextBranch() {
+        JS_ASSERT(*this->getTail() != NULL);
+        return (*this->getTail())->isNextBranch();
+    }
+
     int uncheckedSize() const {
         PoolInfo pi = getPoolData();
         int codeEnd = this->nextOffset().getOffset();
         return (codeEnd - pi.offset) + pi.finalPos;
     }
     ptrdiff_t curDumpsite;
     void resetCounter() {
         curDumpsite = 0;
--- a/js/src/ion/shared/Lowering-shared-inl.h
+++ b/js/src/ion/shared/Lowering-shared-inl.h
@@ -271,16 +271,26 @@ LIRGeneratorShared::useRegisterOrNonDoub
 }
 
 #if defined(JS_CPU_ARM)
 LAllocation
 LIRGeneratorShared::useAnyOrConstant(MDefinition *mir)
 {
     return useRegisterOrConstant(mir);
 }
+LAllocation
+LIRGeneratorShared::useStorable(MDefinition *mir)
+{
+    return useRegister(mir);
+}
+LAllocation
+LIRGeneratorShared::useStorableAtStart(MDefinition *mir)
+{
+    return useRegisterAtStart(mir);
+}
 
 LAllocation
 LIRGeneratorShared::useAny(MDefinition *mir)
 {
     return useRegister(mir);
 }
 #else
 LAllocation
@@ -289,16 +299,27 @@ LIRGeneratorShared::useAnyOrConstant(MDe
     return useOrConstant(mir);
 }
 
 LAllocation
 LIRGeneratorShared::useAny(MDefinition *mir)
 {
     return use(mir);
 }
+LAllocation
+LIRGeneratorShared::useStorable(MDefinition *mir)
+{
+    return useRegisterOrConstant(mir);
+}
+LAllocation
+LIRGeneratorShared::useStorableAtStart(MDefinition *mir)
+{
+    return useRegisterOrConstantAtStart(mir);
+}
+
 #endif
 
 LAllocation
 LIRGeneratorShared::useKeepaliveOrConstant(MDefinition *mir)
 {
     if (mir->isConstant())
         return LAllocation(mir->toConstant()->vp());
     return use(mir, LUse(LUse::KEEPALIVE));
--- a/js/src/ion/shared/Lowering-shared.h
+++ b/js/src/ion/shared/Lowering-shared.h
@@ -74,16 +74,21 @@ class LIRGeneratorShared : public MInstr
     inline LUse useFixed(MDefinition *mir, Register reg);
     inline LUse useFixed(MDefinition *mir, FloatRegister reg);
     inline LUse useFixed(MDefinition *mir, AnyRegister reg);
     inline LAllocation useOrConstant(MDefinition *mir);
     // "Any" is architecture dependent, and will include registers and stack slots on X86,
     // and only registers on ARM.
     inline LAllocation useAny(MDefinition *mir);
     inline LAllocation useAnyOrConstant(MDefinition *mir);
+    // "Storable" is architecture dependend, and will include registers and constants on X86
+    // and only registers on ARM.
+    // this is a generic "things we can expect to write into memory in 1 instruction"
+    inline LAllocation useStorable(MDefinition *mir);
+    inline LAllocation useStorableAtStart(MDefinition *mir);
     inline LAllocation useKeepaliveOrConstant(MDefinition *mir);
     inline LAllocation useRegisterOrConstant(MDefinition *mir);
     inline LAllocation useRegisterOrConstantAtStart(MDefinition *mir);
     inline LAllocation useRegisterOrNonDoubleConstant(MDefinition *mir);
 
 #ifdef JS_NUNBOX32
     inline LUse useType(MDefinition *mir, LUse::Policy policy);
     inline LUse usePayload(MDefinition *mir, LUse::Policy policy);
--- a/js/src/ion/shared/MacroAssembler-x86-shared.h
+++ b/js/src/ion/shared/MacroAssembler-x86-shared.h
@@ -512,15 +512,19 @@ class MacroAssemblerX86Shared : public A
 
     void checkStackAlignment() {
         // Exists for ARM compatibility.
     }
 
     CodeOffsetLabel labelForPatch() {
         return CodeOffsetLabel(size());
     }
+    
+    void abiret() {
+        ret();
+    }
 };
 
 } // namespace ion
 } // namespace js
 
 #endif // jsion_macro_assembler_x86_shared_h__
 
--- a/js/src/ion/x64/Assembler-x64.h
+++ b/js/src/ion/x64/Assembler-x64.h
@@ -155,18 +155,20 @@ class ABIArgGenerator
 static const Register OsrFrameReg = IntArgReg3;
 
 static const Register PreBarrierReg = rdx;
 
 // GCC stack is aligned on 16 bytes, but we don't maintain the invariant in
 // jitted code.
 static const uint32_t StackAlignment = 16;
 static const bool StackKeptAligned = false;
+static const uint32_t CodeAlignment = 8;
 static const uint32_t NativeFrameSize = sizeof(void*);
 static const uint32_t AlignmentAtPrologue = sizeof(void*);
+static const uint32_t AlignmentMidPrologue = AlignmentAtPrologue;
 
 static const Scale ScalePointer = TimesEight;
 
 class Operand
 {
   public:
     enum Kind {
         REG,
--- a/js/src/ion/x64/MacroAssembler-x64.h
+++ b/js/src/ion/x64/MacroAssembler-x64.h
@@ -1008,16 +1008,21 @@ class MacroAssemblerX64 : public MacroAs
     void patchAsmJSGlobalAccess(unsigned offset, uint8_t *code, unsigned codeBytes,
                                 unsigned globalDataOffset)
     {
         uint8_t *nextInsn = code + offset;
         JS_ASSERT(nextInsn <= code + codeBytes);
         uint8_t *target = code + codeBytes + globalDataOffset;
         ((int32_t *)nextInsn)[-1] = target - nextInsn;
     }
+    void memIntToValue(Address Source, Address Dest) {
+        load32(Source, ScratchReg);
+        storeValue(JSVAL_TYPE_INT32, ScratchReg, Dest);
+    }
+
 };
 
 typedef MacroAssemblerX64 MacroAssemblerSpecific;
 
 } // namespace ion
 } // namespace js
 
 #endif // jsion_macro_assembler_x64_h__
--- a/js/src/ion/x86/Assembler-x86.h
+++ b/js/src/ion/x86/Assembler-x86.h
@@ -84,19 +84,20 @@ static const Register PreBarrierReg = ed
 // GCC stack is aligned on 16 bytes, but we don't maintain the invariant in
 // jitted code.
 #if defined(__GNUC__)
 static const uint32_t StackAlignment = 16;
 #else
 static const uint32_t StackAlignment = 4;
 #endif
 static const bool StackKeptAligned = false;
+static const uint32_t CodeAlignment = 8;
 static const uint32_t NativeFrameSize = sizeof(void*);
 static const uint32_t AlignmentAtPrologue = sizeof(void*);
-
+static const uint32_t AlignmentMidPrologue = AlignmentAtPrologue;
 struct ImmTag : public Imm32
 {
     ImmTag(JSValueTag mask)
       : Imm32(int32_t(mask))
     { }
 };
 
 struct ImmType : public ImmTag