Bug 1020834 - IonMonkey: (ARM) Correct some poorly handled pool placement cases and improve test coverage for these issues. r=jandem
authorDouglas Crosher <dtc-moz@scieneer.com>
Fri, 27 Jun 2014 19:42:58 +1000
changeset 192164 f1bacafe789c9d30ae8e5f49f31822ed942217cc
parent 192163 af2c7776626540dc8eed65c78d7eda056a2c3018
child 192165 b191be106cae30ec2477531fc53531cb53c35a64
push id1
push userroot
push dateMon, 20 Oct 2014 17:29:22 +0000
reviewersjandem
bugs1020834
milestone33.0a1
Bug 1020834 - IonMonkey: (ARM) Correct some poorly handled pool placement cases and improve test coverage for these issues. r=jandem
js/src/jit/BaselineJIT.cpp
js/src/jit/arm/Assembler-arm.cpp
js/src/jit/arm/Assembler-arm.h
js/src/jit/arm/Bailouts-arm.cpp
js/src/jit/arm/CodeGenerator-arm.cpp
js/src/jit/arm/MacroAssembler-arm.cpp
js/src/jit/arm/MacroAssembler-arm.h
js/src/jit/arm/Trampoline-arm.cpp
js/src/jit/shared/IonAssemblerBufferWithConstantPools.h
js/src/jit/x64/Assembler-x64.h
js/src/jit/x86/Assembler-x86.h
js/src/shell/js.cpp
--- a/js/src/jit/BaselineJIT.cpp
+++ b/js/src/jit/BaselineJIT.cpp
@@ -168,17 +168,17 @@ jit::EnterBaselineAtBranch(JSContext *cx
     BaselineScript *baseline = fp->script()->baselineScript();
 
     EnterJitData data(cx);
     data.jitcode = baseline->nativeCodeForPC(fp->script(), pc);
 
     // Skip debug breakpoint/trap handler, the interpreter already handled it
     // for the current op.
     if (cx->compartment()->debugMode())
-        data.jitcode += MacroAssembler::ToggledCallSize();
+        data.jitcode += MacroAssembler::ToggledCallSize(data.jitcode);
 
     data.osrFrame = fp;
     data.osrNumStackValues = fp->script()->nfixed() + cx->interpreterRegs().stackDepth();
 
     RootedValue thisv(cx);
 
     if (fp->isNonEvalFunctionFrame()) {
         data.constructing = fp->isConstructing();
--- a/js/src/jit/arm/Assembler-arm.cpp
+++ b/js/src/jit/arm/Assembler-arm.cpp
@@ -512,16 +512,17 @@ Imm16::Imm16()
 { }
 
 void
 jit::PatchJump(CodeLocationJump &jump_, CodeLocationLabel label)
 {
     // We need to determine if this jump can fit into the standard 24+2 bit address
     // or if we need a larger branch (or just need to use our pool entry)
     Instruction *jump = (Instruction*)jump_.raw();
+    // jumpWithPatch() returns the offset of the jump and never a pool or nop.
     Assembler::Condition c;
     jump->extractCond(&c);
     JS_ASSERT(jump->is<InstBranchImm>() || jump->is<InstLDR>());
 
     int jumpOffset = label.raw() - jump_.raw();
     if (BOffImm::isInRange(jumpOffset)) {
         // This instruction started off as a branch, and will remain one
         Assembler::retargetNearBranch(jump, jumpOffset, c);
@@ -727,17 +728,17 @@ Assembler::getPtr32Target(Iter *start, R
         InstMovW *bottom = load1->as<InstMovW>();
         bottom->extractImm(&targ_bot);
         bottom->extractDest(&temp);
 
         // Extract the top part of the immediate.
         InstMovT *top = load2->as<InstMovT>();
         top->extractImm(&targ_top);
 
-        // Make sure they are being loaded intothe same register.
+        // Make sure they are being loaded into the same register.
         JS_ASSERT(top->checkDest(temp));
 
         if (dest)
             *dest = temp;
         if (style)
             *style = L_MOVWT;
 
         uint32_t *value = (uint32_t*) (targ_bot.decode() | (targ_top.decode() << 16));
@@ -780,31 +781,31 @@ Assembler::TraceJumpRelocations(JSTracer
     }
 }
 
 static void
 TraceDataRelocations(JSTracer *trc, uint8_t *buffer, CompactBufferReader &reader)
 {
     while (reader.more()) {
         size_t offset = reader.readUnsigned();
-        InstructionIterator iter((Instruction*)(buffer+offset));
+        InstructionIterator iter((Instruction*)(buffer + offset));
         void *ptr = const_cast<uint32_t *>(Assembler::getPtr32Target(&iter));
         // No barrier needed since these are constants.
         gc::MarkGCThingUnbarriered(trc, reinterpret_cast<void **>(&ptr), "ion-masm-ptr");
     }
 
 }
 static void
 TraceDataRelocations(JSTracer *trc, ARMBuffer *buffer,
                      Vector<BufferOffset, 0, SystemAllocPolicy> *locs)
 {
     for (unsigned int idx = 0; idx < locs->length(); idx++) {
         BufferOffset bo = (*locs)[idx];
         ARMBuffer::AssemblerBufferInstIterator iter(bo, buffer);
-        void *ptr = const_cast<uint32_t *>(jit::Assembler::getPtr32Target(&iter));
+        void *ptr = const_cast<uint32_t *>(Assembler::getPtr32Target(&iter));
 
         // No barrier needed since these are constants.
         gc::MarkGCThingUnbarriered(trc, reinterpret_cast<void **>(&ptr), "ion-masm-ptr");
     }
 
 }
 void
 Assembler::TraceDataRelocations(JSTracer *trc, JitCode *code, CompactBufferReader &reader)
@@ -1302,16 +1303,21 @@ BufferOffset
 Assembler::writeInst(uint32_t x, uint32_t *dest)
 {
     if (dest == nullptr)
         return m_buffer.putInt(x);
 
     writeInstStatic(x, dest);
     return BufferOffset();
 }
+BufferOffset
+Assembler::writeBranchInst(uint32_t x)
+{
+    return m_buffer.putInt(x, /* markAsBranch = */ true);
+}
 void
 Assembler::writeInstStatic(uint32_t x, uint32_t *dest)
 {
     JS_ASSERT(dest != nullptr);
     *dest = x;
 }
 
 BufferOffset
@@ -1693,18 +1699,18 @@ Assembler::as_WritePoolEntry(Instruction
     JS_ASSERT(orig_cond == c);
 }
 
 BufferOffset
 Assembler::as_BranchPool(uint32_t value, RepatchLabel *label, ARMBuffer::PoolEntry *pe, Condition c)
 {
     PoolHintPun php;
     php.phd.init(0, c, PoolHintData::poolBranch, pc);
-    m_buffer.markNextAsBranch();
-    BufferOffset ret = m_buffer.insertEntry(4, (uint8_t*)&php.raw, int32Pool, (uint8_t*)&value, pe);
+    BufferOffset ret = m_buffer.insertEntry(4, (uint8_t*)&php.raw, int32Pool, (uint8_t*)&value, pe,
+                                            /* markAsBranch = */ true);
     // If this label is already bound, then immediately replace the stub load with
     // a correct branch.
     if (label->bound()) {
         BufferOffset dest(label);
         as_b(dest.diffB<BOffImm>(ret), c, ret);
     } else {
         label->use(ret.getOffset());
     }
@@ -1821,33 +1827,33 @@ Assembler::writePoolGuard(BufferOffset b
     *dest = InstBImm(off, Always);
 }
 // Branch can branch to an immediate *or* to a register.
 // Branches to immediates are pc relative, branches to registers
 // are absolute
 BufferOffset
 Assembler::as_b(BOffImm off, Condition c, bool isPatchable)
 {
-    m_buffer.markNextAsBranch();
-    BufferOffset ret =writeInst(((int)c) | op_b | off.encode());
+    BufferOffset ret = writeBranchInst(((int)c) | op_b | off.encode());
     if (c == Always && !isPatchable)
         m_buffer.markGuard();
     return ret;
 }
 
 BufferOffset
 Assembler::as_b(Label *l, Condition c, bool isPatchable)
 {
     if (m_buffer.oom()) {
         BufferOffset ret;
         return ret;
     }
-    m_buffer.markNextAsBranch();
+
     if (l->bound()) {
-        BufferOffset ret = as_nop();
+        // Note only one instruction is emitted here, the NOP is overwritten.
+        BufferOffset ret = writeBranchInst(Always | InstNOP::NopInst);
         as_b(BufferOffset(l).diffB<BOffImm>(ret), c, ret);
         return ret;
     }
 
     int32_t old;
     BufferOffset ret;
     if (l->used()) {
         old = l->offset();
@@ -1885,30 +1891,30 @@ Assembler::as_blx(Register r, Condition 
     return writeInst(((int) c) | op_blx | r.code());
 }
 
 // bl can only branch to an pc-relative immediate offset
 // It cannot change the processor state.
 BufferOffset
 Assembler::as_bl(BOffImm off, Condition c)
 {
-    m_buffer.markNextAsBranch();
-    return writeInst(((int)c) | op_bl | off.encode());
+    return writeBranchInst(((int)c) | op_bl | off.encode());
 }
 
 BufferOffset
 Assembler::as_bl(Label *l, Condition c)
 {
     if (m_buffer.oom()) {
         BufferOffset ret;
         return ret;
     }
-    m_buffer.markNextAsBranch();
+
     if (l->bound()) {
-        BufferOffset ret = as_nop();
+        // Note only one instruction is emitted here, the NOP is overwritten.
+        BufferOffset ret = writeBranchInst(Always | InstNOP::NopInst);
         as_bl(BufferOffset(l).diffB<BOffImm>(ret), c, ret);
         return ret;
     }
 
     int32_t old;
     BufferOffset ret;
     // See if the list was empty :(
     if (l->used()) {
@@ -2605,16 +2611,35 @@ InstIsBNop(Instruction *inst) {
 static bool
 InstIsArtificialGuard(Instruction *inst, const PoolHeader **ph)
 {
     if (!InstIsGuard(inst, ph))
         return false;
     return !(*ph)->isNatural();
 }
 
+// If the instruction points to a artificial pool guard then skip the pool.
+Instruction *
+Instruction::skipPool()
+{
+    const PoolHeader *ph;
+    // If this is a guard, and the next instruction is a header,
+    // always work around the pool. If it isn't a guard, then start
+    // looking ahead.
+    if (InstIsGuard(this, &ph)) {
+        // Don't skip a natural guard.
+        if (ph->isNatural())
+            return this;
+        return (this + 1 + ph->size())->skipPool();
+    }
+    if (InstIsBNop(this))
+        return (this + 1)->skipPool();
+    return this;
+}
+
 // Cases to be handled:
 // 1) no pools or branches in sight => return this+1
 // 2) branch to next instruction => return this+2, because a nop needed to be inserted into the stream.
 // 3) this+1 is an artificial guard for a pool => return first instruction after the pool
 // 4) this+1 is a natural guard => return the branch
 // 5) this is a branch, right before a pool => return first instruction after the pool
 // in assembly form:
 // 1) add r0, r0, r0 <= this
@@ -2644,22 +2669,20 @@ InstIsArtificialGuard(Instruction *inst,
 Instruction *
 Instruction::next()
 {
     Instruction *ret = this+1;
     const PoolHeader *ph;
     // If this is a guard, and the next instruction is a header, always work around the pool
     // If it isn't a guard, then start looking ahead.
     if (InstIsGuard(this, &ph))
-        return ret + ph->size();
+        return (ret + ph->size())->skipPool();
     if (InstIsArtificialGuard(ret, &ph))
-        return ret + 1 + ph->size();
-    if (InstIsBNop(ret))
-        return ret + 1;
-    return ret;
+        return (ret + 1 + ph->size())->skipPool();
+    return ret->skipPool();
 }
 
 void
 Assembler::ToggleToJmp(CodeLocationLabel inst_)
 {
     uint32_t *ptr = (uint32_t *)inst_.raw();
 
     DebugOnly<Instruction *> inst = (Instruction *)inst_.raw();
@@ -2693,16 +2716,18 @@ Assembler::ToggleToCmp(CodeLocationLabel
 
     AutoFlushICache::flush(uintptr_t(ptr), 4);
 }
 
 void
 Assembler::ToggleCall(CodeLocationLabel inst_, bool enabled)
 {
     Instruction *inst = (Instruction *)inst_.raw();
+    // Skip a pool with an artificial guard.
+    inst = inst->skipPool();
     JS_ASSERT(inst->is<InstMovW>() || inst->is<InstLDR>());
 
     if (inst->is<InstMovW>()) {
         // If it looks like the start of a movw/movt sequence,
         // then make sure we have all of it (and advance the iterator
         // past the full sequence)
         inst = inst->next();
         JS_ASSERT(inst->is<InstMovT>());
@@ -2719,16 +2744,47 @@ Assembler::ToggleCall(CodeLocationLabel 
     if (enabled)
         *inst = InstBLXReg(ScratchRegister, Always);
     else
         *inst = InstNOP();
 
     AutoFlushICache::flush(uintptr_t(inst), 4);
 }
 
+size_t
+Assembler::ToggledCallSize(uint8_t *code)
+{
+    Instruction *inst = (Instruction *)code;
+    // Skip a pool with an artificial guard.
+    inst = inst->skipPool();
+    JS_ASSERT(inst->is<InstMovW>() || inst->is<InstLDR>());
+
+    if (inst->is<InstMovW>()) {
+        // If it looks like the start of a movw/movt sequence,
+        // then make sure we have all of it (and advance the iterator
+        // past the full sequence)
+        inst = inst->next();
+        JS_ASSERT(inst->is<InstMovT>());
+    }
+
+    inst = inst->next();
+    JS_ASSERT(inst->is<InstNOP>() || inst->is<InstBLXReg>());
+    return uintptr_t(inst) + 4 - uintptr_t(code);
+}
+
+uint8_t *
+Assembler::BailoutTableStart(uint8_t *code)
+{
+    Instruction *inst = (Instruction *)code;
+    // Skip a pool with an artificial guard or NOP fill.
+    inst = inst->skipPool();
+    JS_ASSERT(inst->is<InstBLImm>());
+    return (uint8_t *) inst;
+}
+
 void Assembler::updateBoundsCheck(uint32_t heapSize, Instruction *inst)
 {
     JS_ASSERT(inst->is<InstCMP>());
     InstCMP *cmp = inst->as<InstCMP>();
 
     Register index;
     cmp->extractOp1(&index);
 
@@ -2738,17 +2794,30 @@ void Assembler::updateBoundsCheck(uint32
     Imm8 imm8 = Imm8(heapSize);
     JS_ASSERT(!imm8.invalid);
 
     *inst = InstALU(InvalidReg, index, imm8, op_cmp, SetCond, Always);
     // NOTE: we don't update the Auto Flush Cache!  this function is currently only called from
     // within AsmJSModule::patchHeapAccesses, which does that for us.  Don't call this!
 }
 
-InstructionIterator::InstructionIterator(Instruction *i_) : i(i_) {
-    const PoolHeader *ph;
-    // If this is a guard, and the next instruction is a header, always work around the pool
-    // If it isn't a guard, then start looking ahead.
-    if (InstIsArtificialGuard(i, &ph)) {
-        i = i->next();
-    }
+InstructionIterator::InstructionIterator(Instruction *i_) : i(i_)
+{
+    // Work around pools with an artificial pool guard and around nop-fill.
+    i = i->skipPool();
 }
 Assembler *Assembler::dummy = nullptr;
+
+uint32_t Assembler::NopFill = 0;
+
+uint32_t
+Assembler::GetNopFill()
+{
+    static bool isSet = false;
+    if (!isSet) {
+        char *fillStr = getenv("ARM_ASM_NOP_FILL");
+        uint32_t fill;
+        if (fillStr && sscanf(fillStr, "%u", &fill) == 1)
+            NopFill = fill;
+        isSet = true;
+    }
+    return NopFill;
+}
--- a/js/src/jit/arm/Assembler-arm.h
+++ b/js/src/jit/arm/Assembler-arm.h
@@ -1142,16 +1142,18 @@ class Assembler : public AssemblerShared
         return m_buffer.getInst(bo);
     }
   public:
     void resetCounter();
     uint32_t actualOffset(uint32_t) const;
     uint32_t actualIndex(uint32_t) const;
     static uint8_t *PatchableJumpAddress(JitCode *code, uint32_t index);
     BufferOffset actualOffset(BufferOffset) const;
+    static uint32_t NopFill;
+    static uint32_t GetNopFill();
   protected:
 
     // structure for fixing up pc-relative loads/jumps when a the machine code
     // gets moved (executable copy, gc, etc.)
     struct RelativePatch
     {
         void *target;
         Relocation::Kind kind;
@@ -1168,17 +1170,16 @@ class Assembler : public AssemblerShared
     js::Vector<BufferOffset, 0, SystemAllocPolicy> tmpDataRelocations_;
     js::Vector<BufferOffset, 0, SystemAllocPolicy> tmpPreBarriers_;
 
     CompactBufferWriter jumpRelocations_;
     CompactBufferWriter dataRelocations_;
     CompactBufferWriter relocations_;
     CompactBufferWriter preBarriers_;
 
-    //typedef JSC::AssemblerBufferWithConstantPool<1024, 4, 4, js::jit::Assembler> ARMBuffer;
     ARMBuffer m_buffer;
 
     // There is now a semi-unified interface for instruction generation.
     // During assembly, there is an active buffer that instructions are
     // being written into, but later, we may wish to modify instructions
     // that have already been created.  In order to do this, we call the
     // same assembly function, but pass it a destination address, which
     // will be overwritten with a new instruction. In order to do this very
@@ -1186,18 +1187,19 @@ class Assembler : public AssemblerShared
     // dest parameter, a this object is still needed.  dummy always happens
     // to be null, but we shouldn't be looking at it in any case.
     static Assembler *dummy;
     mozilla::Array<Pool, 4> pools_;
     Pool *int32Pool;
     Pool *doublePool;
 
   public:
+    // For the nopFill use a branch to the next instruction: 0xeaffffff.
     Assembler()
-      : m_buffer(4, 4, 0, &pools_[0], 8),
+      : m_buffer(4, 4, 0, &pools_[0], 8, 0xeaffffff, GetNopFill()),
         int32Pool(m_buffer.getPool(1)),
         doublePool(m_buffer.getPool(0)),
         isFinished(false),
         dtmActive(false),
         dtmCond(Always)
     {
     }
 
@@ -1296,16 +1298,20 @@ class Assembler : public AssemblerShared
     size_t bytesNeeded() const;
 
     // Write a blob of binary into the instruction stream *OR*
     // into a destination address. If dest is nullptr (the default), then the
     // instruction gets written into the instruction stream. If dest is not null
     // it is interpreted as a pointer to the location that we want the
     // instruction to be written.
     BufferOffset writeInst(uint32_t x, uint32_t *dest = nullptr);
+
+    // As above, but also mark the instruction as a branch.
+    BufferOffset writeBranchInst(uint32_t x);
+
     // A static variant for the cases where we don't want to have an assembler
     // object at all. Normally, you would use the dummy (nullptr) object.
     static void writeInstStatic(uint32_t x, uint32_t *dest);
 
   public:
     void writeCodePointer(AbsoluteLabel *label);
 
     BufferOffset align(int alignment);
@@ -1712,16 +1718,19 @@ class Assembler : public AssemblerShared
         return (offset+1)&~1;
     }
     static uint8_t *nextInstruction(uint8_t *instruction, uint32_t *count = nullptr);
     // Toggle a jmp or cmp emitted by toggledJump().
 
     static void ToggleToJmp(CodeLocationLabel inst_);
     static void ToggleToCmp(CodeLocationLabel inst_);
 
+    static uint8_t *BailoutTableStart(uint8_t *code);
+
+    static size_t ToggledCallSize(uint8_t *code);
     static void ToggleCall(CodeLocationLabel inst_, bool enabled);
 
     static void updateBoundsCheck(uint32_t logHeapSize, Instruction *inst);
     void processCodeLabels(uint8_t *rawCode);
     static int32_t extractCodeLabelOffset(uint8_t *code) {
         return *(uintptr_t *)code;
     }
 
@@ -1770,16 +1779,19 @@ class Instruction
     void extractCond(Assembler::Condition *c) {
         if (data >> 28 != 0xf )
             *c = (Assembler::Condition)(data & 0xf0000000);
     }
     // Get the next instruction in the instruction stream.
     // This does neat things like ignoreconstant pools and their guards.
     Instruction *next();
 
+    // Skipping pools with artificial guards.
+    Instruction *skipPool();
+
     // Sometimes, an api wants a uint32_t (or a pointer to it) rather than
     // an instruction.  raw() just coerces this into a pointer to a uint32_t
     const uint32_t *raw() const { return &data; }
     uint32_t size() const { return 4; }
 }; // Instruction
 
 // make sure that it is the right size
 JS_STATIC_ASSERT(sizeof(Instruction) == 4);
@@ -1815,19 +1827,19 @@ class InstLDR : public InstDTR
     static bool isTHIS(const Instruction &i);
     static InstLDR *asTHIS(const Instruction &i);
 
 };
 JS_STATIC_ASSERT(sizeof(InstDTR) == sizeof(InstLDR));
 
 class InstNOP : public Instruction
 {
+  public:
     static const uint32_t NopInst = 0x0320f000;
 
-  public:
     InstNOP()
       : Instruction(NopInst, Assembler::Always)
     { }
 
     static bool isTHIS(const Instruction &i);
     static InstNOP *asTHIS(Instruction &i);
 };
 
--- a/js/src/jit/arm/Bailouts-arm.cpp
+++ b/js/src/jit/arm/Bailouts-arm.cpp
@@ -2,16 +2,17 @@
  * vim: set ts=8 sts=4 et sw=4 tw=99:
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "jscntxt.h"
 #include "jscompartment.h"
 
+#include "jit/arm/Assembler-arm.h"
 #include "jit/Bailouts.h"
 #include "jit/JitCompartment.h"
 
 using namespace js;
 using namespace js::jit;
 
 namespace js {
 namespace jit {
@@ -91,17 +92,17 @@ IonBailoutIterator::IonBailoutIterator(c
         return;
     }
 
     // Compute the snapshot offset from the bailout ID.
     JitActivation *activation = activations.activation()->asJit();
     JSRuntime *rt = activation->compartment()->runtimeFromMainThread();
     JitCode *code = rt->jitRuntime()->getBailoutTable(bailout->frameClass());
     uintptr_t tableOffset = bailout->tableOffset();
-    uintptr_t tableStart = reinterpret_cast<uintptr_t>(code->raw());
+    uintptr_t tableStart = reinterpret_cast<uintptr_t>(Assembler::BailoutTableStart(code->raw()));
 
     JS_ASSERT(tableOffset >= tableStart &&
               tableOffset < tableStart + code->instructionsSize());
     JS_ASSERT((tableOffset - tableStart) % BAILOUT_TABLE_ENTRY_SIZE == 0);
 
     uint32_t bailoutId = ((tableOffset - tableStart) / BAILOUT_TABLE_ENTRY_SIZE) - 1;
     JS_ASSERT(bailoutId < BAILOUT_TABLE_SIZE);
 
--- a/js/src/jit/arm/CodeGenerator-arm.cpp
+++ b/js/src/jit/arm/CodeGenerator-arm.cpp
@@ -193,17 +193,17 @@ CodeGeneratorARM::bailoutIf(Assembler::C
 
     // Though the assembler doesn't track all frame pushes, at least make sure
     // the known value makes sense. We can't use bailout tables if the stack
     // isn't properly aligned to the static frame size.
     JS_ASSERT_IF(frameClass_ != FrameSizeClass::None(),
                  frameClass_.frameSize() == masm.framePushed());
 
     if (assignBailoutId(snapshot)) {
-        uint8_t *code = deoptTable_->raw() + snapshot->bailoutId() * BAILOUT_TABLE_ENTRY_SIZE;
+        uint8_t *code = Assembler::BailoutTableStart(deoptTable_->raw()) + snapshot->bailoutId() * BAILOUT_TABLE_ENTRY_SIZE;
         masm.ma_b(code, Relocation::HARDCODED, condition);
         return true;
     }
 
     // We could not use a jump table, either because all bailout IDs were
     // reserved, or a jump table is not optimal for this frame size or
     // platform. Whatever, we will generate a lazy bailout.
     OutOfLineBailout *ool = new(alloc()) OutOfLineBailout(snapshot, masm.framePushed());
--- a/js/src/jit/arm/MacroAssembler-arm.cpp
+++ b/js/src/jit/arm/MacroAssembler-arm.cpp
@@ -422,20 +422,17 @@ NextInst(Instruction *i)
 void
 MacroAssemblerARM::ma_movPatchable(Imm32 imm_, Register dest, Assembler::Condition c,
                                    RelocStyle rs, Instruction *i)
 {
     int32_t imm = imm_.value;
     if (i) {
         // Make sure the current instruction is not an artificial guard
         // inserted by the assembler buffer.
-        // The InstructionIterator already does this and handles edge cases,
-        // so, just asking an iterator for its current instruction should be
-        // enough to make sure we don't accidentally inspect an artificial guard.
-        i = InstructionIterator(i).cur();
+        i = i->skipPool();
     }
     switch(rs) {
       case L_MOVWT:
         as_movw(dest, Imm16(imm & 0xffff), c, i);
         // i can be nullptr here.  that just means "insert in the next in sequence."
         // NextInst is special cased to not do anything when it is passed nullptr, so
         // two consecutive instructions will be inserted.
         i = NextInst(i);
@@ -446,18 +443,18 @@ MacroAssemblerARM::ma_movPatchable(Imm32
             as_Imm32Pool(dest, imm, c);
         else
             as_WritePoolEntry(i, c, imm);
         break;
     }
 }
 
 void
-MacroAssemblerARM::ma_movPatchable(ImmPtr imm, Register dest,
-                                   Assembler::Condition c, RelocStyle rs, Instruction *i)
+MacroAssemblerARM::ma_movPatchable(ImmPtr imm, Register dest, Assembler::Condition c,
+                                   RelocStyle rs, Instruction *i)
 {
     return ma_movPatchable(Imm32(int32_t(imm.value)), dest, c, rs, i);
 }
 
 void
 MacroAssemblerARM::ma_mov(Register src, Register dest,
             SetCond_ sc, Assembler::Condition c)
 {
@@ -4359,25 +4356,23 @@ MacroAssemblerARMCompat::toggledJump(Lab
     CodeOffsetLabel ret(b.getOffset());
     return ret;
 }
 
 CodeOffsetLabel
 MacroAssemblerARMCompat::toggledCall(JitCode *target, bool enabled)
 {
     BufferOffset bo = nextOffset();
-    CodeOffsetLabel offset(bo.getOffset());
     addPendingJump(bo, ImmPtr(target->raw()), Relocation::JITCODE);
     ma_movPatchable(ImmPtr(target->raw()), ScratchRegister, Always, HasMOVWT() ? L_MOVWT : L_LDR);
     if (enabled)
         ma_blx(ScratchRegister);
     else
         ma_nop();
-    JS_ASSERT(nextOffset().getOffset() - offset.offset() == ToggledCallSize());
-    return offset;
+    return CodeOffsetLabel(bo.getOffset());
 }
 
 void
 MacroAssemblerARMCompat::round(FloatRegister input, Register output, Label *bail, FloatRegister tmp)
 {
     Label handleZero;
     Label handleNeg;
     Label fin;
--- a/js/src/jit/arm/MacroAssembler-arm.h
+++ b/js/src/jit/arm/MacroAssembler-arm.h
@@ -674,24 +674,16 @@ class MacroAssemblerARMCompat : public M
     }
 
     CodeOffsetLabel toggledJump(Label *label);
 
     // Emit a BLX or NOP instruction. ToggleCall can be used to patch
     // this instruction.
     CodeOffsetLabel toggledCall(JitCode *target, bool enabled);
 
-    static size_t ToggledCallSize() {
-        if (HasMOVWT())
-            // Size of a movw, movt, nop/blx instruction.
-            return 12;
-        // Size of a ldr, nop/blx instruction
-        return 8;
-    }
-
     CodeOffsetLabel pushWithPatch(ImmWord imm) {
         CodeOffsetLabel label = movWithPatch(imm, ScratchRegister);
         ma_push(ScratchRegister);
         return label;
     }
 
     CodeOffsetLabel movWithPatch(ImmWord imm, Register dest) {
         CodeOffsetLabel label = CodeOffsetLabel(currentOffset());
--- a/js/src/jit/arm/Trampoline-arm.cpp
+++ b/js/src/jit/arm/Trampoline-arm.cpp
@@ -644,20 +644,24 @@ GenerateParallelBailoutThunk(MacroAssemb
     masm.as_dtr(IsLoad, 32, PostIndex, pc, DTRAddr(sp, DtrOffImm(4)));
 }
 
 JitCode *
 JitRuntime::generateBailoutTable(JSContext *cx, uint32_t frameClass)
 {
     MacroAssembler masm(cx);
 
-    Label bailout;
-    for (size_t i = 0; i < BAILOUT_TABLE_SIZE; i++)
-        masm.ma_bl(&bailout);
-    masm.bind(&bailout);
+    {
+        // Emit the table without any pools being inserted.
+        Label bailout;
+        AutoForbidPools afp(&masm);
+        for (size_t i = 0; i < BAILOUT_TABLE_SIZE; i++)
+            masm.ma_bl(&bailout);
+        masm.bind(&bailout);
+    }
 
     GenerateBailoutThunk(cx, masm, frameClass);
 
     Linker linker(masm);
     AutoFlushICache afc("BailoutTable");
     JitCode *code = linker.newCode<NoGC>(cx, JSC::OTHER_CODE);
 
 #ifdef JS_ION_PERF
--- a/js/src/jit/shared/IonAssemblerBufferWithConstantPools.h
+++ b/js/src/jit/shared/IonAssemblerBufferWithConstantPools.h
@@ -342,16 +342,26 @@ struct AssemblerBufferWithConstantPool :
     // we need to keep track of how large the pools are, so we can allocate
     // enough space for them later.  This should include any amount of padding
     // necessary to keep the pools aligned.
     int poolSize;
     // The Assembler should set this to true if it does not want us to dump a pool here
     int canNotPlacePool;
     // Are we filling up the forwards or backwards pools?
     bool inBackref;
+
+    // Insert a number of NOP instructions between each requested instruction at all
+    // locations at which a pool can potentially spill. This is useful for checking
+    // that instruction locations are correctly referenced and/or followed.
+    const uint32_t nopFillInst;
+    const uint32_t nopFill;
+    // Inhibit the insertion of fill NOPs in the dynamic context in which they are
+    // being inserted.
+    bool inhibitNops;
+
     // Cache the last place we saw an opportunity to dump the pool
     BufferOffset perforation;
     BufferSlice *perforatedNode;
   public:
     int id;
   private:
     static const int logBasePoolInfo = 3;
     BufferSlice ** getHead() {
@@ -366,23 +376,26 @@ struct AssemblerBufferWithConstantPool :
         if (!tmp) {
             this->m_oom = true;
             return nullptr;
         }
         new (tmp) BufferSlice;
         return tmp;
     }
   public:
-    AssemblerBufferWithConstantPool(int guardSize_, int headerSize_, int footerSize_, Pool *pools_, int instBufferAlign_)
+    AssemblerBufferWithConstantPool(int guardSize_, int headerSize_, int footerSize_,
+                                    Pool *pools_, int instBufferAlign_,
+                                    uint32_t nopFillInst_, uint32_t nopFill_ = 0)
         : guardSize(guardSize_), headerSize(headerSize_),
           footerSize(footerSize_),
           pools(pools_),
           instBufferAlign(instBufferAlign_), numDumps(0),
           poolInfo(nullptr),
           poolSize(0), canNotPlacePool(0), inBackref(false),
+          nopFillInst(nopFillInst_), nopFill(nopFill_), inhibitNops(false),
           perforatedNode(nullptr), id(-1)
     {
         for (int idx = 0; idx < numPoolKinds; idx++) {
             entryCount[idx] = 0;
         }
     }
 
     // We need to wait until an AutoIonContextAlloc is created by the
@@ -449,19 +462,36 @@ struct AssemblerBufferWithConstantPool :
                 Asm::writePoolFooter(poolDest, cur->data, cur->isNatural);
                 poolDest += footerSize;
                 // at this point, poolDest had better still be aligned to a chunk boundary.
                 dest = (Chunk*) poolDest;
             }
         }
     }
 
-    BufferOffset insertEntry(uint32_t instSize, uint8_t *inst, Pool *p, uint8_t *data, PoolEntry *pe = nullptr) {
+    void insertNopFill() {
+        // Insert fill for testing.
+        if (nopFill > 0 && !inhibitNops && !canNotPlacePool) {
+            inhibitNops = true;
+
+            // Fill using a branch-nop rather than a NOP so this can
+            // be distinguished and skipped.
+            for (int i = 0; i < nopFill; i++)
+                putInt(nopFillInst);
+
+            inhibitNops = false;
+        }
+    }
+
+    BufferOffset insertEntry(uint32_t instSize, uint8_t *inst, Pool *p, uint8_t *data,
+                             PoolEntry *pe = nullptr, bool markAsBranch = false) {
         if (this->oom() && !this->bail())
             return BufferOffset();
+        insertNopFill();
+
         int token;
         if (p != nullptr) {
             int poolId = p - pools;
             const char sigil = inBackref ? 'B' : 'F';
 
             IonSpew(IonSpew_Pools, "[%d]{%c} Inserting entry into pool %d", id, sigil, poolId);
             IonSpewStart(IonSpew_Pools, "[%d] data is: 0x", id);
             spewEntry(data, p->immSize);
@@ -484,16 +514,18 @@ struct AssemblerBufferWithConstantPool :
             JS_ASSERT(poolId >= 0);
             // Figure out the offset within like-kinded pool entries
             retPE = PoolEntry(entryCount[poolId], poolId);
             entryCount[poolId]++;
         }
         // Now inst is a valid thing to insert into the instruction stream
         if (pe != nullptr)
             *pe = retPE;
+        if (markAsBranch)
+            this->markNextAsBranch();
         return this->putBlob(instSize, inst);
     }
 
     uint32_t insertEntryBackwards(uint32_t instSize, uint8_t *inst, Pool *p, uint8_t *data) {
         // unlike the forward case, inserting an instruction without inserting
         // anything into a pool after a pool has been placed, we don't affect
         // anything relevant, so we can skip this check entirely!
 
@@ -587,18 +619,19 @@ struct AssemblerBufferWithConstantPool :
             }
             nextOffset += tmp->immSize * tmp->numEntries;
         }
         if (p == nullptr) {
             return INT_MIN;
         }
         return p->insertEntry(data, this->nextOffset(), this->LifoAlloc_);
     }
-    BufferOffset putInt(uint32_t value) {
-        return insertEntry(sizeof(uint32_t) / sizeof(uint8_t), (uint8_t*)&value, nullptr, nullptr);
+    BufferOffset putInt(uint32_t value, bool markAsBranch = false) {
+        return insertEntry(sizeof(uint32_t) / sizeof(uint8_t), (uint8_t*)&value,
+                           nullptr, nullptr, nullptr, markAsBranch);
     }
     // Mark the current section as an area where we can
     // later go to dump a pool
     void perforate() {
         // If we're filling the backrefrences, we don't want to start looking for a new dumpsite.
         if (inBackref)
             return;
         if (canNotPlacePool)
@@ -867,16 +900,17 @@ struct AssemblerBufferWithConstantPool :
             // If there is no data in the pool being dumped, don't dump anything.
             inBackref = true;
             IonSpew(IonSpew_Pools, "[%d]Abort, no pool data", id);
             return;
         }
 
         IonSpew(IonSpew_Pools, "[%d] Dumping %d bytes", id, newPoolInfo.size);
         if (!perforation.assigned()) {
+            JS_ASSERT(!canNotPlacePool);
             IonSpew(IonSpew_Pools, "[%d] No Perforation point selected, generating a new one", id);
             // There isn't a perforation here, we need to dump the pool with a guard.
             BufferOffset branch = this->nextOffset();
             bool shouldMarkAsBranch = this->isNextBranch();
             this->markNextAsBranch();
             this->putBlob(guardSize, nullptr);
             BufferOffset afterPool = this->nextOffset();
             Asm::writePoolGuard(branch, this->getInst(branch), afterPool);
@@ -991,16 +1025,17 @@ struct AssemblerBufferWithConstantPool :
             return;
         // There is no point in trying to grab a new slot if we've already
         // found one and are in the process of filling it in.
         if (inBackref)
             return;
         perforate();
     }
     void enterNoPool() {
+        insertNopFill();
         if (!canNotPlacePool && !perforation.assigned()) {
             // Embarassing mode: The Assembler requests the start of a no pool section
             // and there have been no valid places that a pool could be dumped thusfar.
             // If a pool were to fill up before this no-pool section ends, we need to go back
             // in the stream and enter a pool guard after the fact.  This is feasable, but
             // for now, it is easier to just allocate a junk instruction, default it to a nop, and
             // finally, if the pool *is* needed, patch the nop to  apool guard.
             // What the assembler requests:
--- a/js/src/jit/x64/Assembler-x64.h
+++ b/js/src/jit/x64/Assembler-x64.h
@@ -702,21 +702,21 @@ class Assembler : public AssemblerX86Sha
     }
 
     // Emit a CALL or CMP (nop) instruction. ToggleCall can be used to patch
     // this instruction.
     CodeOffsetLabel toggledCall(JitCode *target, bool enabled) {
         CodeOffsetLabel offset(size());
         JmpSrc src = enabled ? masm.call() : masm.cmp_eax();
         addPendingJump(src, ImmPtr(target->raw()), Relocation::JITCODE);
-        JS_ASSERT(size() - offset.offset() == ToggledCallSize());
+        JS_ASSERT(size() - offset.offset() == ToggledCallSize(nullptr));
         return offset;
     }
 
-    static size_t ToggledCallSize() {
+    static size_t ToggledCallSize(uint8_t *code) {
         // Size of a call instruction.
         return 5;
     }
 
     // Do not mask shared implementations.
     using AssemblerX86Shared::call;
 
     void cvttsd2sq(FloatRegister src, Register dest) {
--- a/js/src/jit/x86/Assembler-x86.h
+++ b/js/src/jit/x86/Assembler-x86.h
@@ -391,21 +391,21 @@ class Assembler : public AssemblerX86Sha
     }
 
     // Emit a CALL or CMP (nop) instruction. ToggleCall can be used to patch
     // this instruction.
     CodeOffsetLabel toggledCall(JitCode *target, bool enabled) {
         CodeOffsetLabel offset(size());
         JmpSrc src = enabled ? masm.call() : masm.cmp_eax();
         addPendingJump(src, ImmPtr(target->raw()), Relocation::JITCODE);
-        JS_ASSERT(size() - offset.offset() == ToggledCallSize());
+        JS_ASSERT(size() - offset.offset() == ToggledCallSize(nullptr));
         return offset;
     }
 
-    static size_t ToggledCallSize() {
+    static size_t ToggledCallSize(uint8_t *code) {
         // Size of a call instruction.
         return 5;
     }
 
     // Re-routes pending jumps to an external target, flushing the label in the
     // process.
     void retarget(Label *label, ImmPtr target, Relocation::Kind reloc) {
         if (label->used()) {
--- a/js/src/shell/js.cpp
+++ b/js/src/shell/js.cpp
@@ -6093,16 +6093,20 @@ SetRuntimeOptions(JSRuntime *rt, const O
         return false;
     }
 
 #endif // JS_ION
 
 #if defined(JS_CODEGEN_ARM)
     if (const char *str = op.getStringOption("arm-hwcap"))
         jit::ParseARMHwCapFlags(str);
+
+    int32_t fill = op.getIntOption("arm-asm-nop-fill");
+    if (fill >= 0)
+        jit::Assembler::NopFill = fill;
 #endif
 
 #if defined(JS_ARM_SIMULATOR)
     if (op.getBoolOption("arm-sim-icache-checks"))
         jit::Simulator::ICacheCheckingEnabled = true;
 
     int32_t stopAt = op.getIntOption("arm-sim-stop-at");
     if (stopAt >= 0)
@@ -6317,16 +6321,18 @@ main(int argc, char **argv, char **envp)
 #ifdef JSGC_GENERATIONAL
         || !op.addBoolOption('\0', "no-ggc", "Disable Generational GC")
 #endif
         || !op.addIntOption('\0', "available-memory", "SIZE",
                             "Select GC settings based on available memory (MB)", 0)
 #if defined(JS_CODEGEN_ARM)
         || !op.addStringOption('\0', "arm-hwcap", "[features]",
                                "Specify ARM code generation features, or 'help' to list all features.")
+        || !op.addIntOption('\0', "arm-asm-nop-fill", "SIZE",
+                            "Insert the given number of NOP instructions at all possible pool locations.", 0)
 #endif
 #if defined(JS_ARM_SIMULATOR)
         || !op.addBoolOption('\0', "arm-sim-icache-checks", "Enable icache flush checks in the ARM "
                              "simulator.")
         || !op.addIntOption('\0', "arm-sim-stop-at", "NUMBER", "Stop the ARM simulator after the given "
                             "NUMBER of instructions.", -1)
 #elif defined(JS_MIPS_SIMULATOR)
 	|| !op.addBoolOption('\0', "mips-sim-icache-checks", "Enable icache flush checks in the MIPS "