Bug 1333446 - ARM assembler: fast paths for putting simple instructions. r=nbp
☠☠ backed out by 2aac3fdd0038 ☠ ☠
authorLars T Hansen <lhansen@mozilla.com>
Thu, 02 Mar 2017 09:40:38 +0100
changeset 394580 7453899cfe444fbc04fb034b3365abaf5eda6af5
parent 394579 772de2146034c098b70024da6a7c74b941fc2ead
child 394581 757e96ecffc2d17406d7b66afe5368a801a412ec
push id1468
push userasasaki@mozilla.com
push dateMon, 05 Jun 2017 19:31:07 +0000
treeherdermozilla-release@0641fc6ee9d1 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersnbp
bugs1333446
milestone54.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1333446 - ARM assembler: fast paths for putting simple instructions. r=nbp
js/src/jit/arm/Assembler-arm.cpp
js/src/jit/arm/Assembler-arm.h
js/src/jit/arm64/vixl/MozBaseAssembler-vixl.h
js/src/jit/shared/IonAssemblerBuffer.h
js/src/jit/shared/IonAssemblerBufferWithConstantPools.h
--- a/js/src/jit/arm/Assembler-arm.cpp
+++ b/js/src/jit/arm/Assembler-arm.cpp
@@ -1668,44 +1668,23 @@ Assembler::SpewNodes::remove(uint32_t ke
             return true;
         }
     }
     return false;
 }
 
 #endif // JS_DISASM_ARM
 
-// Write a blob of binary into the instruction stream.
-BufferOffset
-Assembler::writeInst(uint32_t x)
-{
-    BufferOffset offs = m_buffer.putInt(x);
-#ifdef JS_DISASM_ARM
-    spew(m_buffer.getInstOrNull(offs));
-#endif
-    return offs;
-}
-
-BufferOffset
-Assembler::writeBranchInst(uint32_t x, Label* documentation)
-{
-    BufferOffset offs = m_buffer.putInt(x, /* markAsBranch = */ true);
-#ifdef JS_DISASM_ARM
-    spewBranch(m_buffer.getInstOrNull(offs), documentation);
-#endif
-    return offs;
-}
-
 // Allocate memory for a branch instruction, it will be overwritten
 // subsequently and should not be disassembled.
 
 BufferOffset
 Assembler::allocBranchInst()
 {
-    return m_buffer.putInt(Always | InstNOP::NopInst, /* markAsBranch = */ true);
+    return m_buffer.putInt(Always | InstNOP::NopInst);
 }
 
 void
 Assembler::WriteInstStatic(uint32_t x, uint32_t* dest)
 {
     MOZ_ASSERT(dest != nullptr);
     *dest = x;
 }
@@ -2149,44 +2128,38 @@ Assembler::as_extdtr(LoadStore ls, int s
 
 BufferOffset
 Assembler::as_dtm(LoadStore ls, Register rn, uint32_t mask,
                 DTMMode mode, DTMWriteBack wb, Condition c)
 {
     return writeInst(0x08000000 | RN(rn) | ls | mode | mask | c | wb);
 }
 
-// Note, it's possible for markAsBranch and loadToPC to disagree,
-// because some loads to the PC are not necessarily encoding
-// instructions that should be marked as branches: only patchable
-// near branch instructions should be marked.
-
 BufferOffset
 Assembler::allocEntry(size_t numInst, unsigned numPoolEntries,
                       uint8_t* inst, uint8_t* data, ARMBuffer::PoolEntry* pe,
-                      bool markAsBranch, bool loadToPC)
+                      bool loadToPC)
 {
-    BufferOffset offs = m_buffer.allocEntry(numInst, numPoolEntries, inst, data, pe, markAsBranch);
+    BufferOffset offs = m_buffer.allocEntry(numInst, numPoolEntries, inst, data, pe);
     propagateOOM(offs.assigned());
 #ifdef JS_DISASM_ARM
     spewData(offs, numInst, loadToPC);
 #endif
     return offs;
 }
 
 // This is also used for instructions that might be resolved into branches,
 // or might not.  If dest==pc then it is effectively a branch.
 
 BufferOffset
 Assembler::as_Imm32Pool(Register dest, uint32_t value, Condition c)
 {
     PoolHintPun php;
     php.phd.init(0, c, PoolHintData::PoolDTR, dest);
-    BufferOffset offs = allocEntry(1, 1, (uint8_t*)&php.raw, (uint8_t*)&value, nullptr, false,
-                                   dest == pc);
+    BufferOffset offs = allocEntry(1, 1, (uint8_t*)&php.raw, (uint8_t*)&value, nullptr, dest == pc);
     return offs;
 }
 
 /* static */ void
 Assembler::WritePoolEntry(Instruction* addr, Condition c, uint32_t data)
 {
     MOZ_ASSERT(addr->is<InstLDR>());
     *addr->as<InstLDR>()->dest() = data;
@@ -2195,17 +2168,17 @@ Assembler::WritePoolEntry(Instruction* a
 
 BufferOffset
 Assembler::as_BranchPool(uint32_t value, RepatchLabel* label, ARMBuffer::PoolEntry* pe, Condition c,
                          Label* documentation)
 {
     PoolHintPun php;
     php.phd.init(0, c, PoolHintData::PoolBranch, pc);
     BufferOffset ret = allocEntry(1, 1, (uint8_t*)&php.raw, (uint8_t*)&value, pe,
-                                  /* markAsBranch = */ true, /* loadToPC = */ true);
+                                  /* loadToPC = */ true);
     // If this label is already bound, then immediately replace the stub load
     // with a correct branch.
     if (label->bound()) {
         BufferOffset dest(label);
         BOffImm offset = dest.diffB<BOffImm>(ret);
         if (offset.isInvalid()) {
             m_buffer.fail_bail();
             return ret;
--- a/js/src/jit/arm/Assembler-arm.h
+++ b/js/src/jit/arm/Assembler-arm.h
@@ -1232,17 +1232,17 @@ class Assembler : public AssemblerShared
     BufferOffset nextOffset() {
         return m_buffer.nextOffset();
     }
 
   protected:
     // Shim around AssemblerBufferWithConstantPools::allocEntry.
     BufferOffset allocEntry(size_t numInst, unsigned numPoolEntries,
                             uint8_t* inst, uint8_t* data, ARMBuffer::PoolEntry* pe = nullptr,
-                            bool markAsBranch = false, bool loadToPC = false);
+                            bool loadToPC = false);
 
     Instruction* editSrc(BufferOffset bo) {
         return m_buffer.getInst(bo);
     }
 
 #ifdef JS_DISASM_ARM
     static void spewInst(Instruction* i);
     void spew(Instruction* i);
@@ -1422,22 +1422,35 @@ class Assembler : public AssemblerShared
     // Size of the jump relocation table, in bytes.
     size_t jumpRelocationTableBytes() const;
     size_t dataRelocationTableBytes() const;
     size_t preBarrierTableBytes() const;
 
     // Size of the data table, in bytes.
     size_t bytesNeeded() const;
 
-    // Write a blob of binary into the instruction stream *OR* into a
-    // destination address.
-    BufferOffset writeInst(uint32_t x);
+    // Write a single instruction into the instruction stream.  Very hot,
+    // inlined for performance
+    MOZ_ALWAYS_INLINE BufferOffset writeInst(uint32_t x) {
+        BufferOffset offs = m_buffer.putInt(x);
+#ifdef JS_DISASM_ARM
+        spew(m_buffer.getInstOrNull(offs));
+#endif
+        return offs;
+    }
 
-    // As above, but also mark the instruction as a branch.
-    BufferOffset writeBranchInst(uint32_t x, Label* documentation = nullptr);
+    // As above, but also mark the instruction as a branch.  Very hot, inlined
+    // for performance
+    MOZ_ALWAYS_INLINE BufferOffset writeBranchInst(uint32_t x, Label* documentation = nullptr) {
+        BufferOffset offs = m_buffer.putInt(x);
+#ifdef JS_DISASM_ARM
+        spewBranch(m_buffer.getInstOrNull(offs), documentation);
+#endif
+        return offs;
+    }
 
     // Write a placeholder NOP for a branch into the instruction stream
     // (in order to adjust assembler addresses and mark it as a branch), it will
     // be overwritten subsequently.
     BufferOffset allocBranchInst();
 
     // A static variant for the cases where we don't want to have an assembler
     // object.
--- a/js/src/jit/arm64/vixl/MozBaseAssembler-vixl.h
+++ b/js/src/jit/arm64/vixl/MozBaseAssembler-vixl.h
@@ -99,29 +99,30 @@ class MozBaseAssembler : public js::jit:
   BufferOffset nextOffset() const {
     return armbuffer_.nextOffset();
   }
 
   // Allocate memory in the buffer by forwarding to armbuffer_.
   // Propagate OOM errors.
   BufferOffset allocEntry(size_t numInst, unsigned numPoolEntries,
                           uint8_t* inst, uint8_t* data,
-                          ARMBuffer::PoolEntry* pe = nullptr,
-                          bool markAsBranch = false)
+                          ARMBuffer::PoolEntry* pe = nullptr)
   {
     BufferOffset offset = armbuffer_.allocEntry(numInst, numPoolEntries, inst,
-                                                data, pe, markAsBranch);
+                                                data, pe);
     propagateOOM(offset.assigned());
     return offset;
   }
 
   // Emit the instruction, returning its offset.
   BufferOffset Emit(Instr instruction, bool isBranch = false) {
     JS_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
-    return armbuffer_.putInt(*(uint32_t*)(&instruction), isBranch);
+    // TODO: isBranch is obsolete and should be removed.
+    (void)isBranch;
+    return armbuffer_.putInt(*(uint32_t*)(&instruction));
   }
 
   BufferOffset EmitBranch(Instr instruction) {
     return Emit(instruction, true);
   }
 
  public:
   // Emit the instruction at |at|.
--- a/js/src/jit/shared/IonAssemblerBuffer.h
+++ b/js/src/jit/shared/IonAssemblerBuffer.h
@@ -130,16 +130,25 @@ class BufferSlice
     }
 
     void putBytes(size_t numBytes, const void* source) {
         MOZ_ASSERT(bytelength_ + numBytes <= SliceSize);
         if (source)
             memcpy(&instructions[length()], source, numBytes);
         bytelength_ += numBytes;
     }
+
+    MOZ_ALWAYS_INLINE
+    void putU32Aligned(uint32_t value) {
+        MOZ_ASSERT(bytelength_ + 4 <= SliceSize);
+        MOZ_ASSERT((bytelength_ & 3) == 0);
+        MOZ_ASSERT((uintptr_t(&instructions[0]) & 3) == 0);
+        *reinterpret_cast<uint32_t*>(&instructions[bytelength_]) = value;
+        bytelength_ += 4;
+    }
 };
 
 template<int SliceSize, class Inst>
 class AssemblerBuffer
 {
   protected:
     typedef BufferSlice<SliceSize> Slice;
     typedef AssemblerBuffer<SliceSize, Inst> AssemblerBuffer_;
@@ -229,16 +238,26 @@ class AssemblerBuffer
     BufferOffset putShort(uint16_t value) {
         return putBytes(sizeof(value), &value);
     }
 
     BufferOffset putInt(uint32_t value) {
         return putBytes(sizeof(value), &value);
     }
 
+    MOZ_ALWAYS_INLINE
+    BufferOffset putU32Aligned(uint32_t value) {
+        if (!ensureSpace(sizeof(value)))
+            return BufferOffset();
+
+        BufferOffset ret = nextOffset();
+        tail->putU32Aligned(value);
+        return ret;
+    }
+
     // Add numBytes bytes to this buffer.
     // The data must fit in a single slice.
     BufferOffset putBytes(size_t numBytes, const void* inst) {
         if (!ensureSpace(numBytes))
             return BufferOffset();
 
         BufferOffset ret = nextOffset();
         tail->putBytes(numBytes, inst);
@@ -352,17 +371,18 @@ class AssemblerBuffer
             return nullptr;
         return getInst(off);
     }
 
     // Get a pointer to the instruction at offset |off| which must be within the
     // bounds of the buffer. Use |getInstOrNull()| if |off| may be unassigned.
     Inst* getInst(BufferOffset off) {
         const int offset = off.getOffset();
-        MOZ_RELEASE_ASSERT(off.assigned() && offset >= 0 && (unsigned)offset < size());
+        // This function is hot, do not make the next line a RELEASE_ASSERT.
+        MOZ_ASSERT(off.assigned() && offset >= 0 && unsigned(offset) < size());
 
         // Is the instruction in the last slice?
         if (offset >= int(bufferSize))
             return (Inst*)&tail->instructions[offset - bufferSize];
 
         // How close is this offset to the previous one we looked up?
         // If it is sufficiently far from the start and end of the buffer,
         // use the finger to start midway through the list.
--- a/js/src/jit/shared/IonAssemblerBufferWithConstantPools.h
+++ b/js/src/jit/shared/IonAssemblerBufferWithConstantPools.h
@@ -824,19 +824,19 @@ struct AssemblerBufferWithConstantPools 
         if (!hasSpaceForInsts(/* numInsts= */ 1, /* numPoolEntries= */ 0)) {
             JitSpew(JitSpew_Pools, "[%d] nextInstrOffset @ %d caused a constant pool spill", id,
                     this->nextOffset().getOffset());
             finishPool();
         }
         return this->nextOffset();
     }
 
+    MOZ_NEVER_INLINE
     BufferOffset allocEntry(size_t numInst, unsigned numPoolEntries,
-                            uint8_t* inst, uint8_t* data, PoolEntry* pe = nullptr,
-                            bool markAsBranch = false)
+                            uint8_t* inst, uint8_t* data, PoolEntry* pe = nullptr)
     {
         // The allocation of pool entries is not supported in a no-pool region,
         // check.
         MOZ_ASSERT_IF(numPoolEntries, !canNotPlacePool_);
 
         if (this->oom() && !this->bail())
             return BufferOffset();
 
@@ -872,18 +872,44 @@ struct AssemblerBufferWithConstantPools 
             poolEntryCount += numPoolEntries;
         }
         // Now inst is a valid thing to insert into the instruction stream.
         if (pe != nullptr)
             *pe = retPE;
         return this->putBytes(numInst * InstSize, inst);
     }
 
-    BufferOffset putInt(uint32_t value, bool markAsBranch = false) {
-        return allocEntry(1, 0, (uint8_t*)&value, nullptr, nullptr, markAsBranch);
+
+    // putInt is the workhorse for the assembler and higher-level buffer
+    // abstractions: it places one instruction into the instruction stream.
+    // Under normal circumstances putInt should just check that the constant
+    // pool does not need to be flushed, that there is space for the single word
+    // of the instruction, and write that word and update the buffer pointer.
+    //
+    // To do better here we need a status variable that handles both nopFill_
+    // and capacity, so that we can quickly know whether to go the slow path.
+    // That could be a variable that has the remaining number of simple
+    // instructions that can be inserted before a more expensive check,
+    // which is set to zero when nopFill_ is set.
+    //
+    // We assume that we don't have to check this->oom() if there is space to
+    // insert a plain instruction; there will always come a later time when it will be
+    // checked anyway.
+
+    MOZ_ALWAYS_INLINE
+    BufferOffset putInt(uint32_t value) {
+        if (nopFill_ || !hasSpaceForInsts(/* numInsts= */ 1, /* numPoolEntries= */ 0))
+            return allocEntry(1, 0, (uint8_t*)&value, nullptr, nullptr);
+
+#if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) || \
+    defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+        return this->putU32Aligned(value);
+#else
+        return this->AssemblerBuffer<SliceSize, Inst>::putInt(value);
+#endif
     }
 
     // Register a short-range branch deadline.
     //
     // After inserting a short-range forward branch, call this method to
     // register the branch 'deadline' which is the last buffer offset that the
     // branch instruction can reach.
     //