Bug 1333447 - ARM assembler: fast paths for putting simple instructions. r=nbp
authorLars T Hansen <lhansen@mozilla.com>
Thu, 02 Mar 2017 12:50:45 +0100
changeset 394587 bcd5db7ac471cad3370ab332e3425c3cbac9bf15
parent 394586 bb9d82f04cf680ef8155f62128469f83a1a32e22
child 394588 f8097ba36f5ed1ca50ed58d897249866c7958c0b
push id1468
push userasasaki@mozilla.com
push dateMon, 05 Jun 2017 19:31:07 +0000
treeherdermozilla-release@0641fc6ee9d1 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersnbp
bugs1333447
milestone54.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1333447 - ARM assembler: fast paths for putting simple instructions. r=nbp
js/src/jit/arm/Assembler-arm.cpp
js/src/jit/arm/Assembler-arm.h
js/src/jit/arm64/vixl/MozBaseAssembler-vixl.h
js/src/jit/shared/IonAssemblerBuffer.h
js/src/jit/shared/IonAssemblerBufferWithConstantPools.h
--- a/js/src/jit/arm/Assembler-arm.cpp
+++ b/js/src/jit/arm/Assembler-arm.cpp
@@ -1668,44 +1668,23 @@ Assembler::SpewNodes::remove(uint32_t ke
             return true;
         }
     }
     return false;
 }
 
 #endif // JS_DISASM_ARM
 
-// Write a blob of binary into the instruction stream.
-BufferOffset
-Assembler::writeInst(uint32_t x)
-{
-    BufferOffset offs = m_buffer.putInt(x);
-#ifdef JS_DISASM_ARM
-    spew(m_buffer.getInstOrNull(offs));
-#endif
-    return offs;
-}
-
-BufferOffset
-Assembler::writeBranchInst(uint32_t x, Label* documentation)
-{
-    BufferOffset offs = m_buffer.putInt(x, /* markAsBranch = */ true);
-#ifdef JS_DISASM_ARM
-    spewBranch(m_buffer.getInstOrNull(offs), documentation);
-#endif
-    return offs;
-}
-
 // Allocate memory for a branch instruction, it will be overwritten
 // subsequently and should not be disassembled.
 
 BufferOffset
 Assembler::allocBranchInst()
 {
-    return m_buffer.putInt(Always | InstNOP::NopInst, /* markAsBranch = */ true);
+    return m_buffer.putInt(Always | InstNOP::NopInst);
 }
 
 void
 Assembler::WriteInstStatic(uint32_t x, uint32_t* dest)
 {
     MOZ_ASSERT(dest != nullptr);
     *dest = x;
 }
@@ -2149,44 +2128,38 @@ Assembler::as_extdtr(LoadStore ls, int s
 
 BufferOffset
 Assembler::as_dtm(LoadStore ls, Register rn, uint32_t mask,
                 DTMMode mode, DTMWriteBack wb, Condition c)
 {
     return writeInst(0x08000000 | RN(rn) | ls | mode | mask | c | wb);
 }
 
-// Note, it's possible for markAsBranch and loadToPC to disagree,
-// because some loads to the PC are not necessarily encoding
-// instructions that should be marked as branches: only patchable
-// near branch instructions should be marked.
-
 BufferOffset
 Assembler::allocEntry(size_t numInst, unsigned numPoolEntries,
                       uint8_t* inst, uint8_t* data, ARMBuffer::PoolEntry* pe,
-                      bool markAsBranch, bool loadToPC)
+                      bool loadToPC)
 {
-    BufferOffset offs = m_buffer.allocEntry(numInst, numPoolEntries, inst, data, pe, markAsBranch);
+    BufferOffset offs = m_buffer.allocEntry(numInst, numPoolEntries, inst, data, pe);
     propagateOOM(offs.assigned());
 #ifdef JS_DISASM_ARM
     spewData(offs, numInst, loadToPC);
 #endif
     return offs;
 }
 
 // This is also used for instructions that might be resolved into branches,
 // or might not.  If dest==pc then it is effectively a branch.
 
 BufferOffset
 Assembler::as_Imm32Pool(Register dest, uint32_t value, Condition c)
 {
     PoolHintPun php;
     php.phd.init(0, c, PoolHintData::PoolDTR, dest);
-    BufferOffset offs = allocEntry(1, 1, (uint8_t*)&php.raw, (uint8_t*)&value, nullptr, false,
-                                   dest == pc);
+    BufferOffset offs = allocEntry(1, 1, (uint8_t*)&php.raw, (uint8_t*)&value, nullptr, dest == pc);
     return offs;
 }
 
 /* static */ void
 Assembler::WritePoolEntry(Instruction* addr, Condition c, uint32_t data)
 {
     MOZ_ASSERT(addr->is<InstLDR>());
     *addr->as<InstLDR>()->dest() = data;
@@ -2195,17 +2168,17 @@ Assembler::WritePoolEntry(Instruction* a
 
 BufferOffset
 Assembler::as_BranchPool(uint32_t value, RepatchLabel* label, ARMBuffer::PoolEntry* pe, Condition c,
                          Label* documentation)
 {
     PoolHintPun php;
     php.phd.init(0, c, PoolHintData::PoolBranch, pc);
     BufferOffset ret = allocEntry(1, 1, (uint8_t*)&php.raw, (uint8_t*)&value, pe,
-                                  /* markAsBranch = */ true, /* loadToPC = */ true);
+                                  /* loadToPC = */ true);
     // If this label is already bound, then immediately replace the stub load
     // with a correct branch.
     if (label->bound()) {
         BufferOffset dest(label);
         BOffImm offset = dest.diffB<BOffImm>(ret);
         if (offset.isInvalid()) {
             m_buffer.fail_bail();
             return ret;
--- a/js/src/jit/arm/Assembler-arm.h
+++ b/js/src/jit/arm/Assembler-arm.h
@@ -1232,17 +1232,17 @@ class Assembler : public AssemblerShared
     BufferOffset nextOffset() {
         return m_buffer.nextOffset();
     }
 
   protected:
     // Shim around AssemblerBufferWithConstantPools::allocEntry.
     BufferOffset allocEntry(size_t numInst, unsigned numPoolEntries,
                             uint8_t* inst, uint8_t* data, ARMBuffer::PoolEntry* pe = nullptr,
-                            bool markAsBranch = false, bool loadToPC = false);
+                            bool loadToPC = false);
 
     Instruction* editSrc(BufferOffset bo) {
         return m_buffer.getInst(bo);
     }
 
 #ifdef JS_DISASM_ARM
     static void spewInst(Instruction* i);
     void spew(Instruction* i);
@@ -1422,22 +1422,35 @@ class Assembler : public AssemblerShared
     // Size of the jump relocation table, in bytes.
     size_t jumpRelocationTableBytes() const;
     size_t dataRelocationTableBytes() const;
     size_t preBarrierTableBytes() const;
 
     // Size of the data table, in bytes.
     size_t bytesNeeded() const;
 
-    // Write a blob of binary into the instruction stream *OR* into a
-    // destination address.
-    BufferOffset writeInst(uint32_t x);
+    // Write a single instruction into the instruction stream.  Very hot,
+    // inlined for performance
+    MOZ_ALWAYS_INLINE BufferOffset writeInst(uint32_t x) {
+        BufferOffset offs = m_buffer.putInt(x);
+#ifdef JS_DISASM_ARM
+        spew(m_buffer.getInstOrNull(offs));
+#endif
+        return offs;
+    }
 
-    // As above, but also mark the instruction as a branch.
-    BufferOffset writeBranchInst(uint32_t x, Label* documentation = nullptr);
+    // As above, but also mark the instruction as a branch.  Very hot, inlined
+    // for performance
+    MOZ_ALWAYS_INLINE BufferOffset writeBranchInst(uint32_t x, Label* documentation = nullptr) {
+        BufferOffset offs = m_buffer.putInt(x);
+#ifdef JS_DISASM_ARM
+        spewBranch(m_buffer.getInstOrNull(offs), documentation);
+#endif
+        return offs;
+    }
 
     // Write a placeholder NOP for a branch into the instruction stream
     // (in order to adjust assembler addresses and mark it as a branch), it will
     // be overwritten subsequently.
     BufferOffset allocBranchInst();
 
     // A static variant for the cases where we don't want to have an assembler
     // object.
--- a/js/src/jit/arm64/vixl/MozBaseAssembler-vixl.h
+++ b/js/src/jit/arm64/vixl/MozBaseAssembler-vixl.h
@@ -99,29 +99,30 @@ class MozBaseAssembler : public js::jit:
   BufferOffset nextOffset() const {
     return armbuffer_.nextOffset();
   }
 
   // Allocate memory in the buffer by forwarding to armbuffer_.
   // Propagate OOM errors.
   BufferOffset allocEntry(size_t numInst, unsigned numPoolEntries,
                           uint8_t* inst, uint8_t* data,
-                          ARMBuffer::PoolEntry* pe = nullptr,
-                          bool markAsBranch = false)
+                          ARMBuffer::PoolEntry* pe = nullptr)
   {
     BufferOffset offset = armbuffer_.allocEntry(numInst, numPoolEntries, inst,
-                                                data, pe, markAsBranch);
+                                                data, pe);
     propagateOOM(offset.assigned());
     return offset;
   }
 
   // Emit the instruction, returning its offset.
   BufferOffset Emit(Instr instruction, bool isBranch = false) {
     JS_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
-    return armbuffer_.putInt(*(uint32_t*)(&instruction), isBranch);
+    // TODO: isBranch is obsolete and should be removed.
+    (void)isBranch;
+    return armbuffer_.putInt(*(uint32_t*)(&instruction));
   }
 
   BufferOffset EmitBranch(Instr instruction) {
     return Emit(instruction, true);
   }
 
  public:
   // Emit the instruction at |at|.
--- a/js/src/jit/shared/IonAssemblerBuffer.h
+++ b/js/src/jit/shared/IonAssemblerBuffer.h
@@ -130,16 +130,25 @@ class BufferSlice
     }
 
     void putBytes(size_t numBytes, const void* source) {
         MOZ_ASSERT(bytelength_ + numBytes <= SliceSize);
         if (source)
             memcpy(&instructions[length()], source, numBytes);
         bytelength_ += numBytes;
     }
+
+    MOZ_ALWAYS_INLINE
+    void putU32Aligned(uint32_t value) {
+        MOZ_ASSERT(bytelength_ + 4 <= SliceSize);
+        MOZ_ASSERT((bytelength_ & 3) == 0);
+        MOZ_ASSERT((uintptr_t(&instructions[0]) & 3) == 0);
+        *reinterpret_cast<uint32_t*>(&instructions[bytelength_]) = value;
+        bytelength_ += 4;
+    }
 };
 
 template<int SliceSize, class Inst>
 class AssemblerBuffer
 {
   protected:
     typedef BufferSlice<SliceSize> Slice;
     typedef AssemblerBuffer<SliceSize, Inst> AssemblerBuffer_;
@@ -229,16 +238,26 @@ class AssemblerBuffer
     BufferOffset putShort(uint16_t value) {
         return putBytes(sizeof(value), &value);
     }
 
     BufferOffset putInt(uint32_t value) {
         return putBytes(sizeof(value), &value);
     }
 
+    MOZ_ALWAYS_INLINE
+    BufferOffset putU32Aligned(uint32_t value) {
+        if (!ensureSpace(sizeof(value)))
+            return BufferOffset();
+
+        BufferOffset ret = nextOffset();
+        tail->putU32Aligned(value);
+        return ret;
+    }
+
     // Add numBytes bytes to this buffer.
     // The data must fit in a single slice.
     BufferOffset putBytes(size_t numBytes, const void* inst) {
         if (!ensureSpace(numBytes))
             return BufferOffset();
 
         BufferOffset ret = nextOffset();
         tail->putBytes(numBytes, inst);
@@ -352,17 +371,18 @@ class AssemblerBuffer
             return nullptr;
         return getInst(off);
     }
 
     // Get a pointer to the instruction at offset |off| which must be within the
     // bounds of the buffer. Use |getInstOrNull()| if |off| may be unassigned.
     Inst* getInst(BufferOffset off) {
         const int offset = off.getOffset();
-        MOZ_RELEASE_ASSERT(off.assigned() && offset >= 0 && (unsigned)offset < size());
+        // This function is hot, do not make the next line a RELEASE_ASSERT.
+        MOZ_ASSERT(off.assigned() && offset >= 0 && unsigned(offset) < size());
 
         // Is the instruction in the last slice?
         if (offset >= int(bufferSize))
             return (Inst*)&tail->instructions[offset - bufferSize];
 
         // How close is this offset to the previous one we looked up?
         // If it is sufficiently far from the start and end of the buffer,
         // use the finger to start midway through the list.
--- a/js/src/jit/shared/IonAssemblerBufferWithConstantPools.h
+++ b/js/src/jit/shared/IonAssemblerBufferWithConstantPools.h
@@ -824,19 +824,19 @@ struct AssemblerBufferWithConstantPools 
         if (!hasSpaceForInsts(/* numInsts= */ 1, /* numPoolEntries= */ 0)) {
             JitSpew(JitSpew_Pools, "[%d] nextInstrOffset @ %d caused a constant pool spill", id,
                     this->nextOffset().getOffset());
             finishPool();
         }
         return this->nextOffset();
     }
 
+    MOZ_NEVER_INLINE
     BufferOffset allocEntry(size_t numInst, unsigned numPoolEntries,
-                            uint8_t* inst, uint8_t* data, PoolEntry* pe = nullptr,
-                            bool markAsBranch = false)
+                            uint8_t* inst, uint8_t* data, PoolEntry* pe = nullptr)
     {
         // The allocation of pool entries is not supported in a no-pool region,
         // check.
         MOZ_ASSERT_IF(numPoolEntries, !canNotPlacePool_);
 
         if (this->oom() && !this->bail())
             return BufferOffset();
 
@@ -872,18 +872,44 @@ struct AssemblerBufferWithConstantPools 
             poolEntryCount += numPoolEntries;
         }
         // Now inst is a valid thing to insert into the instruction stream.
         if (pe != nullptr)
             *pe = retPE;
         return this->putBytes(numInst * InstSize, inst);
     }
 
-    BufferOffset putInt(uint32_t value, bool markAsBranch = false) {
-        return allocEntry(1, 0, (uint8_t*)&value, nullptr, nullptr, markAsBranch);
+
+    // putInt is the workhorse for the assembler and higher-level buffer
+    // abstractions: it places one instruction into the instruction stream.
+    // Under normal circumstances putInt should just check that the constant
+    // pool does not need to be flushed, that there is space for the single word
+    // of the instruction, and write that word and update the buffer pointer.
+    //
+    // To do better here we need a status variable that handles both nopFill_
+    // and capacity, so that we can quickly know whether to go the slow path.
+    // That could be a variable that has the remaining number of simple
+    // instructions that can be inserted before a more expensive check,
+    // which is set to zero when nopFill_ is set.
+    //
+    // We assume that we don't have to check this->oom() if there is space to
+    // insert a plain instruction; there will always come a later time when it will be
+    // checked anyway.
+
+    MOZ_ALWAYS_INLINE
+    BufferOffset putInt(uint32_t value) {
+        if (nopFill_ || !hasSpaceForInsts(/* numInsts= */ 1, /* numPoolEntries= */ 0))
+            return allocEntry(1, 0, (uint8_t*)&value, nullptr, nullptr);
+
+#if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) || \
+    defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+        return this->putU32Aligned(value);
+#else
+        return this->AssemblerBuffer<SliceSize, Inst>::putInt(value);
+#endif
     }
 
     // Register a short-range branch deadline.
     //
     // After inserting a short-range forward branch, call this method to
     // register the branch 'deadline' which is the last buffer offset that the
     // branch instruction can reach.
     //