Bug 1333447 - ARM assembler: fast paths for putting simple instructions. r=nbp
authorLars T Hansen <lhansen@mozilla.com>
Thu, 02 Mar 2017 12:50:45 +0100
changeset 374637 bcd5db7ac471cad3370ab332e3425c3cbac9bf15
parent 374636 bb9d82f04cf680ef8155f62128469f83a1a32e22
child 374638 f8097ba36f5ed1ca50ed58d897249866c7958c0b
push id10863
push userjlorenzo@mozilla.com
push dateMon, 06 Mar 2017 23:02:23 +0000
treeherdermozilla-aurora@0931190cd725 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersnbp
bugs1333447
milestone54.0a1
Bug 1333447 - ARM assembler: fast paths for putting simple instructions. r=nbp
js/src/jit/arm/Assembler-arm.cpp
js/src/jit/arm/Assembler-arm.h
js/src/jit/arm64/vixl/MozBaseAssembler-vixl.h
js/src/jit/shared/IonAssemblerBuffer.h
js/src/jit/shared/IonAssemblerBufferWithConstantPools.h
--- a/js/src/jit/arm/Assembler-arm.cpp
+++ b/js/src/jit/arm/Assembler-arm.cpp
@@ -1668,44 +1668,23 @@ Assembler::SpewNodes::remove(uint32_t ke
             return true;
         }
     }
     return false;
 }
 
 #endif // JS_DISASM_ARM
 
-// Write a blob of binary into the instruction stream.
-BufferOffset
-Assembler::writeInst(uint32_t x)
-{
-    BufferOffset offs = m_buffer.putInt(x);
-#ifdef JS_DISASM_ARM
-    spew(m_buffer.getInstOrNull(offs));
-#endif
-    return offs;
-}
-
-BufferOffset
-Assembler::writeBranchInst(uint32_t x, Label* documentation)
-{
-    BufferOffset offs = m_buffer.putInt(x, /* markAsBranch = */ true);
-#ifdef JS_DISASM_ARM
-    spewBranch(m_buffer.getInstOrNull(offs), documentation);
-#endif
-    return offs;
-}
-
 // Allocate memory for a branch instruction, it will be overwritten
 // subsequently and should not be disassembled.
 
 BufferOffset
 Assembler::allocBranchInst()
 {
-    return m_buffer.putInt(Always | InstNOP::NopInst, /* markAsBranch = */ true);
+    return m_buffer.putInt(Always | InstNOP::NopInst);
 }
 
 void
 Assembler::WriteInstStatic(uint32_t x, uint32_t* dest)
 {
     MOZ_ASSERT(dest != nullptr);
     *dest = x;
 }
@@ -2149,44 +2128,38 @@ Assembler::as_extdtr(LoadStore ls, int s
 
 BufferOffset
 Assembler::as_dtm(LoadStore ls, Register rn, uint32_t mask,
                 DTMMode mode, DTMWriteBack wb, Condition c)
 {
     return writeInst(0x08000000 | RN(rn) | ls | mode | mask | c | wb);
 }
 
-// Note, it's possible for markAsBranch and loadToPC to disagree,
-// because some loads to the PC are not necessarily encoding
-// instructions that should be marked as branches: only patchable
-// near branch instructions should be marked.
-
 BufferOffset
 Assembler::allocEntry(size_t numInst, unsigned numPoolEntries,
                       uint8_t* inst, uint8_t* data, ARMBuffer::PoolEntry* pe,
-                      bool markAsBranch, bool loadToPC)
+                      bool loadToPC)
 {
-    BufferOffset offs = m_buffer.allocEntry(numInst, numPoolEntries, inst, data, pe, markAsBranch);
+    BufferOffset offs = m_buffer.allocEntry(numInst, numPoolEntries, inst, data, pe);
     propagateOOM(offs.assigned());
 #ifdef JS_DISASM_ARM
     spewData(offs, numInst, loadToPC);
 #endif
     return offs;
 }
 
 // This is also used for instructions that might be resolved into branches,
 // or might not.  If dest==pc then it is effectively a branch.
 
 BufferOffset
 Assembler::as_Imm32Pool(Register dest, uint32_t value, Condition c)
 {
     PoolHintPun php;
     php.phd.init(0, c, PoolHintData::PoolDTR, dest);
-    BufferOffset offs = allocEntry(1, 1, (uint8_t*)&php.raw, (uint8_t*)&value, nullptr, false,
-                                   dest == pc);
+    BufferOffset offs = allocEntry(1, 1, (uint8_t*)&php.raw, (uint8_t*)&value, nullptr, dest == pc);
     return offs;
 }
 
 /* static */ void
 Assembler::WritePoolEntry(Instruction* addr, Condition c, uint32_t data)
 {
     MOZ_ASSERT(addr->is<InstLDR>());
     *addr->as<InstLDR>()->dest() = data;
@@ -2195,17 +2168,17 @@ Assembler::WritePoolEntry(Instruction* a
 
 BufferOffset
 Assembler::as_BranchPool(uint32_t value, RepatchLabel* label, ARMBuffer::PoolEntry* pe, Condition c,
                          Label* documentation)
 {
     PoolHintPun php;
     php.phd.init(0, c, PoolHintData::PoolBranch, pc);
     BufferOffset ret = allocEntry(1, 1, (uint8_t*)&php.raw, (uint8_t*)&value, pe,
-                                  /* markAsBranch = */ true, /* loadToPC = */ true);
+                                  /* loadToPC = */ true);
     // If this label is already bound, then immediately replace the stub load
     // with a correct branch.
     if (label->bound()) {
         BufferOffset dest(label);
         BOffImm offset = dest.diffB<BOffImm>(ret);
         if (offset.isInvalid()) {
             m_buffer.fail_bail();
             return ret;
--- a/js/src/jit/arm/Assembler-arm.h
+++ b/js/src/jit/arm/Assembler-arm.h
@@ -1232,17 +1232,17 @@ class Assembler : public AssemblerShared
     BufferOffset nextOffset() {
         return m_buffer.nextOffset();
     }
 
   protected:
     // Shim around AssemblerBufferWithConstantPools::allocEntry.
     BufferOffset allocEntry(size_t numInst, unsigned numPoolEntries,
                             uint8_t* inst, uint8_t* data, ARMBuffer::PoolEntry* pe = nullptr,
-                            bool markAsBranch = false, bool loadToPC = false);
+                            bool loadToPC = false);
 
     Instruction* editSrc(BufferOffset bo) {
         return m_buffer.getInst(bo);
     }
 
 #ifdef JS_DISASM_ARM
     static void spewInst(Instruction* i);
     void spew(Instruction* i);
@@ -1422,22 +1422,35 @@ class Assembler : public AssemblerShared
     // Size of the jump relocation table, in bytes.
     size_t jumpRelocationTableBytes() const;
     size_t dataRelocationTableBytes() const;
     size_t preBarrierTableBytes() const;
 
     // Size of the data table, in bytes.
     size_t bytesNeeded() const;
 
-    // Write a blob of binary into the instruction stream *OR* into a
-    // destination address.
-    BufferOffset writeInst(uint32_t x);
+    // Write a single instruction into the instruction stream.  Very hot,
+    // inlined for performance
+    MOZ_ALWAYS_INLINE BufferOffset writeInst(uint32_t x) {
+        BufferOffset offs = m_buffer.putInt(x);
+#ifdef JS_DISASM_ARM
+        spew(m_buffer.getInstOrNull(offs));
+#endif
+        return offs;
+    }
 
-    // As above, but also mark the instruction as a branch.
-    BufferOffset writeBranchInst(uint32_t x, Label* documentation = nullptr);
+    // As above, but also mark the instruction as a branch.  Very hot, inlined
+    // for performance
+    MOZ_ALWAYS_INLINE BufferOffset writeBranchInst(uint32_t x, Label* documentation = nullptr) {
+        BufferOffset offs = m_buffer.putInt(x);
+#ifdef JS_DISASM_ARM
+        spewBranch(m_buffer.getInstOrNull(offs), documentation);
+#endif
+        return offs;
+    }
 
     // Write a placeholder NOP for a branch into the instruction stream
     // (in order to adjust assembler addresses and mark it as a branch), it will
     // be overwritten subsequently.
     BufferOffset allocBranchInst();
 
     // A static variant for the cases where we don't want to have an assembler
     // object.
--- a/js/src/jit/arm64/vixl/MozBaseAssembler-vixl.h
+++ b/js/src/jit/arm64/vixl/MozBaseAssembler-vixl.h
@@ -99,29 +99,30 @@ class MozBaseAssembler : public js::jit:
   BufferOffset nextOffset() const {
     return armbuffer_.nextOffset();
   }
 
   // Allocate memory in the buffer by forwarding to armbuffer_.
   // Propagate OOM errors.
   BufferOffset allocEntry(size_t numInst, unsigned numPoolEntries,
                           uint8_t* inst, uint8_t* data,
-                          ARMBuffer::PoolEntry* pe = nullptr,
-                          bool markAsBranch = false)
+                          ARMBuffer::PoolEntry* pe = nullptr)
   {
     BufferOffset offset = armbuffer_.allocEntry(numInst, numPoolEntries, inst,
-                                                data, pe, markAsBranch);
+                                                data, pe);
     propagateOOM(offset.assigned());
     return offset;
   }
 
   // Emit the instruction, returning its offset.
   BufferOffset Emit(Instr instruction, bool isBranch = false) {
     JS_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
-    return armbuffer_.putInt(*(uint32_t*)(&instruction), isBranch);
+    // TODO: isBranch is obsolete and should be removed.
+    (void)isBranch;
+    return armbuffer_.putInt(*(uint32_t*)(&instruction));
   }
 
   BufferOffset EmitBranch(Instr instruction) {
     return Emit(instruction, true);
   }
 
  public:
   // Emit the instruction at |at|.
--- a/js/src/jit/shared/IonAssemblerBuffer.h
+++ b/js/src/jit/shared/IonAssemblerBuffer.h
@@ -130,16 +130,25 @@ class BufferSlice
     }
 
     void putBytes(size_t numBytes, const void* source) {
         MOZ_ASSERT(bytelength_ + numBytes <= SliceSize);
         if (source)
             memcpy(&instructions[length()], source, numBytes);
         bytelength_ += numBytes;
     }
+
+    MOZ_ALWAYS_INLINE
+    void putU32Aligned(uint32_t value) {
+        MOZ_ASSERT(bytelength_ + 4 <= SliceSize);
+        MOZ_ASSERT((bytelength_ & 3) == 0);
+        MOZ_ASSERT((uintptr_t(&instructions[0]) & 3) == 0);
+        *reinterpret_cast<uint32_t*>(&instructions[bytelength_]) = value;
+        bytelength_ += 4;
+    }
 };
 
 template<int SliceSize, class Inst>
 class AssemblerBuffer
 {
   protected:
     typedef BufferSlice<SliceSize> Slice;
     typedef AssemblerBuffer<SliceSize, Inst> AssemblerBuffer_;
@@ -229,16 +238,26 @@ class AssemblerBuffer
     BufferOffset putShort(uint16_t value) {
         return putBytes(sizeof(value), &value);
     }
 
     BufferOffset putInt(uint32_t value) {
         return putBytes(sizeof(value), &value);
     }
 
+    MOZ_ALWAYS_INLINE
+    BufferOffset putU32Aligned(uint32_t value) {
+        if (!ensureSpace(sizeof(value)))
+            return BufferOffset();
+
+        BufferOffset ret = nextOffset();
+        tail->putU32Aligned(value);
+        return ret;
+    }
+
     // Add numBytes bytes to this buffer.
     // The data must fit in a single slice.
     BufferOffset putBytes(size_t numBytes, const void* inst) {
         if (!ensureSpace(numBytes))
             return BufferOffset();
 
         BufferOffset ret = nextOffset();
         tail->putBytes(numBytes, inst);
@@ -352,17 +371,18 @@ class AssemblerBuffer
             return nullptr;
         return getInst(off);
     }
 
     // Get a pointer to the instruction at offset |off| which must be within the
     // bounds of the buffer. Use |getInstOrNull()| if |off| may be unassigned.
     Inst* getInst(BufferOffset off) {
         const int offset = off.getOffset();
-        MOZ_RELEASE_ASSERT(off.assigned() && offset >= 0 && (unsigned)offset < size());
+        // This function is hot, do not make the next line a RELEASE_ASSERT.
+        MOZ_ASSERT(off.assigned() && offset >= 0 && unsigned(offset) < size());
 
         // Is the instruction in the last slice?
         if (offset >= int(bufferSize))
             return (Inst*)&tail->instructions[offset - bufferSize];
 
         // How close is this offset to the previous one we looked up?
         // If it is sufficiently far from the start and end of the buffer,
         // use the finger to start midway through the list.
--- a/js/src/jit/shared/IonAssemblerBufferWithConstantPools.h
+++ b/js/src/jit/shared/IonAssemblerBufferWithConstantPools.h
@@ -824,19 +824,19 @@ struct AssemblerBufferWithConstantPools 
         if (!hasSpaceForInsts(/* numInsts= */ 1, /* numPoolEntries= */ 0)) {
             JitSpew(JitSpew_Pools, "[%d] nextInstrOffset @ %d caused a constant pool spill", id,
                     this->nextOffset().getOffset());
             finishPool();
         }
         return this->nextOffset();
     }
 
+    MOZ_NEVER_INLINE
     BufferOffset allocEntry(size_t numInst, unsigned numPoolEntries,
-                            uint8_t* inst, uint8_t* data, PoolEntry* pe = nullptr,
-                            bool markAsBranch = false)
+                            uint8_t* inst, uint8_t* data, PoolEntry* pe = nullptr)
     {
         // The allocation of pool entries is not supported in a no-pool region,
         // check.
         MOZ_ASSERT_IF(numPoolEntries, !canNotPlacePool_);
 
         if (this->oom() && !this->bail())
             return BufferOffset();
 
@@ -872,18 +872,44 @@ struct AssemblerBufferWithConstantPools 
             poolEntryCount += numPoolEntries;
         }
         // Now inst is a valid thing to insert into the instruction stream.
         if (pe != nullptr)
             *pe = retPE;
         return this->putBytes(numInst * InstSize, inst);
     }
 
-    BufferOffset putInt(uint32_t value, bool markAsBranch = false) {
-        return allocEntry(1, 0, (uint8_t*)&value, nullptr, nullptr, markAsBranch);
+
+    // putInt is the workhorse for the assembler and higher-level buffer
+    // abstractions: it places one instruction into the instruction stream.
+    // Under normal circumstances putInt should just check that the constant
+    // pool does not need to be flushed, that there is space for the single word
+    // of the instruction, and write that word and update the buffer pointer.
+    //
+    // To do better here we need a status variable that handles both nopFill_
+    // and capacity, so that we can quickly know whether to go the slow path.
+    // That could be a variable that has the remaining number of simple
+    // instructions that can be inserted before a more expensive check,
+    // which is set to zero when nopFill_ is set.
+    //
+    // We assume that we don't have to check this->oom() if there is space to
+    // insert a plain instruction; there will always come a later time when it will be
+    // checked anyway.
+
+    MOZ_ALWAYS_INLINE
+    BufferOffset putInt(uint32_t value) {
+        if (nopFill_ || !hasSpaceForInsts(/* numInsts= */ 1, /* numPoolEntries= */ 0))
+            return allocEntry(1, 0, (uint8_t*)&value, nullptr, nullptr);
+
+#if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) || \
+    defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+        return this->putU32Aligned(value);
+#else
+        return this->AssemblerBuffer<SliceSize, Inst>::putInt(value);
+#endif
     }
 
     // Register a short-range branch deadline.
     //
     // After inserting a short-range forward branch, call this method to
     // register the branch 'deadline' which is the last buffer offset that the
     // branch instruction can reach.
     //