Bug 1479360 - Share and optimize bump-pointer allocation code r=sfink
authorPaul Bone <pbone@mozilla.com>
Wed, 01 Aug 2018 14:51:40 +1000
changeset 429555 a914cedebde5
parent 429554 42983355094d
child 429556 8ded8bf9b94c
push id34366
push userdluca@mozilla.com
push date2018-08-01 09:52 +0000
treeherdermozilla-central@af6a7edf0069 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssfink
bugs1479360
milestone63.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1479360 - Share and optimize bump-pointer allocation code r=sfink
js/src/jit/MacroAssembler.cpp
js/src/jit/MacroAssembler.h
--- a/js/src/jit/MacroAssembler.cpp
+++ b/js/src/jit/MacroAssembler.cpp
@@ -775,23 +775,20 @@ MacroAssembler::nurseryAllocateObject(Re
 
     // No explicit check for nursery.isEnabled() is needed, as the comparison
     // with the nursery's end will always fail in such cases.
     CompileZone* zone = GetJitContext()->realm->zone();
     size_t thingSize = gc::Arena::thingSize(allocKind);
     size_t totalSize = thingSize + nDynamicSlots * sizeof(HeapSlot);
     MOZ_ASSERT(totalSize < INT32_MAX);
     MOZ_ASSERT(totalSize % gc::CellAlignBytes == 0);
-    void *ptrNurseryPosition = zone->addressOfNurseryPosition();
-    loadPtr(AbsoluteAddress(ptrNurseryPosition), result);
-    computeEffectiveAddress(Address(result, totalSize), temp);
-    const void *ptrNurseryCurrentEnd = zone->addressOfNurseryCurrentEnd();
-    branchPtr(Assembler::Below, AbsoluteAddress(ptrNurseryCurrentEnd), temp,
-        fail);
-    storePtr(temp, AbsoluteAddress(ptrNurseryPosition));
+
+    bumpPointerAllocate(result, temp, fail,
+        zone->addressOfNurseryPosition(),
+        zone->addressOfNurseryCurrentEnd(), totalSize, totalSize);
 
     if (nDynamicSlots) {
         computeEffectiveAddress(Address(result, thingSize), temp);
         storePtr(temp, Address(result, NativeObject::offsetOfSlots()));
     }
 }
 
 // Inlined version of FreeSpan::allocate. This does not fill in slots_.
@@ -964,33 +961,43 @@ MacroAssembler::nurseryAllocateString(Re
 
     CompileZone* zone = GetJitContext()->realm->zone();
     size_t thingSize = gc::Arena::thingSize(allocKind);
     size_t totalSize = js::Nursery::stringHeaderSize() + thingSize;
     MOZ_ASSERT(totalSize < INT32_MAX,
         "Nursery allocation too large");
     MOZ_ASSERT(totalSize % gc::CellAlignBytes == 0);
 
-    // The nursery position (allocation pointer) and the nursery end are stored
+    bumpPointerAllocate(result, temp, fail,
+        zone->addressOfStringNurseryPosition(),
+        zone->addressOfStringNurseryCurrentEnd(), totalSize, thingSize);
+    storePtr(ImmPtr(zone), Address(result, -js::Nursery::stringHeaderSize()));
+}
+
+void
+MacroAssembler::bumpPointerAllocate(Register result, Register temp, Label* fail,
+    void* posAddr, const void* curEndAddr, uint32_t totalSize, uint32_t size)
+{
+    // The position (allocation pointer) and the end pointer are stored
     // very close to each other -- specifically, easily within a 32 bit offset.
     // Use relative offsets between them, to avoid 64-bit immediate loads.
-    void* nurseryPosAddr = zone->addressOfStringNurseryPosition();
-    const void* nurseryEndAddr = zone->addressOfStringNurseryCurrentEnd();
-
-    movePtr(ImmPtr(nurseryPosAddr), temp);
+    //
+    // I tried to optimise this further by using an extra register to avoid
+    // the final subtraction and hopefully get some more instruction
+    // parallelism, but it made no difference.
+    movePtr(ImmPtr(posAddr), temp);
     loadPtr(Address(temp, 0), result);
     addPtr(Imm32(totalSize), result);
-    CheckedInt<int32_t> endOffset = (CheckedInt<uintptr_t>(uintptr_t(nurseryEndAddr)) -
-        CheckedInt<uintptr_t>(uintptr_t(nurseryPosAddr))).toChecked<int32_t>();
+    CheckedInt<int32_t> endOffset = (CheckedInt<uintptr_t>(uintptr_t(curEndAddr)) -
+        CheckedInt<uintptr_t>(uintptr_t(posAddr))).toChecked<int32_t>();
     MOZ_ASSERT(endOffset.isValid(),
         "Position and end pointers must be nearby");
     branchPtr(Assembler::Below, Address(temp, endOffset.value()), result, fail);
     storePtr(result, Address(temp, 0));
-    subPtr(Imm32(thingSize), result);
-    storePtr(ImmPtr(zone), Address(result, -js::Nursery::stringHeaderSize()));
+    subPtr(Imm32(size), result);
 }
 
 // Inlined equivalent of gc::AllocateString, jumping to fail if nursery
 // allocation requested but unsuccessful.
 void
 MacroAssembler::allocateString(Register result, Register temp, gc::AllocKind allocKind,
                                gc::InitialHeap initialHeap, Label* fail)
 {
--- a/js/src/jit/MacroAssembler.h
+++ b/js/src/jit/MacroAssembler.h
@@ -2274,16 +2274,20 @@ class MacroAssembler : public MacroAssem
     }
 
     // Inline allocation.
   private:
     void checkAllocatorState(Label* fail);
     bool shouldNurseryAllocate(gc::AllocKind allocKind, gc::InitialHeap initialHeap);
     void nurseryAllocateObject(Register result, Register temp, gc::AllocKind allocKind,
                                size_t nDynamicSlots, Label* fail);
+    void bumpPointerAllocate(Register result, Register temp, Label* fail,
+        void* posAddr, const void* curEddAddr,
+        uint32_t totalSize, uint32_t size);
+
     void freeListAllocate(Register result, Register temp, gc::AllocKind allocKind, Label* fail);
     void allocateObject(Register result, Register temp, gc::AllocKind allocKind,
                         uint32_t nDynamicSlots, gc::InitialHeap initialHeap, Label* fail);
     void nurseryAllocateString(Register result, Register temp, gc::AllocKind allocKind,
                                Label* fail);
     void allocateString(Register result, Register temp, gc::AllocKind allocKind,
                         gc::InitialHeap initialHeap, Label* fail);
     void allocateNonObject(Register result, Register temp, gc::AllocKind allocKind, Label* fail);