Bug 1317678 - don't unconditionally unroll stack probe loop. r=luke
authorLars T Hansen <lhansen@mozilla.com>
Wed, 16 Nov 2016 12:28:52 +0100
changeset 322950 951fc49fe05e7d3c1e077b92c15367277d8eba43
parent 322949 45f214b73596d7d1b194822e7e5a85ee606c372d
child 322984 0f59393d3669fb192fb9178651fdf301983ab4de
push id84018
push userlhansen@mozilla.com
push dateThu, 17 Nov 2016 13:32:42 +0000
treeherdermozilla-inbound@951fc49fe05e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersluke
bugs1317678
milestone53.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1317678 - don't unconditionally unroll stack probe loop. r=luke
js/src/jit/x64/MacroAssembler-x64.cpp
js/src/jit/x86/MacroAssembler-x86.cpp
js/src/wasm/WasmCode.cpp
--- a/js/src/jit/x64/MacroAssembler-x64.cpp
+++ b/js/src/jit/x64/MacroAssembler-x64.cpp
@@ -421,23 +421,42 @@ void
 MacroAssembler::subFromStackPtr(Imm32 imm32)
 {
     if (imm32.value) {
         // On windows, we cannot skip very far down the stack without touching the
         // memory pages in-between.  This is a corner-case code for situations where the
         // Ion frame data for a piece of code is very large.  To handle this special case,
         // for frames over 1k in size we allocate memory on the stack incrementally, touching
         // it as we go.
+        //
+        // When the amount is quite large, which it can be, we emit an actual loop, in order
+        // to keep the function prologue compact.  Compactness is a requirement for eg
+        // Wasm's CodeRange data structure, which can encode only 8-bit offsets.
         uint32_t amountLeft = imm32.value;
-        while (amountLeft > 4096) {
+        uint32_t fullPages = amountLeft / 4096;
+        if (fullPages <= 8) {
+            while (amountLeft > 4096) {
+                subq(Imm32(4096), StackPointer);
+                store32(Imm32(0), Address(StackPointer, 0));
+                amountLeft -= 4096;
+            }
+            subq(Imm32(amountLeft), StackPointer);
+        } else {
+            ScratchRegisterScope scratch(*this);
+            Label top;
+            move32(Imm32(fullPages), scratch);
+            bind(&top);
             subq(Imm32(4096), StackPointer);
             store32(Imm32(0), Address(StackPointer, 0));
-            amountLeft -= 4096;
+            subl(Imm32(1), scratch);
+            j(Assembler::NonZero, &top);
+            amountLeft -= fullPages * 4096;
+            if (amountLeft)
+                subq(Imm32(amountLeft), StackPointer);
         }
-        subq(Imm32(amountLeft), StackPointer);
     }
 }
 
 //{{{ check_macroassembler_style
 // ===============================================================
 // ABI function calls.
 
 void
--- a/js/src/jit/x86/MacroAssembler-x86.cpp
+++ b/js/src/jit/x86/MacroAssembler-x86.cpp
@@ -328,23 +328,49 @@ void
 MacroAssembler::subFromStackPtr(Imm32 imm32)
 {
     if (imm32.value) {
         // On windows, we cannot skip very far down the stack without touching the
         // memory pages in-between.  This is a corner-case code for situations where the
         // Ion frame data for a piece of code is very large.  To handle this special case,
         // for frames over 1k in size we allocate memory on the stack incrementally, touching
         // it as we go.
+        //
+        // When the amount is quite large, which it can be, we emit an actual loop, in order
+        // to keep the function prologue compact.  Compactness is a requirement for eg
+        // Wasm's CodeRange data structure, which can encode only 8-bit offsets.
         uint32_t amountLeft = imm32.value;
-        while (amountLeft > 4096) {
+        uint32_t fullPages = amountLeft / 4096;
+        if (fullPages <= 8) {
+            while (amountLeft > 4096) {
+                subl(Imm32(4096), StackPointer);
+                store32(Imm32(0), Address(StackPointer, 0));
+                amountLeft -= 4096;
+            }
+            subl(Imm32(amountLeft), StackPointer);
+        } else {
+            // Save scratch register.
+            push(eax);
+            amountLeft -= 4;
+            fullPages = amountLeft / 4096;
+
+            Label top;
+            move32(Imm32(fullPages), eax);
+            bind(&top);
             subl(Imm32(4096), StackPointer);
             store32(Imm32(0), Address(StackPointer, 0));
-            amountLeft -= 4096;
+            subl(Imm32(1), eax);
+            j(Assembler::NonZero, &top);
+            amountLeft -= fullPages * 4096;
+            if (amountLeft)
+                subl(Imm32(amountLeft), StackPointer);
+
+            // Restore scratch register.
+            movl(Operand(StackPointer, uint32_t(imm32.value) - 4), eax);
         }
-        subl(Imm32(amountLeft), StackPointer);
     }
 }
 
 //{{{ check_macroassembler_style
 // ===============================================================
 // ABI function calls.
 
 void
--- a/js/src/wasm/WasmCode.cpp
+++ b/js/src/wasm/WasmCode.cpp
@@ -382,21 +382,21 @@ CodeRange::CodeRange(uint32_t funcIndex,
     funcBeginToTableProfilingJump_(offsets.tableProfilingJump - begin_),
     funcBeginToNonProfilingEntry_(offsets.nonProfilingEntry - begin_),
     funcProfilingJumpToProfilingReturn_(profilingReturn_ - offsets.profilingJump),
     funcProfilingEpilogueToProfilingReturn_(profilingReturn_ - offsets.profilingEpilogue),
     kind_(Function)
 {
     MOZ_ASSERT(begin_ < profilingReturn_);
     MOZ_ASSERT(profilingReturn_ < end_);
-    MOZ_ASSERT(funcBeginToTableEntry_ == offsets.tableEntry - begin_);
-    MOZ_ASSERT(funcBeginToTableProfilingJump_ == offsets.tableProfilingJump - begin_);
-    MOZ_ASSERT(funcBeginToNonProfilingEntry_ == offsets.nonProfilingEntry - begin_);
-    MOZ_ASSERT(funcProfilingJumpToProfilingReturn_ == profilingReturn_ - offsets.profilingJump);
-    MOZ_ASSERT(funcProfilingEpilogueToProfilingReturn_ == profilingReturn_ - offsets.profilingEpilogue);
+    MOZ_ASSERT(offsets.tableEntry - begin_ <= UINT8_MAX);
+    MOZ_ASSERT(offsets.tableProfilingJump - begin_ <= UINT8_MAX);
+    MOZ_ASSERT(offsets.nonProfilingEntry - begin_ <= UINT8_MAX);
+    MOZ_ASSERT(profilingReturn_ - offsets.profilingJump <= UINT8_MAX);
+    MOZ_ASSERT(profilingReturn_ - offsets.profilingEpilogue <= UINT8_MAX);
 }
 
 static size_t
 StringLengthWithNullChar(const char* chars)
 {
     return chars ? strlen(chars) + 1 : 0;
 }