Bug 1574415 - Part 14: Don't allocate Spectre bounds check scratch register when it's unused. r=jandem
authorAndré Bargull <andre.bargull@gmail.com>
Mon, 07 Oct 2019 12:01:13 +0000
changeset 496555 144ebbca6844259fe509194059447dc6fcbdff28
parent 496554 3dc778bc0ce0ccb1abc83d46f55f202b95a7e771
child 496556 91bec74c770a54e68352173c42ee78dfc2427f2b
push id36661
push userccoroiu@mozilla.com
push dateMon, 07 Oct 2019 21:50:01 +0000
treeherdermozilla-central@2b4c8b7a255c [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjandem
bugs1574415
milestone71.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1574415 - Part 14: Don't allocate Spectre bounds check scratch register when it's unused. r=jandem `ta[0] = 0;` emits the following assembly for Ion CacheIR without this patch: ``` [Codegen] emitStoreTypedElement [Codegen] push %rbx [Codegen] push %rbp [Codegen] movl 0x28(%rax), %ebx [Codegen] xorl %r11d, %r11d [Codegen] cmpl %ebx, %edx [Codegen] jae .Lfrom47 [Codegen] cmovae %r11d, %edx [Codegen] movq 0x38(%rax), %rbx [Codegen] movl %ecx, 0x0(%rbx,%rdx,4) ``` When not allocating the unused scratch register for Spectre bounds checks, this assembly is emitted: ``` [Codegen] emitStoreTypedElement [Codegen] push %rdx [Codegen] movl 0x28(%rax), %edx [Codegen] xorl %r11d, %r11d [Codegen] cmpl %edx, %ebx [Codegen] jae .Lfrom46 [Codegen] cmovae %r11d, %ebx [Codegen] movq 0x38(%rax), %rdx [Codegen] movl %ecx, 0x0(%rdx,%rbx,4) ``` Which shows `%rbp` is no longer spilled on the stack, resulting in a minor performance improvement (~3%) on ยต-benchmarks and also avoiding a performance regression (performance is now again on par with the state before this patch series). For comparison this assembly was generated before this patch series: ``` [Codegen] emitStoreTypedElement [Codegen] push %rbx [Codegen] movl 0x28(%rax), %ecx [Codegen] xorl %r11d, %r11d [Codegen] cmpl %ecx, %edx [Codegen] jae .Lfrom44 [Codegen] cmovae %r11d, %edx [Codegen] movq 0x38(%rax), %rcx [Codegen] xorl %ebx, %ebx [Codegen] movl %ebx, 0x0(%rcx,%rdx,4) ``` Differential Revision: https://phabricator.services.mozilla.com/D47762
js/src/jit/CacheIRCompiler.cpp
js/src/jit/CacheIRCompiler.h
js/src/jit/IonCacheIRCompiler.cpp
--- a/js/src/jit/CacheIRCompiler.cpp
+++ b/js/src/jit/CacheIRCompiler.cpp
@@ -2941,91 +2941,91 @@ bool CacheIRCompiler::emitGuardIndexIsNo
   return true;
 }
 
 bool CacheIRCompiler::emitGuardIndexGreaterThanDenseInitLength() {
   JitSpew(JitSpew_Codegen, __FUNCTION__);
   Register obj = allocator.useRegister(masm, reader.objOperandId());
   Register index = allocator.useRegister(masm, reader.int32OperandId());
   AutoScratchRegister scratch(allocator, masm);
-  AutoScratchRegister scratch2(allocator, masm);
+  AutoSpectreBoundsScratchRegister spectreScratch(allocator, masm);
 
   FailurePath* failure;
   if (!addFailurePath(&failure)) {
     return false;
   }
 
   // Load obj->elements.
   masm.loadPtr(Address(obj, NativeObject::offsetOfElements()), scratch);
 
   // Ensure index >= initLength.
   Label outOfBounds;
   Address capacity(scratch, ObjectElements::offsetOfInitializedLength());
-  masm.spectreBoundsCheck32(index, capacity, scratch2, &outOfBounds);
+  masm.spectreBoundsCheck32(index, capacity, spectreScratch, &outOfBounds);
   masm.jump(failure->label());
   masm.bind(&outOfBounds);
 
   return true;
 }
 
 bool CacheIRCompiler::emitGuardIndexGreaterThanDenseCapacity() {
   JitSpew(JitSpew_Codegen, __FUNCTION__);
   Register obj = allocator.useRegister(masm, reader.objOperandId());
   Register index = allocator.useRegister(masm, reader.int32OperandId());
   AutoScratchRegister scratch(allocator, masm);
-  AutoScratchRegister scratch2(allocator, masm);
+  AutoSpectreBoundsScratchRegister spectreScratch(allocator, masm);
 
   FailurePath* failure;
   if (!addFailurePath(&failure)) {
     return false;
   }
 
   // Load obj->elements.
   masm.loadPtr(Address(obj, NativeObject::offsetOfElements()), scratch);
 
   // Ensure index >= capacity.
   Label outOfBounds;
   Address capacity(scratch, ObjectElements::offsetOfCapacity());
-  masm.spectreBoundsCheck32(index, capacity, scratch2, &outOfBounds);
+  masm.spectreBoundsCheck32(index, capacity, spectreScratch, &outOfBounds);
   masm.jump(failure->label());
   masm.bind(&outOfBounds);
 
   return true;
 }
 
 bool CacheIRCompiler::emitGuardIndexGreaterThanArrayLength() {
   JitSpew(JitSpew_Codegen, __FUNCTION__);
   Register obj = allocator.useRegister(masm, reader.objOperandId());
   Register index = allocator.useRegister(masm, reader.int32OperandId());
   AutoScratchRegister scratch(allocator, masm);
-  AutoScratchRegister scratch2(allocator, masm);
+  AutoSpectreBoundsScratchRegister spectreScratch(allocator, masm);
 
   FailurePath* failure;
   if (!addFailurePath(&failure)) {
     return false;
   }
 
   // Load obj->elements.
   masm.loadPtr(Address(obj, NativeObject::offsetOfElements()), scratch);
 
   // Ensure index >= length;
   Label outOfBounds;
   Address length(scratch, ObjectElements::offsetOfLength());
-  masm.spectreBoundsCheck32(index, length, scratch2, &outOfBounds);
+  masm.spectreBoundsCheck32(index, length, spectreScratch, &outOfBounds);
   masm.jump(failure->label());
   masm.bind(&outOfBounds);
   return true;
 }
 
 bool CacheIRCompiler::emitGuardIndexIsValidUpdateOrAdd() {
   JitSpew(JitSpew_Codegen, __FUNCTION__);
   Register obj = allocator.useRegister(masm, reader.objOperandId());
   Register index = allocator.useRegister(masm, reader.int32OperandId());
   AutoScratchRegister scratch(allocator, masm);
-  AutoScratchRegister scratch2(allocator, masm);
+  AutoSpectreBoundsScratchRegister spectreScratch(allocator, masm);
 
   FailurePath* failure;
   if (!addFailurePath(&failure)) {
     return false;
   }
 
   // Load obj->elements.
   masm.loadPtr(Address(obj, NativeObject::offsetOfElements()), scratch);
@@ -3035,17 +3035,17 @@ bool CacheIRCompiler::emitGuardIndexIsVa
   // If length is writable, branch to &success.  All indices are writable.
   Address flags(scratch, ObjectElements::offsetOfFlags());
   masm.branchTest32(Assembler::Zero, flags,
                     Imm32(ObjectElements::Flags::NONWRITABLE_ARRAY_LENGTH),
                     &success);
 
   // Otherwise, ensure index is in bounds.
   Address length(scratch, ObjectElements::offsetOfLength());
-  masm.spectreBoundsCheck32(index, length, scratch2,
+  masm.spectreBoundsCheck32(index, length, spectreScratch,
                             /* failure = */ failure->label());
   masm.bind(&success);
   return true;
 }
 
 bool CacheIRCompiler::emitGuardTagNotEqual() {
   JitSpew(JitSpew_Codegen, __FUNCTION__);
   Register lhs = allocator.useRegister(masm, reader.valueTagOperandId());
@@ -3428,27 +3428,27 @@ bool CacheIRCompiler::emitStoreTypedElem
     case Scalar::MaxTypedArrayViewType:
     case Scalar::Int64:
       MOZ_CRASH("Unsupported TypedArray type");
   }
 
   bool handleOOB = reader.readBool();
 
   AutoScratchRegister scratch1(allocator, masm);
-  AutoScratchRegister scratch2(allocator, masm);
+  AutoSpectreBoundsScratchRegister spectreScratch(allocator, masm);
 
   FailurePath* failure;
   if (!addFailurePath(&failure)) {
     return false;
   }
 
   // Bounds check.
   Label done;
   LoadTypedThingLength(masm, layout, obj, scratch1);
-  masm.spectreBoundsCheck32(index, scratch1, scratch2,
+  masm.spectreBoundsCheck32(index, scratch1, spectreScratch,
                             handleOOB ? &done : failure->label());
 
   // Load the elements vector.
   LoadTypedThingData(masm, layout, obj, scratch1);
 
   BaseIndex dest(scratch1, index, ScaleFromElemWidth(Scalar::byteSize(type)));
 
   if (type == Scalar::Float32) {
--- a/js/src/jit/CacheIRCompiler.h
+++ b/js/src/jit/CacheIRCompiler.h
@@ -5,16 +5,17 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifndef jit_CacheIRCompiler_h
 #define jit_CacheIRCompiler_h
 
 #include "mozilla/Maybe.h"
 
 #include "jit/CacheIR.h"
+#include "jit/JitOptions.h"
 #include "jit/SharedICRegisters.h"
 
 namespace js {
 namespace jit {
 
 class BaselineCacheIRCompiler;
 class IonCacheIRCompiler;
 
@@ -650,16 +651,41 @@ class MOZ_RAII AutoScratchRegister {
     MOZ_ASSERT(alloc_.currentOpRegs_.has(reg_));
   }
   ~AutoScratchRegister() { alloc_.releaseRegister(reg_); }
 
   Register get() const { return reg_; }
   operator Register() const { return reg_; }
 };
 
+// On x86, spectreBoundsCheck32 can emit better code if it has a scratch
+// register and index masking is enabled.
+class MOZ_RAII AutoSpectreBoundsScratchRegister {
+  mozilla::Maybe<AutoScratchRegister> scratch_;
+  Register reg_ = InvalidReg;
+
+  AutoSpectreBoundsScratchRegister(const AutoSpectreBoundsScratchRegister&) =
+      delete;
+  void operator=(const AutoSpectreBoundsScratchRegister&) = delete;
+
+ public:
+  AutoSpectreBoundsScratchRegister(CacheRegisterAllocator& alloc,
+                                   MacroAssembler& masm) {
+#ifdef JS_CODEGEN_X86
+    if (JitOptions.spectreIndexMasking) {
+      scratch_.emplace(alloc, masm);
+      reg_ = scratch_->get();
+    }
+#endif
+  }
+
+  Register get() const { return reg_; }
+  operator Register() const { return reg_; }
+};
+
 // The FailurePath class stores everything we need to generate a failure path
 // at the end of the IC code. The failure path restores the input registers, if
 // needed, and jumps to the next stub.
 class FailurePath {
   Vector<OperandLocation, 4, SystemAllocPolicy> inputs_;
   SpilledRegisterVector spilledRegs_;
   NonAssertingLabel label_;
   uint32_t stackPushed_;
--- a/js/src/jit/IonCacheIRCompiler.cpp
+++ b/js/src/jit/IonCacheIRCompiler.cpp
@@ -1638,34 +1638,35 @@ static void EmitAssertNoCopyOnWriteEleme
 bool IonCacheIRCompiler::emitStoreDenseElement() {
   JitSpew(JitSpew_Codegen, __FUNCTION__);
   Register obj = allocator.useRegister(masm, reader.objOperandId());
   Register index = allocator.useRegister(masm, reader.int32OperandId());
   ConstantOrRegister val =
       allocator.useConstantOrRegister(masm, reader.valOperandId());
 
   AutoScratchRegister scratch1(allocator, masm);
-  AutoScratchRegister scratch2(allocator, masm);
+  AutoSpectreBoundsScratchRegister spectreScratch(allocator, masm);
 
   FailurePath* failure;
   if (!addFailurePath(&failure)) {
     return false;
   }
 
   EmitCheckPropertyTypes(masm, typeCheckInfo_, obj, val, *liveRegs_,
                          failure->label());
 
   // Load obj->elements in scratch1.
   masm.loadPtr(Address(obj, NativeObject::offsetOfElements()), scratch1);
 
   EmitAssertNoCopyOnWriteElements(masm, scratch1);
 
   // Bounds check.
   Address initLength(scratch1, ObjectElements::offsetOfInitializedLength());
-  masm.spectreBoundsCheck32(index, initLength, scratch2, failure->label());
+  masm.spectreBoundsCheck32(index, initLength, spectreScratch,
+                            failure->label());
 
   // Hole check.
   BaseObjectElementIndex element(scratch1, index);
   masm.branchTestMagic(Assembler::Equal, element, failure->label());
 
   // Check for frozen elements. We have to check this here because we attach
   // this stub also for non-extensible objects, and these can become frozen
   // without triggering a Shape change.
@@ -1689,17 +1690,17 @@ bool IonCacheIRCompiler::emitStoreDenseE
       allocator.useConstantOrRegister(masm, reader.valOperandId());
 
   // handleAdd boolean is only relevant for Baseline. Ion ICs can always
   // handle adds as we don't have to set any flags on the fallback stub to
   // track this.
   reader.readBool();
 
   AutoScratchRegister scratch1(allocator, masm);
-  AutoScratchRegister scratch2(allocator, masm);
+  AutoSpectreBoundsScratchRegister spectreScratch(allocator, masm);
 
   FailurePath* failure;
   if (!addFailurePath(&failure)) {
     return false;
   }
 
   EmitCheckPropertyTypes(masm, typeCheckInfo_, obj, val, *liveRegs_,
                          failure->label());
@@ -1708,28 +1709,27 @@ bool IonCacheIRCompiler::emitStoreDenseE
   masm.loadPtr(Address(obj, NativeObject::offsetOfElements()), scratch1);
 
   EmitAssertNoCopyOnWriteElements(masm, scratch1);
 
   Address initLength(scratch1, ObjectElements::offsetOfInitializedLength());
   BaseObjectElementIndex element(scratch1, index);
 
   Label inBounds, outOfBounds;
-  Register spectreTemp = scratch2;
-  masm.spectreBoundsCheck32(index, initLength, spectreTemp, &outOfBounds);
+  masm.spectreBoundsCheck32(index, initLength, spectreScratch, &outOfBounds);
   masm.jump(&inBounds);
 
   masm.bind(&outOfBounds);
   masm.branch32(Assembler::NotEqual, initLength, index, failure->label());
 
   // If index < capacity, we can add a dense element inline. If not we
   // need to allocate more elements.
   Label capacityOk, allocElement;
   Address capacity(scratch1, ObjectElements::offsetOfCapacity());
-  masm.spectreBoundsCheck32(index, capacity, spectreTemp, &allocElement);
+  masm.spectreBoundsCheck32(index, capacity, spectreScratch, &allocElement);
   masm.jump(&capacityOk);
 
   // Check for non-writable array length. We only have to do this if
   // index >= capacity.
   masm.bind(&allocElement);
   Address elementsFlags(scratch1, ObjectElements::offsetOfFlags());
   masm.branchTest32(Assembler::NonZero, elementsFlags,
                     Imm32(ObjectElements::NONWRITABLE_ARRAY_LENGTH),