Bug 1435209 - Use CMOVcc instead of index masking. r=luke
authorJan de Mooij <jdemooij@mozilla.com>
Wed, 07 Feb 2018 13:49:06 +0100
changeset 402774 0bc556c6e060f0e29a31f28d69b54179edc32990
parent 402773 7d098669f8cb02be84451118e9c93bc0b9b3d461
child 402775 1157f9e6cce081e0f05f0e62e5d16b816f726456
push id33402
push useraciure@mozilla.com
push dateWed, 07 Feb 2018 22:06:27 +0000
treeherdermozilla-central@8cc2427a322c [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersluke
bugs1435209
milestone60.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1435209 - Use CMOVcc instead of index masking. r=luke
js/src/jit/CodeGenerator.cpp
js/src/jit/Lowering.cpp
js/src/jit/MacroAssembler.cpp
js/src/jit/MacroAssembler.h
js/src/jit/arm/MacroAssembler-arm-inl.h
js/src/jit/arm64/MacroAssembler-arm64-inl.h
js/src/jit/arm64/MacroAssembler-arm64.h
js/src/jit/x86-shared/Assembler-x86-shared.h
js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h
--- a/js/src/jit/CodeGenerator.cpp
+++ b/js/src/jit/CodeGenerator.cpp
@@ -8648,34 +8648,24 @@ CodeGenerator::visitBoundsCheckLower(LBo
                  lir->snapshot());
 }
 
 void
 CodeGenerator::visitSpectreMaskIndex(LSpectreMaskIndex* lir)
 {
     MOZ_ASSERT(JitOptions.spectreIndexMasking);
 
-    const LAllocation* index = lir->index();
     const LAllocation* length = lir->length();
+    Register index = ToRegister(lir->index());
     Register output = ToRegister(lir->output());
 
-    if (index->isConstant()) {
-        int32_t idx = ToInt32(index);
-        if (length->isRegister())
-            masm.spectreMaskIndex(idx, ToRegister(length), output);
-        else
-            masm.spectreMaskIndex(idx, ToAddress(length), output);
-        return;
-    }
-
-    Register indexReg = ToRegister(index);
     if (length->isRegister())
-        masm.spectreMaskIndex(indexReg, ToRegister(length), output);
+        masm.spectreMaskIndex(index, ToRegister(length), output);
     else
-        masm.spectreMaskIndex(indexReg, ToAddress(length), output);
+        masm.spectreMaskIndex(index, ToAddress(length), output);
 }
 
 class OutOfLineStoreElementHole : public OutOfLineCodeBase<CodeGenerator>
 {
     LInstruction* ins_;
     Label rejoinStore_;
     bool strict_;
 
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -3158,26 +3158,18 @@ LIRGenerator::visitBoundsCheck(MBoundsCh
 
 void
 LIRGenerator::visitSpectreMaskIndex(MSpectreMaskIndex* ins)
 {
     MOZ_ASSERT(ins->index()->type() == MIRType::Int32);
     MOZ_ASSERT(ins->length()->type() == MIRType::Int32);
     MOZ_ASSERT(ins->type() == MIRType::Int32);
 
-    // On 64-bit platforms, the length must be in a register, so
-    // MacroAssembler::maskIndex can emit more efficient code.
-#if JS_BITS_PER_WORD == 64
-    LAllocation lengthUse = useRegister(ins->length());
-#else
-    LAllocation lengthUse = useAny(ins->length());
-#endif
-
     LSpectreMaskIndex* lir =
-        new(alloc()) LSpectreMaskIndex(useRegisterOrConstant(ins->index()), lengthUse);
+        new(alloc()) LSpectreMaskIndex(useRegister(ins->index()), useAny(ins->length()));
     define(lir, ins);
 }
 
 void
 LIRGenerator::visitBoundsCheckLower(MBoundsCheckLower* ins)
 {
     MOZ_ASSERT(ins->index()->type() == MIRType::Int32);
 
--- a/js/src/jit/MacroAssembler.cpp
+++ b/js/src/jit/MacroAssembler.cpp
@@ -3317,155 +3317,51 @@ MacroAssembler::debugAssertIsObject(cons
 #ifdef DEBUG
     Label ok;
     branchTestObject(Assembler::Equal, val, &ok);
     assumeUnreachable("Expected an object!");
     bind(&ok);
 #endif
 }
 
-template <typename T>
 void
-MacroAssembler::computeSpectreIndexMaskGeneric(Register index, const T& length, Register output)
-{
-    MOZ_ASSERT(JitOptions.spectreIndexMasking);
-    MOZ_ASSERT(index != output);
-
-    // mask := ((index - length) & ~index) >> 31
-    mov(index, output);
-    sub32(length, output);
-    not32(index);
-    and32(index, output);
-    not32(index); // Restore index register to its original value.
-    rshift32Arithmetic(Imm32(31), output);
-}
-
-template <typename T>
-void
-MacroAssembler::computeSpectreIndexMask(int32_t index, const T& length, Register output)
-{
-    MOZ_ASSERT(JitOptions.spectreIndexMasking);
-
-    // mask := ((index - length) & ~index) >> 31
-    move32(Imm32(index), output);
-    sub32(length, output);
-    and32(Imm32(~index), output);
-    rshift32Arithmetic(Imm32(31), output);
-}
-
-void
-MacroAssembler::computeSpectreIndexMask(Register index, Register length, Register output)
+MacroAssembler::spectreMaskIndex(Register index, Register length, Register output)
 {
     MOZ_ASSERT(JitOptions.spectreIndexMasking);
     MOZ_ASSERT(length != output);
     MOZ_ASSERT(index != output);
 
-#if JS_BITS_PER_WORD == 64
-    // On 64-bit platforms, we can use a faster algorithm:
-    //
-    //   mask := (uint64_t(index) - uint64_t(length)) >> 32
-    //
-    // mask is 0x11…11 if index < length, 0 otherwise.
-    move32(index, output);
-    subPtr(length, output);
-    rshiftPtr(Imm32(32), output);
-#else
-    computeSpectreIndexMaskGeneric(index, length, output);
-#endif
-}
-
-void
-MacroAssembler::spectreMaskIndex(int32_t index, Register length, Register output)
-{
-    MOZ_ASSERT(length != output);
-    if (index == 0) {
-        move32(Imm32(index), output);
-    } else {
-        computeSpectreIndexMask(index, length, output);
-        and32(Imm32(index), output);
-    }
-}
-
-void
-MacroAssembler::spectreMaskIndex(int32_t index, const Address& length, Register output)
-{
-    MOZ_ASSERT(length.base != output);
-    if (index == 0) {
-        move32(Imm32(index), output);
-    } else {
-        computeSpectreIndexMask(index, length, output);
-        and32(Imm32(index), output);
-    }
-}
-
-void
-MacroAssembler::spectreMaskIndex(Register index, Register length, Register output)
-{
-    MOZ_ASSERT(length != output);
-    MOZ_ASSERT(index != output);
-
-    computeSpectreIndexMask(index, length, output);
-    and32(index, output);
+    move32(Imm32(0), output);
+    cmp32Move32(Assembler::Below, index, length, index, output);
 }
 
 void
 MacroAssembler::spectreMaskIndex(Register index, const Address& length, Register output)
 {
+    MOZ_ASSERT(JitOptions.spectreIndexMasking);
     MOZ_ASSERT(index != length.base);
     MOZ_ASSERT(length.base != output);
     MOZ_ASSERT(index != output);
 
-    computeSpectreIndexMaskGeneric(index, length, output);
-    and32(index, output);
+    move32(Imm32(0), output);
+    cmp32Move32(Assembler::Below, index, length, index, output);
 }
 
 void
 MacroAssembler::boundsCheck32PowerOfTwo(Register index, uint32_t length, Label* failure)
 {
     MOZ_ASSERT(mozilla::IsPowerOfTwo(length));
     branch32(Assembler::AboveOrEqual, index, Imm32(length), failure);
 
     // Note: it's fine to clobber the input register, as this is a no-op: it
     // only affects speculative execution.
     if (JitOptions.spectreIndexMasking)
         and32(Imm32(length - 1), index);
 }
 
-void
-MacroAssembler::boundsCheck32ForLoad(Register index, Register length, Register scratch,
-                                     Label* failure)
-{
-    MOZ_ASSERT(index != length);
-    MOZ_ASSERT(length != scratch);
-    MOZ_ASSERT(index != scratch);
-
-    branch32(Assembler::AboveOrEqual, index, length, failure);
-
-    if (JitOptions.spectreIndexMasking) {
-        computeSpectreIndexMask(index, length, scratch);
-        and32(scratch, index);
-    }
-}
-
-void
-MacroAssembler::boundsCheck32ForLoad(Register index, const Address& length, Register scratch,
-                                     Label* failure)
-{
-    MOZ_ASSERT(index != length.base);
-    MOZ_ASSERT(length.base != scratch);
-    MOZ_ASSERT(index != scratch);
-
-    branch32(Assembler::BelowOrEqual, length, index, failure);
-
-    if (JitOptions.spectreIndexMasking) {
-        computeSpectreIndexMaskGeneric(index, length, scratch);
-        and32(scratch, index);
-    }
-}
-
 namespace js {
 namespace jit {
 
 #ifdef DEBUG
 template <class RegisterType>
 AutoGenericRegisterScope<RegisterType>::AutoGenericRegisterScope(MacroAssembler& masm, RegisterType reg)
   : RegisterType(reg), masm_(masm)
 {
--- a/js/src/jit/MacroAssembler.h
+++ b/js/src/jit/MacroAssembler.h
@@ -1354,16 +1354,34 @@ class MacroAssembler : public MacroAssem
     template <typename T>
     inline void branchTestPrimitiveImpl(Condition cond, const T& t, Label* label)
         DEFINED_ON(arm, arm64, x86_shared);
     template <typename T, class L>
     inline void branchTestMagicImpl(Condition cond, const T& t, L label)
         DEFINED_ON(arm, arm64, x86_shared);
 
   public:
+
+    inline void cmp32Move32(Condition cond, Register lhs, Register rhs, Register src,
+                            Register dest)
+        DEFINED_ON(arm, arm64, x86_shared);
+
+    inline void cmp32Move32(Condition cond, Register lhs, const Address& rhs, Register src,
+                            Register dest)
+        DEFINED_ON(arm, arm64, x86_shared);
+
+    // Performs a bounds check and zeroes the index register if out-of-bounds
+    // (to mitigate Spectre).
+    inline void boundsCheck32ForLoad(Register index, Register length, Register scratch,
+                                     Label* failure)
+        DEFINED_ON(arm, arm64, x86_shared);
+    inline void boundsCheck32ForLoad(Register index, const Address& length, Register scratch,
+                                     Label* failure)
+        DEFINED_ON(arm, arm64, x86_shared);
+
     // ========================================================================
     // Canonicalization primitives.
     inline void canonicalizeDouble(FloatRegister reg);
     inline void canonicalizeDoubleIfDeterministic(FloatRegister reg);
 
     inline void canonicalizeFloat(FloatRegister reg);
     inline void canonicalizeFloatIfDeterministic(FloatRegister reg);
 
@@ -2050,39 +2068,23 @@ class MacroAssembler : public MacroAssem
     using MacroAssemblerSpecific::store32;
     void store32(const RegisterOrInt32Constant& key, const Address& dest) {
         if (key.isRegister())
             store32(key.reg(), dest);
         else
             store32(Imm32(key.constant()), dest);
     }
 
-  private:
-    template <typename T>
-    void computeSpectreIndexMaskGeneric(Register index, const T& length, Register output);
-
-    void computeSpectreIndexMask(Register index, Register length, Register output);
-
-    template <typename T>
-    void computeSpectreIndexMask(int32_t index, const T& length, Register output);
-
-  public:
-    void spectreMaskIndex(int32_t index, Register length, Register output);
-    void spectreMaskIndex(int32_t index, const Address& length, Register output);
     void spectreMaskIndex(Register index, Register length, Register output);
     void spectreMaskIndex(Register index, const Address& length, Register output);
 
     // The length must be a power of two. Performs a bounds check and Spectre index
     // masking.
     void boundsCheck32PowerOfTwo(Register index, uint32_t length, Label* failure);
 
-    // Performs a bounds check and Spectre index masking.
-    void boundsCheck32ForLoad(Register index, Register length, Register scratch, Label* failure);
-    void boundsCheck32ForLoad(Register index, const Address& length, Register scratch, Label* failure);
-
     template <typename T>
     void guardedCallPreBarrier(const T& address, MIRType type) {
         Label done;
 
         branchTestNeedsIncrementalBarrier(Assembler::Zero, &done);
 
         if (type == MIRType::Value)
             branchTestGCThing(Assembler::NotEqual, address, &done);
--- a/js/src/jit/arm/MacroAssembler-arm-inl.h
+++ b/js/src/jit/arm/MacroAssembler-arm-inl.h
@@ -2131,16 +2131,68 @@ MacroAssembler::branchToComputedAddress(
     uint32_t scale = Imm32::ShiftOf(addr.scale).value;
 
     ma_ldr(DTRAddr(base, DtrRegImmShift(addr.index, LSL, scale)), pc);
     // When loading from pc, the pc is shifted to the next instruction, we
     // add one extra instruction to accomodate for this shifted offset.
     breakpoint();
 }
 
+void
+MacroAssembler::cmp32Move32(Condition cond, Register lhs, Register rhs, Register src,
+                            Register dest)
+{
+    cmp32(lhs, rhs);
+    ma_mov(src, dest, LeaveCC, cond);
+}
+
+void
+MacroAssembler::cmp32Move32(Condition cond, Register lhs, const Address& rhs, Register src,
+                            Register dest)
+{
+    ScratchRegisterScope scratch(*this);
+    SecondScratchRegisterScope scratch2(*this);
+    ma_ldr(rhs, scratch, scratch2);
+    cmp32Move32(cond, lhs, scratch, src, dest);
+}
+
+void
+MacroAssembler::boundsCheck32ForLoad(Register index, Register length, Register scratch,
+                                     Label* failure)
+{
+    MOZ_ASSERT(index != length);
+    MOZ_ASSERT(length != scratch);
+    MOZ_ASSERT(index != scratch);
+
+    if (JitOptions.spectreIndexMasking)
+        move32(Imm32(0), scratch);
+
+    branch32(Assembler::BelowOrEqual, length, index, failure);
+
+    if (JitOptions.spectreIndexMasking)
+        ma_mov(scratch, index, LeaveCC, Assembler::BelowOrEqual);
+}
+
+void
+MacroAssembler::boundsCheck32ForLoad(Register index, const Address& length, Register scratch,
+                                     Label* failure)
+{
+    MOZ_ASSERT(index != length.base);
+    MOZ_ASSERT(length.base != scratch);
+    MOZ_ASSERT(index != scratch);
+
+    if (JitOptions.spectreIndexMasking)
+        move32(Imm32(0), scratch);
+
+    branch32(Assembler::BelowOrEqual, length, index, failure);
+
+    if (JitOptions.spectreIndexMasking)
+        ma_mov(scratch, index, LeaveCC, Assembler::BelowOrEqual);
+}
+
 // ========================================================================
 // Memory access primitives.
 void
 MacroAssembler::storeUncanonicalizedDouble(FloatRegister src, const Address& addr)
 {
     ScratchRegisterScope scratch(*this);
     ma_vstr(src, addr, scratch);
 }
--- a/js/src/jit/arm64/MacroAssembler-arm64-inl.h
+++ b/js/src/jit/arm64/MacroAssembler-arm64-inl.h
@@ -1705,16 +1705,60 @@ MacroAssembler::branchTestMagic(Conditio
 }
 
 void
 MacroAssembler::branchToComputedAddress(const BaseIndex& addr)
 {
     MOZ_CRASH("branchToComputedAddress");
 }
 
+void
+MacroAssembler::cmp32Move32(Condition cond, Register lhs, Register rhs, Register src,
+                            Register dest)
+{
+    cmp32(lhs, rhs);
+    Csel(ARMRegister(dest, 32), ARMRegister(src, 32), ARMRegister(dest, 32), cond);
+}
+
+void
+MacroAssembler::cmp32Move32(Condition cond, Register lhs, const Address& rhs, Register src,
+                            Register dest)
+{
+    cmp32(lhs, rhs);
+    Csel(ARMRegister(dest, 32), ARMRegister(src, 32), ARMRegister(dest, 32), cond);
+}
+
+void
+MacroAssembler::boundsCheck32ForLoad(Register index, Register length, Register scratch,
+                                     Label* failure)
+{
+    MOZ_ASSERT(index != length);
+    MOZ_ASSERT(length != scratch);
+    MOZ_ASSERT(index != scratch);
+
+    branch32(Assembler::BelowOrEqual, length, index, failure);
+
+    if (JitOptions.spectreIndexMasking)
+        Csel(ARMRegister(index, 32), ARMRegister(index, 32), vixl::wzr, Assembler::Above);
+}
+
+void
+MacroAssembler::boundsCheck32ForLoad(Register index, const Address& length, Register scratch,
+                                     Label* failure)
+{
+    MOZ_ASSERT(index != length.base);
+    MOZ_ASSERT(length.base != scratch);
+    MOZ_ASSERT(index != scratch);
+
+    branch32(Assembler::BelowOrEqual, length, index, failure);
+
+    if (JitOptions.spectreIndexMasking)
+        Csel(ARMRegister(index, 32), ARMRegister(index, 32), vixl::wzr, Assembler::Above);
+}
+
 // ========================================================================
 // Memory access primitives.
 void
 MacroAssembler::storeUncanonicalizedDouble(FloatRegister src, const Address& dest)
 {
     Str(ARMFPRegister(src, 64), toMemOperand(dest));
 }
 void
--- a/js/src/jit/arm64/MacroAssembler-arm64.h
+++ b/js/src/jit/arm64/MacroAssembler-arm64.h
@@ -1017,28 +1017,37 @@ class MacroAssemblerCompat : public vixl
         Cmp(ARMRegister(a, 32), Operand(ARMRegister(b, 32)));
     }
     void cmp32(const Address& lhs, Imm32 rhs) {
         cmp32(Operand(lhs.base, lhs.offset), rhs);
     }
     void cmp32(const Address& lhs, Register rhs) {
         cmp32(Operand(lhs.base, lhs.offset), rhs);
     }
+    void cmp32(Register lhs, const Address& rhs) {
+        cmp32(lhs, Operand(rhs.base, rhs.offset));
+    }
     void cmp32(const Operand& lhs, Imm32 rhs) {
         vixl::UseScratchRegisterScope temps(this);
         const ARMRegister scratch32 = temps.AcquireW();
         Mov(scratch32, lhs);
         Cmp(scratch32, Operand(rhs.value));
     }
     void cmp32(const Operand& lhs, Register rhs) {
         vixl::UseScratchRegisterScope temps(this);
         const ARMRegister scratch32 = temps.AcquireW();
         Mov(scratch32, lhs);
         Cmp(scratch32, Operand(ARMRegister(rhs, 32)));
     }
+    void cmp32(Register lhs, const Operand& rhs) {
+        vixl::UseScratchRegisterScope temps(this);
+        const ARMRegister scratch32 = temps.AcquireW();
+        Mov(scratch32, rhs);
+        Cmp(scratch32, Operand(ARMRegister(lhs, 32)));
+    }
 
     void cmpPtr(Register lhs, Imm32 rhs) {
         Cmp(ARMRegister(lhs, 64), Operand(rhs.value));
     }
     void cmpPtr(Register lhs, ImmWord rhs) {
         Cmp(ARMRegister(lhs, 64), Operand(rhs.value));
     }
     void cmpPtr(Register lhs, ImmPtr rhs) {
--- a/js/src/jit/x86-shared/Assembler-x86-shared.h
+++ b/js/src/jit/x86-shared/Assembler-x86-shared.h
@@ -464,16 +464,20 @@ class AssemblerX86Shared : public Assemb
             break;
           case Operand::MEM_SCALE:
             masm.cmovCCl_mr(cc, src.disp(), src.base(), src.index(), src.scale(), dest.encoding());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
+    void cmovCCl(Condition cond, Register src, Register dest) {
+        X86Encoding::Condition cc = static_cast<X86Encoding::Condition>(cond);
+        masm.cmovCCl_rr(cc, src.encoding(), dest.encoding());
+    }
     void cmovzl(const Operand& src, Register dest) {
         cmovCCl(Condition::Zero, src, dest);
     }
     void cmovnzl(const Operand& src, Register dest) {
         cmovCCl(Condition::NonZero, src, dest);
     }
     void movl(Imm32 imm32, Register dest) {
         masm.movl_i32r(imm32.value, dest.encoding());
--- a/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h
+++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h
@@ -1089,16 +1089,68 @@ MacroAssembler::branchTestMagic(Conditio
 template <typename T, class L>
 void
 MacroAssembler::branchTestMagicImpl(Condition cond, const T& t, L label)
 {
     cond = testMagic(cond, t);
     j(cond, label);
 }
 
+void
+MacroAssembler::cmp32Move32(Condition cond, Register lhs, Register rhs, Register src,
+                            Register dest)
+{
+    cmp32(lhs, rhs);
+    cmovCCl(cond, src, dest);
+}
+
+void
+MacroAssembler::cmp32Move32(Condition cond, Register lhs, const Address& rhs, Register src,
+                            Register dest)
+{
+    cmp32(lhs, Operand(rhs));
+    cmovCCl(cond, src, dest);
+}
+
+void
+MacroAssembler::boundsCheck32ForLoad(Register index, Register length, Register scratch,
+                                     Label* failure)
+{
+    MOZ_ASSERT(index != length);
+    MOZ_ASSERT(length != scratch);
+    MOZ_ASSERT(index != scratch);
+
+    if (JitOptions.spectreIndexMasking)
+        move32(Imm32(0), scratch);
+
+    cmp32(index, length);
+    j(Assembler::AboveOrEqual, failure);
+
+    if (JitOptions.spectreIndexMasking)
+        cmovCCl(Assembler::AboveOrEqual, scratch, index);
+}
+
+void
+MacroAssembler::boundsCheck32ForLoad(Register index, const Address& length, Register scratch,
+                                     Label* failure)
+{
+    MOZ_ASSERT(index != length.base);
+    MOZ_ASSERT(length.base != scratch);
+    MOZ_ASSERT(index != scratch);
+
+    if (JitOptions.spectreIndexMasking)
+        move32(Imm32(0), scratch);
+
+    cmp32(index, Operand(length));
+    j(Assembler::AboveOrEqual, failure);
+
+    if (JitOptions.spectreIndexMasking)
+        cmovCCl(Assembler::AboveOrEqual, scratch, index);
+}
+
 // ========================================================================
 // Canonicalization primitives.
 void
 MacroAssembler::canonicalizeFloat32x4(FloatRegister reg, FloatRegister scratch)
 {
     ScratchSimd128Scope scratch2(*this);
 
     MOZ_ASSERT(scratch.asSimd128() != scratch2.asSimd128());