Bug 1435209 - Use CMOVcc instead of index masking. r=luke a=RyanVM
authorJan de Mooij <jdemooij@mozilla.com>
Wed, 07 Feb 2018 13:49:06 +0100
changeset 454739 41310e95042844c74c58adc4650770540c6cc06c
parent 454738 2764b3a41f36c32b48fb669795efc92e389a810a
child 454740 aa1153f2621f4e896b92796f572f6d9bd42413a6
push id1648
push usermtabara@mozilla.com
push dateThu, 01 Mar 2018 12:45:47 +0000
treeherdermozilla-release@cbb9688c2eeb [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersluke, RyanVM
bugs1435209
milestone59.0
Bug 1435209 - Use CMOVcc instead of index masking. r=luke a=RyanVM
js/src/jit/CodeGenerator.cpp
js/src/jit/Lowering.cpp
js/src/jit/MacroAssembler.cpp
js/src/jit/MacroAssembler.h
js/src/jit/arm/MacroAssembler-arm-inl.h
js/src/jit/arm64/MacroAssembler-arm64-inl.h
js/src/jit/arm64/MacroAssembler-arm64.h
js/src/jit/x86-shared/Assembler-x86-shared.h
js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h
--- a/js/src/jit/CodeGenerator.cpp
+++ b/js/src/jit/CodeGenerator.cpp
@@ -8535,34 +8535,24 @@ CodeGenerator::visitBoundsCheckLower(LBo
                  lir->snapshot());
 }
 
 void
 CodeGenerator::visitSpectreMaskIndex(LSpectreMaskIndex* lir)
 {
     MOZ_ASSERT(JitOptions.spectreIndexMasking);
 
-    const LAllocation* index = lir->index();
     const LAllocation* length = lir->length();
+    Register index = ToRegister(lir->index());
     Register output = ToRegister(lir->output());
 
-    if (index->isConstant()) {
-        int32_t idx = ToInt32(index);
-        if (length->isRegister())
-            masm.spectreMaskIndex(idx, ToRegister(length), output);
-        else
-            masm.spectreMaskIndex(idx, ToAddress(length), output);
-        return;
-    }
-
-    Register indexReg = ToRegister(index);
     if (length->isRegister())
-        masm.spectreMaskIndex(indexReg, ToRegister(length), output);
+        masm.spectreMaskIndex(index, ToRegister(length), output);
     else
-        masm.spectreMaskIndex(indexReg, ToAddress(length), output);
+        masm.spectreMaskIndex(index, ToAddress(length), output);
 }
 
 class OutOfLineStoreElementHole : public OutOfLineCodeBase<CodeGenerator>
 {
     LInstruction* ins_;
     Label rejoinStore_;
     bool strict_;
 
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -3131,26 +3131,18 @@ LIRGenerator::visitBoundsCheck(MBoundsCh
 
 void
 LIRGenerator::visitSpectreMaskIndex(MSpectreMaskIndex* ins)
 {
     MOZ_ASSERT(ins->index()->type() == MIRType::Int32);
     MOZ_ASSERT(ins->length()->type() == MIRType::Int32);
     MOZ_ASSERT(ins->type() == MIRType::Int32);
 
-    // On 64-bit platforms, the length must be in a register, so
-    // MacroAssembler::maskIndex can emit more efficient code.
-#if JS_BITS_PER_WORD == 64
-    LAllocation lengthUse = useRegister(ins->length());
-#else
-    LAllocation lengthUse = useAny(ins->length());
-#endif
-
     LSpectreMaskIndex* lir =
-        new(alloc()) LSpectreMaskIndex(useRegisterOrConstant(ins->index()), lengthUse);
+        new(alloc()) LSpectreMaskIndex(useRegister(ins->index()), useAny(ins->length()));
     define(lir, ins);
 }
 
 void
 LIRGenerator::visitBoundsCheckLower(MBoundsCheckLower* ins)
 {
     MOZ_ASSERT(ins->index()->type() == MIRType::Int32);
 
--- a/js/src/jit/MacroAssembler.cpp
+++ b/js/src/jit/MacroAssembler.cpp
@@ -3419,157 +3419,52 @@ MacroAssembler::debugAssertIsObject(cons
 #ifdef DEBUG
     Label ok;
     branchTestObject(Assembler::Equal, val, &ok);
     assumeUnreachable("Expected an object!");
     bind(&ok);
 #endif
 }
 
-template <typename T>
 void
-MacroAssembler::computeSpectreIndexMaskGeneric(Register index, const T& length, Register output)
-{
-    MOZ_ASSERT(JitOptions.spectreIndexMasking);
-    MOZ_ASSERT(index != output);
-
-    // mask := ((index - length) & ~index) >> 31
-    mov(index, output);
-    sub32(length, output);
-    not32(index);
-    and32(index, output);
-    not32(index); // Restore index register to its original value.
-    rshift32Arithmetic(Imm32(31), output);
-}
-
-template <typename T>
-void
-MacroAssembler::computeSpectreIndexMask(int32_t index, const T& length, Register output)
-{
-    MOZ_ASSERT(JitOptions.spectreIndexMasking);
-
-    // mask := ((index - length) & ~index) >> 31
-    move32(Imm32(index), output);
-    sub32(length, output);
-    and32(Imm32(~index), output);
-    rshift32Arithmetic(Imm32(31), output);
-}
-
-void
-MacroAssembler::computeSpectreIndexMask(Register index, Register length, Register output)
+MacroAssembler::spectreMaskIndex(Register index, Register length, Register output)
 {
     MOZ_ASSERT(JitOptions.spectreIndexMasking);
     MOZ_ASSERT(index != length);
     MOZ_ASSERT(length != output);
     MOZ_ASSERT(index != output);
 
-#if JS_BITS_PER_WORD == 64
-    // On 64-bit platforms, we can use a faster algorithm:
-    //
-    //   mask := (uint64_t(index) - uint64_t(length)) >> 32
-    //
-    // mask is 0x11…11 if index < length, 0 otherwise.
-    move32(index, output);
-    subPtr(length, output);
-    rshiftPtr(Imm32(32), output);
-#else
-    computeSpectreIndexMaskGeneric(index, length, output);
-#endif
-}
-
-void
-MacroAssembler::spectreMaskIndex(int32_t index, Register length, Register output)
-{
-    MOZ_ASSERT(length != output);
-    if (index == 0) {
-        move32(Imm32(index), output);
-    } else {
-        computeSpectreIndexMask(index, length, output);
-        and32(Imm32(index), output);
-    }
-}
-
-void
-MacroAssembler::spectreMaskIndex(int32_t index, const Address& length, Register output)
-{
-    MOZ_ASSERT(length.base != output);
-    if (index == 0) {
-        move32(Imm32(index), output);
-    } else {
-        computeSpectreIndexMask(index, length, output);
-        and32(Imm32(index), output);
-    }
-}
-
-void
-MacroAssembler::spectreMaskIndex(Register index, Register length, Register output)
-{
-    MOZ_ASSERT(index != length);
-    MOZ_ASSERT(length != output);
-    MOZ_ASSERT(index != output);
-
-    computeSpectreIndexMask(index, length, output);
-    and32(index, output);
+    move32(Imm32(0), output);
+    cmp32Move32(Assembler::Below, index, length, index, output);
 }
 
 void
 MacroAssembler::spectreMaskIndex(Register index, const Address& length, Register output)
 {
+    MOZ_ASSERT(JitOptions.spectreIndexMasking);
     MOZ_ASSERT(index != length.base);
     MOZ_ASSERT(length.base != output);
     MOZ_ASSERT(index != output);
 
-    computeSpectreIndexMaskGeneric(index, length, output);
-    and32(index, output);
+    move32(Imm32(0), output);
+    cmp32Move32(Assembler::Below, index, length, index, output);
 }
 
 void
 MacroAssembler::boundsCheck32PowerOfTwo(Register index, uint32_t length, Label* failure)
 {
     MOZ_ASSERT(mozilla::IsPowerOfTwo(length));
     branch32(Assembler::AboveOrEqual, index, Imm32(length), failure);
 
     // Note: it's fine to clobber the input register, as this is a no-op: it
     // only affects speculative execution.
     if (JitOptions.spectreIndexMasking)
         and32(Imm32(length - 1), index);
 }
 
-void
-MacroAssembler::boundsCheck32ForLoad(Register index, Register length, Register scratch,
-                                     Label* failure)
-{
-    MOZ_ASSERT(index != length);
-    MOZ_ASSERT(length != scratch);
-    MOZ_ASSERT(index != scratch);
-
-    branch32(Assembler::AboveOrEqual, index, length, failure);
-
-    if (JitOptions.spectreIndexMasking) {
-        computeSpectreIndexMask(index, length, scratch);
-        and32(scratch, index);
-    }
-}
-
-void
-MacroAssembler::boundsCheck32ForLoad(Register index, const Address& length, Register scratch,
-                                     Label* failure)
-{
-    MOZ_ASSERT(index != length.base);
-    MOZ_ASSERT(length.base != scratch);
-    MOZ_ASSERT(index != scratch);
-
-    branch32(Assembler::BelowOrEqual, length, index, failure);
-
-    if (JitOptions.spectreIndexMasking) {
-        computeSpectreIndexMaskGeneric(index, length, scratch);
-        and32(scratch, index);
-    }
-}
-
 namespace js {
 namespace jit {
 
 #ifdef DEBUG
 template <class RegisterType>
 AutoGenericRegisterScope<RegisterType>::AutoGenericRegisterScope(MacroAssembler& masm, RegisterType reg)
   : RegisterType(reg), masm_(masm)
 {
--- a/js/src/jit/MacroAssembler.h
+++ b/js/src/jit/MacroAssembler.h
@@ -1330,16 +1330,34 @@ class MacroAssembler : public MacroAssem
     template <typename T>
     inline void branchTestPrimitiveImpl(Condition cond, const T& t, Label* label)
         DEFINED_ON(arm, arm64, x86_shared);
     template <typename T, class L>
     inline void branchTestMagicImpl(Condition cond, const T& t, L label)
         DEFINED_ON(arm, arm64, x86_shared);
 
   public:
+
+    inline void cmp32Move32(Condition cond, Register lhs, Register rhs, Register src,
+                            Register dest)
+        DEFINED_ON(arm, arm64, x86_shared);
+
+    inline void cmp32Move32(Condition cond, Register lhs, const Address& rhs, Register src,
+                            Register dest)
+        DEFINED_ON(arm, arm64, x86_shared);
+
+    // Performs a bounds check and zeroes the index register if out-of-bounds
+    // (to mitigate Spectre).
+    inline void boundsCheck32ForLoad(Register index, Register length, Register scratch,
+                                     Label* failure)
+        DEFINED_ON(arm, arm64, x86_shared);
+    inline void boundsCheck32ForLoad(Register index, const Address& length, Register scratch,
+                                     Label* failure)
+        DEFINED_ON(arm, arm64, x86_shared);
+
     // ========================================================================
     // Canonicalization primitives.
     inline void canonicalizeDouble(FloatRegister reg);
     inline void canonicalizeDoubleIfDeterministic(FloatRegister reg);
 
     inline void canonicalizeFloat(FloatRegister reg);
     inline void canonicalizeFloatIfDeterministic(FloatRegister reg);
 
@@ -1889,39 +1907,23 @@ class MacroAssembler : public MacroAssem
     using MacroAssemblerSpecific::store32;
     void store32(const RegisterOrInt32Constant& key, const Address& dest) {
         if (key.isRegister())
             store32(key.reg(), dest);
         else
             store32(Imm32(key.constant()), dest);
     }
 
-  private:
-    template <typename T>
-    void computeSpectreIndexMaskGeneric(Register index, const T& length, Register output);
-
-    void computeSpectreIndexMask(Register index, Register length, Register output);
-
-    template <typename T>
-    void computeSpectreIndexMask(int32_t index, const T& length, Register output);
-
-  public:
-    void spectreMaskIndex(int32_t index, Register length, Register output);
-    void spectreMaskIndex(int32_t index, const Address& length, Register output);
     void spectreMaskIndex(Register index, Register length, Register output);
     void spectreMaskIndex(Register index, const Address& length, Register output);
 
     // The length must be a power of two. Performs a bounds check and Spectre index
     // masking.
     void boundsCheck32PowerOfTwo(Register index, uint32_t length, Label* failure);
 
-    // Performs a bounds check and Spectre index masking.
-    void boundsCheck32ForLoad(Register index, Register length, Register scratch, Label* failure);
-    void boundsCheck32ForLoad(Register index, const Address& length, Register scratch, Label* failure);
-
     template <typename T>
     void guardedCallPreBarrier(const T& address, MIRType type) {
         Label done;
 
         branchTestNeedsIncrementalBarrier(Assembler::Zero, &done);
 
         if (type == MIRType::Value)
             branchTestGCThing(Assembler::NotEqual, address, &done);
--- a/js/src/jit/arm/MacroAssembler-arm-inl.h
+++ b/js/src/jit/arm/MacroAssembler-arm-inl.h
@@ -2131,16 +2131,68 @@ MacroAssembler::branchToComputedAddress(
     uint32_t scale = Imm32::ShiftOf(addr.scale).value;
 
     ma_ldr(DTRAddr(base, DtrRegImmShift(addr.index, LSL, scale)), pc);
     // When loading from pc, the pc is shifted to the next instruction, we
     // add one extra instruction to accomodate for this shifted offset.
     breakpoint();
 }
 
+void
+MacroAssembler::cmp32Move32(Condition cond, Register lhs, Register rhs, Register src,
+                            Register dest)
+{
+    cmp32(lhs, rhs);
+    ma_mov(src, dest, LeaveCC, cond);
+}
+
+void
+MacroAssembler::cmp32Move32(Condition cond, Register lhs, const Address& rhs, Register src,
+                            Register dest)
+{
+    ScratchRegisterScope scratch(*this);
+    SecondScratchRegisterScope scratch2(*this);
+    ma_ldr(rhs, scratch, scratch2);
+    cmp32Move32(cond, lhs, scratch, src, dest);
+}
+
+void
+MacroAssembler::boundsCheck32ForLoad(Register index, Register length, Register scratch,
+                                     Label* failure)
+{
+    MOZ_ASSERT(index != length);
+    MOZ_ASSERT(length != scratch);
+    MOZ_ASSERT(index != scratch);
+
+    if (JitOptions.spectreIndexMasking)
+        move32(Imm32(0), scratch);
+
+    branch32(Assembler::BelowOrEqual, length, index, failure);
+
+    if (JitOptions.spectreIndexMasking)
+        ma_mov(scratch, index, LeaveCC, Assembler::BelowOrEqual);
+}
+
+void
+MacroAssembler::boundsCheck32ForLoad(Register index, const Address& length, Register scratch,
+                                     Label* failure)
+{
+    MOZ_ASSERT(index != length.base);
+    MOZ_ASSERT(length.base != scratch);
+    MOZ_ASSERT(index != scratch);
+
+    if (JitOptions.spectreIndexMasking)
+        move32(Imm32(0), scratch);
+
+    branch32(Assembler::BelowOrEqual, length, index, failure);
+
+    if (JitOptions.spectreIndexMasking)
+        ma_mov(scratch, index, LeaveCC, Assembler::BelowOrEqual);
+}
+
 // ========================================================================
 // Memory access primitives.
 void
 MacroAssembler::storeUncanonicalizedDouble(FloatRegister src, const Address& addr)
 {
     ScratchRegisterScope scratch(*this);
     ma_vstr(src, addr, scratch);
 }
--- a/js/src/jit/arm64/MacroAssembler-arm64-inl.h
+++ b/js/src/jit/arm64/MacroAssembler-arm64-inl.h
@@ -1705,16 +1705,60 @@ MacroAssembler::branchTestMagic(Conditio
 }
 
 void
 MacroAssembler::branchToComputedAddress(const BaseIndex& addr)
 {
     MOZ_CRASH("branchToComputedAddress");
 }
 
+void
+MacroAssembler::cmp32Move32(Condition cond, Register lhs, Register rhs, Register src,
+                            Register dest)
+{
+    cmp32(lhs, rhs);
+    Csel(ARMRegister(dest, 32), ARMRegister(src, 32), ARMRegister(dest, 32), cond);
+}
+
+void
+MacroAssembler::cmp32Move32(Condition cond, Register lhs, const Address& rhs, Register src,
+                            Register dest)
+{
+    cmp32(lhs, rhs);
+    Csel(ARMRegister(dest, 32), ARMRegister(src, 32), ARMRegister(dest, 32), cond);
+}
+
+void
+MacroAssembler::boundsCheck32ForLoad(Register index, Register length, Register scratch,
+                                     Label* failure)
+{
+    MOZ_ASSERT(index != length);
+    MOZ_ASSERT(length != scratch);
+    MOZ_ASSERT(index != scratch);
+
+    branch32(Assembler::BelowOrEqual, length, index, failure);
+
+    if (JitOptions.spectreIndexMasking)
+        Csel(ARMRegister(index, 32), ARMRegister(index, 32), vixl::wzr, Assembler::Above);
+}
+
+void
+MacroAssembler::boundsCheck32ForLoad(Register index, const Address& length, Register scratch,
+                                     Label* failure)
+{
+    MOZ_ASSERT(index != length.base);
+    MOZ_ASSERT(length.base != scratch);
+    MOZ_ASSERT(index != scratch);
+
+    branch32(Assembler::BelowOrEqual, length, index, failure);
+
+    if (JitOptions.spectreIndexMasking)
+        Csel(ARMRegister(index, 32), ARMRegister(index, 32), vixl::wzr, Assembler::Above);
+}
+
 // ========================================================================
 // Memory access primitives.
 void
 MacroAssembler::storeUncanonicalizedDouble(FloatRegister src, const Address& dest)
 {
     Str(ARMFPRegister(src, 64), MemOperand(ARMRegister(dest.base, 64), dest.offset));
 }
 void
--- a/js/src/jit/arm64/MacroAssembler-arm64.h
+++ b/js/src/jit/arm64/MacroAssembler-arm64.h
@@ -998,28 +998,37 @@ class MacroAssemblerCompat : public vixl
         Cmp(ARMRegister(a, 32), Operand(ARMRegister(b, 32)));
     }
     void cmp32(const Address& lhs, Imm32 rhs) {
         cmp32(Operand(lhs.base, lhs.offset), rhs);
     }
     void cmp32(const Address& lhs, Register rhs) {
         cmp32(Operand(lhs.base, lhs.offset), rhs);
     }
+    void cmp32(Register lhs, const Address& rhs) {
+        cmp32(lhs, Operand(rhs.base, rhs.offset));
+    }
     void cmp32(const Operand& lhs, Imm32 rhs) {
         vixl::UseScratchRegisterScope temps(this);
         const ARMRegister scratch32 = temps.AcquireW();
         Mov(scratch32, lhs);
         Cmp(scratch32, Operand(rhs.value));
     }
     void cmp32(const Operand& lhs, Register rhs) {
         vixl::UseScratchRegisterScope temps(this);
         const ARMRegister scratch32 = temps.AcquireW();
         Mov(scratch32, lhs);
         Cmp(scratch32, Operand(ARMRegister(rhs, 32)));
     }
+    void cmp32(Register lhs, const Operand& rhs) {
+        vixl::UseScratchRegisterScope temps(this);
+        const ARMRegister scratch32 = temps.AcquireW();
+        Mov(scratch32, rhs);
+        Cmp(scratch32, Operand(ARMRegister(lhs, 32)));
+    }
 
     void cmpPtr(Register lhs, Imm32 rhs) {
         Cmp(ARMRegister(lhs, 64), Operand(rhs.value));
     }
     void cmpPtr(Register lhs, ImmWord rhs) {
         Cmp(ARMRegister(lhs, 64), Operand(rhs.value));
     }
     void cmpPtr(Register lhs, ImmPtr rhs) {
--- a/js/src/jit/x86-shared/Assembler-x86-shared.h
+++ b/js/src/jit/x86-shared/Assembler-x86-shared.h
@@ -464,16 +464,20 @@ class AssemblerX86Shared : public Assemb
             break;
           case Operand::MEM_SCALE:
             masm.cmovCCl_mr(cc, src.disp(), src.base(), src.index(), src.scale(), dest.encoding());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
+    void cmovCCl(Condition cond, Register src, Register dest) {
+        X86Encoding::Condition cc = static_cast<X86Encoding::Condition>(cond);
+        masm.cmovCCl_rr(cc, src.encoding(), dest.encoding());
+    }
     void cmovzl(const Operand& src, Register dest) {
         cmovCCl(Condition::Zero, src, dest);
     }
     void cmovnzl(const Operand& src, Register dest) {
         cmovCCl(Condition::NonZero, src, dest);
     }
     void movl(Imm32 imm32, Register dest) {
         masm.movl_i32r(imm32.value, dest.encoding());
--- a/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h
+++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h
@@ -1089,16 +1089,68 @@ MacroAssembler::branchTestMagic(Conditio
 template <typename T, class L>
 void
 MacroAssembler::branchTestMagicImpl(Condition cond, const T& t, L label)
 {
     cond = testMagic(cond, t);
     j(cond, label);
 }
 
+void
+MacroAssembler::cmp32Move32(Condition cond, Register lhs, Register rhs, Register src,
+                            Register dest)
+{
+    cmp32(lhs, rhs);
+    cmovCCl(cond, src, dest);
+}
+
+void
+MacroAssembler::cmp32Move32(Condition cond, Register lhs, const Address& rhs, Register src,
+                            Register dest)
+{
+    cmp32(lhs, Operand(rhs));
+    cmovCCl(cond, src, dest);
+}
+
+void
+MacroAssembler::boundsCheck32ForLoad(Register index, Register length, Register scratch,
+                                     Label* failure)
+{
+    MOZ_ASSERT(index != length);
+    MOZ_ASSERT(length != scratch);
+    MOZ_ASSERT(index != scratch);
+
+    if (JitOptions.spectreIndexMasking)
+        move32(Imm32(0), scratch);
+
+    cmp32(index, length);
+    j(Assembler::AboveOrEqual, failure);
+
+    if (JitOptions.spectreIndexMasking)
+        cmovCCl(Assembler::AboveOrEqual, scratch, index);
+}
+
+void
+MacroAssembler::boundsCheck32ForLoad(Register index, const Address& length, Register scratch,
+                                     Label* failure)
+{
+    MOZ_ASSERT(index != length.base);
+    MOZ_ASSERT(length.base != scratch);
+    MOZ_ASSERT(index != scratch);
+
+    if (JitOptions.spectreIndexMasking)
+        move32(Imm32(0), scratch);
+
+    cmp32(index, Operand(length));
+    j(Assembler::AboveOrEqual, failure);
+
+    if (JitOptions.spectreIndexMasking)
+        cmovCCl(Assembler::AboveOrEqual, scratch, index);
+}
+
 // ========================================================================
 // Canonicalization primitives.
 void
 MacroAssembler::canonicalizeFloat32x4(FloatRegister reg, FloatRegister scratch)
 {
     ScratchSimd128Scope scratch2(*this);
 
     MOZ_ASSERT(scratch.asSimd128() != scratch2.asSimd128());