Bug 991153: Make the types for sets of registers a per-type quantity (r=jandem)
authorMarty Rosenberg <mrosenberg@mozilla.com>
Wed, 25 Jun 2014 12:54:34 -0400
changeset 190735 4dbf6c8109d45f9f9316a5a4d159e15a866391ee
parent 190734 c35df65e8a75208c7f0ba0ee06a758de3423a488
child 190736 a7a5966a9672c12902c5401ee0d01cad1e4b8e84
push id45386
push usermrosenberg@mozilla.com
push dateWed, 25 Jun 2014 16:57:52 +0000
treeherdermozilla-inbound@a7a5966a9672 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjandem
bugs991153
milestone33.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 991153: Make the types for sets of registers a per-type quantity (r=jandem)
js/src/jit/RegisterSets.h
js/src/jit/arm/Architecture-arm.cpp
js/src/jit/arm/Architecture-arm.h
js/src/jit/x64/Architecture-x64.h
js/src/jit/x64/Assembler-x64.cpp
js/src/jit/x86/Architecture-x86.h
js/src/jit/x86/Assembler-x86.cpp
mfbt/MathAlgorithms.h
--- a/js/src/jit/RegisterSets.h
+++ b/js/src/jit/RegisterSets.h
@@ -495,16 +495,25 @@ class TypedRegisterSet
         return bits_;
     }
     uint32_t size() const {
         return mozilla::CountPopulation32(bits_);
     }
     bool operator ==(const TypedRegisterSet<T> &other) const {
         return other.bits_ == bits_;
     }
+    TypedRegisterSet<T> reduceSetForPush() const {
+        return T::ReduceSetForPush(*this);
+    }
+    uint32_t getSizeInBytes() const {
+        return T::GetSizeInBytes(*this);
+    }
+    uint32_t getPushSizeInBytes() const {
+        return T::GetPushSizeInBytes(*this);
+    }
 };
 
 typedef TypedRegisterSet<Register> GeneralRegisterSet;
 typedef TypedRegisterSet<FloatRegister> FloatRegisterSet;
 
 class AnyRegisterIterator;
 
 class RegisterSet {
--- a/js/src/jit/arm/Architecture-arm.cpp
+++ b/js/src/jit/arm/Architecture-arm.cpp
@@ -303,10 +303,57 @@ FloatRegisters::FromName(const char *nam
     for (size_t i = 0; i < Total; i++) {
         if (strcmp(GetName(i), name) == 0)
             return Code(i);
     }
 
     return Invalid;
 }
 
+FloatRegisterSet
+VFPRegister::ReduceSetForPush(const FloatRegisterSet &s)
+{
+    FloatRegisterSet mod;
+    for (TypedRegisterIterator<FloatRegister> iter(s); iter.more(); iter++) {
+        if ((*iter).isSingle()) {
+            // add in just this float
+            mod.addUnchecked(*iter);
+        } else if ((*iter).id() < 16) {
+            // a double with an overlay, add in both floats
+            mod.addUnchecked((*iter).singleOverlay(0));
+            mod.addUnchecked((*iter).singleOverlay(1));
+        } else {
+            // add in the lone double in the range 16-31
+            mod.addUnchecked(*iter);
+        }
+    }
+    return mod;
+}
+
+uint32_t
+VFPRegister::GetSizeInBytes(const FloatRegisterSet &s)
+{
+    uint64_t bits = s.bits();
+    uint32_t ret = mozilla::CountPopulation32(bits&0xffffffff) * sizeof(float);
+    ret +=  mozilla::CountPopulation32(bits >> 32) * sizeof(double);
+    return ret;
+}
+uint32_t
+VFPRegister::GetPushSizeInBytes(const FloatRegisterSet &s)
+{
+    FloatRegisterSet ss = s.reduceSetForPush();
+    uint64_t bits = ss.bits();
+    uint32_t ret = mozilla::CountPopulation32(bits&0xffffffff) * sizeof(float);
+    ret +=  mozilla::CountPopulation32(bits >> 32) * sizeof(double);
+    return ret;
+}
+uint32_t
+VFPRegister::getRegisterDumpOffsetInBytes()
+{
+    if (isSingle())
+        return id() * sizeof(float);
+    if (isDouble())
+        return id() * sizeof(double);
+    MOZ_ASSUME_UNREACHABLE();
+}
+
 } // namespace jit
 } // namespace js
--- a/js/src/jit/arm/Architecture-arm.h
+++ b/js/src/jit/arm/Architecture-arm.h
@@ -2,16 +2,18 @@
  * vim: set ts=8 sts=4 et sw=4 tw=99:
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifndef jit_arm_Architecture_arm_h
 #define jit_arm_Architecture_arm_h
 
+#include "mozilla/MathAlgorithms.h"
+
 #include <limits.h>
 #include <stdint.h>
 
 #include "js/Utility.h"
 
 // gcc appears to use __ARM_PCS_VFP to denote that the target is a hard-float target.
 #if defined(__ARM_PCS_VFP)
 #define JS_CODEGEN_ARM_HARDFP
@@ -135,16 +137,21 @@ class Registers
         (1 << Registers::r3);
 
     // Registers returned from a JS -> C call.
     static const uint32_t CallMask =
         (1 << Registers::r0) |
         (1 << Registers::r1);  // used for double-size returns
 
     static const uint32_t AllocatableMask = AllMask & ~NonAllocatableMask;
+    typedef uint32_t SetType;
+    static uint32_t SetSize(SetType x) {
+        static_assert(sizeof(SetType) == 4, "SetType must be 32 bits");
+        return mozilla::CountPopulation32(x);
+    }
 };
 
 // Smallest integer type that can hold a register bitmask.
 typedef uint16_t PackedRegisterMask;
 
 class FloatRegisters
 {
   public:
@@ -219,18 +226,22 @@ class FloatRegisters
 
     // d15 is the ARM scratch float register.
     static const uint32_t NonAllocatableMask = (1 << d15) | (1 << invalid_freg);
 
     // Registers that can be allocated without being saved, generally.
     static const uint32_t TempMask = VolatileMask & ~NonAllocatableMask;
 
     static const uint32_t AllocatableMask = AllMask & ~NonAllocatableMask;
+    typedef uint32_t SetType;
 };
 
+template <typename T>
+class TypedRegisterSet;
+
 class VFPRegister
 {
   public:
     // What type of data is being stored in this register?
     // UInt / Int are specifically for vcvt, where we need
     // to know how the data is supposed to be converted.
     enum RegType {
         Single = 0x0,
@@ -390,19 +401,29 @@ class VFPRegister
             *ret = singleOverlay(aliasIdx - 1);
             return;
         }
         JS_ASSERT(aliasIdx == 1);
         JS_ASSERT((code_ & 1) == 0);
         *ret = doubleOverlay(aliasIdx - 1);
         return;
     }
+    typedef FloatRegisters::SetType SetType;
+    static uint32_t SetSize(SetType x) {
+        static_assert(sizeof(SetType) == 4, "SetType must be 32 bits");
+        return mozilla::CountPopulation32(x);
+    }
     static Code FromName(const char *name) {
         return FloatRegisters::FromName(name);
     }
+    static TypedRegisterSet<VFPRegister> ReduceSetForPush(const TypedRegisterSet<VFPRegister> &s);
+    static uint32_t GetSizeInBytes(const TypedRegisterSet<VFPRegister> &s);
+    static uint32_t GetPushSizeInBytes(const TypedRegisterSet<VFPRegister> &s);
+    uint32_t getRegisterDumpOffsetInBytes();
+
 };
 
 // The only floating point register set that we work with
 // are the VFP Registers
 typedef VFPRegister FloatRegister;
 
 uint32_t GetARMFlags();
 bool HasMOVWT();
--- a/js/src/jit/x64/Architecture-x64.h
+++ b/js/src/jit/x64/Architecture-x64.h
@@ -22,17 +22,21 @@ static const uint32_t ION_FRAME_SLACK_SI
 static const uint32_t ShadowStackSpace = 32;
 #else
 static const uint32_t ShadowStackSpace = 0;
 #endif
 
 class Registers {
   public:
     typedef JSC::X86Registers::RegisterID Code;
-
+    typedef uint32_t SetType;
+    static uint32_t SetSize(SetType x) {
+        static_assert(sizeof(SetType) == 4, "SetType must be 32 bits");
+        return mozilla::CountPopulation32(x);
+    }
     static const char *GetName(Code code) {
         static const char * const Names[] = { "rax", "rcx", "rdx", "rbx",
                                               "rsp", "rbp", "rsi", "rdi",
                                               "r8",  "r9",  "r10", "r11",
                                               "r12", "r13", "r14", "r15" };
         return Names[code];
     }
 
@@ -43,16 +47,17 @@ class Registers {
         }
         return Invalid;
     }
 
     static const Code StackPointer = JSC::X86Registers::esp;
     static const Code Invalid = JSC::X86Registers::invalid_reg;
 
     static const uint32_t Total = 16;
+    static const uint32_t TotalPhys = 16;
     static const uint32_t Allocatable = 14;
 
     static const uint32_t AllMask = (1 << Total) - 1;
 
     static const uint32_t ArgRegMask =
 # if !defined(_WIN64)
         (1 << JSC::X86Registers::edi) |
         (1 << JSC::X86Registers::esi) |
@@ -110,17 +115,17 @@ class Registers {
 };
 
 // Smallest integer type that can hold a register bitmask.
 typedef uint16_t PackedRegisterMask;
 
 class FloatRegisters {
   public:
     typedef JSC::X86Registers::XMMRegisterID Code;
-
+    typedef uint32_t SetType;
     static const char *GetName(Code code) {
         static const char * const Names[] = { "xmm0",  "xmm1",  "xmm2",  "xmm3",
                                               "xmm4",  "xmm5",  "xmm6",  "xmm7",
                                               "xmm8",  "xmm9",  "xmm10", "xmm11",
                                               "xmm12", "xmm13", "xmm14", "xmm15" };
         return Names[code];
     }
 
@@ -130,20 +135,22 @@ class FloatRegisters {
                 return Code(i);
         }
         return Invalid;
     }
 
     static const Code Invalid = JSC::X86Registers::invalid_xmm;
 
     static const uint32_t Total = 16;
+    static const uint32_t TotalPhys = 16;
+
     static const uint32_t Allocatable = 15;
 
     static const uint32_t AllMask = (1 << Total) - 1;
-
+    static const uint32_t AllDoubleMask = AllMask;
     static const uint32_t VolatileMask =
 #if defined(_WIN64)
         (1 << JSC::X86Registers::xmm0) |
         (1 << JSC::X86Registers::xmm1) |
         (1 << JSC::X86Registers::xmm2) |
         (1 << JSC::X86Registers::xmm3) |
         (1 << JSC::X86Registers::xmm4) |
         (1 << JSC::X86Registers::xmm5);
@@ -154,21 +161,30 @@ class FloatRegisters {
     static const uint32_t NonVolatileMask = AllMask & ~VolatileMask;
 
     static const uint32_t WrapperMask = VolatileMask;
 
     static const uint32_t NonAllocatableMask =
         (1 << JSC::X86Registers::xmm15);    // This is ScratchFloatReg.
 
     static const uint32_t AllocatableMask = AllMask & ~NonAllocatableMask;
+
 };
 
+template <typename T>
+class TypedRegisterSet;
+
 struct FloatRegister {
     typedef FloatRegisters Codes;
     typedef Codes::Code Code;
+    typedef Codes::SetType SetType;
+    static uint32_t SetSize(SetType x) {
+        static_assert(sizeof(SetType) == 4, "SetType must be 32 bits");
+        return mozilla::CountPopulation32(x);
+    }
 
     Code code_;
 
     static FloatRegister FromCode(uint32_t i) {
         JS_ASSERT(i < FloatRegisters::Total);
         FloatRegister r = { (FloatRegisters::Code)i };
         return r;
     }
@@ -210,16 +226,21 @@ struct FloatRegister {
     }
     uint32_t numAlignedAliased() {
         return 1;
     }
     void alignedAliased(uint32_t aliasIdx, FloatRegister *ret) {
         JS_ASSERT(aliasIdx == 0);
         *ret = *this;
     }
+    static TypedRegisterSet<FloatRegister> ReduceSetForPush(const TypedRegisterSet<FloatRegister> &s);
+    static uint32_t GetSizeInBytes(const TypedRegisterSet<FloatRegister> &s);
+    static uint32_t GetPushSizeInBytes(const TypedRegisterSet<FloatRegister> &s);
+    uint32_t getRegisterDumpOffsetInBytes();
+
 };
 
 // Arm/D32 has double registers that can NOT be treated as float32
 // and this requires some dances in lowering.
 static bool hasUnaliasedDouble() {
     return false;
 }
 // On ARM, Dn aliases both S2n and S2n+1, so if you need to convert a float32
--- a/js/src/jit/x64/Assembler-x64.cpp
+++ b/js/src/jit/x64/Assembler-x64.cpp
@@ -258,8 +258,29 @@ Assembler::TraceJumpRelocations(JSTracer
     RelocationIterator iter(reader);
     while (iter.read()) {
         JitCode *child = CodeFromJump(code, code->raw() + iter.offset());
         MarkJitCodeUnbarriered(trc, &child, "rel32");
         JS_ASSERT(child == CodeFromJump(code, code->raw() + iter.offset()));
     }
 }
 
+FloatRegisterSet
+FloatRegister::ReduceSetForPush(const FloatRegisterSet &s)
+{
+    return s;
+}
+uint32_t
+FloatRegister::GetSizeInBytes(const FloatRegisterSet &s)
+{
+    uint32_t ret = s.size() * sizeof(double);
+    return ret;
+}
+uint32_t
+FloatRegister::GetPushSizeInBytes(const FloatRegisterSet &s)
+{
+    return s.size() * sizeof(double);
+}
+uint32_t
+FloatRegister::getRegisterDumpOffsetInBytes()
+{
+    return code() * sizeof(double);
+}
--- a/js/src/jit/x86/Architecture-x86.h
+++ b/js/src/jit/x86/Architecture-x86.h
@@ -31,17 +31,21 @@ static const int32_t NUNBOX32_PAYLOAD_OF
 ////
 
 // Size of each bailout table entry. On x86 this is a 5-byte relative call.
 static const uint32_t BAILOUT_TABLE_ENTRY_SIZE    = 5;
 
 class Registers {
   public:
     typedef JSC::X86Registers::RegisterID Code;
-
+    typedef uint8_t SetType;
+    static uint32_t SetSize(SetType x) {
+        static_assert(sizeof(SetType) == 1, "SetType must be 8 bits");
+        return mozilla::CountPopulation32(x);
+    }
     static const char *GetName(Code code) {
         static const char * const Names[] = { "eax", "ecx", "edx", "ebx",
                                               "esp", "ebp", "esi", "edi" };
         return Names[code];
     }
 
     static Code FromName(const char *name) {
         for (size_t i = 0; i < Total; i++) {
@@ -50,16 +54,17 @@ class Registers {
         }
         return Invalid;
     }
 
     static const Code StackPointer = JSC::X86Registers::esp;
     static const Code Invalid = JSC::X86Registers::invalid_reg;
 
     static const uint32_t Total = 8;
+    static const uint32_t TotalPhys = 8;
     static const uint32_t Allocatable = 7;
 
     static const uint32_t AllMask = (1 << Total) - 1;
 
     static const uint32_t ArgRegMask = 0;
 
     static const uint32_t VolatileMask =
         (1 << JSC::X86Registers::eax) |
@@ -101,17 +106,17 @@ class Registers {
 };
 
 // Smallest integer type that can hold a register bitmask.
 typedef uint8_t PackedRegisterMask;
 
 class FloatRegisters {
   public:
     typedef JSC::X86Registers::XMMRegisterID Code;
-
+    typedef uint32_t SetType;
     static const char *GetName(Code code) {
         static const char * const Names[] = { "xmm0", "xmm1", "xmm2", "xmm3",
                                               "xmm4", "xmm5", "xmm6", "xmm7" };
         return Names[code];
     }
 
     static Code FromName(const char *name) {
         for (size_t i = 0; i < Total; i++) {
@@ -119,34 +124,43 @@ class FloatRegisters {
                 return Code(i);
         }
         return Invalid;
     }
 
     static const Code Invalid = JSC::X86Registers::invalid_xmm;
 
     static const uint32_t Total = 8;
+    static const uint32_t TotalPhys = 8;
     static const uint32_t Allocatable = 7;
 
     static const uint32_t AllMask = (1 << Total) - 1;
-
+    static const uint32_t AllDoubleMask = AllMask;
     static const uint32_t VolatileMask = AllMask;
     static const uint32_t NonVolatileMask = 0;
 
     static const uint32_t WrapperMask = VolatileMask;
 
     static const uint32_t NonAllocatableMask =
         (1 << JSC::X86Registers::xmm7);
 
     static const uint32_t AllocatableMask = AllMask & ~NonAllocatableMask;
 };
 
+template <typename T>
+class TypedRegisterSet;
+
 struct FloatRegister {
     typedef FloatRegisters Codes;
     typedef Codes::Code Code;
+    typedef Codes::SetType SetType;
+    static uint32_t SetSize(SetType x) {
+        static_assert(sizeof(SetType) == 4, "SetType must be 32 bits");
+        return mozilla::CountPopulation32(x);
+    }
 
     Code code_;
 
     static FloatRegister FromCode(uint32_t i) {
         JS_ASSERT(i < FloatRegisters::Total);
         FloatRegister r = { (FloatRegisters::Code)i };
         return r;
     }
@@ -188,16 +202,21 @@ struct FloatRegister {
     }
     uint32_t numAlignedAliased() {
         return 1;
     }
     void alignedAliased(uint32_t aliasIdx, FloatRegister *ret) {
         JS_ASSERT(aliasIdx == 0);
         *ret = *this;
     }
+    static TypedRegisterSet<FloatRegister> ReduceSetForPush(const TypedRegisterSet<FloatRegister> &s);
+    static uint32_t GetSizeInBytes(const TypedRegisterSet<FloatRegister> &s);
+    static uint32_t GetPushSizeInBytes(const TypedRegisterSet<FloatRegister> &s);
+    uint32_t getRegisterDumpOffsetInBytes();
+
 
 };
 
 // Arm/D32 has double registers that can NOT be treated as float32
 // and this requires some dances in lowering.
 static bool hasUnaliasedDouble() {
     return false;
 }
--- a/js/src/jit/x86/Assembler-x86.cpp
+++ b/js/src/jit/x86/Assembler-x86.cpp
@@ -85,8 +85,30 @@ Assembler::TraceJumpRelocations(JSTracer
     RelocationIterator iter(reader);
     while (iter.read()) {
         JitCode *child = CodeFromJump(code->raw() + iter.offset());
         MarkJitCodeUnbarriered(trc, &child, "rel32");
         JS_ASSERT(child == CodeFromJump(code->raw() + iter.offset()));
     }
 }
 
+uint32_t
+FloatRegister::GetSizeInBytes(const FloatRegisterSet &s)
+{
+    uint32_t ret = s.size() * sizeof(double);
+    return ret;
+}
+
+FloatRegisterSet
+FloatRegister::ReduceSetForPush(const FloatRegisterSet &s)
+{
+    return s;
+}
+uint32_t
+FloatRegister::GetPushSizeInBytes(const FloatRegisterSet &s)
+{
+    return s.size() * sizeof(double);
+}
+uint32_t
+FloatRegister::getRegisterDumpOffsetInBytes()
+{
+    return code() * sizeof(double);
+}
--- a/mfbt/MathAlgorithms.h
+++ b/mfbt/MathAlgorithms.h
@@ -183,16 +183,22 @@ CountTrailingZeroes32(uint32_t aValue)
 
 inline uint_fast8_t
 CountPopulation32(uint32_t aValue)
 {
   uint32_t x = aValue - ((aValue >> 1) & 0x55555555);
   x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
   return (((x + (x >> 4)) & 0xf0f0f0f) * 0x1010101) >> 24;
 }
+inline uint_fast8_t
+CountPopulation64(uint64_t aValue)
+{
+  return uint_fast8_t(CountPopulation32(aValue & 0xffffffff) +
+                      CountPopulation32(aValue >> 32));
+}
 
 inline uint_fast8_t
 CountLeadingZeroes64(uint64_t aValue)
 {
 #if defined(MOZ_BITSCAN_WINDOWS64)
   unsigned long index;
   _BitScanReverse64(&index, static_cast<unsigned __int64>(aValue));
   return uint_fast8_t(63 - index);
@@ -249,32 +255,39 @@ CountTrailingZeroes32(uint32_t aValue)
 
 inline uint_fast8_t
 CountPopulation32(uint32_t aValue)
 {
   return __builtin_popcount(aValue);
 }
 
 inline uint_fast8_t
+CountPopulation64(uint64_t aValue)
+{
+  return __builtin_popcountll(aValue);
+}
+
+inline uint_fast8_t
 CountLeadingZeroes64(uint64_t aValue)
 {
   return __builtin_clzll(aValue);
 }
 
 inline uint_fast8_t
 CountTrailingZeroes64(uint64_t aValue)
 {
   return __builtin_ctzll(aValue);
 }
 
 #else
 #  error "Implement these!"
 inline uint_fast8_t CountLeadingZeroes32(uint32_t aValue) MOZ_DELETE;
 inline uint_fast8_t CountTrailingZeroes32(uint32_t aValue) MOZ_DELETE;
 inline uint_fast8_t CountPopulation32(uint32_t aValue) MOZ_DELETE;
+inline uint_fast8_t CountPopulation64(uint64_t aValue) MOZ_DELETE;
 inline uint_fast8_t CountLeadingZeroes64(uint64_t aValue) MOZ_DELETE;
 inline uint_fast8_t CountTrailingZeroes64(uint64_t aValue) MOZ_DELETE;
 #endif
 
 } // namespace detail
 
 /**
  * Compute the number of high-order zero bits in the NON-ZERO number |aValue|.
@@ -316,16 +329,23 @@ CountTrailingZeroes32(uint32_t aValue)
  * Compute the number of one bits in the number |aValue|,
  */
 inline uint_fast8_t
 CountPopulation32(uint32_t aValue)
 {
   return detail::CountPopulation32(aValue);
 }
 
+/** Analogous to CoutPopulation32, but for 64-bit numbers */
+inline uint_fast8_t
+CountPopulation64(uint64_t aValue)
+{
+  return detail::CountPopulation64(aValue);
+}
+
 /** Analogous to CountLeadingZeroes32, but for 64-bit numbers. */
 inline uint_fast8_t
 CountLeadingZeroes64(uint64_t aValue)
 {
   MOZ_ASSERT(aValue != 0);
   return detail::CountLeadingZeroes64(aValue);
 }