Bug 1419025 - wasm baseline, refactor registers and register allocation, r=bbouvier
authorLars T Hansen <lhansen@mozilla.com>
Thu, 12 Oct 2017 15:44:46 +0200
changeset 393267 ad749012c057f562414ddac2b8d24cb1066dfd6f
parent 393266 fedc2d408840d81fdb3550c83121b400a2e63a0b
child 393268 2079792e5c71451b2dfac559d72e7f6f97a5526b
push id97628
push userlhansen@mozilla.com
push dateThu, 23 Nov 2017 07:40:41 +0000
treeherdermozilla-inbound@32b00c586da2 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbbouvier
bugs1419025
milestone59.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1419025 - wasm baseline, refactor registers and register allocation, r=bbouvier MozReview-Commit-ID: J7NawzsOJ1x
js/src/jit/RegisterAllocator.h
js/src/wasm/WasmBaselineCompile.cpp
--- a/js/src/jit/RegisterAllocator.h
+++ b/js/src/jit/RegisterAllocator.h
@@ -276,24 +276,17 @@ class RegisterAllocator
 
     RegisterAllocator(MIRGenerator* mir, LIRGenerator* lir, LIRGraph& graph)
       : mir(mir),
         lir(lir),
         graph(graph),
         allRegisters_(RegisterSet::All())
     {
         if (mir->compilingWasm()) {
-#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM) || \
-    defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
-            allRegisters_.take(AnyRegister(HeapReg));
-#elif defined(JS_CODEGEN_ARM64)
-            allRegisters_.take(AnyRegister(HeapReg));
-            allRegisters_.take(AnyRegister(HeapLenReg));
-#endif
-            allRegisters_.take(FramePointer);
+            takeWasmRegisters(allRegisters_);
         } else {
 #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM64)
             if (mir->instrumentedProfiling())
                 allRegisters_.take(AnyRegister(FramePointer));
 #endif
         }
     }
 
@@ -354,16 +347,29 @@ class RegisterAllocator
                 break;
             ins = next;
         }
 
         return outputOf(ins);
     }
 
     void dumpInstructions();
+
+  public:
+    template<typename TakeableSet>
+    static void takeWasmRegisters(TakeableSet& regs) {
+#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM) || \
+    defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
+            regs.take(HeapReg);
+#elif defined(JS_CODEGEN_ARM64)
+            regs.take(HeapReg);
+            regs.take(HeapLenReg);
+#endif
+            regs.take(FramePointer);
+    }
 };
 
 static inline AnyRegister
 GetFixedRegister(const LDefinition* def, const LUse* use)
 {
     return def->isFloatReg()
            ? AnyRegister(FloatRegister::FromCode(use->registerCode()))
            : AnyRegister(Register::FromCode(use->registerCode()));
--- a/js/src/wasm/WasmBaselineCompile.cpp
+++ b/js/src/wasm/WasmBaselineCompile.cpp
@@ -70,23 +70,25 @@
  *   that should be assigned to registers.  Or something like that.  Wasm makes
  *   this simple.  Static assignments are desirable because they are not flushed
  *   to memory by the pre-block sync() call.)
  */
 
 #include "wasm/WasmBaselineCompile.h"
 
 #include "mozilla/MathAlgorithms.h"
+#include "mozilla/Maybe.h"
 
 #include "jit/AtomicOp.h"
 #include "jit/IonTypes.h"
 #include "jit/JitAllocPolicy.h"
 #include "jit/Label.h"
 #include "jit/MacroAssembler.h"
 #include "jit/MIR.h"
+#include "jit/RegisterAllocator.h"
 #include "jit/Registers.h"
 #include "jit/RegisterSets.h"
 #if defined(JS_CODEGEN_ARM)
 # include "jit/arm/Assembler-arm.h"
 #endif
 #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
 # include "jit/x86-shared/Architecture-x86-shared.h"
 # include "jit/x86-shared/Assembler-x86-shared.h"
@@ -98,16 +100,17 @@
 #include "wasm/WasmValidate.h"
 
 #include "jit/MacroAssembler-inl.h"
 
 using mozilla::DebugOnly;
 using mozilla::FloatingPoint;
 using mozilla::FloorLog2;
 using mozilla::IsPowerOfTwo;
+using mozilla::Maybe;
 using mozilla::SpecificNaN;
 
 namespace js {
 namespace wasm {
 
 using namespace js::jit;
 using JS::GenericNaN;
 
@@ -293,180 +296,529 @@ BaseLocalIter::operator++(int)
 {
     MOZ_ASSERT(!done_);
     index_++;
     if (!argsIter_.done())
         argsIter_++;
     settle();
 }
 
-class BaseCompiler
-{
-    // We define our own ScratchRegister abstractions, deferring to
-    // the platform's when possible.
+// The strongly typed register wrappers are especially useful to distinguish
+// float registers from double registers.
+
+struct RegI32 : public Register
+{
+    RegI32() : Register(Register::Invalid()) {}
+    explicit RegI32(Register reg) : Register(reg) {}
+};
+
+struct RegI64 : public Register64
+{
+    RegI64() : Register64(Register64::Invalid()) {}
+    explicit RegI64(Register64 reg) : Register64(reg) {}
+};
+
+struct RegF32 : public FloatRegister
+{
+    RegF32() : FloatRegister() {}
+    explicit RegF32(FloatRegister reg) : FloatRegister(reg) {}
+};
+
+struct RegF64 : public FloatRegister
+{
+    RegF64() : FloatRegister() {}
+    explicit RegF64(FloatRegister reg) : FloatRegister(reg) {}
+};
+
+struct AnyReg
+{
+    explicit AnyReg(RegI32 r) { tag = I32; i32_ = r; }
+    explicit AnyReg(RegI64 r) { tag = I64; i64_ = r; }
+    explicit AnyReg(RegF32 r) { tag = F32; f32_ = r; }
+    explicit AnyReg(RegF64 r) { tag = F64; f64_ = r; }
+
+    RegI32 i32() const {
+        MOZ_ASSERT(tag == I32);
+        return i32_;
+    }
+    RegI64 i64() const {
+        MOZ_ASSERT(tag == I64);
+        return i64_;
+    }
+    RegF32 f32() const {
+        MOZ_ASSERT(tag == F32);
+        return f32_;
+    }
+    RegF64 f64() const {
+        MOZ_ASSERT(tag == F64);
+        return f64_;
+    }
+    AnyRegister any() const {
+        switch (tag) {
+          case F32: return AnyRegister(f32_);
+          case F64: return AnyRegister(f64_);
+          case I32: return AnyRegister(i32_);
+          case I64:
+#ifdef JS_PUNBOX64
+            return AnyRegister(i64_.reg);
+#else
+            // The compiler is written so that this is never needed: any() is
+            // called on arbitrary registers for asm.js but asm.js does not have
+            // 64-bit ints.  For wasm, any() is called on arbitrary registers
+            // only on 64-bit platforms.
+            MOZ_CRASH("AnyReg::any() on 32-bit platform");
+#endif
+          default:
+            MOZ_CRASH();
+        }
+        // Work around GCC 5 analysis/warning bug.
+        MOZ_CRASH("AnyReg::any(): impossible case");
+    }
+
+    union {
+        RegI32 i32_;
+        RegI64 i64_;
+        RegF32 f32_;
+        RegF64 f64_;
+    };
+    enum { I32, I64, F32, F64 } tag;
+};
+
+class BaseCompilerInterface
+{
+  public:
+    // Spill all spillable registers.
+    //
+    // TODO / OPTIMIZE (Bug 1316802): It's possible to do better here by
+    // spilling only enough registers to satisfy current needs.
+    virtual void sync() = 0;
+};
+
+// Register allocator.
+
+class BaseRegAlloc
+{
+    // Notes on float register allocation.
+    //
+    // The general rule in SpiderMonkey is that float registers can alias double
+    // registers, but there are predicates to handle exceptions to that rule:
+    // hasUnaliasedDouble() and hasMultiAlias().  The way aliasing actually
+    // works is platform dependent and exposed through the aliased(n, &r)
+    // predicate, etc.
+    //
+    //  - hasUnaliasedDouble(): on ARM VFPv3-D32 there are double registers that
+    //    cannot be treated as float.
+    //  - hasMultiAlias(): on ARM and MIPS a double register aliases two float
+    //    registers.
+    //
+    // On some platforms (x86, x64, ARM64) but not all (ARM)
+    // ScratchFloat32Register is the same as ScratchDoubleRegister.
+    //
+    // It's a basic invariant of the AllocatableRegisterSet that it deals
+    // properly with aliasing of registers: if s0 or s1 are allocated then d0 is
+    // not allocatable; if s0 and s1 are freed individually then d0 becomes
+    // allocatable.
+
+    BaseCompilerInterface&        bc;
+    AllocatableGeneralRegisterSet availGPR;
+    AllocatableFloatRegisterSet   availFPU;
+#ifdef DEBUG
+    AllocatableGeneralRegisterSet allGPR;       // The registers available to the compiler
+    AllocatableFloatRegisterSet   allFPU;       //   after removing ScratchReg, HeapReg, etc
+    bool                          scratchTaken;
+#endif
+#ifdef JS_CODEGEN_X86
+    AllocatableGeneralRegisterSet singleByteRegs;
+#endif
+
+    bool hasGPR() {
+        return !availGPR.empty();
+    }
+
+    bool hasGPR64() {
+#ifdef JS_PUNBOX64
+        return !availGPR.empty();
+#else
+        if (availGPR.empty())
+            return false;
+        Register r = allocGPR();
+        bool available = !availGPR.empty();
+        freeGPR(r);
+        return available;
+#endif
+    }
+
+    template<MIRType t>
+    bool hasFPU() {
+        return availFPU.hasAny<RegTypeOf<t>::value>();
+    }
+
+    bool isAvailableGPR(Register r) {
+        return availGPR.has(r);
+    }
+
+    bool isAvailableFPU(FloatRegister r) {
+        return availFPU.has(r);
+    }
+
+    void allocGPR(Register r) {
+        MOZ_ASSERT(isAvailableGPR(r));
+        availGPR.take(r);
+    }
+
+    Register allocGPR() {
+        MOZ_ASSERT(hasGPR());
+        return availGPR.takeAny();
+    }
+
+    void allocInt64(Register64 r) {
+#ifdef JS_PUNBOX64
+        allocGPR(r.reg);
+#else
+        allocGPR(r.low);
+        allocGPR(r.high);
+#endif
+    }
+
+    Register64 allocInt64() {
+        MOZ_ASSERT(hasGPR64());
+#ifdef JS_PUNBOX64
+        return Register64(availGPR.takeAny());
+#else
+        Register high = availGPR.takeAny();
+        Register low = availGPR.takeAny();
+        return Register64(high, low);
+#endif
+    }
+
+#ifdef JS_CODEGEN_ARM
+    // r12 is normally the ScratchRegister and r13 is always the stack pointer,
+    // so the highest possible pair has r10 as the even-numbered register.
+
+    static const uint32_t pairLimit = 10;
+
+    bool hasGPRPair() {
+        for (uint32_t i = 0; i <= pairLimit; i += 2) {
+            if (isAvailableGPR(Register::FromCode(i)) && isAvailableGPR(Register::FromCode(i + 1)))
+                return true;
+        }
+        return false;
+    }
+
+    void allocGPRPair(Register* low, Register* high) {
+        MOZ_ASSERT(hasGPRPair());
+        for (uint32_t i = 0; i <= pairLimit; i += 2) {
+            if (isAvailableGPR(Register::FromCode(i)) &&
+                isAvailableGPR(Register::FromCode(i + 1)))
+            {
+                *low = Register::FromCode(i);
+                *high = Register::FromCode(i + 1);
+                allocGPR(*low);
+                allocGPR(*high);
+                return;
+            }
+        }
+        MOZ_CRASH("No pair");
+    }
+#endif
+
+    void allocFPU(FloatRegister r) {
+        MOZ_ASSERT(isAvailableFPU(r));
+        availFPU.take(r);
+    }
+
+    template<MIRType t>
+    FloatRegister allocFPU() {
+        return availFPU.takeAny<RegTypeOf<t>::value>();
+    }
+
+    void freeGPR(Register r) {
+        availGPR.add(r);
+    }
+
+    void freeInt64(Register64 r) {
+#ifdef JS_PUNBOX64
+        freeGPR(r.reg);
+#else
+        freeGPR(r.low);
+        freeGPR(r.high);
+#endif
+    }
+
+    void freeFPU(FloatRegister r) {
+        availFPU.add(r);
+    }
+
+  public:
+    explicit BaseRegAlloc(BaseCompilerInterface& bc)
+      : bc(bc)
+      , availGPR(GeneralRegisterSet::All())
+      , availFPU(FloatRegisterSet::All())
+#ifdef DEBUG
+      , scratchTaken(false)
+#endif
+#ifdef JS_CODEGEN_X86
+      , singleByteRegs(GeneralRegisterSet(Registers::SingleByteRegs))
+#endif
+    {
+        RegisterAllocator::takeWasmRegisters(availGPR);
+
+#if defined(JS_CODEGEN_ARM)
+        availGPR.take(ScratchRegARM);
+#elif defined(JS_CODEGEN_X86)
+        availGPR.take(ScratchRegX86);
+#endif
+
+#ifdef DEBUG
+        allGPR = availGPR;
+        allFPU = availFPU;
+#endif
+    }
+
+#ifdef DEBUG
+    bool scratchRegisterTaken() const {
+        return scratchTaken;
+    }
+
+    void setScratchRegisterTaken(bool state) {
+        scratchTaken = state;
+    }
+#endif
+
+#ifdef JS_CODEGEN_X86
+    bool isSingleByteI32(Register r) {
+        return singleByteRegs.has(r);
+    }
+#endif
+
+    bool isAvailableI32(RegI32 r) {
+        return isAvailableGPR(r);
+    }
+
+    bool isAvailableI64(RegI64 r) {
+#ifdef JS_PUNBOX64
+        return isAvailableGPR(r.reg);
+#else
+        return isAvailableGPR(r.low) && isAvailableGPR(r.high);
+#endif
+    }
+
+    bool isAvailableF32(RegF32 r) {
+        return isAvailableFPU(r);
+    }
+
+    bool isAvailableF64(RegF64 r) {
+        return isAvailableFPU(r);
+    }
+
+    // TODO / OPTIMIZE (Bug 1316802): Do not sync everything on allocation
+    // failure, only as much as we need.
+
+    MOZ_MUST_USE RegI32 needI32() {
+        if (!hasGPR())
+            bc.sync();
+        return RegI32(allocGPR());
+    }
+
+    void needI32(RegI32 specific) {
+        if (!isAvailableI32(specific))
+            bc.sync();
+        allocGPR(specific);
+    }
+
+    MOZ_MUST_USE RegI64 needI64() {
+        if (!hasGPR64())
+            bc.sync();
+        return RegI64(allocInt64());
+    }
+
+    void needI64(RegI64 specific) {
+        if (!isAvailableI64(specific))
+            bc.sync();
+        allocInt64(specific);
+    }
+
+    MOZ_MUST_USE RegF32 needF32() {
+        if (!hasFPU<MIRType::Float32>())
+            bc.sync();
+        return RegF32(allocFPU<MIRType::Float32>());
+    }
+
+    void needF32(RegF32 specific) {
+        if (!isAvailableF32(specific))
+            bc.sync();
+        allocFPU(specific);
+    }
+
+    MOZ_MUST_USE RegF64 needF64() {
+        if (!hasFPU<MIRType::Double>())
+            bc.sync();
+        return RegF64(allocFPU<MIRType::Double>());
+    }
+
+    void needF64(RegF64 specific) {
+        if (!isAvailableF64(specific))
+            bc.sync();
+        allocFPU(specific);
+    }
+
+    void freeI32(RegI32 r) {
+        freeGPR(r);
+    }
+
+    void freeI64(RegI64 r) {
+        freeInt64(r);
+    }
+
+    void freeF64(RegF64 r) {
+        freeFPU(r);
+    }
+
+    void freeF32(RegF32 r) {
+        freeFPU(r);
+    }
+
+#ifdef JS_CODEGEN_ARM
+    MOZ_MUST_USE RegI64 needI64Pair() {
+        if (!hasGPRPair())
+            bc.sync();
+        Register low, high;
+        allocGPRPair(&low, &high);
+        return RegI64(Register64(high, low));
+    }
+#endif
+
+#ifdef DEBUG
+    friend class LeakCheck;
+
+    class MOZ_RAII LeakCheck
+    {
+      private:
+        const BaseRegAlloc&           ra;
+        AllocatableGeneralRegisterSet knownGPR;
+        AllocatableFloatRegisterSet   knownFPU;
+
+      public:
+        explicit LeakCheck(const BaseRegAlloc& ra) : ra(ra) {
+            knownGPR = ra.availGPR;
+            knownFPU = ra.availFPU;
+        }
+
+        ~LeakCheck() {
+            MOZ_ASSERT(knownGPR.bits() == ra.allGPR.bits());
+            MOZ_ASSERT(knownFPU.bits() == ra.allFPU.bits());
+        }
+
+        void addKnownI32(RegI32 r) {
+            knownGPR.add(r);
+        }
+
+        void addKnownI64(RegI64 r) {
+# ifdef JS_PUNBOX64
+            knownGPR.add(r.reg);
+# else
+            knownGPR.add(r.high);
+            knownGPR.add(r.low);
+# endif
+        }
+
+        void addKnownF32(RegF32 r) {
+            knownFPU.add(r);
+        }
+
+        void addKnownF64(RegF64 r) {
+            knownFPU.add(r);
+        }
+    };
+#endif
+};
+
+// ScratchRegister abstractions.  We define our own, deferring to the platform's
+// when possible.
 
 #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_ARM)
-    typedef ScratchDoubleScope ScratchF64;
+typedef ScratchDoubleScope ScratchF64;
 #else
-    class ScratchF64
-    {
-      public:
-        ScratchF64(BaseCompiler& b) {}
-        operator FloatRegister() const {
-            MOZ_CRASH("BaseCompiler platform hook - ScratchF64");
-        }
-    };
+class ScratchF64
+{
+  public:
+    ScratchF64(BaseRegAlloc&) {}
+    operator FloatRegister() const {
+        MOZ_CRASH("BaseCompiler platform hook - ScratchF64");
+    }
+};
 #endif
 
 #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_ARM)
-    typedef ScratchFloat32Scope ScratchF32;
+typedef ScratchFloat32Scope ScratchF32;
 #else
-    class ScratchF32
-    {
-      public:
-        ScratchF32(BaseCompiler& b) {}
-        operator FloatRegister() const {
-            MOZ_CRASH("BaseCompiler platform hook - ScratchF32");
-        }
-    };
+class ScratchF32
+{
+  public:
+    ScratchF32(BaseRegAlloc&) {}
+    operator FloatRegister() const {
+        MOZ_CRASH("BaseCompiler platform hook - ScratchF32");
+    }
+};
 #endif
 
 #if defined(JS_CODEGEN_X64)
-    typedef ScratchRegisterScope ScratchI32;
+typedef ScratchRegisterScope ScratchI32;
 #elif defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_ARM)
-    class ScratchI32
-    {
+class ScratchI32
+{
 # ifdef DEBUG
-        BaseCompiler& bc;
-      public:
-        explicit ScratchI32(BaseCompiler& bc) : bc(bc) {
-            MOZ_ASSERT(!bc.scratchRegisterTaken());
-            bc.setScratchRegisterTaken(true);
-        }
-        ~ScratchI32() {
-            MOZ_ASSERT(bc.scratchRegisterTaken());
-            bc.setScratchRegisterTaken(false);
-        }
+    BaseRegAlloc& ra;
+  public:
+    explicit ScratchI32(BaseRegAlloc& ra) : ra(ra) {
+        MOZ_ASSERT(!ra.scratchRegisterTaken());
+        ra.setScratchRegisterTaken(true);
+    }
+    ~ScratchI32() {
+        MOZ_ASSERT(ra.scratchRegisterTaken());
+        ra.setScratchRegisterTaken(false);
+    }
 # else
-      public:
-        explicit ScratchI32(BaseCompiler& bc) {}
+  public:
+    explicit ScratchI32(BaseRegAlloc&) {}
 # endif
-        operator Register() const {
+    operator Register() const {
 # ifdef JS_CODEGEN_X86
-            return ScratchRegX86;
+        return ScratchRegX86;
 # else
-            return ScratchRegARM;
+        return ScratchRegARM;
 # endif
-        }
-    };
+    }
+};
 #else
-    class ScratchI32
-    {
-      public:
-        ScratchI32(BaseCompiler& bc) {}
-        operator Register() const {
-            MOZ_CRASH("BaseCompiler platform hook - ScratchI32");
-        }
-    };
+class ScratchI32
+{
+public:
+    ScratchI32(BaseRegAlloc&) {}
+    operator Register() const {
+        MOZ_CRASH("BaseCompiler platform hook - ScratchI32");
+    }
+};
 #endif
 
 #if defined(JS_CODEGEN_X86)
-    // ScratchEBX is a mnemonic device: For some atomic ops we really need EBX,
-    // no other register will do.  And we would normally have to allocate that
-    // register using ScratchI32 since normally the scratch register is EBX.
-    // But the whole point of ScratchI32 is to hide that relationship.  By using
-    // the ScratchEBX alias, we document that at that point we require the
-    // scratch register to be EBX.
-    typedef ScratchI32 ScratchEBX;
-#endif
-
+// ScratchEBX is a mnemonic device: For some atomic ops we really need EBX,
+// no other register will do.  And we would normally have to allocate that
+// register using ScratchI32 since normally the scratch register is EBX.
+// But the whole point of ScratchI32 is to hide that relationship.  By using
+// the ScratchEBX alias, we document that at that point we require the
+// scratch register to be EBX.
+typedef ScratchI32 ScratchEBX;
+#endif
+
+class BaseCompiler final : public BaseCompilerInterface
+{
     typedef Vector<NonAssertingLabel, 8, SystemAllocPolicy> LabelVector;
     typedef Vector<MIRType, 8, SystemAllocPolicy> MIRTypeVector;
 
-    // The strongly typed register wrappers have saved my bacon a few
-    // times; though they are largely redundant they stay, for now.
-
-    struct RegI32 : public Register
-    {
-        RegI32() : Register(Register::Invalid()) {}
-        explicit RegI32(Register reg) : Register(reg) {}
-    };
-
-    struct RegI64 : public Register64
-    {
-        RegI64() : Register64(Register64::Invalid()) {}
-        explicit RegI64(Register64 reg) : Register64(reg) {}
-    };
-
-    struct RegF32 : public FloatRegister
-    {
-        RegF32() : FloatRegister() {}
-        explicit RegF32(FloatRegister reg) : FloatRegister(reg) {}
-    };
-
-    struct RegF64 : public FloatRegister
-    {
-        RegF64() : FloatRegister() {}
-        explicit RegF64(FloatRegister reg) : FloatRegister(reg) {}
-    };
-
-    struct AnyReg
-    {
-        AnyReg() { tag = NONE; }
-        explicit AnyReg(RegI32 r) { tag = I32; i32_ = r; }
-        explicit AnyReg(RegI64 r) { tag = I64; i64_ = r; }
-        explicit AnyReg(RegF32 r) { tag = F32; f32_ = r; }
-        explicit AnyReg(RegF64 r) { tag = F64; f64_ = r; }
-
-        RegI32 i32() {
-            MOZ_ASSERT(tag == I32);
-            return i32_;
-        }
-        RegI64 i64() {
-            MOZ_ASSERT(tag == I64);
-            return i64_;
-        }
-        RegF32 f32() {
-            MOZ_ASSERT(tag == F32);
-            return f32_;
-        }
-        RegF64 f64() {
-            MOZ_ASSERT(tag == F64);
-            return f64_;
-        }
-        AnyRegister any() {
-            switch (tag) {
-              case F32: return AnyRegister(f32_);
-              case F64: return AnyRegister(f64_);
-              case I32: return AnyRegister(i32_);
-              case I64:
-#ifdef JS_PUNBOX64
-                return AnyRegister(i64_.reg);
-#else
-                // The compiler is written so that this is never needed: any() is called
-                // on arbitrary registers for asm.js but asm.js does not have 64-bit ints.
-                // For wasm, any() is called on arbitrary registers only on 64-bit platforms.
-                MOZ_CRASH("AnyReg::any() on 32-bit platform");
-#endif
-              case NONE:
-                MOZ_CRASH("AnyReg::any() on NONE");
-            }
-            // Work around GCC 5 analysis/warning bug.
-            MOZ_CRASH("AnyReg::any(): impossible case");
-        }
-
-        union {
-            RegI32 i32_;
-            RegI64 i64_;
-            RegF32 f32_;
-            RegF64 f64_;
-        };
-        enum { NONE, I32, I64, F32, F64 } tag;
-    };
-
     struct Local
     {
         Local() : type_(MIRType::None), offs_(UINT32_MAX) {}
         Local(MIRType type, uint32_t offs) : type_(type), offs_(offs) {}
 
         void init(MIRType type_, uint32_t offs_) {
             this->type_ = type_;
             this->offs_ = offs_;
@@ -519,20 +871,16 @@ class BaseCompiler
 
         // The baseline compiler uses the iterator's control stack, attaching
         // its own control information.
         typedef Control ControlItem;
     };
 
     typedef OpIter<BaseCompilePolicy> BaseOpIter;
 
-    // Volatile registers except ReturnReg.
-
-    static LiveRegisterSet VolatileReturnGPR;
-
     // The baseline compiler will use OOL code more sparingly than
     // Baldr since our code is not high performance and frills like
     // code density and branch prediction friendliness will be less
     // important.
 
     class OutOfLineCode : public TempObject
     {
       private:
@@ -627,50 +975,43 @@ class BaseCompiler
 
     LatentOp                    latentOp_;       // Latent operation for branch (seen next)
     ValType                     latentType_;     // Operand type, if latentOp_ is true
     Assembler::Condition        latentIntCmp_;   // Comparison operator, if latentOp_ == Compare, int types
     Assembler::DoubleCondition  latentDoubleCmp_;// Comparison operator, if latentOp_ == Compare, float types
 
     FuncOffsets                 offsets_;
     MacroAssembler&             masm;            // No '_' suffix - too tedious...
-
-    AllocatableGeneralRegisterSet availGPR_;
-    AllocatableFloatRegisterSet   availFPU_;
-#ifdef DEBUG
-    bool                          scratchRegisterTaken_;
-    AllocatableGeneralRegisterSet allGPR_;       // The registers available to the compiler
-    AllocatableFloatRegisterSet   allFPU_;       //   after removing ScratchReg, HeapReg, etc
-#endif
+    BaseRegAlloc                ra;              // Ditto
 
     Vector<Local, 8, SystemAllocPolicy> localInfo_;
     Vector<OutOfLineCode*, 8, SystemAllocPolicy> outOfLine_;
 
     // On specific platforms we sometimes need to use specific registers.
 
 #ifdef JS_CODEGEN_X64
     RegI64 specific_rax;
     RegI64 specific_rcx;
     RegI64 specific_rdx;
 #endif
 
 #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
     RegI32 specific_eax;
     RegI32 specific_ecx;
     RegI32 specific_edx;
+    RegI32 specific_edi;
+    RegI32 specific_esi;
 #endif
 
 #if defined(JS_CODEGEN_X86)
     RegI64 specific_ecx_ebx;
     RegI64 specific_edx_eax;
-
-    AllocatableGeneralRegisterSet singleByteRegs_;
-#endif
-
-#if defined(JS_NUNBOX32)
+#endif
+
+#if !defined(JS_PUNBOX64)
     RegI64 abiReturnRegI64;
 #endif
 
     // The join registers are used to carry values out of blocks.
     // JoinRegI32 and joinRegI64 must overlap: emitBrIf and
     // emitBrTable assume that.
 
     RegI32 joinRegI32;
@@ -696,25 +1037,17 @@ class BaseCompiler
 
     MOZ_MUST_USE bool emitFunction();
     void emitInitStackLocals();
 
     const SigWithId& sig() const { return *env_.funcSigs[func_.index]; }
 
     // Used by some of the ScratchRegister implementations.
     operator MacroAssembler&() const { return masm; }
-
-#ifdef DEBUG
-    bool scratchRegisterTaken() const {
-        return scratchRegisterTaken_;
-    }
-    void setScratchRegisterTaken(bool state) {
-        scratchRegisterTaken_ = state;
-    }
-#endif
+    operator BaseRegAlloc&() { return ra; }
 
   private:
 
     ////////////////////////////////////////////////////////////
     //
     // Out of line code management.
 
     MOZ_MUST_USE OutOfLineCode* addOutOfLineCode(OutOfLineCode* ool) {
@@ -786,176 +1119,16 @@ class BaseCompiler
 
     int32_t frameOffsetFromSlot(uint32_t slot, MIRType type) {
         MOZ_ASSERT(localInfo_[slot].type() == type);
         return localInfo_[slot].offs();
     }
 
     ////////////////////////////////////////////////////////////
     //
-    // Low-level register allocation.
-
-    bool isAvailable(Register r) {
-        return availGPR_.has(r);
-    }
-
-    bool hasGPR() {
-        return !availGPR_.empty();
-    }
-
-    void allocGPR(Register r) {
-        MOZ_ASSERT(isAvailable(r));
-        availGPR_.take(r);
-    }
-
-    Register allocGPR() {
-        MOZ_ASSERT(hasGPR());
-        return availGPR_.takeAny();
-    }
-
-    void freeGPR(Register r) {
-        availGPR_.add(r);
-    }
-
-    bool isAvailable(Register64 r) {
-#ifdef JS_PUNBOX64
-        return isAvailable(r.reg);
-#else
-        return isAvailable(r.low) && isAvailable(r.high);
-#endif
-    }
-
-    bool hasInt64() {
-#ifdef JS_PUNBOX64
-        return !availGPR_.empty();
-#else
-        if (availGPR_.empty())
-            return false;
-        Register r = allocGPR();
-        bool available = !availGPR_.empty();
-        freeGPR(r);
-        return available;
-#endif
-    }
-
-    void allocInt64(Register64 r) {
-        MOZ_ASSERT(isAvailable(r));
-#ifdef JS_PUNBOX64
-        availGPR_.take(r.reg);
-#else
-        availGPR_.take(r.low);
-        availGPR_.take(r.high);
-#endif
-    }
-
-    Register64 allocInt64() {
-        MOZ_ASSERT(hasInt64());
-#ifdef JS_PUNBOX64
-        return Register64(availGPR_.takeAny());
-#else
-        Register high = availGPR_.takeAny();
-        Register low = availGPR_.takeAny();
-        return Register64(high, low);
-#endif
-    }
-
-    void freeInt64(Register64 r) {
-#ifdef JS_PUNBOX64
-        availGPR_.add(r.reg);
-#else
-        availGPR_.add(r.low);
-        availGPR_.add(r.high);
-#endif
-    }
-
-#ifdef JS_CODEGEN_ARM
-    // r12 is normally the ScratchRegister and r13 is always the stack pointer,
-    // so the highest possible pair has r10 as the even-numbered register.
-
-    static const uint32_t pairLimit = 10;
-
-    bool hasGPRPair() {
-        for (uint32_t i = 0; i <= pairLimit; i += 2) {
-            if (isAvailable(Register::FromCode(i)) && isAvailable(Register::FromCode(i + 1)))
-                return true;
-        }
-        return false;
-    }
-
-    void allocGPRPair(Register* low, Register* high) {
-        for (uint32_t i = 0; i <= pairLimit; i += 2) {
-            if (isAvailable(Register::FromCode(i)) && isAvailable(Register::FromCode(i + 1))) {
-                *low = Register::FromCode(i);
-                *high = Register::FromCode(i + 1);
-                allocGPR(*low);
-                allocGPR(*high);
-                return;
-            }
-        }
-        MOZ_CRASH("No pair");
-    }
-#endif
-
-    // Notes on float register allocation.
-    //
-    // The general rule in SpiderMonkey is that float registers can
-    // alias double registers, but there are predicates to handle
-    // exceptions to that rule: hasUnaliasedDouble() and
-    // hasMultiAlias().  The way aliasing actually works is platform
-    // dependent and exposed through the aliased(n, &r) predicate,
-    // etc.
-    //
-    //  - hasUnaliasedDouble(): on ARM VFPv3-D32 there are double
-    //    registers that cannot be treated as float.
-    //  - hasMultiAlias(): on ARM and MIPS a double register aliases
-    //    two float registers.
-    //  - notes in Architecture-arm.h indicate that when we use a
-    //    float register that aliases a double register we only use
-    //    the low float register, never the high float register.  I
-    //    think those notes lie, or at least are confusing.
-    //  - notes in Architecture-mips32.h suggest that the MIPS port
-    //    will use both low and high float registers except on the
-    //    Longsoon, which may be the only MIPS that's being tested, so
-    //    who knows what's working.
-    //  - SIMD is not yet implemented on ARM or MIPS so constraints
-    //    may change there.
-    //
-    // On some platforms (x86, x64, ARM64) but not all (ARM)
-    // ScratchFloat32Register is the same as ScratchDoubleRegister.
-    //
-    // It's a basic invariant of the AllocatableRegisterSet that it
-    // deals properly with aliasing of registers: if s0 or s1 are
-    // allocated then d0 is not allocatable; if s0 and s1 are freed
-    // individually then d0 becomes allocatable.
-
-    template<MIRType t>
-    bool hasFPU() {
-        return availFPU_.hasAny<RegTypeOf<t>::value>();
-    }
-
-    bool isAvailable(FloatRegister r) {
-        return availFPU_.has(r);
-    }
-
-    void allocFPU(FloatRegister r) {
-        MOZ_ASSERT(isAvailable(r));
-        availFPU_.take(r);
-    }
-
-    template<MIRType t>
-    FloatRegister allocFPU() {
-        return availFPU_.takeAny<RegTypeOf<t>::value>();
-    }
-
-    void freeFPU(FloatRegister r) {
-        availFPU_.add(r);
-    }
-
-    ////////////////////////////////////////////////////////////
-    //
     // Value stack and high-level register allocation.
     //
     // The value stack facilitates some on-the-fly register allocation
     // and immediate-constant use.  It tracks constants, latent
     // references to locals, register contents, and values on the CPU
     // stack.
     //
     // The stack can be flushed to memory using sync().  This is handy
@@ -1044,48 +1217,44 @@ class BaseCompiler
 
     Vector<Stk, 8, SystemAllocPolicy> stk_;
 
     Stk& push() {
         stk_.infallibleEmplaceBack(Stk());
         return stk_.back();
     }
 
-    Register64 invalidRegister64() {
-        return Register64::Invalid();
-    }
-
     RegI32 invalidI32() {
         return RegI32(Register::Invalid());
     }
 
     RegI64 invalidI64() {
-        return RegI64(invalidRegister64());
+        return RegI64(Register64::Invalid());
     }
 
     RegF64 invalidF64() {
         return RegF64(InvalidFloatReg);
     }
 
     RegI32 fromI64(RegI64 r) {
         return RegI32(lowPart(r));
     }
 
     RegI64 widenI32(RegI32 r) {
-        MOZ_ASSERT(!isAvailable(r));
+        MOZ_ASSERT(!isAvailableI32(r));
 #ifdef JS_PUNBOX64
         return RegI64(Register64(r));
 #else
         RegI32 high = needI32();
         return RegI64(Register64(high, r));
 #endif
     }
 
     RegI32 narrowI64(RegI64 r) {
-#if defined(JS_64BIT)
+#if defined(JS_PUNBOX64)
         return RegI32(r.reg);
 #else
         freeI32(RegI32(r.high));
         return RegI32(r.low);
 #endif
     }
 
     Register lowPart(RegI64 r) {
@@ -1100,118 +1269,83 @@ class BaseCompiler
 #ifdef JS_PUNBOX64
         return Register::Invalid();
 #else
         return r.high;
 #endif
     }
 
     void maybeClearHighPart(RegI64 r) {
-#ifdef JS_NUNBOX32
+#if !defined(JS_PUNBOX64)
         masm.move32(Imm32(0), r.high);
 #endif
     }
 
-    void freeI32(RegI32 r) {
-        freeGPR(r);
-    }
-
-    void freeI64(RegI64 r) {
-        freeInt64(r);
-    }
+    bool isAvailableI32(RegI32 r) { return ra.isAvailableI32(r); }
+    bool isAvailableI64(RegI64 r) { return ra.isAvailableI64(r); }
+    bool isAvailableF32(RegF32 r) { return ra.isAvailableF32(r); }
+    bool isAvailableF64(RegF64 r) { return ra.isAvailableF64(r); }
+
+    MOZ_MUST_USE RegI32 needI32() { return ra.needI32(); }
+    MOZ_MUST_USE RegI64 needI64() { return ra.needI64(); }
+    MOZ_MUST_USE RegF32 needF32() { return ra.needF32(); }
+    MOZ_MUST_USE RegF64 needF64() { return ra.needF64(); }
+
+    void needI32(RegI32 specific) { ra.needI32(specific); }
+    void needI64(RegI64 specific) { ra.needI64(specific); }
+    void needF32(RegF32 specific) { ra.needF32(specific); }
+    void needF64(RegF64 specific) { ra.needF64(specific); }
+
+#if defined(JS_CODEGEN_ARM)
+    MOZ_MUST_USE RegI64 needI64Pair() { return ra.needI64Pair(); }
+#endif
+
+    void freeI32(RegI32 r) { ra.freeI32(r); }
+    void freeI64(RegI64 r) { ra.freeI64(r); }
+    void freeF32(RegF32 r) { ra.freeF32(r); }
+    void freeF64(RegF64 r) { ra.freeF64(r); }
 
     void freeI64Except(RegI64 r, RegI32 except) {
 #ifdef JS_PUNBOX64
         MOZ_ASSERT(r.reg == except);
 #else
         MOZ_ASSERT(r.high == except || r.low == except);
         freeI64(r);
         needI32(except);
 #endif
     }
 
-    void freeF64(RegF64 r) {
-        freeFPU(r);
-    }
-
-    void freeF32(RegF32 r) {
-        freeFPU(r);
-    }
-
-    MOZ_MUST_USE RegI32 needI32() {
-        if (!hasGPR())
-            sync();            // TODO / OPTIMIZE: improve this (Bug 1316802)
-        return RegI32(allocGPR());
-    }
-
-    void needI32(RegI32 specific) {
-        if (!isAvailable(specific))
-            sync();            // TODO / OPTIMIZE: improve this (Bug 1316802)
-        allocGPR(specific);
+    void maybeFreeI32(RegI32 r) {
+        if (r != invalidI32())
+            freeI32(r);
+    }
+
+    void maybeFreeI64(RegI64 r) {
+        if (r != invalidI64())
+            freeI64(r);
+    }
+
+    void needI32NoSync(RegI32 r) {
+        MOZ_ASSERT(isAvailableI32(r));
+        needI32(r);
     }
 
     // TODO / OPTIMIZE: need2xI32() can be optimized along with needI32()
     // to avoid sync(). (Bug 1316802)
 
     void need2xI32(RegI32 r0, RegI32 r1) {
         needI32(r0);
         needI32(r1);
     }
 
-    MOZ_MUST_USE RegI64 needI64() {
-        if (!hasInt64())
-            sync();            // TODO / OPTIMIZE: improve this (Bug 1316802)
-        return RegI64(allocInt64());
-    }
-
-    void needI64(RegI64 specific) {
-        if (!isAvailable(specific))
-            sync();            // TODO / OPTIMIZE: improve this (Bug 1316802)
-        allocInt64(specific);
-    }
-
     void need2xI64(RegI64 r0, RegI64 r1) {
         needI64(r0);
         needI64(r1);
     }
 
-#ifdef JS_CODEGEN_ARM
-    MOZ_MUST_USE RegI64 needI64Pair() {
-        if (!hasGPRPair())
-            sync();
-        Register low, high;
-        allocGPRPair(&low, &high);
-        return RegI64(Register64(high, low));
-    }
-#endif
-
-    MOZ_MUST_USE RegF32 needF32() {
-        if (!hasFPU<MIRType::Float32>())
-            sync();            // TODO / OPTIMIZE: improve this (Bug 1316802)
-        return RegF32(allocFPU<MIRType::Float32>());
-    }
-
-    void needF32(RegF32 specific) {
-        if (!isAvailable(specific))
-            sync();            // TODO / OPTIMIZE: improve this (Bug 1316802)
-        allocFPU(specific);
-    }
-
-    MOZ_MUST_USE RegF64 needF64() {
-        if (!hasFPU<MIRType::Double>())
-            sync();            // TODO / OPTIMIZE: improve this (Bug 1316802)
-        return RegF64(allocFPU<MIRType::Double>());
-    }
-
-    void needF64(RegF64 specific) {
-        if (!isAvailable(specific))
-            sync();            // TODO / OPTIMIZE: improve this (Bug 1316802)
-        allocFPU(specific);
-    }
-
     void moveI32(RegI32 src, RegI32 dest) {
         if (src != dest)
             masm.move32(src, dest);
     }
 
     void moveI64(RegI64 src, RegI64 dest) {
         if (src != dest)
             masm.move64(src, dest);
@@ -1342,17 +1476,17 @@ class BaseCompiler
             loadRegisterI64(r, src);
             break;
           case Stk::None:
           default:
             MOZ_CRASH("Compiler bug: Expected I64 on stack");
         }
     }
 
-#ifdef JS_NUNBOX32
+#if !defined(JS_PUNBOX64)
     void loadI64Low(Register r, Stk& src) {
         switch (src.kind()) {
           case Stk::ConstI64:
             masm.move32(Imm64(src.i64val()).low(), r);
             break;
           case Stk::MemI64:
             loadFromFrameI32(r, src.offs() - INT64LOW_OFFSET);
             break;
@@ -1448,17 +1582,17 @@ class BaseCompiler
     //  - Operations that need specific registers: multiply, quotient,
     //    remainder, will tend to sync because the registers we need
     //    will tend to be allocated.  We may be able to avoid that by
     //    prioritizing registers differently (takeLast instead of
     //    takeFirst) but we may also be able to allocate an unused
     //    register on demand to free up one we need, thus avoiding the
     //    sync.  That type of fix would go into needI32().
 
-    void sync() {
+    void sync() final {
         size_t start = 0;
         size_t lim = stk_.length();
 
         for (size_t i = lim; i > 0; i--) {
             // Memory opcodes are first in the enum, single check against MemLast is fine.
             if (stk_[i - 1].kind() <= Stk::MemLast) {
                 start = i;
                 break;
@@ -1565,35 +1699,35 @@ class BaseCompiler
     void syncLocal(uint32_t slot) {
         if (hasLocal(slot))
             sync();            // TODO / OPTIMIZE: Improve this?  (Bug 1316817)
     }
 
     // Push the register r onto the stack.
 
     void pushI32(RegI32 r) {
-        MOZ_ASSERT(!isAvailable(r));
+        MOZ_ASSERT(!isAvailableI32(r));
         Stk& x = push();
         x.setI32Reg(r);
     }
 
     void pushI64(RegI64 r) {
-        MOZ_ASSERT(!isAvailable(r));
+        MOZ_ASSERT(!isAvailableI64(r));
         Stk& x = push();
         x.setI64Reg(r);
     }
 
     void pushF64(RegF64 r) {
-        MOZ_ASSERT(!isAvailable(r));
+        MOZ_ASSERT(!isAvailableF64(r));
         Stk& x = push();
         x.setF64Reg(r);
     }
 
     void pushF32(RegF32 r) {
-        MOZ_ASSERT(!isAvailable(r));
+        MOZ_ASSERT(!isAvailableF32(r));
         Stk& x = push();
         x.setF32Reg(r);
     }
 
     // Push the value onto the stack.
 
     void pushI32(int32_t v) {
         Stk& x = push();
@@ -1927,112 +2061,116 @@ class BaseCompiler
     // On the other hand, we sync() before every block and only the
     // JoinReg is live out of the block.  But on the way out, we
     // currently pop the JoinReg before freeing regs to be discarded,
     // so there is a real risk of some pointless shuffling there.  If
     // we instead integrate the popping of the join reg into the
     // popping of the stack we can just use the JoinReg as it will
     // become available in that process.
 
-    MOZ_MUST_USE AnyReg popJoinRegUnlessVoid(ExprType type) {
+    MOZ_MUST_USE Maybe<AnyReg> popJoinRegUnlessVoid(ExprType type) {
         switch (type) {
           case ExprType::Void: {
-            return AnyReg();
+            return Nothing();
           }
           case ExprType::I32: {
             DebugOnly<Stk::Kind> k(stk_.back().kind());
             MOZ_ASSERT(k == Stk::RegisterI32 || k == Stk::ConstI32 || k == Stk::MemI32 ||
                        k == Stk::LocalI32);
-            return AnyReg(popI32(joinRegI32));
+            return Some(AnyReg(popI32(joinRegI32)));
           }
           case ExprType::I64: {
             DebugOnly<Stk::Kind> k(stk_.back().kind());
             MOZ_ASSERT(k == Stk::RegisterI64 || k == Stk::ConstI64 || k == Stk::MemI64 ||
                        k == Stk::LocalI64);
-            return AnyReg(popI64(joinRegI64));
+            return Some(AnyReg(popI64(joinRegI64)));
           }
           case ExprType::F64: {
             DebugOnly<Stk::Kind> k(stk_.back().kind());
             MOZ_ASSERT(k == Stk::RegisterF64 || k == Stk::ConstF64 || k == Stk::MemF64 ||
                        k == Stk::LocalF64);
-            return AnyReg(popF64(joinRegF64));
+            return Some(AnyReg(popF64(joinRegF64)));
           }
           case ExprType::F32: {
             DebugOnly<Stk::Kind> k(stk_.back().kind());
             MOZ_ASSERT(k == Stk::RegisterF32 || k == Stk::ConstF32 || k == Stk::MemF32 ||
                        k == Stk::LocalF32);
-            return AnyReg(popF32(joinRegF32));
+            return Some(AnyReg(popF32(joinRegF32)));
           }
           default: {
             MOZ_CRASH("Compiler bug: unexpected expression type");
           }
         }
     }
 
     // If we ever start not sync-ing on entry to Block (but instead try to sync
     // lazily) then this may start asserting because it does not spill the
     // joinreg if the joinreg is already allocated.  Note, it *can't* spill the
     // joinreg in the contexts it's being used, so some other solution will need
     // to be found.
 
-    MOZ_MUST_USE AnyReg captureJoinRegUnlessVoid(ExprType type) {
+    MOZ_MUST_USE Maybe<AnyReg> captureJoinRegUnlessVoid(ExprType type) {
         switch (type) {
           case ExprType::I32:
-            allocGPR(joinRegI32);
-            return AnyReg(joinRegI32);
+            MOZ_ASSERT(isAvailableI32(joinRegI32));
+            needI32(joinRegI32);
+            return Some(AnyReg(joinRegI32));
           case ExprType::I64:
-            allocInt64(joinRegI64);
-            return AnyReg(joinRegI64);
+            MOZ_ASSERT(isAvailableI64(joinRegI64));
+            needI64(joinRegI64);
+            return Some(AnyReg(joinRegI64));
           case ExprType::F32:
-            allocFPU(joinRegF32);
-            return AnyReg(joinRegF32);
+            MOZ_ASSERT(isAvailableF32(joinRegF32));
+            needF32(joinRegF32);
+            return Some(AnyReg(joinRegF32));
           case ExprType::F64:
-            allocFPU(joinRegF64);
-            return AnyReg(joinRegF64);
+            MOZ_ASSERT(isAvailableF64(joinRegF64));
+            needF64(joinRegF64);
+            return Some(AnyReg(joinRegF64));
           case ExprType::Void:
-            return AnyReg();
+            return Nothing();
           default:
             MOZ_CRASH("Compiler bug: unexpected type");
         }
     }
 
-    void pushJoinRegUnlessVoid(AnyReg r) {
-        switch (r.tag) {
-          case AnyReg::NONE:
-            break;
+    void pushJoinRegUnlessVoid(const Maybe<AnyReg>& r) {
+        if (!r)
+            return;
+        switch (r->tag) {
           case AnyReg::I32:
-            pushI32(r.i32());
+            pushI32(r->i32());
             break;
           case AnyReg::I64:
-            pushI64(r.i64());
+            pushI64(r->i64());
             break;
           case AnyReg::F64:
-            pushF64(r.f64());
+            pushF64(r->f64());
             break;
           case AnyReg::F32:
-            pushF32(r.f32());
+            pushF32(r->f32());
             break;
         }
     }
 
-    void freeJoinRegUnlessVoid(AnyReg r) {
-        switch (r.tag) {
-          case AnyReg::NONE:
-            break;
+    void freeJoinRegUnlessVoid(const Maybe<AnyReg>& r) {
+        if (!r)
+            return;
+        switch (r->tag) {
           case AnyReg::I32:
-            freeI32(r.i32());
+            freeI32(r->i32());
             break;
           case AnyReg::I64:
-            freeI64(r.i64());
+            freeI64(r->i64());
             break;
           case AnyReg::F64:
-            freeF64(r.f64());
+            freeF64(r->f64());
             break;
           case AnyReg::F32:
-            freeF32(r.f32());
+            freeF32(r->f32());
             break;
         }
     }
 
     void maybeReserveJoinRegI(ExprType type) {
         if (type == ExprType::I32)
             needI32(joinRegI32);
         else if (type == ExprType::I64)
@@ -2203,52 +2341,38 @@ class BaseCompiler
         return stk_[stk_.length()-1-relativeDepth];
     }
 
 #ifdef DEBUG
     // Check that we're not leaking registers by comparing the
     // state of the stack + available registers with the set of
     // all available registers.
 
-    // Call this before compiling any code.
-    void setupRegisterLeakCheck() {
-        allGPR_ = availGPR_;
-        allFPU_ = availFPU_;
-    }
-
     // Call this between opcodes.
     void performRegisterLeakCheck() {
-        AllocatableGeneralRegisterSet knownGPR_ = availGPR_;
-        AllocatableFloatRegisterSet knownFPU_ = availFPU_;
+        BaseRegAlloc::LeakCheck check(ra);
         for (size_t i = 0 ; i < stk_.length() ; i++) {
             Stk& item = stk_[i];
             switch (item.kind_) {
               case Stk::RegisterI32:
-                knownGPR_.add(item.i32reg());
+                check.addKnownI32(item.i32reg());
                 break;
               case Stk::RegisterI64:
-#ifdef JS_PUNBOX64
-                knownGPR_.add(item.i64reg().reg);
-#else
-                knownGPR_.add(item.i64reg().high);
-                knownGPR_.add(item.i64reg().low);
-#endif
+                check.addKnownI64(item.i64reg());
                 break;
               case Stk::RegisterF32:
-                knownFPU_.add(item.f32reg());
+                check.addKnownF32(item.f32reg());
                 break;
               case Stk::RegisterF64:
-                knownFPU_.add(item.f64reg());
+                check.addKnownF64(item.f64reg());
                 break;
               default:
                 break;
             }
         }
-        MOZ_ASSERT(knownGPR_.bits() == allGPR_.bits());
-        MOZ_ASSERT(knownFPU_.bits() == allFPU_.bits());
     }
 #endif
 
     ////////////////////////////////////////////////////////////
     //
     // Control stack
 
     void initControl(Control& item)
@@ -2806,42 +2930,42 @@ class BaseCompiler
                     Assembler::Always);
 #else
         MOZ_CRASH("BaseCompiler platform hook: tableSwitch");
 #endif
     }
 
     RegI32 captureReturnedI32() {
         RegI32 rv = RegI32(ReturnReg);
-        MOZ_ASSERT(isAvailable(rv));
+        MOZ_ASSERT(isAvailableI32(rv));
         needI32(rv);
         return rv;
     }
 
     RegI64 captureReturnedI64() {
         RegI64 rv = RegI64(ReturnReg64);
-        MOZ_ASSERT(isAvailable(rv));
+        MOZ_ASSERT(isAvailableI64(rv));
         needI64(rv);
         return rv;
     }
 
     RegF32 captureReturnedF32(const FunctionCall& call) {
         RegF32 rv = RegF32(ReturnFloat32Reg);
-        MOZ_ASSERT(isAvailable(rv));
+        MOZ_ASSERT(isAvailableF32(rv));
         needF32(rv);
 #if defined(JS_CODEGEN_ARM)
         if (call.usesSystemAbi && !call.hardFP)
             masm.ma_vxfer(r0, rv);
 #endif
         return rv;
     }
 
     RegF64 captureReturnedF64(const FunctionCall& call) {
         RegF64 rv = RegF64(ReturnDoubleReg);
-        MOZ_ASSERT(isAvailable(rv));
+        MOZ_ASSERT(isAvailableF64(rv));
         needF64(rv);
 #if defined(JS_CODEGEN_ARM)
         if (call.usesSystemAbi && !call.hardFP)
             masm.ma_vxfer(r0, r1, rv);
 #endif
         return rv;
     }
 
@@ -2920,17 +3044,17 @@ class BaseCompiler
             checkDivideByZeroI64(rhs);
 
         if (!isUnsigned && (!isConst || c == -1))
             checkDivideSignedOverflowI64(rhs, srcDest, &done, ZeroOnOverflow(false));
 
 # if defined(JS_CODEGEN_X64)
         // The caller must set up the following situation.
         MOZ_ASSERT(srcDest.reg == rax);
-        MOZ_ASSERT(isAvailable(rdx));
+        MOZ_ASSERT(isAvailableI64(specific_rdx));
         if (isUnsigned) {
             masm.xorq(rdx, rdx);
             masm.udivq(rhs.reg);
         } else {
             masm.cqo();
             masm.idivq(rhs.reg);
         }
 # else
@@ -2948,17 +3072,17 @@ class BaseCompiler
             checkDivideByZeroI64(rhs);
 
         if (!isUnsigned && (!isConst || c == -1))
             checkDivideSignedOverflowI64(rhs, srcDest, &done, ZeroOnOverflow(true));
 
 # if defined(JS_CODEGEN_X64)
         // The caller must set up the following situation.
         MOZ_ASSERT(srcDest.reg == rax);
-        MOZ_ASSERT(isAvailable(rdx));
+        MOZ_ASSERT(isAvailableI64(specific_rdx));
 
         if (isUnsigned) {
             masm.xorq(rdx, rdx);
             masm.udivq(rhs.reg);
         } else {
             masm.cqo();
             masm.idivq(rhs.reg);
         }
@@ -3439,32 +3563,31 @@ class BaseCompiler
         if (!check->omitBoundsCheck) {
             masm.wasmBoundsCheck(Assembler::AboveOrEqual, ptr,
                                  Address(tls, offsetof(TlsData, boundsCheckLimit)),
                                  trap(Trap::OutOfBounds));
         }
 #endif
     }
 
-    // This is the temp register passed as the last argument to load()
-    MOZ_MUST_USE size_t loadTemps(const MemoryAccessDesc& access) {
+    void needLoadTemps(const MemoryAccessDesc& access, RegI32* tmp1, RegI32* tmp2, RegI32* tmp3) {
 #if defined(JS_CODEGEN_ARM)
         if (IsUnaligned(access)) {
             switch (access.type()) {
+              case Scalar::Float64:
+                *tmp3 = needI32();
+                MOZ_FALLTHROUGH;
               case Scalar::Float32:
-                return 2;
-              case Scalar::Float64:
-                return 3;
+                *tmp2 = needI32();
+                MOZ_FALLTHROUGH;
               default:
-                return 1;
+                *tmp1 = needI32();
+                break;
             }
         }
-        return 0;
-#else
-        return 0;
 #endif
     }
 
     MOZ_MUST_USE bool needTlsForAccess(const AccessCheck& check) {
 #if defined(JS_CODEGEN_ARM)
         return !check.omitBoundsCheck;
 #elif defined(JS_CODEGEN_X86)
         return true;
@@ -3490,17 +3613,17 @@ class BaseCompiler
 #elif defined(JS_CODEGEN_X86)
         masm.addPtr(Address(tls, offsetof(TlsData, memoryBase)), ptr);
         Operand srcAddr(ptr, access->offset());
 
         if (dest.tag == AnyReg::I64) {
             MOZ_ASSERT(dest.i64() == abiReturnRegI64);
             masm.wasmLoadI64(*access, srcAddr, dest.i64());
         } else {
-            bool byteRegConflict = access->byteSize() == 1 && !singleByteRegs_.has(dest.i32());
+            bool byteRegConflict = access->byteSize() == 1 && !ra.isSingleByteI32(dest.i32());
             AnyRegister out = byteRegConflict ? AnyRegister(ScratchRegX86) : dest.any();
 
             masm.wasmLoad(*access, srcAddr, out);
 
             if (byteRegConflict)
                 masm.mov(ScratchRegX86, dest.i32());
         }
 #elif defined(JS_CODEGEN_ARM)
@@ -3528,54 +3651,53 @@ class BaseCompiler
         }
 #else
         MOZ_CRASH("BaseCompiler platform hook: load");
 #endif
 
         return true;
     }
 
-    MOZ_MUST_USE size_t storeTemps(const MemoryAccessDesc& access, ValType srcType) {
+    void needStoreTemps(const MemoryAccessDesc& access, ValType srcType, RegI32* tmp) {
 #if defined(JS_CODEGEN_ARM)
         if (IsUnaligned(access) && srcType != ValType::I32)
-            return 1;
-#endif
-        return 0;
+            *tmp = needI32();
+#endif
     }
 
     // ptr and src must not be the same register.
     // This may destroy ptr and src.
     MOZ_MUST_USE bool store(MemoryAccessDesc* access, AccessCheck* check, RegI32 tls, RegI32 ptr,
                             AnyReg src, RegI32 tmp)
     {
         prepareMemoryAccess(access, check, tls, ptr);
 
         // Emit the store
 #if defined(JS_CODEGEN_X64)
-        MOZ_ASSERT(tmp == Register::Invalid());
+        MOZ_ASSERT(tmp == invalidI32());
         Operand dstAddr(HeapReg, ptr, TimesOne, access->offset());
 
         masm.wasmStore(*access, src.any(), dstAddr);
 #elif defined(JS_CODEGEN_X86)
-        MOZ_ASSERT(tmp == Register::Invalid());
+        MOZ_ASSERT(tmp == invalidI32());
         masm.addPtr(Address(tls, offsetof(TlsData, memoryBase)), ptr);
         Operand dstAddr(ptr, access->offset());
 
         if (access->type() == Scalar::Int64) {
             masm.wasmStoreI64(*access, src.i64(), dstAddr);
         } else {
             AnyRegister value;
             if (src.tag == AnyReg::I64) {
-                if (access->byteSize() == 1 && !singleByteRegs_.has(src.i64().low)) {
+                if (access->byteSize() == 1 && !ra.isSingleByteI32(src.i64().low)) {
                     masm.mov(src.i64().low, ScratchRegX86);
                     value = AnyRegister(ScratchRegX86);
                 } else {
                     value = AnyRegister(src.i64().low);
                 }
-            } else if (access->byteSize() == 1 && !singleByteRegs_.has(src.i32())) {
+            } else if (access->byteSize() == 1 && !ra.isSingleByteI32(src.i32())) {
                 masm.mov(src.i32(), ScratchRegX86);
                 value = AnyRegister(ScratchRegX86);
             } else {
                 value = src.any();
             }
 
             masm.wasmStore(*access, value, dstAddr);
         }
@@ -3587,22 +3709,22 @@ class BaseCompiler
                 break;
               case AnyReg::F32:
                 masm.wasmUnalignedStoreFP(*access, src.f32(), HeapReg, ptr, ptr, tmp);
                 break;
               case AnyReg::F64:
                 masm.wasmUnalignedStoreFP(*access, src.f64(), HeapReg, ptr, ptr, tmp);
                 break;
               default:
-                MOZ_ASSERT(tmp == Register::Invalid());
+                MOZ_ASSERT(tmp == invalidI32());
                 masm.wasmUnalignedStore(*access, src.i32(), HeapReg, ptr, ptr);
                 break;
             }
         } else {
-            MOZ_ASSERT(tmp == Register::Invalid());
+            MOZ_ASSERT(tmp == invalidI32());
             if (access->type() == Scalar::Int64)
                 masm.wasmStoreI64(*access, src.i64(), HeapReg, ptr, ptr);
             else if (src.tag == AnyReg::I64)
                 masm.wasmStore(*access, AnyRegister(src.i64().low), HeapReg, ptr, ptr);
             else
                 masm.wasmStore(*access, src.any(), HeapReg, ptr, ptr);
         }
 #else
@@ -3660,40 +3782,39 @@ class BaseCompiler
 
         masm.atomicExchange64(srcAddr, rv, rd);
 
         if (wantResult)
             pushI64(rd);
         else
             freeI64(rd);
 
-        if (tls != invalidI32())
-            freeI32(tls);
+        maybeFreeI32(tls);
         freeI32(rp);
 
 #if defined(JS_CODEGEN_X86)
         freeI32(specific_ecx);
 #elif defined(JS_CODEGEN_ARM)
         freeI64(rv);
 #else
         MOZ_CRASH("BaseCompiler porting interface: xchg64");
 #endif
     }
 
-    MOZ_MUST_USE uint32_t
-    atomicRMWTemps(AtomicOp op, MemoryAccessDesc* access) {
+    void needAtomicRMWTemps(AtomicOp op, MemoryAccessDesc* access, RegI32* tmp) {
 #if defined(JS_CODEGEN_X86)
         // Handled specially in atomicRMW
         if (access->byteSize() == 1)
-            return 0;
+            return;
 #endif
 #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
-        return op == AtomicFetchAddOp || op == AtomicFetchSubOp ? 0 : 1;
+        if (op != AtomicFetchAddOp && op != AtomicFetchSubOp)
+            *tmp = needI32();
 #elif defined(JS_CODEGEN_ARM)
-        return 1;
+        *tmp = needI32();
 #else
         MOZ_CRASH("BaseCompiler platform hook: atomicRMWTemps");
 #endif
     }
 
     void
     atomicRMW(AtomicOp op, MemoryAccessDesc* access, AccessCheck* check, RegI32 tls, RegI32 ptr,
               RegI32 rv, RegI32 rd, RegI32 tmp)
@@ -3746,24 +3867,24 @@ class BaseCompiler
             break;
           }
           default: {
             MOZ_CRASH("Bad type for atomic operation");
           }
         }
     }
 
-    MOZ_MUST_USE uint32_t
-    atomicRMW64Temps(AtomicOp op) {
+    void needAtomicRMW64Temps(AtomicOp op, RegI64* tmp) {
 #if defined(JS_CODEGEN_X86)
         MOZ_CRASH("Do not call on x86");
 #elif defined(JS_CODEGEN_X64)
-        return (op == AtomicFetchAddOp || op == AtomicFetchSubOp) ? 0 : 1;
+        if (op != AtomicFetchAddOp && op != AtomicFetchSubOp)
+            *tmp = needI64();
 #elif defined(JS_CODEGEN_ARM)
-        return 1;
+        *tmp = needI64Pair();
 #else
         MOZ_CRASH("BaseCompiler platform hook: atomicRMW64Temps");
 #endif
     }
 
     // On x86, T is Address.  On other platforms, it is Register64.
     // U is BaseIndex or Address.
     template <typename T, typename U>
@@ -3786,17 +3907,17 @@ class BaseCompiler
         prepareMemoryAccess(access, check, tls, ptr);
         ATOMIC_PTR(srcAddr, access, tls, ptr);
 
         switch (access->type()) {
           case Scalar::Uint8: {
 #if defined(JS_CODEGEN_X86)
             ScratchEBX scratch(*this);
             MOZ_ASSERT(rd == specific_eax);
-            if (!singleByteRegs_.has(rnew)) {
+            if (!ra.isSingleByteI32(rnew)) {
                 // The replacement value must have a byte persona.
                 masm.movl(rnew, scratch);
                 rnew = RegI32(scratch);
             }
 #endif
             masm.compareExchange8ZeroExtend(srcAddr, rexpect, rnew, rd);
             break;
           }
@@ -3817,17 +3938,17 @@ class BaseCompiler
                    RegI32 rv, RegI32 rd)
     {
         prepareMemoryAccess(access, check, tls, ptr);
         ATOMIC_PTR(srcAddr, access, tls, ptr);
 
         switch (access->type()) {
           case Scalar::Uint8: {
 #if defined(JS_CODEGEN_X86)
-            if (!singleByteRegs_.has(rd)) {
+            if (!ra.isSingleByteI32(rd)) {
                 ScratchEBX scratch(*this);
                 // The output register must have a byte persona.
                 masm.atomicExchange8ZeroExtend(srcAddr, rv, scratch);
                 masm.movl(scratch, rd);
             } else {
                 masm.atomicExchange8ZeroExtend(srcAddr, rv, rd);
             }
 #else
@@ -4039,17 +4160,17 @@ class BaseCompiler
     // Lhs is Register, Register64, or FloatRegister.
     //
     // Rhs is either the same as Lhs, or an immediate expression compatible with
     // Lhs "when applicable".
 
     template<typename Cond, typename Lhs, typename Rhs>
     void jumpConditionalWithJoinReg(BranchState* b, Cond cond, Lhs lhs, Rhs rhs)
     {
-        AnyReg r = popJoinRegUnlessVoid(b->resultType);
+        Maybe<AnyReg> r = popJoinRegUnlessVoid(b->resultType);
 
         if (b->framePushed != BranchState::NoPop && willPopStackBeforeBranch(b->framePushed)) {
             Label notTaken;
             branchTo(b->invertBranch ? cond : Assembler::InvertCondition(cond), lhs, rhs, &notTaken);
             popStackBeforeBranch(b->framePushed);
             masm.jump(b->label);
             masm.bind(&notTaken);
         } else {
@@ -4364,18 +4485,17 @@ BaseCompiler::emitMultiplyI64()
     r1 = popI64();
     r0 = popI64ToSpecific(RegI64(Register64(specific_edx, specific_eax)));
     temp = needI32();
 #else
     pop2xI64(&r0, &r1);
     temp = needI32();
 #endif
     masm.mul64(r1, r0, temp);
-    if (temp != Register::Invalid())
-        freeI32(temp);
+    maybeFreeI32(temp);
     freeI64(r1);
     pushI64(r0);
 }
 
 void
 BaseCompiler::emitMultiplyF32()
 {
     RegF32 r0, r1;
@@ -4993,18 +5113,17 @@ BaseCompiler::emitRotrI64()
 {
     int64_t c;
     if (popConstI64(&c)) {
         RegI64 r = popI64();
         RegI32 temp;
         if (rotate64NeedsTemp())
             temp = needI32();
         masm.rotateRight64(Imm32(c & 63), r, r, temp);
-        if (temp != Register::Invalid())
-            freeI32(temp);
+        maybeFreeI32(temp);
         pushI64(r);
     } else {
         RegI64 r0, r1;
         pop2xI64ForShiftOrRotate(&r0, &r1);
         masm.rotateRight64(lowPart(r1), r0, r0, maybeHighPart(r1));
         freeI64(r1);
         pushI64(r0);
     }
@@ -5032,18 +5151,17 @@ BaseCompiler::emitRotlI64()
 {
     int64_t c;
     if (popConstI64(&c)) {
         RegI64 r = popI64();
         RegI32 temp;
         if (rotate64NeedsTemp())
             temp = needI32();
         masm.rotateLeft64(Imm32(c & 63), r, r, temp);
-        if (temp != Register::Invalid())
-            freeI32(temp);
+        maybeFreeI32(temp);
         pushI64(r);
     } else {
         RegI64 r0, r1;
         pop2xI64ForShiftOrRotate(&r0, &r1);
         masm.rotateLeft64(lowPart(r1), r0, r0, maybeHighPart(r1));
         freeI64(r1);
         pushI64(r0);
     }
@@ -5383,18 +5501,17 @@ void
 BaseCompiler::emitConvertU64ToF32()
 {
     RegI64 r0 = popI64();
     RegF32 f0 = needF32();
     RegI32 temp;
     if (convertI64ToFloatNeedsTemp(ValType::F32, IsUnsigned(true)))
         temp = needI32();
     convertI64ToF32(r0, IsUnsigned(true), f0, temp);
-    if (temp != Register::Invalid())
-        freeI32(temp);
+    maybeFreeI32(temp);
     freeI64(r0);
     pushF32(f0);
 }
 #endif
 
 void
 BaseCompiler::emitConvertF32ToF64()
 {
@@ -5440,18 +5557,17 @@ void
 BaseCompiler::emitConvertU64ToF64()
 {
     RegI64 r0 = popI64();
     RegF64 d0 = needF64();
     RegI32 temp;
     if (convertI64ToFloatNeedsTemp(ValType::F64, IsUnsigned(true)))
         temp = needI32();
     convertI64ToF64(r0, IsUnsigned(true), d0, temp);
-    if (temp != Register::Invalid())
-        freeI32(temp);
+    maybeFreeI32(temp);
     freeI64(r0);
     pushF64(d0);
 }
 #endif // I64_TO_FLOAT_CALLOUT
 
 void
 BaseCompiler::emitReinterpretI32AsF32()
 {
@@ -5662,17 +5778,17 @@ BaseCompiler::emitBlock()
 }
 
 void
 BaseCompiler::endBlock(ExprType type)
 {
     Control& block = controlItem();
 
     // Save the value.
-    AnyReg r;
+    Maybe<AnyReg> r;
     if (!deadCode_) {
         r = popJoinRegUnlessVoid(type);
         block.bceSafeOnExit &= bceSafe_;
     }
 
     // Leave the block.
     popStackOnBlockExit(block.framePushed);
     popValueStackTo(block.stackSize);
@@ -5715,17 +5831,17 @@ BaseCompiler::emitLoop()
     return true;
 }
 
 void
 BaseCompiler::endLoop(ExprType type)
 {
     Control& block = controlItem();
 
-    AnyReg r;
+    Maybe<AnyReg> r;
     if (!deadCode_) {
         r = popJoinRegUnlessVoid(type);
         // block.bceSafeOnExit need not be updated because it won't be used for
         // the fallthrough path.
     }
 
     popStackOnBlockExit(block.framePushed);
     popValueStackTo(block.stackSize);
@@ -5809,17 +5925,17 @@ BaseCompiler::emitElse()
     Control& ifThenElse = controlItem(0);
 
     // See comment in endIfThenElse, below.
 
     // Exit the "then" branch.
 
     ifThenElse.deadThenBranch = deadCode_;
 
-    AnyReg r;
+    Maybe<AnyReg> r;
     if (!deadCode_)
         r = popJoinRegUnlessVoid(thenType);
 
     popStackOnBlockExit(ifThenElse.framePushed);
     popValueStackTo(ifThenElse.stackSize);
 
     if (!deadCode_)
         masm.jump(&ifThenElse.label);
@@ -5846,18 +5962,17 @@ BaseCompiler::endIfThenElse(ExprType typ
     Control& ifThenElse = controlItem();
 
     // The expression type is not a reliable guide to what we'll find
     // on the stack, we could have (if E (i32.const 1) (unreachable))
     // in which case the "else" arm is AnyType but the type of the
     // full expression is I32.  So restore whatever's there, not what
     // we want to find there.  The "then" arm has the same constraint.
 
-    AnyReg r;
-
+    Maybe<AnyReg> r;
     if (!deadCode_) {
         r = popJoinRegUnlessVoid(type);
         ifThenElse.bceSafeOnExit &= bceSafe_;
     }
 
     popStackOnBlockExit(ifThenElse.framePushed);
     popValueStackTo(ifThenElse.stackSize);
 
@@ -5915,17 +6030,17 @@ BaseCompiler::emitBr()
         return true;
 
     Control& target = controlItem(relativeDepth);
     target.bceSafeOnExit &= bceSafe_;
 
     // Save any value in the designated join register, where the
     // normal block exit code will also leave it.
 
-    AnyReg r = popJoinRegUnlessVoid(type);
+    Maybe<AnyReg> r = popJoinRegUnlessVoid(type);
 
     popStackBeforeBranch(target.framePushed);
     masm.jump(&target.label);
 
     // The register holding the join value is free for the remainder
     // of this block.
 
     freeJoinRegUnlessVoid(r);
@@ -5975,17 +6090,17 @@ BaseCompiler::emitBrTable()
     // Don't use joinReg for rc
     maybeReserveJoinRegI(branchValueType);
 
     // Table switch value always on top.
     RegI32 rc = popI32();
 
     maybeUnreserveJoinRegI(branchValueType);
 
-    AnyReg r = popJoinRegUnlessVoid(branchValueType);
+    Maybe<AnyReg> r = popJoinRegUnlessVoid(branchValueType);
 
     Label dispatchCode;
     masm.branch32(Assembler::Below, rc, Imm32(depths.length()), &dispatchCode);
 
     // This is the out-of-range stub.  rc is dead here but we don't need it.
 
     popStackBeforeBranch(controlItem(defaultDepth).framePushed);
     controlItem(defaultDepth).bceSafeOnExit &= bceSafe_;
@@ -6349,21 +6464,21 @@ BaseCompiler::emitConvertInt64ToFloating
 {
     sync();
 
     RegI64 input = popI64();
 
     FunctionCall call(0);
 
     masm.setupWasmABICall();
-# ifdef JS_NUNBOX32
+# if defined(JS_PUNBOX64)
+    MOZ_CRASH("BaseCompiler platform hook: emitConvertInt64ToFloatingCallout");
+# else
     masm.passABIArg(input.high);
     masm.passABIArg(input.low);
-# else
-    MOZ_CRASH("BaseCompiler platform hook: emitConvertInt64ToFloatingCallout");
 # endif
     masm.callWithABI(bytecodeOffset(), callee,
                      resultType == ValType::F32 ? MoveOp::FLOAT32 : MoveOp::DOUBLE);
 
     freeI64(input);
 
     if (resultType == ValType::F32)
         pushF32(captureReturnedF32(call));
@@ -6710,17 +6825,17 @@ BaseCompiler::emitSetGlobal()
 // is aligned.
 //
 // (In addition, alignment checking of the pointer can be omitted if the pointer
 // has been checked in dominating code, but we don't do that yet.)
 
 // TODO / OPTIMIZE (bug 1329576): There are opportunities to generate better
 // code by not moving a constant address with a zero offset into a register.
 
-BaseCompiler::RegI32
+RegI32
 BaseCompiler::popMemoryAccess(MemoryAccessDesc* access, AccessCheck* check)
 {
     check->onlyPointerAlignment = (access->offset() & (access->byteSize() - 1)) == 0;
 
     int32_t addrTmp;
     if (popConstI32(&addrTmp)) {
         uint32_t addr = addrTmp;
 
@@ -6745,38 +6860,34 @@ BaseCompiler::popMemoryAccess(MemoryAcce
 
     uint32_t local;
     if (peekLocalI32(&local))
         bceCheckLocal(access, check, local);
 
     return popI32();
 }
 
-BaseCompiler::RegI32
+RegI32
 BaseCompiler::maybeLoadTlsForAccess(const AccessCheck& check)
 {
-    RegI32 tls = invalidI32();
+    RegI32 tls;
     if (needTlsForAccess(check)) {
         tls = needI32();
         masm.loadWasmTlsRegFromFrame(tls);
     }
     return tls;
 }
 
 bool
 BaseCompiler::loadCommon(MemoryAccessDesc* access, ValType type)
 {
     AccessCheck check;
 
-    size_t temps = loadTemps(*access);
-    MOZ_ASSERT(temps <= 3);
-    RegI32 tmp1 = temps >= 1 ? needI32() : invalidI32();
-    RegI32 tmp2 = temps >= 2 ? needI32() : invalidI32();
-    RegI32 tmp3 = temps >= 3 ? needI32() : invalidI32();
-    RegI32 tls = invalidI32();
+    RegI32 tls, tmp1, tmp2, tmp3;
+    needLoadTemps(*access, &tmp1, &tmp2, &tmp3);
 
     switch (type) {
       case ValType::I32: {
         RegI32 rp = popMemoryAccess(access, &check);
 #ifdef JS_CODEGEN_ARM
         RegI32 rv = IsUnaligned(*access) ? needI32() : rp;
 #else
         RegI32 rv = rp;
@@ -6827,26 +6938,20 @@ BaseCompiler::loadCommon(MemoryAccessDes
         freeI32(rp);
         break;
       }
       default:
         MOZ_CRASH("load type");
         break;
     }
 
-    if (tls != invalidI32())
-        freeI32(tls);
-
-    MOZ_ASSERT(temps <= 3);
-    if (temps >= 1)
-        freeI32(tmp1);
-    if (temps >= 2)
-        freeI32(tmp2);
-    if (temps >= 3)
-        freeI32(tmp3);
+    maybeFreeI32(tls);
+    maybeFreeI32(tmp1);
+    maybeFreeI32(tmp2);
+    maybeFreeI32(tmp3);
 
     return true;
 }
 
 bool
 BaseCompiler::emitLoad(ValType type, Scalar::Type viewType)
 {
     LinearMemoryAddress<Nothing> addr;
@@ -6859,21 +6964,19 @@ BaseCompiler::emitLoad(ValType type, Sca
     MemoryAccessDesc access(viewType, addr.align, addr.offset, Some(bytecodeOffset()));
     return loadCommon(&access, type);
 }
 
 bool
 BaseCompiler::storeCommon(MemoryAccessDesc* access, ValType resultType)
 {
     AccessCheck check;
-    size_t temps = storeTemps(*access, resultType);
-
-    MOZ_ASSERT(temps <= 1);
-    RegI32 tmp = temps >= 1 ? needI32() : invalidI32();
-    RegI32 tls = invalidI32();
+
+    RegI32 tls, tmp;
+    needStoreTemps(*access, resultType, &tmp);
 
     switch (resultType) {
       case ValType::I32: {
         RegI32 rv = popI32();
         RegI32 rp = popMemoryAccess(access, &check);
         tls = maybeLoadTlsForAccess(check);
         if (!store(access, &check, tls, rp, AnyReg(rv), tmp))
             return false;
@@ -6911,22 +7014,18 @@ BaseCompiler::storeCommon(MemoryAccessDe
         freeF64(rv);
         break;
       }
       default:
         MOZ_CRASH("store type");
         break;
     }
 
-    if (tls != invalidI32())
-        freeI32(tls);
-
-    MOZ_ASSERT(temps <= 1);
-    if (temps >= 1)
-        freeI32(tmp);
+    maybeFreeI32(tls);
+    maybeFreeI32(tmp);
 
     return true;
 }
 
 bool
 BaseCompiler::emitStore(ValType resultType, Scalar::Type viewType)
 {
     LinearMemoryAddress<Nothing> addr;
@@ -7216,18 +7315,17 @@ BaseCompiler::emitAtomicCmpXchg(ValType 
         MOZ_CRASH("BaseCompiler porting interface: compareExchange");
 #endif
         AccessCheck check;
         RegI32 rp = popMemoryAccess(&access, &check);
         RegI32 tls = maybeLoadTlsForAccess(check);
 
         atomicCompareExchange(&access, &check, tls, rp, rexpect, rnew, rd);
 
-        if (tls != invalidI32())
-            freeI32(tls);
+        maybeFreeI32(tls);
         freeI32(rp);
         freeI32(rnew);
         if (rexpect != rd)
             freeI32(rexpect);
 
         if (narrowing)
             pushU32AsI64(rd);
         else
@@ -7264,18 +7362,17 @@ BaseCompiler::emitAtomicCmpXchg(ValType 
     RegI32 rp = popMemoryAccess(&access, &check);
     RegI32 tls = maybeLoadTlsForAccess(check);
     prepareMemoryAccess(&access, &check, tls, rp);
     ATOMIC_PTR(srcAddr, &access, tls, rp);
     masm.compareExchange64(srcAddr, rexpect, rreplace, rd);
 
     pushI64(rd);
 
-    if (tls != invalidI32())
-        freeI32(tls);
+    maybeFreeI32(tls);
     freeI32(rp);
 #if defined(JS_CODEGEN_X64)
     freeI64(rreplace);
 #elif defined(JS_CODEGEN_X86)
     freeI32(specific_ecx);
 #elif defined(JS_CODEGEN_ARM)
     freeI64(rexpect);
     freeI64(rreplace);
@@ -7311,35 +7408,34 @@ BaseCompiler::emitAtomicLoad(ValType typ
 # if defined(JS_CODEGEN_X86)
     needI32(specific_ecx);
     needI64(specific_edx_eax);
     // Claim scratch after the need() calls because they may need it to sync.
     ScratchEBX scratch(*this);
     RegI64 tmp = specific_ecx_ebx;
     RegI64 output = specific_edx_eax;
 # elif defined(JS_CODEGEN_ARM)
-    RegI64 tmp = invalidI64();
+    RegI64 tmp;
     RegI64 output = needI64Pair();
 # else
     RegI64 tmp, output;
     MOZ_CRASH("BaseCompiler porting interface: atomic load 64-bit");
 # endif
 
     AccessCheck check;
     RegI32 rp = popMemoryAccess(&access, &check);
     RegI32 tls = maybeLoadTlsForAccess(check);
     prepareMemoryAccess(&access, &check, tls, rp);
     ATOMIC_PTR(srcAddr, &access, tls, rp);
 
     masm.atomicLoad64(srcAddr, tmp, output);
     pushI64(output);
 
     freeI32(rp);
-    if (tls != invalidI32())
-        freeI32(tls);
+    maybeFreeI32(tls);
 # if defined(JS_CODEGEN_X86)
     freeI32(specific_ecx);
 # elif defined(JS_CODEGEN_ARM)
     // Nothing
 # else
     MOZ_CRASH("BaseCompiler porting interface: atomic load 64-bit");
 # endif
 
@@ -7377,61 +7473,58 @@ BaseCompiler::emitAtomicRMW(ValType type
         RegI32 rv = narrowing ? popI64ToI32() : popI32();
         RegI32 rp = popMemoryAccess(&access, &check);
         RegI32 output = needI32();
 #else
         RegI32 rv, rp, output;
         MOZ_CRASH("BaseCompiler porting interface: atomic rmw");
 #endif
         RegI32 tls = maybeLoadTlsForAccess(check);
-        size_t temps = atomicRMWTemps(op, &access);
-        MOZ_ASSERT(temps <= 1);
-        RegI32 tmp = temps >= 1 ? needI32() : invalidI32();
+        RegI32 tmp;
+        needAtomicRMWTemps(op, &access, &tmp);
 
         atomicRMW(op, &access, &check, tls, rp, rv, output, tmp);
 
-        if (tls != invalidI32())
-            freeI32(tls);
+        maybeFreeI32(tls);
+        maybeFreeI32(tmp);
         freeI32(rp);
         if (rv != output)
             freeI32(rv);
-        if (temps >= 1)
-            freeI32(tmp);
 
         if (narrowing)
             pushU32AsI64(output);
         else
             pushI32(output);
         return true;
     }
 
     MOZ_ASSERT(type == ValType::I64 && Scalar::byteSize(viewType) == 8);
 
 #if defined(JS_CODEGEN_X86)
 
     sync();
 
-    allocGPR(eax);
+    needI32NoSync(specific_eax);
     ScratchEBX scratch(*this);           // Already allocated
-    allocGPR(ecx);
-    allocGPR(edx);
-    allocGPR(edi);
-    allocGPR(esi);
+    needI32NoSync(specific_ecx);
+    needI32NoSync(specific_edx);
+    needI32NoSync(specific_edi);
+    needI32NoSync(specific_esi);
 
     AccessCheck check;
     MOZ_ASSERT(needTlsForAccess(check));
 
     RegI64 tmp = specific_ecx_ebx;
     popI64ToSpecific(tmp);
 
-    RegI32 ptr = RegI32(esi);
+    RegI32 ptr = specific_esi;
     popI32ToSpecific(ptr);
 
-    RegI32 tls = RegI32(edi);
-    RegI32 memoryBase = RegI32(edi);     // Yes, same
+    RegI32 tls = specific_edi;
+    RegI32 memoryBase = specific_edi;     // Yes, same
     masm.loadWasmTlsRegFromFrame(tls);
 
     prepareMemoryAccess(&access, &check, tls, ptr);
     masm.movl(Operand(Address(tls, offsetof(TlsData, memoryBase))), memoryBase);
 
     masm.Push(ecx);
     masm.Push(ebx);
 
@@ -7439,19 +7532,19 @@ BaseCompiler::emitAtomicRMW(ValType type
 
     BaseIndex srcAddr(memoryBase, ptr, TimesOne, access.offset());
     Address value(esp, 0);
     atomicRMW64(op, value, srcAddr, tmp, rd);
 
     masm.freeStack(8);
 
     pushI64(rd);
-    freeGPR(ecx);
-    freeGPR(edi);
-    freeGPR(esi);
+    freeI32(specific_ecx);
+    freeI32(specific_edi);
+    freeI32(specific_esi);
 
 #else // !JS_CODEGEN_X86
 
     AccessCheck check;
 # if defined(JS_CODEGEN_X64)
     bool isAddSub = op == AtomicFetchAddOp || op == AtomicFetchSubOp;
     needI64(specific_rax);
     RegI64 rv = isAddSub ? popI64ToSpecific(specific_rax) : popI64();
@@ -7463,39 +7556,31 @@ BaseCompiler::emitAtomicRMW(ValType type
     RegI64 rd = needI64Pair();
 #  else
     RegI64 rv, rd;
     RegI32 rp;
     MOZ_CRASH("BaseCompiler porting interface: 64-bit atomic RMW");
 # endif
 
     RegI32 tls = maybeLoadTlsForAccess(check);
-    size_t temps = atomicRMW64Temps(op);
-    MOZ_ASSERT(temps <= 1);
-    RegI64 tmp = invalidI64();
-# ifdef JS_CODEGEN_ARM
-    if (temps >= 1) tmp = needI64Pair();
-# else
-    if (temps >= 1) tmp = needI64();
-# endif
+    RegI64 tmp;
+    needAtomicRMW64Temps(op, &tmp);
 
     prepareMemoryAccess(&access, &check, tls, rp);
     ATOMIC_PTR(srcAddr, &access, tls, rp);
 
     atomicRMW64(op, rv, srcAddr, tmp, rd);
 
     pushI64(rd);
 
-    if (tls != invalidI32())
-        freeI32(tls);
+    maybeFreeI32(tls);
     freeI32(rp);
     if (rv != rd)
         freeI64(rv);
-    if (temps >= 1)
-        freeI64(tmp);
+    maybeFreeI64(tmp);
 
 #endif // !JS_CODEGEN_X86
 
     return true;
 }
 
 bool
 BaseCompiler::emitAtomicStore(ValType type, Scalar::Type viewType)
@@ -7548,18 +7633,17 @@ BaseCompiler::emitAtomicXchg(ValType typ
         RegI32 rd = rv;
 #else
         RegI32 rd = needI32();
 #endif
         RegI32 tls = maybeLoadTlsForAccess(check);
 
         atomicExchange(&access, &check, tls, rp, rv, rd);
 
-        if (tls != invalidI32())
-            freeI32(tls);
+        maybeFreeI32(tls);
         freeI32(rp);
         if (rv != rd)
             freeI32(rv);
 
         if (narrowing)
             pushU32AsI64(rd);
         else
             pushI32(rd);
@@ -7579,18 +7663,17 @@ BaseCompiler::emitAtomicXchg(ValType typ
     RegI32 tls = maybeLoadTlsForAccess(check);
 
     prepareMemoryAccess(&access, &check, tls, rp);
     ATOMIC_PTR(srcAddr, &access, tls, rp);
 
     masm.atomicExchange64(srcAddr, rv, rd);
     pushI64(rd);
 
-    if (tls != invalidI32())
-        freeI32(tls);
+    maybeFreeI32(tls);
     freeI32(rp);
     if (rv != rd)
         freeI64(rv);
 #else
     xchg64(&access, type, WantResult(true));
 #endif
 
     return true;
@@ -8456,17 +8539,17 @@ BaseCompiler::emitInitStackLocals()
     // this case we'll end up using 32-bit offsets on x64 for up to half of the
     // stores, though.)
 
     // Fully-unrolled case.
 
     if (initWords < 2 * unrollLimit)  {
         for (uint32_t i = low; i < high; i += wordSize)
             masm.storePtr(zero, Address(StackPointer, localOffsetToSPOffset(i + wordSize)));
-        freeGPR(zero);
+        freeI32(zero);
         return;
     }
 
     // Unrolled loop with a tail. Stores will use negative offsets. That's OK
     // for x86 and ARM, at least.
 
     // Compute pointer to the highest-addressed slot on the frame.
     RegI32 p = needI32();
@@ -8487,19 +8570,19 @@ BaseCompiler::emitInitStackLocals()
         masm.storePtr(zero, Address(p, -(wordSize * i)));
     masm.subPtr(Imm32(unrollLimit * wordSize), p);
     masm.branchPtr(Assembler::LessThan, lim, p, &again);
 
     // The tail.
     for (uint32_t i = 0; i < tailWords; ++i)
         masm.storePtr(zero, Address(p, -(wordSize * i)));
 
-    freeGPR(p);
-    freeGPR(lim);
-    freeGPR(zero);
+    freeI32(p);
+    freeI32(lim);
+    freeI32(zero);
 }
 
 BaseCompiler::BaseCompiler(const ModuleEnvironment& env,
                            Decoder& decoder,
                            const FuncCompileInput& func,
                            const ValTypeVector& locals,
                            bool debugEnabled,
                            TempAllocator* alloc,
@@ -8520,65 +8603,42 @@ BaseCompiler::BaseCompiler(const ModuleE
       bceSafe_(0),
       stackAddOffset_(0),
       mode_(mode),
       latentOp_(LatentOp::None),
       latentType_(ValType::I32),
       latentIntCmp_(Assembler::Equal),
       latentDoubleCmp_(Assembler::DoubleEqual),
       masm(*masm),
-      availGPR_(GeneralRegisterSet::All()),
-      availFPU_(FloatRegisterSet::All()),
-#ifdef DEBUG
-      scratchRegisterTaken_(false),
-#endif
+      ra(*this),
 #ifdef JS_CODEGEN_X64
       specific_rax(RegI64(Register64(rax))),
       specific_rcx(RegI64(Register64(rcx))),
       specific_rdx(RegI64(Register64(rdx))),
 #endif
 #if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
       specific_eax(RegI32(eax)),
       specific_ecx(RegI32(ecx)),
       specific_edx(RegI32(edx)),
+      specific_edi(RegI32(edi)),
+      specific_esi(RegI32(esi)),
 #endif
 #ifdef JS_CODEGEN_X86
       specific_ecx_ebx(RegI64(Register64(ecx, ebx))),
       specific_edx_eax(RegI64(Register64(edx, eax))),
-      singleByteRegs_(GeneralRegisterSet(Registers::SingleByteRegs)),
       abiReturnRegI64(RegI64(Register64(edx, eax))),
 #endif
 #ifdef JS_CODEGEN_ARM
       abiReturnRegI64(ReturnReg64),
 #endif
       joinRegI32(RegI32(ReturnReg)),
       joinRegI64(RegI64(ReturnReg64)),
       joinRegF32(RegF32(ReturnFloat32Reg)),
       joinRegF64(RegF64(ReturnDoubleReg))
 {
-    // jit/RegisterAllocator.h: RegisterAllocator::RegisterAllocator()
-
-#if defined(JS_CODEGEN_X64)
-    availGPR_.take(HeapReg);
-#elif defined(JS_CODEGEN_ARM)
-    availGPR_.take(HeapReg);
-    availGPR_.take(ScratchRegARM);
-#elif defined(JS_CODEGEN_ARM64)
-    availGPR_.take(HeapReg);
-    availGPR_.take(HeapLenReg);
-#elif defined(JS_CODEGEN_X86)
-    availGPR_.take(ScratchRegX86);
-#elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
-    availGPR_.take(HeapReg);
-#endif
-    availGPR_.take(FramePointer);
-
-#ifdef DEBUG
-    setupRegisterLeakCheck();
-#endif
 }
 
 bool
 BaseCompiler::init()
 {
     if (!SigD_.append(ValType::F64))
         return false;
     if (!SigF_.append(ValType::F32))