Bug 1150337 - OdinMonkey: Optimize the full range of immediate offsets on x64. r=luke, a=abillings
authorDan Gohman <sunfish@mozilla.com>
Mon, 27 Apr 2015 11:55:31 -0400
changeset 267239 80cb954e2eb8c6f411fbebd6e6234cc3f21e6d8b
parent 267238 b6b8350569d9729787b4e3d9c91fa3b92b2e0bc1
child 267240 78c576630fed988ff067cfb66ad25b8de438b2fd
push id830
push userraliiev@mozilla.com
push dateFri, 19 Jun 2015 19:24:37 +0000
treeherdermozilla-release@932614382a68 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersluke, abillings
bugs1150337
milestone39.0a2
Bug 1150337 - OdinMonkey: Optimize the full range of immediate offsets on x64. r=luke, a=abillings
js/src/asmjs/AsmJSModule.cpp
js/src/asmjs/AsmJSSignalHandlers.cpp
js/src/asmjs/AsmJSValidate.h
js/src/jit-test/tests/asm.js/testHeapAccess.js
js/src/jit/Disassembler.h
js/src/jit/EffectiveAddressAnalysis.cpp
js/src/jit/EffectiveAddressAnalysis.h
js/src/jit/Ion.cpp
js/src/jit/MIR.h
js/src/jit/MIRGenerator.h
js/src/jit/MIRGraph.cpp
js/src/jit/shared/Disassembler-x86-shared.cpp
--- a/js/src/asmjs/AsmJSModule.cpp
+++ b/js/src/asmjs/AsmJSModule.cpp
@@ -91,16 +91,20 @@ AsmJSModule::AsmJSModule(ScriptSource* s
     mozilla::PodZero(&pod);
     pod.funcPtrTableAndExitBytes_ = SIZE_MAX;
     pod.functionBytes_ = UINT32_MAX;
     pod.minHeapLength_ = RoundUpToNextValidAsmJSHeapLength(0);
     pod.maxHeapLength_ = 0x80000000;
     pod.strict_ = strict;
     pod.usesSignalHandlers_ = canUseSignalHandlers;
 
+    // AsmJSCheckedImmediateRange should be defined to be at most the minimum
+    // heap length so that offsets can be folded into bounds checks.
+    MOZ_ASSERT(pod.minHeapLength_ - AsmJSCheckedImmediateRange <= pod.minHeapLength_);
+
     scriptSource_->incref();
 }
 
 AsmJSModule::~AsmJSModule()
 {
     MOZ_ASSERT(!interrupted_);
 
     scriptSource_->decref();
--- a/js/src/asmjs/AsmJSSignalHandlers.cpp
+++ b/js/src/asmjs/AsmJSSignalHandlers.cpp
@@ -565,24 +565,24 @@ StoreValueFromRegister(EMULATOR_CONTEXT*
 
 MOZ_COLD static uint8_t*
 ComputeAccessAddress(EMULATOR_CONTEXT* context, const Disassembler::ComplexAddress& address)
 {
     MOZ_RELEASE_ASSERT(!address.isPCRelative(), "PC-relative addresses not supported yet");
 
     uintptr_t result = address.disp();
 
-    if (address.base() != Registers::Invalid) {
+    if (address.hasBase()) {
         uintptr_t base;
         StoreValueFromGPReg(&base, sizeof(uintptr_t),
                             AddressOfGPRegisterSlot(context, address.base()));
         result += base;
     }
 
-    if (address.index() != Registers::Invalid) {
+    if (address.hasIndex()) {
         uintptr_t index;
         StoreValueFromGPReg(&index, sizeof(uintptr_t),
                             AddressOfGPRegisterSlot(context, address.index()));
         result += index * (1 << address.scale());
     }
 
     return reinterpret_cast<uint8_t*>(result);
 }
@@ -603,25 +603,25 @@ EmulateHeapAccess(EMULATOR_CONTEXT* cont
     const Disassembler::ComplexAddress& address = access.address();
     MOZ_RELEASE_ASSERT(end > pc);
     MOZ_RELEASE_ASSERT(module.containsFunctionPC(end));
 
 #if defined(JS_CODEGEN_X64)
     // Check x64 asm.js heap access invariants.
     MOZ_RELEASE_ASSERT(address.disp() >= 0);
     MOZ_RELEASE_ASSERT(address.base() == HeapReg.code());
-    MOZ_RELEASE_ASSERT(address.index() != HeapReg.code());
+    MOZ_RELEASE_ASSERT(!address.hasIndex() || address.index() != HeapReg.code());
     MOZ_RELEASE_ASSERT(address.scale() == 0);
-    if (address.base() != Registers::Invalid) {
+    if (address.hasBase()) {
         uintptr_t base;
         StoreValueFromGPReg(&base, sizeof(uintptr_t),
                             AddressOfGPRegisterSlot(context, address.base()));
         MOZ_RELEASE_ASSERT(reinterpret_cast<uint8_t*>(base) == module.maybeHeap());
     }
-    if (address.index() != Registers::Invalid) {
+    if (address.hasIndex()) {
         uintptr_t index;
         StoreValueFromGPReg(&index, sizeof(uintptr_t),
                             AddressOfGPRegisterSlot(context, address.index()));
         MOZ_RELEASE_ASSERT(uint32_t(index) == index);
     }
 #endif
 
     // Determine the actual effective address of the faulting access. We can't
--- a/js/src/asmjs/AsmJSValidate.h
+++ b/js/src/asmjs/AsmJSValidate.h
@@ -62,22 +62,22 @@ const size_t AsmJSPageSize = 4096;
 static_assert(jit::AsmJSCheckedImmediateRange <= jit::AsmJSImmediateRange,
               "AsmJSImmediateRange should be the size of an unconstrained "
               "address immediate");
 
 // To support the use of signal handlers for catching Out Of Bounds accesses,
 // the internal ArrayBuffer data array is inflated to 4GiB (only the
 // byteLength portion of which is accessible) so that out-of-bounds accesses
 // (made using a uint32 index) are guaranteed to raise a SIGSEGV.
-// Then, an additional extent is added to permit folding of small immediate
+// Then, an additional extent is added to permit folding of immediate
 // values into addresses. And finally, unaligned accesses and mask optimizations
 // might also try to access a few bytes after this limit, so just inflate it by
 // AsmJSPageSize.
 static const size_t AsmJSMappedSize = 4 * 1024ULL * 1024ULL * 1024ULL +
-                                      jit::AsmJSCheckedImmediateRange +
+                                      jit::AsmJSImmediateRange +
                                       AsmJSPageSize;
 
 #endif // defined(ASMJS_MAY_USE_SIGNAL_HANDLERS_FOR_OOB)
 
 // From the asm.js spec Linking section:
 //  the heap object's byteLength must be either
 //    2^n for n in [12, 24)
 //  or
--- a/js/src/jit-test/tests/asm.js/testHeapAccess.js
+++ b/js/src/jit-test/tests/asm.js/testHeapAccess.js
@@ -500,8 +500,25 @@ assertEq(asmLink(m, this, null, new Arra
 var buf = new ArrayBuffer(0x1000000);
 new Uint8Array(buf)[0x424242] = 0xAA;
 var f = asmLink(m, this, null, buf);
 assertEq(f(0),0);
 assertEq(f(0x424242),0xAA);
 assertEq(f(0x1000000),0);
 assertEq(asmLink(m, this, null, new ArrayBuffer(0x2000000))(0),0);
 assertEq(asmLink(m, this, null, new ArrayBuffer(0x3000000))(0),0);
+
+// Heap offsets
+var asmMod = function test (glob, env, b) {
+    'use asm';
+    var i8 = new glob.Int8Array(b);
+    function f(i) {
+        i = i | 0;
+        i = i & 1;
+        i = (i - 0x40000)|0;
+        i8[0x3ffff] = 0;
+        return i8[(i + 0x7fffe) >> 0] | 0;
+    }
+    return f;
+};
+var buffer = new ArrayBuffer(0x40000);
+var asm = asmMod(this, {}, buffer);
+assertEq(asm(-1),0);
--- a/js/src/jit/Disassembler.h
+++ b/js/src/jit/Disassembler.h
@@ -92,21 +92,31 @@ class ComplexAddress {
     bool isPCRelative() const {
         return isPCRelative_;
     }
 
     int32_t disp() const {
         return disp_;
     }
 
+    bool hasBase() const {
+        return base_ != Registers::Invalid;
+    }
+
     Register::Encoding base() const {
+        MOZ_ASSERT(hasBase());
         return base_;
     }
 
+    bool hasIndex() const {
+        return index_ != Registers::Invalid;
+    }
+
     Register::Encoding index() const {
+        MOZ_ASSERT(hasIndex());
         return index_;
     }
 
     uint32_t scale() const {
         return scale_;
     }
 
 #ifdef DEBUG
--- a/js/src/jit/EffectiveAddressAnalysis.cpp
+++ b/js/src/jit/EffectiveAddressAnalysis.cpp
@@ -96,44 +96,74 @@ AnalyzeLsh(TempAllocator& alloc, MLsh* l
         return;
 
     MEffectiveAddress* eaddr = MEffectiveAddress::New(alloc, base, index, scale, displacement);
     last->replaceAllUsesWith(eaddr);
     last->block()->insertAfter(last, eaddr);
 }
 
 template<typename MAsmJSHeapAccessType>
-static void
-AnalyzeAsmHeapAccess(MAsmJSHeapAccessType* ins, MIRGraph& graph)
+bool
+EffectiveAddressAnalysis::tryAddDisplacement(MAsmJSHeapAccessType *ins, int32_t o)
+{
+    // Compute the new offset. Check for overflow and negative. In theory it
+    // ought to be possible to support negative offsets, but it'd require
+    // more elaborate bounds checking mechanisms than we currently have.
+    MOZ_ASSERT(ins->offset() >= 0);
+    int32_t newOffset = uint32_t(ins->offset()) + o;
+    if (newOffset < 0)
+        return false;
+
+    // Compute the new offset to the end of the access. Check for overflow
+    // and negative here also.
+    int32_t newEnd = uint32_t(newOffset) + ins->byteSize();
+    if (newEnd < 0)
+        return false;
+    MOZ_ASSERT(uint32_t(newEnd) >= uint32_t(newOffset));
+
+    // Determine the range of valid offsets which can be folded into this
+    // instruction and check whether our computed offset is within that range.
+    size_t range = mir_->foldableOffsetRange(ins);
+    if (size_t(newEnd) > range)
+        return false;
+
+    // Everything checks out. This is the new offset.
+    ins->setOffset(newOffset);
+    return true;
+}
+
+template<typename MAsmJSHeapAccessType>
+void
+EffectiveAddressAnalysis::analyzeAsmHeapAccess(MAsmJSHeapAccessType* ins)
 {
     MDefinition* ptr = ins->ptr();
 
     if (ptr->isConstantValue()) {
         // Look for heap[i] where i is a constant offset, and fold the offset.
         // By doing the folding now, we simplify the task of codegen; the offset
         // is always the address mode immediate. This also allows it to avoid
         // a situation where the sum of a constant pointer value and a non-zero
         // offset doesn't actually fit into the address mode immediate.
         int32_t imm = ptr->constantValue().toInt32();
-        if (imm != 0 && ins->tryAddDisplacement(imm)) {
-            MInstruction* zero = MConstant::New(graph.alloc(), Int32Value(0));
+        if (imm != 0 && tryAddDisplacement(ins, imm)) {
+            MInstruction* zero = MConstant::New(graph_.alloc(), Int32Value(0));
             ins->block()->insertBefore(ins, zero);
             ins->replacePtr(zero);
         }
     } else if (ptr->isAdd()) {
         // Look for heap[a+i] where i is a constant offset, and fold the offset.
         // Alignment masks have already been moved out of the way by the
         // Alignment Mask Analysis pass.
         MDefinition* op0 = ptr->toAdd()->getOperand(0);
         MDefinition* op1 = ptr->toAdd()->getOperand(1);
         if (op0->isConstantValue())
             mozilla::Swap(op0, op1);
         if (op1->isConstantValue()) {
             int32_t imm = op1->constantValue().toInt32();
-            if (ins->tryAddDisplacement(imm))
+            if (tryAddDisplacement(ins, imm))
                 ins->replacePtr(op0);
         }
     }
 }
 
 // This analysis converts patterns of the form:
 //   truncate(x + (y << {0,1,2,3}))
 //   truncate(x + (y << {0,1,2,3}) + imm32)
@@ -154,15 +184,15 @@ EffectiveAddressAnalysis::analyze()
     for (ReversePostorderIterator block(graph_.rpoBegin()); block != graph_.rpoEnd(); block++) {
         for (MInstructionIterator i = block->begin(); i != block->end(); i++) {
             // Note that we don't check for MAsmJSCompareExchangeHeap
             // or MAsmJSAtomicBinopHeap, because the backend and the OOB
             // mechanism don't support non-zero offsets for them yet.
             if (i->isLsh())
                 AnalyzeLsh(graph_.alloc(), i->toLsh());
             else if (i->isAsmJSLoadHeap())
-                AnalyzeAsmHeapAccess(i->toAsmJSLoadHeap(), graph_);
+                analyzeAsmHeapAccess(i->toAsmJSLoadHeap());
             else if (i->isAsmJSStoreHeap())
-                AnalyzeAsmHeapAccess(i->toAsmJSStoreHeap(), graph_);
+                analyzeAsmHeapAccess(i->toAsmJSStoreHeap());
         }
     }
     return true;
 }
--- a/js/src/jit/EffectiveAddressAnalysis.h
+++ b/js/src/jit/EffectiveAddressAnalysis.h
@@ -9,21 +9,28 @@
 
 namespace js {
 namespace jit {
 
 class MIRGraph;
 
 class EffectiveAddressAnalysis
 {
+    MIRGenerator* mir_;
     MIRGraph& graph_;
 
+    template<typename MAsmJSHeapAccessType>
+    bool tryAddDisplacement(MAsmJSHeapAccessType *ins, int32_t o);
+
+    template<typename MAsmJSHeapAccessType>
+    void analyzeAsmHeapAccess(MAsmJSHeapAccessType* ins);
+
   public:
-    explicit EffectiveAddressAnalysis(MIRGraph& graph)
-      : graph_(graph)
+    EffectiveAddressAnalysis(MIRGenerator *mir, MIRGraph& graph)
+      : mir_(mir), graph_(graph)
     {}
 
     bool analyze();
 };
 
 } /* namespace jit */
 } /* namespace js */
 
--- a/js/src/jit/Ion.cpp
+++ b/js/src/jit/Ion.cpp
@@ -1409,17 +1409,17 @@ OptimizeMIR(MIRGenerator* mir)
 
             IonSpewPass("Unroll Loops");
             AssertExtendedGraphCoherency(graph);
         }
     }
 
     if (mir->optimizationInfo().eaaEnabled()) {
         AutoTraceLog log(logger, TraceLogger_EffectiveAddressAnalysis);
-        EffectiveAddressAnalysis eaa(graph);
+        EffectiveAddressAnalysis eaa(mir, graph);
         if (!eaa.analyze())
             return false;
         IonSpewPass("Effective Address Analysis");
         AssertExtendedGraphCoherency(graph);
 
         if (mir->shouldCancel("Effective Address Analysis"))
             return false;
     }
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -12541,42 +12541,19 @@ class MAsmJSHeapAccess
     unsigned byteSize() const {
         return Scalar::isSimdType(accessType())
                ? Scalar::scalarByteSize(accessType()) * numSimdElems()
                : TypedArrayElemSize(accessType());
     }
     bool needsBoundsCheck() const { return needsBoundsCheck_; }
     void removeBoundsCheck() { needsBoundsCheck_ = false; }
     unsigned numSimdElems() const { MOZ_ASSERT(Scalar::isSimdType(accessType_)); return numSimdElems_; }
-
-    bool tryAddDisplacement(int32_t o) {
-        // Compute the new offset. Check for overflow and negative. In theory it
-        // ought to be possible to support negative offsets, but it'd require
-        // more elaborate bounds checking mechanisms than we currently have.
-        MOZ_ASSERT(offset_ >= 0);
-        int32_t newOffset = uint32_t(offset_) + o;
-        if (newOffset < 0)
-            return false;
-
-        // Compute the new offset to the end of the access. Check for overflow
-        // and negative here also.
-        int32_t newEnd = uint32_t(newOffset) + byteSize();
-        if (newEnd < 0)
-            return false;
-        MOZ_ASSERT(uint32_t(newEnd) >= uint32_t(newOffset));
-
-        // If we need bounds checking, keep it within the more restrictive
-        // AsmJSCheckedImmediateRange. Otherwise, just keep it within what
-        // the instruction set can support.
-        size_t range = needsBoundsCheck() ? AsmJSCheckedImmediateRange : AsmJSImmediateRange;
-        if (size_t(newEnd) > range)
-            return false;
-
-        offset_ = newOffset;
-        return true;
+    void setOffset(int32_t o) {
+        MOZ_ASSERT(o >= 0);
+        offset_ = o;
     }
 };
 
 class MAsmJSLoadHeap
   : public MUnaryInstruction,
     public MAsmJSHeapAccess,
     public NoTypePolicy::Data
 {
--- a/js/src/jit/MIRGenerator.h
+++ b/js/src/jit/MIRGenerator.h
@@ -229,25 +229,16 @@ class MIRGenerator
 
     const ObjectVector& nurseryObjects() const {
         return nurseryObjects_;
     }
 
     Label* outOfBoundsLabel() const {
         return outOfBoundsLabel_;
     }
-    bool needsAsmJSBoundsCheckBranch(const MAsmJSHeapAccess* access) const {
-        // A heap access needs a bounds-check branch if we're not relying on signal
-        // handlers to catch errors, and if it's not proven to be within bounds.
-        // We use signal-handlers on x64, but on x86 there isn't enough address
-        // space for a guard region.
-#if defined(ASMJS_MAY_USE_SIGNAL_HANDLERS_FOR_OOB)
-        if (usesSignalHandlersForAsmJSOOB_)
-            return false;
-#endif
-        return access->needsBoundsCheck();
-    }
+    bool needsAsmJSBoundsCheckBranch(const MAsmJSHeapAccess* access) const;
+    size_t foldableOffsetRange(const MAsmJSHeapAccess* access) const;
 };
 
 } // namespace jit
 } // namespace js
 
 #endif /* jit_MIRGenerator_h */
--- a/js/src/jit/MIRGraph.cpp
+++ b/js/src/jit/MIRGraph.cpp
@@ -103,16 +103,55 @@ MIRGenerator::addAbortedPreliminaryGroup
     for (size_t i = 0; i < abortedPreliminaryGroups_.length(); i++) {
         if (group == abortedPreliminaryGroups_[i])
             return;
     }
     if (!abortedPreliminaryGroups_.append(group))
         CrashAtUnhandlableOOM("addAbortedPreliminaryGroup");
 }
 
+bool
+MIRGenerator::needsAsmJSBoundsCheckBranch(const MAsmJSHeapAccess* access) const
+{
+    // A heap access needs a bounds-check branch if we're not relying on signal
+    // handlers to catch errors, and if it's not proven to be within bounds.
+    // We use signal-handlers on x64, but on x86 there isn't enough address
+    // space for a guard region.
+#if defined(ASMJS_MAY_USE_SIGNAL_HANDLERS_FOR_OOB)
+    if (usesSignalHandlersForAsmJSOOB_)
+        return false;
+#endif
+    return access->needsBoundsCheck();
+}
+
+size_t
+MIRGenerator::foldableOffsetRange(const MAsmJSHeapAccess* access) const
+{
+    // This determines whether it's ok to fold up to AsmJSImmediateSize
+    // offsets, instead of just AsmJSCheckedImmediateSize.
+
+#if defined(ASMJS_MAY_USE_SIGNAL_HANDLERS_FOR_OOB)
+    // With signal-handler OOB handling, we reserve guard space for the full
+    // immediate size.
+    if (usesSignalHandlersForAsmJSOOB_)
+        return AsmJSImmediateRange;
+#endif
+
+    // On 32-bit platforms, if we've proven the access is in bounds after
+    // 32-bit wrapping, we can fold full offsets because they're added with
+    // 32-bit arithmetic.
+    if (sizeof(intptr_t) == sizeof(int32_t) && !access->needsBoundsCheck())
+        return AsmJSImmediateRange;
+
+    // Otherwise, only allow the checked size. This is always less than the
+    // minimum heap length, and allows explicit bounds checks to fold in the
+    // offset without overflow.
+    return AsmJSCheckedImmediateRange;
+}
+
 void
 MIRGraph::addBlock(MBasicBlock* block)
 {
     MOZ_ASSERT(block);
     block->setId(blockIdGen_++);
     blocks_.pushBack(block);
     numBlocks_++;
 }
--- a/js/src/jit/shared/Disassembler-x86-shared.cpp
+++ b/js/src/jit/shared/Disassembler-x86-shared.cpp
@@ -526,27 +526,27 @@ js::jit::Disassembler::DumpHeapAccess(co
       case OtherOperand::FPR: fprintf(stderr, "fpr %s", X86Encoding::XMMRegName(access.otherOperand().fpr())); break;
       default: fprintf(stderr, "unknown");
     }
 
     fprintf(stderr, " @ ");
 
     if (access.address().isPCRelative()) {
         fprintf(stderr, MEM_o32r " ", ADDR_o32r(access.address().disp()));
-    } else if (access.address().index() != X86Encoding::invalid_reg) {
-        if (access.address().base() != X86Encoding::invalid_reg) {
+    } else if (access.address().hasIndex()) {
+        if (access.address().hasBase()) {
             fprintf(stderr, MEM_obs " ",
                     ADDR_obs(access.address().disp(), access.address().base(),
                              access.address().index(), access.address().scale()));
         } else {
             fprintf(stderr, MEM_os " ",
                     ADDR_os(access.address().disp(),
                             access.address().index(), access.address().scale()));
         }
-    } else if (access.address().base() != X86Encoding::invalid_reg) {
+    } else if (access.address().hasBase()) {
         fprintf(stderr, MEM_ob " ", ADDR_ob(access.address().disp(), access.address().base()));
     } else {
         fprintf(stderr, MEM_o " ", ADDR_o(access.address().disp()));
     }
 
     fprintf(stderr, "\n");
 }
 #endif