Bug 1181612: Make asm.js internal calls thread-local; r=luke
authorBenjamin Bouvier <benj@benj.me>
Thu, 05 Nov 2015 12:10:28 +0100
changeset 293031 af53cabd8bbfc4f3953c129d33311efd67a260ee
parent 293030 2920e4569fc430f130dfe7785144ce3218a97222
child 293032 5dec2ebb1e712d6d756ef6e8c0e64931b8ff6373
push id8824
push userraliiev@mozilla.com
push dateMon, 14 Dec 2015 20:18:56 +0000
treeherdermozilla-aurora@e2031358e2a6 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersluke
bugs1181612
milestone45.0a1
Bug 1181612: Make asm.js internal calls thread-local; r=luke
js/src/asmjs/AsmJSCompile.cpp
js/src/asmjs/AsmJSCompile.h
js/src/asmjs/AsmJSGlobals.h
js/src/asmjs/AsmJSModule.cpp
js/src/asmjs/AsmJSModule.h
js/src/asmjs/AsmJSValidate.cpp
js/src/jit/MIR.h
js/src/jit/MacroAssembler-inl.h
js/src/jit/MacroAssembler.h
js/src/jit/arm/MacroAssembler-arm.cpp
js/src/jit/arm64/MacroAssembler-arm64.cpp
js/src/jit/mips-shared/MacroAssembler-mips-shared.cpp
js/src/jit/shared/Assembler-shared.h
js/src/jit/x86-shared/Assembler-x86-shared.h
js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp
--- a/js/src/asmjs/AsmJSCompile.cpp
+++ b/js/src/asmjs/AsmJSCompile.cpp
@@ -110,61 +110,16 @@ class ModuleCompiler
     int64_t usecBefore()            { return compileResults_->usecBefore(); }
 
     bool usesSignalHandlersForOOB() const   { return compileInputs_.usesSignalHandlersForOOB; }
     CompileRuntime* runtime() const         { return compileInputs_.runtime; }
     CompileCompartment* compartment() const { return compileInputs_.compartment; }
 
     /***************************************************** Mutable interface */
 
-    bool getOrCreateFunctionEntry(uint32_t funcIndex, Label** label)
-    {
-        return compileResults_->getOrCreateFunctionEntry(funcIndex, label);
-    }
-
-    bool finishGeneratingFunction(AsmFunction& func, CodeGenerator& codegen,
-                                  const AsmJSFunctionLabels& labels)
-    {
-        // If we have hit OOM then invariants which we assert below may not
-        // hold, so abort now.
-        if (masm().oom())
-            return false;
-
-        // Code range
-        unsigned line = func.lineno();
-        unsigned column = func.column();
-        PropertyName* funcName = func.name();
-        if (!compileResults_->addCodeRange(AsmJSModule::FunctionCodeRange(funcName, line, labels)))
-            return false;
-
-        // Script counts
-        jit::IonScriptCounts* counts = codegen.extractScriptCounts();
-        if (counts && !compileResults_->addFunctionCounts(counts)) {
-            js_delete(counts);
-            return false;
-        }
-
-        // Slow functions
-        if (func.compileTime() >= 250) {
-            ModuleCompileResults::SlowFunction sf(funcName, func.compileTime(), line, column);
-            if (!compileResults_->slowFunctions().append(Move(sf)))
-                return false;
-        }
-
-#if defined(MOZ_VTUNE) || defined(JS_ION_PERF)
-        // Perf and profiling information
-        unsigned begin = labels.nonProfilingEntry.offset();
-        unsigned end = labels.endAfterOOL.offset();
-        AsmJSModule::ProfiledFunction profiledFunc(funcName, begin, end, line, column);
-        if (!compileResults_->addProfiledFunction(profiledFunc))
-            return false;
-#endif // defined(MOZ_VTUNE) || defined(JS_ION_PERF)
-        return true;
-    }
-
     void finish(ScopedJSDeletePtr<ModuleCompileResults>* results) {
         *results = compileResults_.forget();
     }
 };
 
 } // namespace js
 
 enum class AsmType : uint8_t {
@@ -796,17 +751,17 @@ class FunctionCompiler
   private:
     bool callPrivate(MAsmJSCall::Callee callee, const Call& call, MIRType returnType, MDefinition** def)
     {
         if (inDeadCode()) {
             *def = nullptr;
             return true;
         }
 
-        CallSiteDesc::Kind kind = CallSiteDesc::Kind(-1);  // initialize to silence GCC warning
+        CallSiteDesc::Kind kind = CallSiteDesc::Kind(-1);
         switch (callee.which()) {
           case MAsmJSCall::Callee::Internal: kind = CallSiteDesc::Relative; break;
           case MAsmJSCall::Callee::Dynamic:  kind = CallSiteDesc::Register; break;
           case MAsmJSCall::Callee::Builtin:  kind = CallSiteDesc::Register; break;
         }
 
         MAsmJSCall* ins = MAsmJSCall::New(alloc(), CallSiteDesc(call.lineno_, call.column_, kind),
                                           callee, call.regArgs_, returnType, call.spIncrement_);
@@ -814,20 +769,20 @@ class FunctionCompiler
             return false;
 
         curBlock_->add(ins);
         *def = ins;
         return true;
     }
 
   public:
-    bool internalCall(const Signature& sig, Label* entry, const Call& call, MDefinition** def)
+    bool internalCall(const Signature& sig, uint32_t funcIndex, const Call& call, MDefinition** def)
     {
         MIRType returnType = sig.retType().toMIRType();
-        return callPrivate(MAsmJSCall::Callee(entry), call, returnType, def);
+        return callPrivate(MAsmJSCall::Callee(AsmJSInternalCallee(funcIndex)), call, returnType, def);
     }
 
     bool funcPtrCall(const Signature& sig, uint32_t maskLit, uint32_t globalDataOffset, MDefinition* index,
                      const Call& call, MDefinition** def)
     {
         if (inDeadCode()) {
             *def = nullptr;
             return true;
@@ -1736,33 +1691,27 @@ ReadCallLineCol(FunctionCompiler& f, uin
     *line = f.readU32();
     *column = f.readU32();
 }
 
 static bool
 EmitInternalCall(FunctionCompiler& f, RetType retType, MDefinition** def)
 {
     uint32_t funcIndex = f.readU32();
-
-    Label* entry;
-    if (!f.m().getOrCreateFunctionEntry(funcIndex, &entry))
-        return false;
-
     const Signature& sig = *f.readSignature();
-
     MOZ_ASSERT_IF(sig.retType() != RetType::Void, sig.retType() == retType);
 
     uint32_t lineno, column;
     ReadCallLineCol(f, &lineno, &column);
 
     FunctionCompiler::Call call(f, lineno, column);
     if (!EmitCallArgs(f, sig, &call))
         return false;
 
-    return f.internalCall(sig, entry, call, def);
+    return f.internalCall(sig, funcIndex, call, def);
 }
 
 static bool
 EmitFuncPtrCall(FunctionCompiler& f, RetType retType, MDefinition** def)
 {
     uint32_t mask = f.readU32();
     uint32_t globalDataOffset = f.readU32();
 
@@ -3163,45 +3112,47 @@ js::GenerateAsmFunctionMIR(ModuleCompile
 
     f.checkPostconditions();
 
     func.accumulateCompileTime((PRMJ_Now() - before) / PRMJ_USEC_PER_MSEC);
     return true;
 }
 
 bool
-js::GenerateAsmFunctionCode(ModuleCompiler& m, AsmFunction& func, MIRGenerator& mir, LIRGraph& lir)
+js::GenerateAsmFunctionCode(ModuleCompiler& m, AsmFunction& func, MIRGenerator& mir, LIRGraph& lir,
+                            FunctionCompileResults* results)
 {
     JitContext jitContext(m.runtime(), /* CompileCompartment = */ nullptr, &mir.alloc());
 
     int64_t before = PRMJ_Now();
 
     // A single MacroAssembler is reused for all function compilations so
     // that there is a single linear code segment for each module. To avoid
     // spiking memory, a LifoAllocScope in the caller frees all MIR/LIR
     // after each function is compiled. This method is responsible for cleaning
     // out any dangling pointers that the MacroAssembler may have kept.
     m.masm().resetForNewCodeGenerator(mir.alloc());
 
     ScopedJSDeletePtr<CodeGenerator> codegen(js_new<CodeGenerator>(&mir, &lir, &m.masm()));
     if (!codegen)
         return false;
 
-    Label* funcEntry;
-    if (!m.getOrCreateFunctionEntry(func.funcIndex(), &funcEntry))
-        return false;
-
-    AsmJSFunctionLabels labels(*funcEntry, m.stackOverflowLabel());
+    Label entry;
+    AsmJSFunctionLabels labels(entry, m.stackOverflowLabel());
     if (!codegen->generateAsmJS(&labels))
         return false;
 
     func.accumulateCompileTime((PRMJ_Now() - before) / PRMJ_USEC_PER_MSEC);
 
-    if (!m.finishGeneratingFunction(func, *codegen, labels))
-        return false;
+    PropertyName* funcName = func.name();
+    unsigned line = func.lineno();
+
+    // Fill in the results of the function's compilation
+    AsmJSModule::FunctionCodeRange codeRange(funcName, line, labels);
+    results->finishCodegen(func, codeRange, *codegen->extractScriptCounts());
 
     // Unlike regular IonMonkey, which links and generates a new JitCode for
     // every function, we accumulate all the functions in the module in a
     // single MacroAssembler and link at end. Linking asm.js doesn't require a
     // CodeGenerator so we can destroy it now (via ScopedJSDeletePtr).
     return true;
 }
 
--- a/js/src/asmjs/AsmJSCompile.h
+++ b/js/src/asmjs/AsmJSCompile.h
@@ -22,16 +22,17 @@
 #include "jit/CompileWrappers.h"
 
 namespace js {
 
 class AsmFunction;
 class LifoAlloc;
 class ModuleCompiler;
 class ModuleCompileResults;
+class FunctionCompileResults;
 
 namespace jit {
     class LIRGraph;
     class MIRGenerator;
 }
 
 struct ModuleCompileInputs
 {
@@ -71,14 +72,15 @@ class MOZ_RAII AsmModuleCompilerScope
         return *m_;
     }
 
     ~AsmModuleCompilerScope();
 };
 
 bool CreateAsmModuleCompiler(ModuleCompileInputs mci, AsmModuleCompilerScope* scope);
 bool GenerateAsmFunctionMIR(ModuleCompiler& m, LifoAlloc& lifo, AsmFunction& func, jit::MIRGenerator** mir);
-bool GenerateAsmFunctionCode(ModuleCompiler& m, AsmFunction& func, jit::MIRGenerator& mir, jit::LIRGraph& lir);
+bool GenerateAsmFunctionCode(ModuleCompiler& m, AsmFunction& func, jit::MIRGenerator& mir,
+                             jit::LIRGraph& lir, FunctionCompileResults* results);
 void FinishAsmModuleCompilation(ModuleCompiler& m, ScopedJSDeletePtr<ModuleCompileResults>* results);
 
 } // namespace js
 
 #endif // jit_AsmJSCompile_h
--- a/js/src/asmjs/AsmJSGlobals.h
+++ b/js/src/asmjs/AsmJSGlobals.h
@@ -1041,108 +1041,68 @@ class AsmFunction
     const VarInitializerVector& varInitializers() const { return varInitializers_; }
     size_t numLocals() const { return argTypes_.length() + varInitializers_.length(); }
     wasm::RetType returnedType() const {
         MOZ_ASSERT(returnedType_ != wasm::RetType::Which(-1));
         return returnedType_;
     }
 };
 
-const size_t LIFO_ALLOC_PRIMARY_CHUNK_SIZE = 1 << 12;
+class FunctionCompileResults
+{
+    const AsmFunction* func_;
+    jit::IonScriptCounts* counts_;
+    AsmJSModule::FunctionCodeRange codeRange_;
+
+  public:
+    FunctionCompileResults()
+      : func_(nullptr),
+        counts_(nullptr),
+        codeRange_()
+    {}
+
+    const AsmFunction& func() const { MOZ_ASSERT(func_); return *func_; }
+    const AsmJSModule::FunctionCodeRange& codeRange() const { return codeRange_; }
+    jit::IonScriptCounts* counts() const { return counts_; }
+
+    void finishCodegen(AsmFunction& func, AsmJSModule::FunctionCodeRange codeRange,
+                       jit::IonScriptCounts& counts)
+    {
+        func_ = &func;
+        codeRange_ = codeRange;
+        counts_ = &counts;
+    }
+};
 
 class ModuleCompileResults
 {
   public:
-    struct SlowFunction
-    {
-        SlowFunction(PropertyName* name, unsigned ms, unsigned line, unsigned column)
-         : name(name), ms(ms), line(line), column(column)
-        {}
-
-        PropertyName* name;
-        unsigned ms;
-        unsigned line;
-        unsigned column;
-    };
-
-    typedef Vector<SlowFunction                  , 0, SystemAllocPolicy> SlowFunctionVector;
-    typedef Vector<jit::Label*                   , 8, SystemAllocPolicy> LabelVector;
-    typedef Vector<AsmJSModule::FunctionCodeRange, 8, SystemAllocPolicy> FunctionCodeRangeVector;
-    typedef Vector<jit::IonScriptCounts*         , 0, SystemAllocPolicy> ScriptCountVector;
-#if defined(MOZ_VTUNE) || defined(JS_ION_PERF)
-    typedef Vector<AsmJSModule::ProfiledFunction , 0, SystemAllocPolicy> ProfiledFunctionVector;
-#endif // defined(MOZ_VTUNE) || defined(JS_ION_PERF)
 
   private:
-    LifoAlloc               lifo_;
     jit::MacroAssembler     masm_;
 
-    SlowFunctionVector      slowFunctions_;
-    LabelVector             functionEntries_;
-    FunctionCodeRangeVector codeRanges_;
-    ScriptCountVector       functionCounts_;
-#if defined(MOZ_VTUNE) || defined(JS_ION_PERF)
-    ProfiledFunctionVector  profiledFunctions_;
-#endif // defined(MOZ_VTUNE) || defined(JS_ION_PERF)
-
     jit::NonAssertingLabel stackOverflowLabel_;
     jit::NonAssertingLabel asyncInterruptLabel_;
     jit::NonAssertingLabel syncInterruptLabel_;
     jit::NonAssertingLabel onDetachedLabel_;
     jit::NonAssertingLabel onConversionErrorLabel_;
     jit::NonAssertingLabel onOutOfBoundsLabel_;
     int64_t                usecBefore_;
 
   public:
     ModuleCompileResults()
-      : lifo_(LIFO_ALLOC_PRIMARY_CHUNK_SIZE),
-        masm_(jit::MacroAssembler::AsmJSToken()),
+      : masm_(jit::MacroAssembler::AsmJSToken()),
         usecBefore_(PRMJ_Now())
     {}
 
     jit::MacroAssembler& masm()           { return masm_; }
     jit::Label& stackOverflowLabel()      { return stackOverflowLabel_; }
     jit::Label& asyncInterruptLabel()     { return asyncInterruptLabel_; }
     jit::Label& syncInterruptLabel()      { return syncInterruptLabel_; }
     jit::Label& onOutOfBoundsLabel()      { return onOutOfBoundsLabel_; }
     jit::Label& onDetachedLabel()         { return onDetachedLabel_; }
     jit::Label& onConversionErrorLabel()  { return onConversionErrorLabel_; }
     int64_t usecBefore()                  { return usecBefore_; }
-
-    SlowFunctionVector& slowFunctions()   { return slowFunctions_; }
-
-    size_t numFunctionEntries() const     { return functionEntries_.length(); }
-    jit::Label* functionEntry(unsigned i) { return functionEntries_[i]; }
-
-    bool getOrCreateFunctionEntry(unsigned i, jit::Label** label) {
-        if (i == UINT32_MAX)
-            return false;
-        while (functionEntries_.length() <= i) {
-            jit::Label* newEntry = lifo_.new_<jit::Label>();
-            if (!newEntry || !functionEntries_.append(newEntry))
-                return false;
-        }
-        *label = functionEntries_[i];
-        return true;
-    }
-
-    size_t numCodeRanges() const { return codeRanges_.length(); }
-    bool addCodeRange(AsmJSModule::FunctionCodeRange range) { return codeRanges_.append(range); }
-    AsmJSModule::FunctionCodeRange& codeRange(unsigned i) { return codeRanges_[i]; }
-
-    size_t numFunctionCounts() const { return functionCounts_.length(); }
-    bool addFunctionCounts(jit::IonScriptCounts* counts) { return functionCounts_.append(counts); }
-    jit::IonScriptCounts* functionCount(unsigned i) { return functionCounts_[i]; }
-
-#if defined(MOZ_VTUNE) || defined(JS_ION_PERF)
-    size_t numProfiledFunctions() const { return profiledFunctions_.length(); }
-    bool addProfiledFunction(AsmJSModule::ProfiledFunction func) {
-        return profiledFunctions_.append(func);
-    }
-    AsmJSModule::ProfiledFunction& profiledFunction(unsigned i) {
-        return profiledFunctions_[i];
-    }
-#endif // defined(MOZ_VTUNE) || defined(JS_ION_PERF)
 };
 
 } // namespace js
 
 #endif //jit_AsmJSGlobals_h
--- a/js/src/asmjs/AsmJSModule.cpp
+++ b/js/src/asmjs/AsmJSModule.cpp
@@ -317,17 +317,19 @@ AsmJSModule::finish(ExclusiveContext* cx
     // Copy over metadata.
     staticLinkData_.interruptExitOffset = interruptLabel.offset();
     staticLinkData_.outOfBoundsExitOffset = outOfBoundsLabel.offset();
 
     // Heap-access metadata used for link-time patching and fault-handling.
     heapAccesses_ = masm.extractAsmJSHeapAccesses();
 
     // Call-site metadata used for stack unwinding.
-    callSites_ = masm.extractCallSites();
+    const CallSiteAndTargetVector& callSites = masm.callSites();
+    if (!callSites_.appendAll(callSites))
+        return false;
 
     MOZ_ASSERT(pod.functionBytes_ % AsmJSPageSize == 0);
 
     // Absolute link metadata: absolute addresses that refer to some fixed
     // address in the address space.
     AbsoluteLinkArray& absoluteLinks = staticLinkData_.absoluteLinks;
     for (size_t i = 0; i < masm.numAsmJSAbsoluteLinks(); i++) {
         AsmJSAbsoluteLink src = masm.asmJSAbsoluteLink(i);
--- a/js/src/asmjs/AsmJSModule.h
+++ b/js/src/asmjs/AsmJSModule.h
@@ -107,16 +107,20 @@ struct MOZ_STACK_CLASS AsmJSFunctionLabe
     jit::Label  profilingEntry;
     jit::Label& nonProfilingEntry;
     jit::Label  profilingJump;
     jit::Label  profilingEpilogue;
     jit::Label  profilingReturn;
     jit::Label  endAfterOOL;
     mozilla::Maybe<jit::Label> overflowThunk;
     jit::Label& overflowExit;
+
+  private:
+    AsmJSFunctionLabels(const AsmJSFunctionLabels&) = delete;
+    AsmJSFunctionLabels& operator=(const AsmJSFunctionLabels&) = delete;
 };
 
 // Represents the type and value of an asm.js numeric literal.
 //
 // A literal is a double iff the literal contains a decimal point (even if the
 // fractional part is 0). Otherwise, integers may be classified:
 //  fixnum: [0, 2^31)
 //  negative int: [-2^31, 0)
@@ -626,16 +630,20 @@ class AsmJSModule
     };
 
     class FunctionCodeRange : public CodeRange
     {
       private:
         PropertyName* name_;
 
       public:
+        FunctionCodeRange()
+          : CodeRange(), name_(nullptr)
+        {}
+
         FunctionCodeRange(PropertyName* name, uint32_t lineNumber, const AsmJSFunctionLabels& l)
           : CodeRange(UINT32_MAX, lineNumber, l), name_(name)
         {}
 
         PropertyName* name() const { return name_; }
 
         void initNameIndex(uint32_t nameIndex) {
             MOZ_ASSERT(nameIndex_ == UINT32_MAX);
@@ -1154,24 +1162,24 @@ class AsmJSModule
         return true;
     }
     bool addCodeRange(CodeRange::Kind kind, uint32_t begin, uint32_t end) {
         return codeRanges_.append(CodeRange(kind, begin, end));
     }
     bool addCodeRange(CodeRange::Kind kind, uint32_t begin, uint32_t pret, uint32_t end) {
         return codeRanges_.append(CodeRange(kind, begin, pret, end));
     }
-    bool addFunctionCodeRange(PropertyName* name, FunctionCodeRange&& codeRange)
+    bool addFunctionCodeRange(PropertyName* name, FunctionCodeRange codeRange)
     {
         MOZ_ASSERT(!isFinished());
         MOZ_ASSERT(name->isTenured());
         if (names_.length() >= UINT32_MAX)
             return false;
         codeRange.initNameIndex(names_.length());
-        return names_.append(name) && codeRanges_.append(Move(codeRange));
+        return names_.append(name) && codeRanges_.append(codeRange);
     }
     bool addBuiltinThunkCodeRange(AsmJSExit::BuiltinKind builtin, uint32_t begin,
                                   uint32_t profilingReturn, uint32_t end)
     {
         return builtinThunkOffsets_.append(begin) &&
                codeRanges_.append(CodeRange(builtin, begin, profilingReturn, end));
     }
     bool addExit(unsigned ffiIndex, unsigned* exitIndex) {
--- a/js/src/asmjs/AsmJSValidate.cpp
+++ b/js/src/asmjs/AsmJSValidate.cpp
@@ -579,16 +579,18 @@ TypedArrayLoadType(Scalar::Type viewType
         return Type::MaybeFloat;
       case Scalar::Float64:
         return Type::MaybeDouble;
       default:;
     }
     MOZ_CRASH("Unexpected array type");
 }
 
+const size_t LIFO_ALLOC_PRIMARY_CHUNK_SIZE = 1 << 12;
+
 namespace {
 
 // The ModuleValidator encapsulates the entire validation of an asm.js module.
 // Its lifetime goes from the validation of the top components of an asm.js
 // module (all the globals), the emission of bytecode for all the functions in
 // the module and the validation of function's pointer tables. It also finishes
 // the compilation of all the module's stubs.
 //
@@ -880,22 +882,35 @@ class MOZ_STACK_CLASS ModuleValidator
                 hn = AddToHash(hn, args[i].which());
             return hn;
         }
         static bool match(const ExitDescriptor& lhs, const Lookup& rhs) {
             return lhs.name_ == rhs.name_ && *lhs.sig_ == *rhs.sig_;
         }
     };
 
+    struct SlowFunction
+    {
+        SlowFunction(PropertyName* name, unsigned ms, unsigned line, unsigned column)
+         : name(name), ms(ms), line(line), column(column)
+        {}
+
+        PropertyName* name;
+        unsigned ms;
+        unsigned line;
+        unsigned column;
+    };
+
   private:
     typedef HashMap<PropertyName*, Global*> GlobalMap;
     typedef HashMap<PropertyName*, MathBuiltin> MathNameMap;
     typedef HashMap<PropertyName*, AsmJSAtomicsBuiltinFunction> AtomicsNameMap;
     typedef HashMap<PropertyName*, AsmJSSimdOperation> SimdOperationNameMap;
     typedef Vector<ArrayView> ArrayViewVector;
+    typedef Vector<SlowFunction> SlowFunctionVector;
 
   public:
     typedef HashMap<ExitDescriptor, unsigned, ExitDescriptor> ExitMap;
 
   private:
     ExclusiveContext*                       cx_;
     AsmJSParser&                            parser_;
 
@@ -919,16 +934,19 @@ class MOZ_STACK_CLASS ModuleValidator
     uint32_t                                errorOffset_;
     bool                                    errorOverRecursed_;
 
     bool                                    canValidateChangeHeap_;
     bool                                    hasChangeHeap_;
     bool                                    supportsSimd_;
     bool                                    atomicsPresent_;
 
+    Vector<uint32_t>                        functionEntryOffsets_;
+    SlowFunctionVector                      slowFunctions_;
+
     ScopedJSDeletePtr<ModuleCompileResults> compileResults_;
     DebugOnly<bool>                         finishedFunctionBodies_;
 
   public:
     ModuleValidator(ExclusiveContext* cx, AsmJSParser& parser)
       : cx_(cx),
         parser_(parser),
         moduleLifo_(LIFO_ALLOC_PRIMARY_CHUNK_SIZE),
@@ -944,16 +962,18 @@ class MOZ_STACK_CLASS ModuleValidator
         moduleFunctionName_(nullptr),
         errorString_(nullptr),
         errorOffset_(UINT32_MAX),
         errorOverRecursed_(false),
         canValidateChangeHeap_(false),
         hasChangeHeap_(false),
         supportsSimd_(cx->jitSupportsSimd()),
         atomicsPresent_(false),
+        functionEntryOffsets_(cx),
+        slowFunctions_(cx),
         compileResults_(nullptr),
         finishedFunctionBodies_(false)
     {
         MOZ_ASSERT(moduleFunctionNode_->pn_funbox == parser.pc->sc->asFunctionBox());
     }
 
     ~ModuleValidator() {
         if (errorString_) {
@@ -1394,16 +1414,19 @@ class MOZ_STACK_CLASS ModuleValidator
         return *functions_[i];
     }
     unsigned numFuncPtrTables() const {
         return funcPtrTables_.length();
     }
     FuncPtrTable& funcPtrTable(unsigned i) const {
         return *funcPtrTables_[i];
     }
+    uint32_t functionEntryOffset(unsigned i) {
+        return functionEntryOffsets_[i];
+    }
 
     const Global* lookupGlobal(PropertyName* name) const {
         if (GlobalMap::Ptr p = globals_.lookup(name))
             return p->value();
         return nullptr;
     }
 
     Func* lookupFunction(PropertyName* name) {
@@ -1440,19 +1463,53 @@ class MOZ_STACK_CLASS ModuleValidator
     // End-of-compilation utils
     MacroAssembler& masm()           { return compileResults_->masm(); }
     Label& stackOverflowLabel()      { return compileResults_->stackOverflowLabel(); }
     Label& asyncInterruptLabel()     { return compileResults_->asyncInterruptLabel(); }
     Label& syncInterruptLabel()      { return compileResults_->syncInterruptLabel(); }
     Label& onDetachedLabel()         { return compileResults_->onDetachedLabel(); }
     Label& onOutOfBoundsLabel()      { return compileResults_->onOutOfBoundsLabel(); }
     Label& onConversionErrorLabel()  { return compileResults_->onConversionErrorLabel(); }
-    Label* functionEntry(unsigned i) { return compileResults_->functionEntry(i); }
     ExitMap::Range allExits() const  { return exits_.all(); }
 
+    bool finishGeneratingFunction(FunctionCompileResults& results) {
+        const AsmFunction& func = results.func();
+        unsigned i = func.funcIndex();
+        if (functionEntryOffsets_.length() <= i && !functionEntryOffsets_.resize(i + 1))
+            return false;
+
+        AsmJSModule::FunctionCodeRange codeRange = results.codeRange();
+        functionEntryOffsets_[i] = codeRange.entry();
+
+        PropertyName* funcName = func.name();
+        unsigned line = func.lineno();
+        unsigned column = func.column();
+
+        // These must be done before the module is done with function bodies.
+        if (results.counts() && !module().addFunctionCounts(results.counts()))
+            return false;
+        if (!module().addFunctionCodeRange(codeRange.name(), codeRange))
+            return false;
+
+        unsigned compileTime = func.compileTime();
+        if (compileTime >= 250) {
+            if (!slowFunctions_.append(SlowFunction(funcName, compileTime, line, column)))
+                return false;
+        }
+
+#if defined(MOZ_VTUNE) || defined(JS_ION_PERF)
+        // Perf and profiling information
+        AsmJSModule::ProfiledFunction pf(funcName, codeRange.entry(), codeRange.end(), line, column);
+        if (!module().addProfiledFunction(Move(pf)))
+            return false;
+#endif // defined(MOZ_VTUNE) || defined(JS_ION_PERF)
+
+        return true;
+    }
+
     bool finishGeneratingEntry(unsigned exportIndex, Label* begin) {
         MOZ_ASSERT(finishedFunctionBodies_);
         module_->exportedFunction(exportIndex).initCodeOffset(begin->offset());
         uint32_t end = masm().currentOffset();
         return module_->addCodeRange(AsmJSModule::CodeRange::Entry, begin->offset(), end);
     }
     bool finishGeneratingInterpExit(unsigned exitIndex, Label* begin, Label* profilingReturn) {
         MOZ_ASSERT(finishedFunctionBodies_);
@@ -1499,18 +1556,17 @@ class MOZ_STACK_CLASS ModuleValidator
         // Finally, convert all the function-pointer table elements into
         // RelativeLinks that will be patched by AsmJSModule::staticallyLink.
         for (unsigned tableIndex = 0; tableIndex < numFuncPtrTables(); tableIndex++) {
             ModuleValidator::FuncPtrTable& table = funcPtrTable(tableIndex);
             unsigned tableBaseOffset = module_->offsetOfGlobalData() + table.globalDataOffset();
             for (unsigned elemIndex = 0; elemIndex < table.numElems(); elemIndex++) {
                 AsmJSModule::RelativeLink link(AsmJSModule::RelativeLink::RawPointer);
                 link.patchAtOffset = tableBaseOffset + elemIndex * sizeof(uint8_t*);
-                Label* entry = functionEntry(table.elem(elemIndex).funcIndex());
-                link.targetOffset = entry->offset();
+                link.targetOffset = functionEntryOffset(table.elem(elemIndex).funcIndex());
                 if (!module_->addRelativeLink(link))
                     return false;
             }
         }
 
         *module = module_.forget();
         return true;
     }
@@ -1525,34 +1581,24 @@ class MOZ_STACK_CLASS ModuleValidator
             module_->setViewsAreShared();
         }
         module_->startFunctionBodies();
     }
     bool finishFunctionBodies(ScopedJSDeletePtr<ModuleCompileResults>* compileResults) {
         // Take ownership of compilation results
         compileResults_ = compileResults->forget();
 
-        // These must be done before the module is done with function bodies.
-        for (size_t i = 0; i < compileResults_->numFunctionCounts(); ++i) {
-            if (!module().addFunctionCounts(compileResults_->functionCount(i)))
-                return false;
-        }
-
-#if defined(MOZ_VTUNE) || defined(JS_ION_PERF)
-        for (size_t i = 0; i < compileResults_->numProfiledFunctions(); ++i) {
-            if (!module().addProfiledFunction(Move(compileResults_->profiledFunction(i))))
-                return false;
-        }
-#endif // defined(MOZ_VTUNE) || defined(JS_ION_PERF)
-
-        // Hand in code ranges to the AsmJSModule
-        for (size_t i = 0; i < compileResults_->numCodeRanges(); ++i) {
-            AsmJSModule::FunctionCodeRange& codeRange = compileResults_->codeRange(i);
-            if (!module().addFunctionCodeRange(codeRange.name(), Move(codeRange)))
-                return false;
+        // Patch internal calls to their final positions
+        for (auto& cs : masm().callSites()) {
+            if (!cs.isInternal())
+                continue;
+            MOZ_ASSERT(cs.kind() == CallSiteDesc::Relative);
+            uint32_t callerOffset = cs.returnAddressOffset();
+            uint32_t calleeOffset = functionEntryOffset(cs.targetIndex());
+            masm().patchCall(callerOffset, calleeOffset);
         }
 
         // When an interrupt is triggered, all function code is mprotected and,
         // for sanity, stub code (particularly the interrupt stub) is not.
         // Protection works at page granularity, so we need to ensure that no
         // stub code gets into the function code pages.
         // TODO; this is no longer true and could be removed, see also
         // bug 1200609.
@@ -1564,29 +1610,28 @@ class MOZ_STACK_CLASS ModuleValidator
         return true;
     }
 
     void buildCompilationTimeReport(JS::AsmJSCacheResult cacheResult, ScopedJSFreePtr<char>* out) {
 #ifndef JS_MORE_DETERMINISTIC
         ScopedJSFreePtr<char> slowFuns;
         int64_t usecAfter = PRMJ_Now();
         int msTotal = (usecAfter - compileResults_->usecBefore()) / PRMJ_USEC_PER_MSEC;
-        ModuleCompileResults::SlowFunctionVector& slowFunctions = compileResults_->slowFunctions();
-        if (!slowFunctions.empty()) {
-            slowFuns.reset(JS_smprintf("; %d functions compiled slowly: ", slowFunctions.length()));
+        if (!slowFunctions_.empty()) {
+            slowFuns.reset(JS_smprintf("; %d functions compiled slowly: ", slowFunctions_.length()));
             if (!slowFuns)
                 return;
-            for (unsigned i = 0; i < slowFunctions.length(); i++) {
-                ModuleCompileResults::SlowFunction& func = slowFunctions[i];
+            for (unsigned i = 0; i < slowFunctions_.length(); i++) {
+                ModuleValidator::SlowFunction& func = slowFunctions_[i];
                 JSAutoByteString name;
                 if (!AtomToPrintableString(cx_, func.name, &name))
                     return;
                 slowFuns.reset(JS_smprintf("%s%s:%u:%u (%ums)%s", slowFuns.get(),
                                            name.ptr(), func.line, func.column, func.ms,
-                                           i+1 < slowFunctions.length() ? ", " : ""));
+                                           i+1 < slowFunctions_.length() ? ", " : ""));
                 if (!slowFuns)
                     return;
             }
         }
         const char* cacheString = "";
         switch (cacheResult) {
           case JS::AsmJSCache_Success:
             cacheString = "stored in cache";
@@ -6534,17 +6579,20 @@ CheckFunctionsSequential(ModuleValidator
 
             lir = GenerateLIR(mir);
             if (!lir)
                 return m.failOffset(func->srcBegin(), "internal compiler failure (probably out of memory)");
 
             func->accumulateCompileTime((PRMJ_Now() - before) / PRMJ_USEC_PER_MSEC);
         }
 
-        if (!GenerateAsmFunctionCode(mc, *func, *mir, *lir))
+        FunctionCompileResults results;
+        if (!GenerateAsmFunctionCode(mc, *func, *mir, *lir, &results))
+            return false;
+        if (!m.finishGeneratingFunction(results))
             return false;
     }
 
     if (!CheckAllFunctionsDefined(m))
         return false;
 
     FinishAsmModuleCompilation(mc, compileResults);
     return true;
@@ -6615,28 +6663,31 @@ GetFinishedCompilation(ModuleCompiler& m
         }
         HelperThreadState().wait(GlobalHelperThreadState::CONSUMER);
     }
 
     return nullptr;
 }
 
 static bool
-GetUsedTask(ModuleCompiler& m, ParallelGroupState& group, AsmJSParallelTask** outTask)
+GetUsedTask(ModuleValidator& m, ModuleCompiler& mc, ParallelGroupState& group, AsmJSParallelTask** outTask)
 {
     // Block until a used LifoAlloc becomes available.
-    AsmJSParallelTask* task = GetFinishedCompilation(m, group);
+    AsmJSParallelTask* task = GetFinishedCompilation(mc, group);
     if (!task)
         return false;
 
     auto& func = *reinterpret_cast<AsmFunction*>(task->func);
     func.accumulateCompileTime(task->compileTime);
 
     // Perform code generation on the main thread.
-    if (!GenerateAsmFunctionCode(m, func, *task->mir, *task->lir))
+    FunctionCompileResults results;
+    if (!GenerateAsmFunctionCode(mc, func, *task->mir, *task->lir, &results))
+        return false;
+    if (!m.finishGeneratingFunction(results))
         return false;
 
     group.compiledJobs++;
 
     // Clear the LifoAlloc for use by another helper.
     TempAllocator& tempAlloc = task->mir->alloc();
     tempAlloc.TempAllocator::~TempAllocator();
     task->lifo.releaseAll();
@@ -6678,17 +6729,17 @@ CheckFunctionsParallel(ModuleValidator& 
     AsmJSParallelTask* task = nullptr;
     for (unsigned i = 0;; i++) {
         TokenKind tk;
         if (!PeekToken(m.parser(), &tk))
             return false;
         if (tk != TOK_FUNCTION)
             break;
 
-        if (!task && !GetUnusedTask(group, i, &task) && !GetUsedTask(mc, group, &task))
+        if (!task && !GetUnusedTask(group, i, &task) && !GetUsedTask(m, mc, group, &task))
             return false;
 
         AsmFunction* func;
         if (!CheckFunction(m, task->lifo, &func))
             return false;
 
         // In the case of the change-heap function, no function is produced.
         if (!func)
@@ -6706,17 +6757,17 @@ CheckFunctionsParallel(ModuleValidator& 
 
         group.outstandingJobs++;
         task = nullptr;
     }
 
     // Block for all outstanding helpers to complete.
     while (group.outstandingJobs > 0) {
         AsmJSParallelTask* ignored = nullptr;
-        if (!GetUsedTask(mc, group, &ignored))
+        if (!GetUsedTask(m, mc, group, &ignored))
             return false;
     }
 
     if (!CheckAllFunctionsDefined(m))
         return false;
 
     MOZ_ASSERT(group.outstandingJobs == 0);
     MOZ_ASSERT(group.compiledJobs == m.numFunctions());
@@ -7176,17 +7227,19 @@ GenerateEntry(ModuleValidator& m, unsign
                 MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE("unexpected stack arg type");
             }
             break;
         }
     }
 
     // Call into the real function.
     masm.assertStackAlignment(AsmJSStackAlignment);
-    masm.call(CallSiteDesc(CallSiteDesc::Relative), m.functionEntry(func.funcIndex()));
+    Label funcLabel;
+    funcLabel.bind(m.functionEntryOffset(func.funcIndex()));
+    masm.call(CallSiteDesc(CallSiteDesc::Relative), &funcLabel);
 
     // Recover the stack pointer value before dynamic alignment.
     masm.loadAsmJSActivation(scratch);
     masm.loadStackPtr(Address(scratch, AsmJSActivation::offsetOfEntrySP()));
     masm.setFramePushed(FramePushedForEntrySP);
 
     // Recover the 'argv' pointer which was saved before aligning the stack.
     masm.Pop(argv);
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -13702,27 +13702,27 @@ class MAsmJSCall final
 {
   public:
     class Callee {
       public:
         enum Which { Internal, Dynamic, Builtin };
       private:
         Which which_;
         union {
-            Label* internal_;
+            AsmJSInternalCallee internal_;
             MDefinition* dynamic_;
             AsmJSImmKind builtin_;
         } u;
       public:
         Callee() {}
-        explicit Callee(Label* callee) : which_(Internal) { u.internal_ = callee; }
+        explicit Callee(AsmJSInternalCallee callee) : which_(Internal) { u.internal_ = callee; }
         explicit Callee(MDefinition* callee) : which_(Dynamic) { u.dynamic_ = callee; }
         explicit Callee(AsmJSImmKind callee) : which_(Builtin) { u.builtin_ = callee; }
         Which which() const { return which_; }
-        Label* internal() const { MOZ_ASSERT(which_ == Internal); return u.internal_; }
+        AsmJSInternalCallee internal() const { MOZ_ASSERT(which_ == Internal); return u.internal_; }
         MDefinition* dynamic() const { MOZ_ASSERT(which_ == Dynamic); return u.dynamic_; }
         AsmJSImmKind builtin() const { MOZ_ASSERT(which_ == Builtin); return u.builtin_; }
     };
 
   private:
     CallSiteDesc desc_;
     Callee callee_;
     FixedList<AnyRegister> argRegs_;
--- a/js/src/jit/MacroAssembler-inl.h
+++ b/js/src/jit/MacroAssembler-inl.h
@@ -74,25 +74,32 @@ MacroAssembler::PushWithPatch(ImmPtr imm
 }
 
 // ===============================================================
 // Simple call functions.
 
 void
 MacroAssembler::call(const CallSiteDesc& desc, const Register reg)
 {
-    call(reg);
-    append(desc, currentOffset(), framePushed());
+    CodeOffsetLabel l = call(reg);
+    append(desc, l, framePushed());
 }
 
 void
 MacroAssembler::call(const CallSiteDesc& desc, Label* label)
 {
-    call(label);
-    append(desc, currentOffset(), framePushed());
+    CodeOffsetLabel l = call(label);
+    append(desc, l, framePushed());
+}
+
+void
+MacroAssembler::call(const CallSiteDesc& desc, AsmJSInternalCallee callee)
+{
+    CodeOffsetLabel l = callWithPatch();
+    append(desc, l, framePushed(), callee.index);
 }
 
 // ===============================================================
 // ABI function calls.
 
 void
 MacroAssembler::passABIArg(Register reg)
 {
--- a/js/src/jit/MacroAssembler.h
+++ b/js/src/jit/MacroAssembler.h
@@ -471,28 +471,32 @@ class MacroAssembler : public MacroAssem
     // Manipulated by the AutoGenericRegisterScope class.
     AllocatableRegisterSet debugTrackedRegisters_;
 #endif // DEBUG
 
   public:
     // ===============================================================
     // Simple call functions.
 
-    void call(Register reg) PER_SHARED_ARCH;
+    CodeOffsetLabel call(Register reg) PER_SHARED_ARCH;
+    CodeOffsetLabel call(Label* label) PER_SHARED_ARCH;
     void call(const Address& addr) DEFINED_ON(x86_shared);
-    void call(Label* label) PER_SHARED_ARCH;
     void call(ImmWord imm) PER_SHARED_ARCH;
     // Call a target native function, which is neither traceable nor movable.
     void call(ImmPtr imm) PER_SHARED_ARCH;
     void call(AsmJSImmPtr imm) PER_SHARED_ARCH;
     // Call a target JitCode, which must be traceable, and may be movable.
     void call(JitCode* c) PER_SHARED_ARCH;
 
     inline void call(const CallSiteDesc& desc, const Register reg);
     inline void call(const CallSiteDesc& desc, Label* label);
+    inline void call(const CallSiteDesc& desc, AsmJSInternalCallee callee);
+
+    CodeOffsetLabel callWithPatch() PER_SHARED_ARCH;
+    void patchCall(uint32_t callerOffset, uint32_t calleeOffset) PER_SHARED_ARCH;
 
     // Push the return address and make a call. On platforms where this function
     // is not defined, push the link register (pushReturnAddress) at the entry
     // point of the callee.
     void callAndPushReturnAddress(Register reg) DEFINED_ON(mips_shared, x86_shared);
     void callAndPushReturnAddress(Label* label) DEFINED_ON(mips_shared, x86_shared);
 
     void pushReturnAddress() DEFINED_ON(arm, arm64);
--- a/js/src/jit/arm/MacroAssembler-arm.cpp
+++ b/js/src/jit/arm/MacroAssembler-arm.cpp
@@ -5093,27 +5093,29 @@ MacroAssembler::reserveStack(uint32_t am
     if (amount)
         ma_sub(Imm32(amount), sp);
     adjustFrame(amount);
 }
 
 // ===============================================================
 // Simple call functions.
 
-void
+CodeOffsetLabel
 MacroAssembler::call(Register reg)
 {
     as_blx(reg);
-}
-
-void
+    return CodeOffsetLabel(currentOffset());
+}
+
+CodeOffsetLabel
 MacroAssembler::call(Label* label)
 {
-    // For now, assume that it'll be nearby?
+    // For now, assume that it'll be nearby.
     as_bl(label, Always);
+    return CodeOffsetLabel(currentOffset());
 }
 
 void
 MacroAssembler::call(ImmWord imm)
 {
     call(ImmPtr((void*)imm.value));
 }
 
@@ -5143,16 +5145,30 @@ MacroAssembler::call(JitCode* c)
     else
         rs = L_LDR;
 
     ScratchRegisterScope scratch(*this);
     ma_movPatchable(ImmPtr(c->raw()), scratch, Always, rs);
     callJitNoProfiler(scratch);
 }
 
+CodeOffsetLabel
+MacroAssembler::callWithPatch()
+{
+    // For now, assume that it'll be nearby.
+    as_bl(BOffImm(), Always, /* documentation */ nullptr);
+    return CodeOffsetLabel(currentOffset());
+}
+void
+MacroAssembler::patchCall(uint32_t callerOffset, uint32_t calleeOffset)
+{
+    BufferOffset inst(callerOffset - 4);
+    as_bl(BufferOffset(calleeOffset).diffB<BOffImm>(inst), Always, inst);
+}
+
 void
 MacroAssembler::pushReturnAddress()
 {
     push(lr);
 }
 
 // ===============================================================
 // ABI function calls.
--- a/js/src/jit/arm64/MacroAssembler-arm64.cpp
+++ b/js/src/jit/arm64/MacroAssembler-arm64.cpp
@@ -497,28 +497,30 @@ MacroAssembler::reserveStack(uint32_t am
     // It would save some instructions if we had a fixed frame size.
     vixl::MacroAssembler::Claim(Operand(amount));
     adjustFrame(amount);
 }
 
 // ===============================================================
 // Simple call functions.
 
-void
+CodeOffsetLabel
 MacroAssembler::call(Register reg)
 {
     syncStackPtr();
     Blr(ARMRegister(reg, 64));
+    return CodeOffsetLabel(currentOffset());
 }
 
-void
+CodeOffsetLabel
 MacroAssembler::call(Label* label)
 {
     syncStackPtr();
     Bl(label);
+    return CodeOffsetLabel(currentOffset());
 }
 
 void
 MacroAssembler::call(ImmWord imm)
 {
     call(ImmPtr((void*)imm.value));
 }
 
@@ -546,16 +548,28 @@ MacroAssembler::call(JitCode* c)
     vixl::UseScratchRegisterScope temps(this);
     const ARMRegister scratch64 = temps.AcquireX();
     syncStackPtr();
     BufferOffset off = immPool64(scratch64, uint64_t(c->raw()));
     addPendingJump(off, ImmPtr(c->raw()), Relocation::JITCODE);
     blr(scratch64);
 }
 
+CodeOffsetLabel
+MacroAssembler::callWithPatch()
+{
+    MOZ_CRASH("NYI");
+    return CodeOffsetLabel();
+}
+void
+MacroAssembler::patchCall(uint32_t callerOffset, uint32_t calleeOffset)
+{
+    MOZ_CRASH("NYI");
+}
+
 void
 MacroAssembler::pushReturnAddress()
 {
     push(lr);
 }
 
 // ===============================================================
 // ABI function calls.
--- a/js/src/jit/mips-shared/MacroAssembler-mips-shared.cpp
+++ b/js/src/jit/mips-shared/MacroAssembler-mips-shared.cpp
@@ -850,27 +850,40 @@ MacroAssembler::Pop(const ValueOperand& 
     popValue(val);
     framePushed_ -= sizeof(Value);
 }
 
 
 // ===============================================================
 // Simple call functions.
 
-void
+CodeOffsetLabel
 MacroAssembler::call(Register reg)
 {
     as_jalr(reg);
     as_nop();
+    return CodeOffsetLabel(currentOffset());
 }
 
-void
+CodeOffsetLabel
 MacroAssembler::call(Label* label)
 {
     ma_bal(label);
+    return CodeOffsetLabel(currentOffset());
+}
+
+CodeOffsetLabel
+MacroAssembler::callWithPatch()
+{
+    MOZ_CRASH("NYI");
+}
+void
+MacroAssembler::patchCall(uint32_t callerOffset, uint32_t calleeOffset)
+{
+    MOZ_CRASH("NYI");
 }
 
 void
 MacroAssembler::call(AsmJSImmPtr target)
 {
     movePtr(target, CallReg);
     call(CallReg);
 }
--- a/js/src/jit/shared/Assembler-shared.h
+++ b/js/src/jit/shared/Assembler-shared.h
@@ -654,17 +654,17 @@ class CallSiteDesc
     };
     CallSiteDesc() {}
     explicit CallSiteDesc(Kind kind)
       : line_(0), column_(0), kind_(kind)
     {}
     CallSiteDesc(uint32_t line, uint32_t column, Kind kind)
       : line_(line), column_(column), kind_(kind)
     {
-        MOZ_ASSERT(column <= INT32_MAX);
+        MOZ_ASSERT(column_ == column, "column must fit in 31 bits");
     }
     uint32_t line() const { return line_; }
     uint32_t column() const { return column_; }
     Kind kind() const { return Kind(kind_); }
 };
 
 // Adds to CallSiteDesc the metadata necessary to walk the stack given an
 // initial stack-pointer.
@@ -689,16 +689,33 @@ class CallSite : public CallSiteDesc
     // function was called. In particular, this includes the pushed return
     // address on all archs (whether or not the call instruction pushes the
     // return address (x86/x64) or the prologue does (ARM/MIPS)).
     uint32_t stackDepth() const { return stackDepth_; }
 };
 
 typedef Vector<CallSite, 0, SystemAllocPolicy> CallSiteVector;
 
+class CallSiteAndTarget : public CallSite
+{
+    uint32_t targetIndex_;
+
+  public:
+    explicit CallSiteAndTarget(CallSite cs, uint32_t targetIndex)
+      : CallSite(cs), targetIndex_(targetIndex)
+    { }
+
+    static const uint32_t NOT_INTERNAL = UINT32_MAX;
+
+    bool isInternal() const { return targetIndex_ != NOT_INTERNAL; }
+    uint32_t targetIndex() const { MOZ_ASSERT(isInternal()); return targetIndex_; }
+};
+
+typedef Vector<CallSiteAndTarget, 0, SystemAllocPolicy> CallSiteAndTargetVector;
+
 // As an invariant across architectures, within asm.js code:
 //   $sp % AsmJSStackAlignment = (sizeof(AsmJSFrame) + masm.framePushed) % AsmJSStackAlignment
 // Thus, AsmJSFrame represents the bytes pushed after the call (which occurred
 // with a AsmJSStackAlignment-aligned StackPointer) that are not included in
 // masm.framePushed.
 struct AsmJSFrame
 {
     // The caller's saved frame pointer. In non-profiling mode, internal
@@ -911,20 +928,34 @@ class AsmJSAbsoluteAddress
 struct AsmJSAbsoluteLink
 {
     AsmJSAbsoluteLink(CodeOffsetLabel patchAt, AsmJSImmKind target)
       : patchAt(patchAt), target(target) {}
     CodeOffsetLabel patchAt;
     AsmJSImmKind target;
 };
 
+// Represents a call from an asm.js function to another asm.js function,
+// represented by the index of the callee in the Module Validator
+struct AsmJSInternalCallee
+{
+    uint32_t index;
+
+    // Provide a default constructor for embedding it in unions
+    AsmJSInternalCallee() = default;
+
+    explicit AsmJSInternalCallee(uint32_t calleeIndex)
+      : index(calleeIndex)
+    {}
+};
+
 // The base class of all Assemblers for all archs.
 class AssemblerShared
 {
-    Vector<CallSite, 0, SystemAllocPolicy> callsites_;
+    CallSiteAndTargetVector callsites_;
     Vector<AsmJSHeapAccess, 0, SystemAllocPolicy> asmJSHeapAccesses_;
     Vector<AsmJSGlobalAccess, 0, SystemAllocPolicy> asmJSGlobalAccesses_;
     Vector<AsmJSAbsoluteLink, 0, SystemAllocPolicy> asmJSAbsoluteLinks_;
 
   protected:
     Vector<CodeLabel, 0, SystemAllocPolicy> codeLabels_;
 
     bool enoughMemory_;
@@ -947,23 +978,25 @@ class AssemblerShared
     bool oom() const {
         return !enoughMemory_;
     }
 
     bool embedsNurseryPointers() const {
         return embedsNurseryPointers_;
     }
 
-    void append(const CallSiteDesc& desc, size_t currentOffset, size_t framePushed) {
+    void append(const CallSiteDesc& desc, CodeOffsetLabel label, size_t framePushed,
+                uint32_t targetIndex = CallSiteAndTarget::NOT_INTERNAL)
+    {
         // framePushed does not include sizeof(AsmJSFrame), so add it in here (see
         // CallSite::stackDepth).
-        CallSite callsite(desc, currentOffset, framePushed + sizeof(AsmJSFrame));
-        enoughMemory_ &= callsites_.append(callsite);
+        CallSite callsite(desc, label.offset(), framePushed + sizeof(AsmJSFrame));
+        enoughMemory_ &= callsites_.append(CallSiteAndTarget(callsite, targetIndex));
     }
-    CallSiteVector&& extractCallSites() { return Move(callsites_); }
+    const CallSiteAndTargetVector& callSites() const { return callsites_; }
 
     void append(AsmJSHeapAccess access) { enoughMemory_ &= asmJSHeapAccesses_.append(access); }
     AsmJSHeapAccessVector&& extractAsmJSHeapAccesses() { return Move(asmJSHeapAccesses_); }
 
     void append(AsmJSGlobalAccess access) { enoughMemory_ &= asmJSGlobalAccesses_.append(access); }
     size_t numAsmJSGlobalAccesses() const { return asmJSGlobalAccesses_.length(); }
     AsmJSGlobalAccess asmJSGlobalAccess(size_t i) const { return asmJSGlobalAccesses_[i]; }
 
--- a/js/src/jit/x86-shared/Assembler-x86-shared.h
+++ b/js/src/jit/x86-shared/Assembler-x86-shared.h
@@ -985,41 +985,51 @@ class AssemblerX86Shared : public Assemb
 
     void ret() {
         masm.ret();
     }
     void retn(Imm32 n) {
         // Remove the size of the return address which is included in the frame.
         masm.ret_i(n.value - sizeof(void*));
     }
-    void call(Label* label) {
+    CodeOffsetLabel call(Label* label) {
         if (label->bound()) {
             masm.linkJump(masm.call(), JmpDst(label->offset()));
         } else {
             JmpSrc j = masm.call();
             JmpSrc prev = JmpSrc(label->use(j.offset()));
             masm.setNextJump(j, prev);
         }
-    }
-    void call(Register reg) {
+        return CodeOffsetLabel(masm.currentOffset());
+    }
+    CodeOffsetLabel call(Register reg) {
         masm.call_r(reg.encoding());
+        return CodeOffsetLabel(masm.currentOffset());
     }
     void call(const Operand& op) {
         switch (op.kind()) {
           case Operand::REG:
             masm.call_r(op.reg());
             break;
           case Operand::MEM_REG_DISP:
             masm.call_m(op.disp(), op.base());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
 
+    CodeOffsetLabel callWithPatch() {
+        return CodeOffsetLabel(masm.call().offset());
+    }
+    void patchCall(uint32_t callerOffset, uint32_t calleeOffset) {
+        unsigned char* code = masm.data();
+        X86Encoding::SetRel32(code + callerOffset, code + calleeOffset);
+    }
+
     void breakpoint() {
         masm.int3();
     }
 
     static bool HasSSE2() { return CPUInfo::IsSSE2Present(); }
     static bool HasSSE3() { return CPUInfo::IsSSE3Present(); }
     static bool HasSSE41() { return CPUInfo::IsSSE41Present(); }
     static bool SupportsFloatingPoint() { return CPUInfo::IsSSE2Present(); }
--- a/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp
+++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp
@@ -406,26 +406,26 @@ MacroAssembler::Pop(const ValueOperand& 
 {
     popValue(val);
     framePushed_ -= sizeof(Value);
 }
 
 // ===============================================================
 // Simple call functions.
 
-void
+CodeOffsetLabel
 MacroAssembler::call(Register reg)
 {
-    Assembler::call(reg);
+    return Assembler::call(reg);
 }
 
-void
+CodeOffsetLabel
 MacroAssembler::call(Label* label)
 {
-    Assembler::call(label);
+    return Assembler::call(label);
 }
 
 void
 MacroAssembler::call(const Address& addr)
 {
     Assembler::call(Operand(addr.base, addr.offset));
 }
 
@@ -450,16 +450,27 @@ MacroAssembler::call(ImmPtr target)
 }
 
 void
 MacroAssembler::call(JitCode* target)
 {
     Assembler::call(target);
 }
 
+CodeOffsetLabel
+MacroAssembler::callWithPatch()
+{
+    return Assembler::callWithPatch();
+}
+void
+MacroAssembler::patchCall(uint32_t callerOffset, uint32_t calleeOffset)
+{
+    Assembler::patchCall(callerOffset, calleeOffset);
+}
+
 void
 MacroAssembler::callAndPushReturnAddress(Register reg)
 {
     call(reg);
 }
 
 void
 MacroAssembler::callAndPushReturnAddress(Label* label)