Bug 969722 - Part 1: Remove ThreadPoolWorker subclasses for ease of inlining. (r=nmatsakis)
authorShu-yu Guo <shu@rfrn.org>
Fri, 14 Feb 2014 13:59:13 -0800
changeset 171238 023aed557989b2cf12825c82b701814ef1bf4b09
parent 171237 a40bcf02bb6025f64ee57cf702c7926edc2f3782
child 171239 c1218ef1628ebb0f7174e0f9b7086e37f701c61f
push id270
push userpvanderbeken@mozilla.com
push dateThu, 06 Mar 2014 09:24:21 +0000
reviewersnmatsakis
bugs969722
milestone30.0a1
Bug 969722 - Part 1: Remove ThreadPoolWorker subclasses for ease of inlining. (r=nmatsakis)
js/src/jit/shared/CodeGenerator-x86-shared.cpp
js/src/vm/ForkJoin.cpp
js/src/vm/ForkJoin.h
js/src/vm/SelfHosting.cpp
js/src/vm/ThreadPool.cpp
js/src/vm/ThreadPool.h
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -1763,11 +1763,10 @@ CodeGeneratorX86Shared::visitNegF(LNegF 
 {
     FloatRegister input = ToFloatRegister(ins->input());
     JS_ASSERT(input == ToFloatRegister(ins->output()));
 
     masm.negateFloat(input);
     return true;
 }
 
-
 } // namespace jit
 } // namespace js
--- a/js/src/vm/ForkJoin.cpp
+++ b/js/src/vm/ForkJoin.cpp
@@ -344,60 +344,58 @@ class ForkJoinShared : public ParallelJo
 
     bool gcRequested_;              // True if a worker requested a GC
     JS::gcreason::Reason gcReason_; // Reason given to request GC
     Zone *gcZone_;                  // Zone for GC, or nullptr for full
 
     /////////////////////////////////////////////////////////////////////////
     // Asynchronous Flags
     //
-    // These can be read without the lock (hence the |volatile| declaration).
-    // All fields should be *written with the lock*, however.
+    // These can be accessed without the lock and are thus atomic.
 
     // Set to true when parallel execution should abort.
-    volatile bool abort_;
+    mozilla::Atomic<bool, mozilla::ReleaseAcquire> abort_;
 
     // Set to true when a worker bails for a fatal reason.
-    volatile bool fatal_;
+    mozilla::Atomic<bool, mozilla::ReleaseAcquire> fatal_;
 
   public:
     ForkJoinShared(JSContext *cx,
                    ThreadPool *threadPool,
                    HandleFunction fun,
                    uint16_t sliceFrom,
                    uint16_t sliceTo,
                    ParallelBailoutRecord *records);
     ~ForkJoinShared();
 
     bool init();
 
     ParallelResult execute();
 
     // Invoked from parallel worker threads:
-    virtual bool executeFromWorker(uint32_t workerId, uintptr_t stackLimit) MOZ_OVERRIDE;
+    virtual bool executeFromWorker(ThreadPoolWorker *worker, uintptr_t stackLimit) MOZ_OVERRIDE;
 
     // Invoked only from the main thread:
-    virtual bool executeFromMainThread() MOZ_OVERRIDE;
+    virtual bool executeFromMainThread(ThreadPoolWorker *worker) MOZ_OVERRIDE;
 
     // Executes the user-supplied function a worker or the main thread.
-    void executePortion(PerThreadData *perThread, uint32_t workerId);
+    void executePortion(PerThreadData *perThread, ThreadPoolWorker *worker);
 
     // Moves all the per-thread arenas into the main compartment and processes
     // any pending requests for a GC. This can only safely be invoked on the
     // main thread after the workers have completed.
     void transferArenasToCompartmentAndProcessGCRequests();
 
-    // Invoked during processing by worker threads to "check in".
-    bool check(ForkJoinContext &cx);
 
     // Requests a GC, either full or specific to a zone.
     void requestGC(JS::gcreason::Reason reason);
     void requestZoneGC(JS::Zone *zone, JS::gcreason::Reason reason);
 
     // Requests that computation abort.
+    void setAbortFlagDueToInterrupt(ForkJoinContext &cx);
     void setAbortFlagAndTriggerOperationCallback(bool fatal);
 
     // Set the fatal flag for the next abort.
     void setPendingAbortFatal() { fatal_ = true; }
 
     JSRuntime *runtime() { return cx_->runtime(); }
     JS::Zone *zone() { return cx_->zone(); }
     JSCompartment *compartment() { return cx_->compartment(); }
@@ -593,22 +591,22 @@ ForkJoinOperation::apply()
 
     JS_ASSERT_IF(!jit::IsBaselineEnabled(cx_), !jit::IsIonEnabled(cx_));
     if (!jit::IsBaselineEnabled(cx_) || !jit::IsIonEnabled(cx_))
         return sequentialExecution(true);
 
     SpewBeginOp(cx_, "ForkJoinOperation");
 
     // How many workers do we have, counting the main thread.
-    unsigned numWorkersWithMain = cx_->runtime()->threadPool.numWorkers() + 1;
+    unsigned numWorkers = cx_->runtime()->threadPool.numWorkers();
 
-    if (!bailoutRecords_.resize(numWorkersWithMain))
+    if (!bailoutRecords_.resize(numWorkers))
         return SpewEndOp(ExecutionFatal);
 
-    for (uint32_t i = 0; i < numWorkersWithMain; i++)
+    for (uint32_t i = 0; i < numWorkers; i++)
         bailoutRecords_[i].init(cx_);
 
     if (enqueueInitialScript(&status) == RedLight)
         return SpewEndOp(status);
 
     Spew(SpewOps, "Execution mode: %s", ForkJoinModeString(mode_));
     switch (mode_) {
       case ForkJoinModeNormal:
@@ -628,17 +626,17 @@ ForkJoinOperation::apply()
         }
         break;
 
       case NumForkJoinModes:
         MOZ_ASSUME_UNREACHABLE("Invalid mode");
     }
 
     while (bailouts < MAX_BAILOUTS) {
-        for (uint32_t i = 0; i < numWorkersWithMain; i++)
+        for (uint32_t i = 0; i < numWorkers; i++)
             bailoutRecords_[i].reset(cx_);
 
         if (compileForParallelExecution(&status) == RedLight)
             return SpewEndOp(status);
 
         JS_ASSERT(worklist_.length() == 0);
         if (parallelExecution(&status) == RedLight)
             return SpewEndOp(status);
@@ -1366,17 +1364,17 @@ ForkJoinShared::init()
 
     if (!Monitor::init())
         return false;
 
     cxLock_ = PR_NewLock();
     if (!cxLock_)
         return false;
 
-    for (unsigned i = 0; i < (threadPool_->numWorkers() + 1); i++) {
+    for (unsigned i = 0; i < threadPool_->numWorkers(); i++) {
         Allocator *allocator = cx_->new_<Allocator>(cx_->zone());
         if (!allocator)
             return false;
 
         if (!allocators_.append(allocator)) {
             js_delete(allocator);
             return false;
         }
@@ -1434,72 +1432,72 @@ ForkJoinShared::execute()
     // Everything went swimmingly. Give yourself a pat on the back.
     return jobResult;
 }
 
 void
 ForkJoinShared::transferArenasToCompartmentAndProcessGCRequests()
 {
     JSCompartment *comp = cx_->compartment();
-    for (unsigned i = 0; i < (threadPool_->numWorkers() + 1); i++)
+    for (unsigned i = 0; i < threadPool_->numWorkers(); i++)
         comp->adoptWorkerAllocator(allocators_[i]);
 
     if (gcRequested_) {
         if (!gcZone_)
             TriggerGC(cx_->runtime(), gcReason_);
         else
             TriggerZoneGC(gcZone_, gcReason_);
         gcRequested_ = false;
         gcZone_ = nullptr;
     }
 }
 
 bool
-ForkJoinShared::executeFromWorker(uint32_t workerId, uintptr_t stackLimit)
+ForkJoinShared::executeFromWorker(ThreadPoolWorker *worker, uintptr_t stackLimit)
 {
     PerThreadData thisThread(cx_->runtime());
     if (!thisThread.init()) {
         setAbortFlagAndTriggerOperationCallback(true);
         return false;
     }
     TlsPerThreadData.set(&thisThread);
 
 #ifdef JS_ARM_SIMULATOR
     stackLimit = Simulator::StackLimit();
 #endif
 
     // Don't use setIonStackLimit() because that acquires the ionStackLimitLock, and the
     // lock has not been initialized in these cases.
     thisThread.jitStackLimit = stackLimit;
-    executePortion(&thisThread, workerId);
+    executePortion(&thisThread, worker);
     TlsPerThreadData.set(nullptr);
 
     return !abort_;
 }
 
 bool
-ForkJoinShared::executeFromMainThread()
+ForkJoinShared::executeFromMainThread(ThreadPoolWorker *worker)
 {
-    executePortion(&cx_->mainThread(), threadPool_->numWorkers());
+    executePortion(&cx_->mainThread(), worker);
     return !abort_;
 }
 
 void
-ForkJoinShared::executePortion(PerThreadData *perThread, uint32_t workerId)
+ForkJoinShared::executePortion(PerThreadData *perThread, ThreadPoolWorker *worker)
 {
     // WARNING: This code runs ON THE PARALLEL WORKER THREAD.
     // Be careful when accessing cx_.
 
     // ForkJoinContext already contains an AutoAssertNoGC; however, the analysis
     // does not propagate this type information. We duplicate the assertion
     // here for maximum clarity.
     JS::AutoAssertNoGC nogc(runtime());
 
-    Allocator *allocator = allocators_[workerId];
-    ForkJoinContext cx(perThread, workerId, allocator, this, &records_[workerId]);
+    Allocator *allocator = allocators_[worker->id()];
+    ForkJoinContext cx(perThread, worker, allocator, this, &records_[worker->id()]);
     AutoSetForkJoinContext autoContext(&cx);
 
 #ifdef DEBUG
     // Set the maximum worker and slice number for prettier spewing.
     cx.maxWorkerId = threadPool_->numWorkers();
 #endif
 
     Spew(SpewOps, "Up");
@@ -1518,55 +1516,41 @@ ForkJoinShared::executePortion(PerThread
         // op and reaching this point.  In that case, we just fail
         // and fallback.
         Spew(SpewOps, "Down (Script no longer present)");
         cx.bailoutRecord->setCause(ParallelBailoutMainScriptNotPresent);
         setAbortFlagAndTriggerOperationCallback(false);
     } else {
         ParallelIonInvoke<2> fii(cx_->runtime(), fun_, 2);
 
-        fii.args[0] = Int32Value(workerId);
+        fii.args[0] = Int32Value(worker->id());
         fii.args[1] = BooleanValue(false);
 
         bool ok = fii.invoke(perThread);
         JS_ASSERT(ok == !cx.bailoutRecord->topScript);
         if (!ok)
             setAbortFlagAndTriggerOperationCallback(false);
     }
 
     Spew(SpewOps, "Down");
 }
 
-bool
-ForkJoinShared::check(ForkJoinContext &cx)
+void
+ForkJoinShared::setAbortFlagDueToInterrupt(ForkJoinContext &cx)
 {
     JS_ASSERT(cx_->runtime()->interruptPar);
-
-    if (abort_)
-        return false;
-
-    // Note: We must check if the main thread has exited successfully here, as
-    // without a main thread the worker threads which are tripping on the
-    // interrupt flag would never exit.
-    if (cx.isMainThread() || !threadPool_->isMainThreadActive()) {
-        JS_ASSERT(!cx_->runtime()->gcIsNeeded);
+    // The GC Needed flag should not be set during parallel
+    // execution.  Instead, one of the requestGC() or
+    // requestZoneGC() methods should be invoked.
+    JS_ASSERT(!cx_->runtime()->gcIsNeeded);
 
-        if (cx_->runtime()->interruptPar) {
-            // The GC Needed flag should not be set during parallel
-            // execution.  Instead, one of the requestGC() or
-            // requestZoneGC() methods should be invoked.
-            JS_ASSERT(!cx_->runtime()->gcIsNeeded);
-
-            cx.bailoutRecord->setCause(ParallelBailoutInterrupt);
-            setAbortFlagAndTriggerOperationCallback(false);
-            return false;
-        }
+    if (!abort_) {
+        cx.bailoutRecord->setCause(ParallelBailoutInterrupt);
+        setAbortFlagAndTriggerOperationCallback(false);
     }
-
-    return true;
 }
 
 void
 ForkJoinShared::setAbortFlagAndTriggerOperationCallback(bool fatal)
 {
     AutoLockMonitor lock(*this);
 
     abort_ = true;
@@ -1605,25 +1589,25 @@ ForkJoinShared::requestZoneGC(JS::Zone *
         gcRequested_ = true;
     }
 }
 
 /////////////////////////////////////////////////////////////////////////////
 // ForkJoinContext
 //
 
-ForkJoinContext::ForkJoinContext(PerThreadData *perThreadData, uint32_t workerId,
+ForkJoinContext::ForkJoinContext(PerThreadData *perThreadData, ThreadPoolWorker *worker,
                                  Allocator *allocator, ForkJoinShared *shared,
                                  ParallelBailoutRecord *bailoutRecord)
   : ThreadSafeContext(shared->runtime(), perThreadData, Context_ForkJoin),
-    workerId(workerId),
     bailoutRecord(bailoutRecord),
     targetRegionStart(nullptr),
     targetRegionEnd(nullptr),
-    shared(shared),
+    shared_(shared),
+    worker_(worker),
     acquiredJSContext_(false),
     nogc_(shared->runtime())
 {
     /*
      * Unsafely set the zone. This is used to track malloc counters and to
      * trigger GCs and is otherwise not thread-safe to access.
      */
     zone_ = shared->zone();
@@ -1635,77 +1619,78 @@ ForkJoinContext::ForkJoinContext(PerThre
     compartment_ = shared->compartment();
 
     allocator_ = allocator;
 }
 
 bool
 ForkJoinContext::isMainThread() const
 {
-    return perThreadData == &shared->runtime()->mainThread;
+    return perThreadData == &shared_->runtime()->mainThread;
 }
 
 JSRuntime *
 ForkJoinContext::runtime()
 {
-    return shared->runtime();
+    return shared_->runtime();
 }
 
 JSContext *
 ForkJoinContext::acquireJSContext()
 {
-    JSContext *cx = shared->acquireJSContext();
+    JSContext *cx = shared_->acquireJSContext();
     JS_ASSERT(!acquiredJSContext_);
     acquiredJSContext_ = true;
     return cx;
 }
 
 void
 ForkJoinContext::releaseJSContext()
 {
     JS_ASSERT(acquiredJSContext_);
     acquiredJSContext_ = false;
-    return shared->releaseJSContext();
+    return shared_->releaseJSContext();
 }
 
 bool
 ForkJoinContext::hasAcquiredJSContext() const
 {
     return acquiredJSContext_;
 }
 
 bool
 ForkJoinContext::check()
 {
-    if (runtime()->interruptPar)
-        return shared->check(*this);
-    else
-        return true;
+    if (runtime()->interruptPar) {
+        shared_->setAbortFlagDueToInterrupt(*this);
+        return false;
+    }
+    return true;
 }
 
 void
 ForkJoinContext::requestGC(JS::gcreason::Reason reason)
 {
-    shared->requestGC(reason);
+    shared_->requestGC(reason);
     bailoutRecord->setCause(ParallelBailoutRequestedGC);
-    shared->setAbortFlagAndTriggerOperationCallback(false);
+    shared_->setAbortFlagAndTriggerOperationCallback(false);
 }
 
 void
 ForkJoinContext::requestZoneGC(JS::Zone *zone, JS::gcreason::Reason reason)
 {
-    shared->requestZoneGC(zone, reason);
+    shared_->requestZoneGC(zone, reason);
     bailoutRecord->setCause(ParallelBailoutRequestedZoneGC);
-    shared->setAbortFlagAndTriggerOperationCallback(false);
+    shared_->setAbortFlagAndTriggerOperationCallback(false);
 }
 
 bool
 ForkJoinContext::setPendingAbortFatal(ParallelBailoutCause cause)
 {
-    shared->setPendingAbortFatal();
+    shared_->setPendingAbortFatal();
     bailoutRecord->setCause(cause);
     return false;
 }
 
 //////////////////////////////////////////////////////////////////////////////
 // ParallelBailoutRecord
 
 void
@@ -1896,18 +1881,18 @@ class ParallelSpewer
         char buf[BufferSize];
 
         if (ForkJoinContext *cx = ForkJoinContext::current()) {
             // Print the format first into a buffer to right-justify the
             // worker ids.
             char bufbuf[BufferSize];
             JS_snprintf(bufbuf, BufferSize, "[%%sParallel:%%0%du%%s] ",
                         NumberOfDigits(cx->maxWorkerId));
-            JS_snprintf(buf, BufferSize, bufbuf, workerColor(cx->workerId),
-                        cx->workerId, reset());
+            JS_snprintf(buf, BufferSize, bufbuf, workerColor(cx->workerId()),
+                        cx->workerId(), reset());
         } else {
             JS_snprintf(buf, BufferSize, "[Parallel:M] ");
         }
 
         for (uint32_t i = 0; i < depth; i++)
             JS_snprintf(buf + strlen(buf), BufferSize, "  ");
 
         JS_vsnprintf(buf + strlen(buf), BufferSize, fmt, ap);
--- a/js/src/vm/ForkJoin.h
+++ b/js/src/vm/ForkJoin.h
@@ -307,19 +307,16 @@ struct ParallelBailoutRecord {
                   jsbytecode *pc);
 };
 
 struct ForkJoinShared;
 
 class ForkJoinContext : public ThreadSafeContext
 {
   public:
-    // The worker that is doing the work.
-    const uint32_t workerId;
-
     // Bailout record used to record the reason this thread stopped executing
     ParallelBailoutRecord *const bailoutRecord;
 
 #ifdef DEBUG
     // Records the last instr. to execute on this thread.
     IonLIRTraceData traceData;
 
     // The maximum worker id.
@@ -338,27 +335,26 @@ class ForkJoinContext : public ThreadSaf
     // anyhow. But due to sequential fallback it is possible for handles into
     // other regions to escape into global variables in the sequential
     // execution and then get accessed by later parallel sections. Thus we
     // must be careful and ensure that the write is going through a handle
     // into the correct *region* of the buffer.
     uint8_t *targetRegionStart;
     uint8_t *targetRegionEnd;
 
-    ForkJoinContext(PerThreadData *perThreadData, uint32_t workerId,
+    ForkJoinContext(PerThreadData *perThreadData, ThreadPoolWorker *worker,
                     Allocator *allocator, ForkJoinShared *shared,
                     ParallelBailoutRecord *bailoutRecord);
 
+    // Get the worker id. The main thread by convention has the id of the max
+    // worker thread id + 1.
+    uint32_t workerId() const { return worker_->id(); }
+
     // Get a slice of work for the worker associated with the context.
-    bool getSlice(uint16_t *sliceId) {
-        ThreadPool &pool = runtime()->threadPool;
-        return (isMainThread()
-                ? pool.getSliceForMainThread(sliceId)
-                : pool.getSliceForWorker(workerId, sliceId));
-    }
+    bool getSlice(uint16_t *sliceId) { return worker_->getSlice(this, sliceId); }
 
     // True if this is the main thread, false if it is one of the parallel workers.
     bool isMainThread() const;
 
     // When the code would normally trigger a GC, we don't trigger it
     // immediately but instead record that request here.  This will
     // cause |ExecuteForkJoinOp()| to invoke |TriggerGC()| or
     // |TriggerCompartmentGC()| as appropriate once the parallel
@@ -383,17 +379,17 @@ class ForkJoinContext : public ThreadSaf
 
     // During the parallel phase, this method should be invoked
     // periodically, for example on every backedge, similar to the
     // interrupt check.  If it returns false, then the parallel phase
     // has been aborted and so you should bailout.  The function may
     // also rendesvous to perform GC or do other similar things.
     //
     // This function is guaranteed to have no effect if both
-    // runtime()->interrupt is zero.  Ion-generated code takes
+    // runtime()->interruptPar is zero.  Ion-generated code takes
     // advantage of this by inlining the checks on those flags before
     // actually calling this function.  If this function ends up
     // getting called a lot from outside ion code, we can refactor
     // it into an inlined version with this check that calls a slower
     // version.
     bool check();
 
     // Be wary, the runtime is shared between all threads!
@@ -411,17 +407,19 @@ class ForkJoinContext : public ThreadSaf
     static bool initialize();
 
   private:
     friend class AutoSetForkJoinContext;
 
     // Initialized by initialize()
     static mozilla::ThreadLocal<ForkJoinContext*> tlsForkJoinContext;
 
-    ForkJoinShared *const shared;
+    ForkJoinShared *const shared_;
+
+    ThreadPoolWorker *worker_;
 
     bool acquiredJSContext_;
 
     // ForkJoinContext is allocated on the stack. It would be dangerous to GC
     // with it live because of the GC pointer fields stored in the context.
     JS::AutoAssertNoGC nogc_;
 };
 
--- a/js/src/vm/SelfHosting.cpp
+++ b/js/src/vm/SelfHosting.cpp
@@ -294,17 +294,17 @@ intrinsic_ForkJoin(JSContext *cx, unsign
 /*
  * ForkJoinWorkerNumWorkers(): Returns the number of workers in the fork join
  * thread pool, including the main thread.
  */
 static bool
 intrinsic_ForkJoinNumWorkers(JSContext *cx, unsigned argc, Value *vp)
 {
     CallArgs args = CallArgsFromVp(argc, vp);
-    args.rval().setInt32(cx->runtime()->threadPool.numWorkers() + 1);
+    args.rval().setInt32(cx->runtime()->threadPool.numWorkers());
     return true;
 }
 
 /*
  * ForkJoinGetSlice(id): Returns the id of the next slice to be worked
  * on.
  *
  * Acts as the identity function when called from outside of a ForkJoin
--- a/js/src/vm/ThreadPool.cpp
+++ b/js/src/vm/ThreadPool.cpp
@@ -13,185 +13,88 @@
 #include "vm/ForkJoin.h"
 #include "vm/Monitor.h"
 #include "vm/Runtime.h"
 
 using namespace js;
 
 const size_t WORKER_THREAD_STACK_SIZE = 1*1024*1024;
 
-/////////////////////////////////////////////////////////////////////////////
-// ThreadPoolBaseWorker
-//
-// Base class for worker threads in the pool.
-
-class js::ThreadPoolBaseWorker
+static inline uint32_t
+ComposeSliceBounds(uint16_t from, uint16_t to)
 {
-  protected:
-    const uint32_t workerId_;
-    ThreadPool *pool_;
-
-  private:
-    // Slices this thread is responsible for.
-    //
-    // This a uint32 composed of two uint16s (the lower and upper bounds) so
-    // that we may do a single CAS. See {Compose,Decompose}SliceBounds
-    // functions below.
-    mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> sliceBounds_;
-
-  protected:
-    static uint32_t ComposeSliceBounds(uint16_t from, uint16_t to) {
-        MOZ_ASSERT(from <= to);
-        return (uint32_t(from) << 16) | to;
-    }
-
-    static void DecomposeSliceBounds(uint32_t bounds, uint16_t *from, uint16_t *to) {
-        *from = bounds >> 16;
-        *to = bounds & uint16_t(~0);
-        MOZ_ASSERT(*from <= *to);
-    }
-
-    bool hasWork() const {
-        uint16_t from, to;
-        DecomposeSliceBounds(sliceBounds_, &from, &to);
-        return from != to;
-    }
-
-    bool popSliceFront(uint16_t *sliceId);
-    bool popSliceBack(uint16_t *sliceId);
-    bool stealFrom(ThreadPoolBaseWorker *victim, uint16_t *sliceId);
-
-  public:
-    ThreadPoolBaseWorker(uint32_t workerId, ThreadPool *pool)
-      : workerId_(workerId),
-        pool_(pool),
-        sliceBounds_(0)
-    { }
-
-    void submitSlices(uint16_t sliceFrom, uint16_t sliceTo) {
-        MOZ_ASSERT(!hasWork());
-        sliceBounds_ = ComposeSliceBounds(sliceFrom, sliceTo);
-    }
-
-    void abort();
-};
+    MOZ_ASSERT(from <= to);
+    return (uint32_t(from) << 16) | to;
+}
 
-/////////////////////////////////////////////////////////////////////////////
-// ThreadPoolWorker
-//
-// Each |ThreadPoolWorker| just hangs around waiting for slices to be added to
-// its worklist. Whenever something is added, it gets executed. Once the
-// worker's state is set to |TERMINATED|, the worker will exit as soon as its
-// queue is empty.
-
-class js::ThreadPoolWorker : public ThreadPoolBaseWorker
+static inline void
+DecomposeSliceBounds(uint32_t bounds, uint16_t *from, uint16_t *to)
 {
-    friend class ThreadPoolMainWorker;
-
-    // Current point in the worker's lifecycle.
-    //
-    // Modified only while holding the ThreadPoolWorker's lock.
-    volatile enum WorkerState {
-        CREATED, ACTIVE, TERMINATED
-    } state_;
-
-    // The thread's main function
-    static void ThreadMain(void *arg);
-    void run();
-
-  public:
-    ThreadPoolWorker(uint32_t workerId, ThreadPool *pool)
-      : ThreadPoolBaseWorker(workerId, pool),
-        state_(CREATED)
-    { }
-
-    // Get a slice of work, from ourself or steal work from other workers
-    // (or from the main thread).
-    bool getSlice(uint16_t *sliceId);
-
-    // Invoked from main thread; signals worker to start.
-    bool start();
-
-    // Invoked from main thread; signals the worker loop to return.
-    void terminate(AutoLockMonitor &lock);
-};
-
-// ThreadPoolMainWorker
-//
-// This class abstracts the main thread as a worker thread with a private
-// queue to allow for work stealing.
-
-class js::ThreadPoolMainWorker : public ThreadPoolBaseWorker
-{
-    friend class ThreadPoolWorker;
-
-  public:
-    bool isActive;
-
-    ThreadPoolMainWorker(ThreadPool *pool)
-      : ThreadPoolBaseWorker(0, pool),
-        isActive(false)
-    { }
-
-    // Get a slice of work, from ourself or steal work from other workers.
-    bool getSlice(uint16_t *sliceId);
-
-    // Execute a job on the main thread.
-    void executeJob();
-};
+    *from = bounds >> 16;
+    *to = bounds & uint16_t(~0);
+    MOZ_ASSERT(*from <= *to);
+}
 
 bool
-ThreadPoolBaseWorker::popSliceFront(uint16_t *sliceId)
+ThreadPoolWorker::hasWork() const
+{
+    uint16_t from, to;
+    DecomposeSliceBounds(sliceBounds_, &from, &to);
+    return from != to;
+}
+
+bool
+ThreadPoolWorker::popSliceFront(uint16_t *sliceId)
 {
     uint32_t bounds;
     uint16_t from, to;
     do {
         bounds = sliceBounds_;
         DecomposeSliceBounds(bounds, &from, &to);
         if (from == to)
             return false;
     } while (!sliceBounds_.compareExchange(bounds, ComposeSliceBounds(from + 1, to)));
 
     *sliceId = from;
     pool_->pendingSlices_--;
     return true;
 }
 
 bool
-ThreadPoolBaseWorker::popSliceBack(uint16_t *sliceId)
+ThreadPoolWorker::popSliceBack(uint16_t *sliceId)
 {
     uint32_t bounds;
     uint16_t from, to;
     do {
         bounds = sliceBounds_;
         DecomposeSliceBounds(bounds, &from, &to);
         if (from == to)
             return false;
     } while (!sliceBounds_.compareExchange(bounds, ComposeSliceBounds(from, to - 1)));
 
     *sliceId = to - 1;
     pool_->pendingSlices_--;
     return true;
 }
 
 void
-ThreadPoolBaseWorker::abort()
+ThreadPoolWorker::discardSlices()
 {
     uint32_t bounds;
     uint16_t from, to;
     do {
         bounds = sliceBounds_;
         DecomposeSliceBounds(bounds, &from, &to);
     } while (!sliceBounds_.compareExchange(bounds, 0));
 
     pool_->pendingSlices_ -= to - from;
 }
 
 bool
-ThreadPoolBaseWorker::stealFrom(ThreadPoolBaseWorker *victim, uint16_t *sliceId)
+ThreadPoolWorker::stealFrom(ThreadPoolWorker *victim, uint16_t *sliceId)
 {
     // Instead of popping the slice from the front by incrementing sliceFrom_,
     // decrement sliceTo_. Usually this gives us better locality.
     if (!victim->popSliceBack(sliceId))
         return false;
 #ifdef DEBUG
     pool_->stolenSlices_++;
 #endif
@@ -199,163 +102,140 @@ ThreadPoolBaseWorker::stealFrom(ThreadPo
 }
 
 bool
 ThreadPoolWorker::start()
 {
 #ifndef JS_THREADSAFE
     return false;
 #else
+    if (isMainThread())
+        return true;
+
     MOZ_ASSERT(state_ == CREATED);
 
     // Set state to active now, *before* the thread starts:
     state_ = ACTIVE;
 
     if (!PR_CreateThread(PR_USER_THREAD,
-                         ThreadMain, this,
+                         HelperThreadMain, this,
                          PR_PRIORITY_NORMAL, PR_LOCAL_THREAD,
                          PR_UNJOINABLE_THREAD,
                          WORKER_THREAD_STACK_SIZE))
     {
         // If the thread failed to start, call it TERMINATED.
         state_ = TERMINATED;
         return false;
     }
 
     return true;
 #endif
 }
 
 void
-ThreadPoolWorker::ThreadMain(void *arg)
+ThreadPoolWorker::HelperThreadMain(void *arg)
 {
     ThreadPoolWorker *worker = (ThreadPoolWorker*) arg;
-    worker->run();
-}
-
-bool
-ThreadPoolWorker::getSlice(uint16_t *sliceId)
-{
-    // First see whether we have any work ourself.
-    if (popSliceFront(sliceId))
-        return true;
-
-    // Try to steal work.
-    if (!pool_->workStealing())
-        return false;
-
-    ThreadPoolBaseWorker *victim;
-    do {
-        if (!pool_->hasWork())
-            return false;
-
-        // Add one to add the main thread into the mix.
-        uint32_t victimId = rand() % (pool_->numWorkers() + 1);
-
-        // By convention consider worker id 0 the main thread.
-        if (victimId == 0)
-            victim = pool_->mainWorker_;
-        else
-            victim = pool_->workers_[victimId - 1];
-    } while (!stealFrom(victim, sliceId));
-
-    return true;
+    worker->helperLoop();
 }
 
 void
-ThreadPoolWorker::run()
+ThreadPoolWorker::helperLoop()
 {
+    MOZ_ASSERT(!isMainThread());
+
     // This is hokey in the extreme.  To compute the stack limit,
     // subtract the size of the stack from the address of a local
     // variable and give a 100k buffer.  Is there a better way?
     // (Note: 2k proved to be fine on Mac, but too little on Linux)
     uintptr_t stackLimitOffset = WORKER_THREAD_STACK_SIZE - 100*1024;
     uintptr_t stackLimit = (((uintptr_t)&stackLimitOffset) +
                              stackLimitOffset * JS_STACK_GROWTH_DIRECTION);
 
+
     for (;;) {
         // Wait for work to arrive or for us to terminate.
         {
             AutoLockMonitor lock(*pool_);
             while (state_ == ACTIVE && !pool_->hasWork())
                 lock.wait();
 
             if (state_ == TERMINATED) {
                 pool_->join(lock);
                 return;
             }
 
             pool_->activeWorkers_++;
         }
 
-        if (!pool_->job()->executeFromWorker(workerId_, stackLimit))
+        if (!pool_->job()->executeFromWorker(this, stackLimit))
             pool_->abortJob();
 
         // Join the pool.
         {
             AutoLockMonitor lock(*pool_);
             pool_->join(lock);
         }
     }
 }
 
 void
-ThreadPoolWorker::terminate(AutoLockMonitor &lock)
+ThreadPoolWorker::submitSlices(uint16_t sliceFrom, uint16_t sliceTo)
 {
-    MOZ_ASSERT(lock.isFor(*pool_));
-    MOZ_ASSERT(state_ != TERMINATED);
-    state_ = TERMINATED;
-}
-
-void
-ThreadPoolMainWorker::executeJob()
-{
-    if (!pool_->job()->executeFromMainThread())
-        pool_->abortJob();
+    MOZ_ASSERT(!hasWork());
+    sliceBounds_ = ComposeSliceBounds(sliceFrom, sliceTo);
 }
 
 bool
-ThreadPoolMainWorker::getSlice(uint16_t *sliceId)
+ThreadPoolWorker::getSlice(ForkJoinContext *cx, uint16_t *sliceId)
 {
     // First see whether we have any work ourself.
     if (popSliceFront(sliceId))
         return true;
 
     // Try to steal work.
     if (!pool_->workStealing())
         return false;
 
-    // Pick a random target with work left over.
     ThreadPoolWorker *victim;
     do {
         if (!pool_->hasWork())
             return false;
 
+        // Add one to add the main thread into the mix.
         victim = pool_->workers_[rand() % pool_->numWorkers()];
     } while (!stealFrom(victim, sliceId));
 
     return true;
 }
 
+void
+ThreadPoolWorker::terminate(AutoLockMonitor &lock)
+{
+    MOZ_ASSERT(lock.isFor(*pool_));
+    MOZ_ASSERT(state_ != TERMINATED);
+    state_ = TERMINATED;
+}
+
 /////////////////////////////////////////////////////////////////////////////
 // ThreadPool
 //
 // The |ThreadPool| starts up workers, submits work to them, and shuts
 // them down when requested.
 
 ThreadPool::ThreadPool(JSRuntime *rt)
   : runtime_(rt),
-    mainWorker_(nullptr),
     activeWorkers_(0),
     joinBarrier_(nullptr),
     job_(nullptr),
 #ifdef DEBUG
     stolenSlices_(0),
 #endif
-    pendingSlices_(0)
+    pendingSlices_(0),
+    isMainThreadActive_(false)
 { }
 
 ThreadPool::~ThreadPool()
 {
     terminateWorkers();
 #ifdef JS_THREADSAFE
     if (joinBarrier_)
         PR_DestroyCondVar(joinBarrier_);
@@ -374,41 +254,34 @@ ThreadPool::init()
     return true;
 #endif
 }
 
 uint32_t
 ThreadPool::numWorkers() const
 {
 #ifdef JS_THREADSAFE
-    // Subtract one for the main thread, which always exists.
-    return WorkerThreadState().cpuCount - 1;
+    return WorkerThreadState().cpuCount;
 #else
-    return 0;
+    return 1;
 #endif
 }
 
 bool
 ThreadPool::workStealing() const
 {
 #ifdef DEBUG
     if (char *stealEnv = getenv("JS_THREADPOOL_STEAL"))
         return !!strtol(stealEnv, nullptr, 10);
 #endif
 
     return true;
 }
 
 bool
-ThreadPool::isMainThreadActive() const
-{
-    return mainWorker_ && mainWorker_->isActive;
-}
-
-bool
 ThreadPool::lazyStartWorkers(JSContext *cx)
 {
     // Starts the workers if they have not already been started.  If
     // something goes wrong, reports an error and ensures that all
     // partially started threads are terminated.  Therefore, upon exit
     // from this function, the workers array is either full (upon
     // success) or empty (upon failure).
 
@@ -459,27 +332,25 @@ ThreadPool::terminateWorkers()
         AutoLockMonitor lock(*this);
 
         // Signal to the workers they should quit.
         for (uint32_t i = 0; i < workers_.length(); i++)
             workers_[i]->terminate(lock);
 
         // Wake up all the workers. Set the number of active workers to the
         // current number of workers so we can make sure they all join.
-        activeWorkers_ = workers_.length();
+        activeWorkers_ = workers_.length() - 1;
         lock.notifyAll();
 
         // Wait for all workers to join.
         waitForWorkers(lock);
 
         while (workers_.length() > 0)
             js_delete(workers_.popCopy());
     }
-
-    js_delete(mainWorker_);
 }
 
 void
 ThreadPool::terminate()
 {
     terminateWorkers();
 }
 
@@ -503,100 +374,76 @@ ThreadPool::waitForWorkers(AutoLockMonit
 ParallelResult
 ThreadPool::executeJob(JSContext *cx, ParallelJob *job, uint16_t sliceFrom, uint16_t sliceMax)
 {
     MOZ_ASSERT(sliceFrom < sliceMax);
     MOZ_ASSERT(CurrentThreadCanAccessRuntime(runtime_));
     MOZ_ASSERT(activeWorkers_ == 0);
     MOZ_ASSERT(!hasWork());
 
-    // Create the main thread worker and off-main-thread workers if necessary.
-    if (!mainWorker_) {
-        mainWorker_ = cx->new_<ThreadPoolMainWorker>(this);
-        if (!mainWorker_) {
-            terminateWorkersAndReportOOM(cx);
-            return TP_FATAL;
-        }
-    }
-
     if (!lazyStartWorkers(cx))
         return TP_FATAL;
 
     // Evenly distribute slices to the workers.
     uint16_t numSlices = sliceMax - sliceFrom;
-    uint16_t slicesPerWorker = numSlices / (numWorkers() + 1);
-    uint16_t leftover = numSlices % (numWorkers() + 1);
+    uint16_t slicesPerWorker = numSlices / numWorkers();
+    uint16_t leftover = numSlices % numWorkers();
     uint16_t sliceTo = sliceFrom;
     for (uint32_t workerId = 0; workerId < numWorkers(); workerId++) {
         if (leftover > 0) {
             sliceTo += slicesPerWorker + 1;
             leftover--;
         } else {
             sliceTo += slicesPerWorker;
         }
         workers_[workerId]->submitSlices(sliceFrom, sliceTo);
         sliceFrom = sliceTo;
     }
     MOZ_ASSERT(leftover == 0);
-    mainWorker_->submitSlices(sliceFrom, sliceFrom + slicesPerWorker);
 
     // Notify the worker threads that there's work now.
     {
         job_ = job;
         pendingSlices_ = numSlices;
 #ifdef DEBUG
         stolenSlices_ = 0;
 #endif
         AutoLockMonitor lock(*this);
         lock.notifyAll();
     }
 
     // Do work on the main thread.
-    mainWorker_->isActive = true;
-    mainWorker_->executeJob();
-    mainWorker_->isActive = false;
+    isMainThreadActive_ = true;
+    if (!job->executeFromMainThread(mainThreadWorker()))
+        abortJob();
+    isMainThreadActive_ = false;
 
     // Wait for all threads to join. While there are no pending slices at this
     // point, the slices themselves may not be finished processing.
     {
         AutoLockMonitor lock(*this);
         waitForWorkers(lock);
     }
 
     // Guard against errors in the self-hosted slice processing function. If
     // we still have work at this point, it is the user function's fault.
     MOZ_ASSERT(!hasWork(), "User function did not process all the slices!");
 
     // Everything went swimmingly. Give yourself a pat on the back.
     return TP_SUCCESS;
 }
 
-bool
-ThreadPool::getSliceForWorker(uint32_t workerId, uint16_t *sliceId)
-{
-    MOZ_ASSERT(workers_[workerId]);
-    return workers_[workerId]->getSlice(sliceId);
-}
-
-bool
-ThreadPool::getSliceForMainThread(uint16_t *sliceId)
-{
-    MOZ_ASSERT(mainWorker_);
-    return mainWorker_->getSlice(sliceId);
-}
-
 void
 ThreadPool::abortJob()
 {
-    mainWorker_->abort();
     for (uint32_t workerId = 0; workerId < numWorkers(); workerId++)
-        workers_[workerId]->abort();
+        workers_[workerId]->discardSlices();
 
     // Spin until pendingSlices_ reaches 0.
     //
-    // The reason for this is that while calling abort() clears all workers'
-    // bounds, the pendingSlices_ cache might still be > 0 due to
+    // The reason for this is that while calling discardSlices() clears all
+    // workers' bounds, the pendingSlices_ cache might still be > 0 due to
     // still-executing calls to popSliceBack or popSliceFront in other
     // threads. When those finish, we will be sure that !hasWork(), which is
     // important to ensure that an aborted worker does not start again due to
     // the thread pool having more work.
     while (hasWork());
 }
--- a/js/src/vm/ThreadPool.h
+++ b/js/src/vm/ThreadPool.h
@@ -6,42 +6,109 @@
 
 #ifndef vm_ThreadPool_h
 #define vm_ThreadPool_h
 
 #include "mozilla/Atomics.h"
 
 #include "jsalloc.h"
 #include "jslock.h"
+#include "jsmath.h"
 #include "jspubtd.h"
 
 #include "js/Vector.h"
 #include "vm/Monitor.h"
 
 struct JSRuntime;
 struct JSCompartment;
 
 namespace js {
 
-class ThreadPoolBaseWorker;
-class ThreadPoolWorker;
-class ThreadPoolMainWorker;
+class ThreadPool;
+
+/////////////////////////////////////////////////////////////////////////////
+// ThreadPoolWorker
+//
+// Class for worker threads in the pool. All threads (i.e. helpers and main
+// thread) have a worker associted with them. By convention, the worker id of
+// the main thread is 0.
+
+class ThreadPoolWorker
+{
+    const uint32_t workerId_;
+    ThreadPool *pool_;
+
+    // Slices this thread is responsible for.
+    //
+    // This a uint32 composed of two uint16s (the lower and upper bounds) so
+    // that we may do a single CAS. See {Compose,Decompose}SliceBounds
+    // functions below.
+    mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> sliceBounds_;
+
+    // Current point in the worker's lifecycle.
+    volatile enum WorkerState {
+        CREATED, ACTIVE, TERMINATED
+    } state_;
+
+    // The thread's main function.
+    static void HelperThreadMain(void *arg);
+    void helperLoop();
 
+    bool hasWork() const;
+    bool popSliceFront(uint16_t *sliceId);
+    bool popSliceBack(uint16_t *sliceId);
+    bool stealFrom(ThreadPoolWorker *victim, uint16_t *sliceId);
+
+  public:
+    ThreadPoolWorker(uint32_t workerId, ThreadPool *pool)
+      : workerId_(workerId),
+        pool_(pool),
+        sliceBounds_(0),
+        state_(CREATED)
+    { }
+
+    uint32_t id() const { return workerId_; }
+    bool isMainThread() const { return id() == 0; }
+
+    // Submits a new set of slices. Assumes !hasWork().
+    void submitSlices(uint16_t sliceFrom, uint16_t sliceTo);
+
+    // Get the next slice; work stealing happens here if work stealing is
+    // on. Returns false if there are no more slices to hand out.
+    bool getSlice(ForkJoinContext *cx, uint16_t *sliceId);
+
+    // Discard remaining slices. Used for aborting jobs.
+    void discardSlices();
+
+    // Invoked from the main thread; signals worker to start.
+    bool start();
+
+    // Invoked from the main thread; signals the worker loop to return.
+    void terminate(AutoLockMonitor &lock);
+
+    static size_t offsetOfSliceBounds() {
+        return offsetof(ThreadPoolWorker, sliceBounds_);
+    }
+};
+
+/////////////////////////////////////////////////////////////////////////////
 // A ParallelJob is the main runnable abstraction in the ThreadPool.
 //
 // The unit of work here is in terms of threads, *not* slices. The
 // user-provided function has the responsibility of getting slices of work via
 // the |ForkJoinGetSlice| intrinsic.
+
 class ParallelJob
 {
   public:
-    virtual bool executeFromWorker(uint32_t workerId, uintptr_t stackLimit) = 0;
-    virtual bool executeFromMainThread() = 0;
+    virtual bool executeFromWorker(ThreadPoolWorker *worker, uintptr_t stackLimit) = 0;
+    virtual bool executeFromMainThread(ThreadPoolWorker *mainWorker) = 0;
 };
 
+/////////////////////////////////////////////////////////////////////////////
 // ThreadPool used for parallel JavaScript execution. Unless you are building
 // a new kind of parallel service, it is very likely that you do not wish to
 // interact with the threadpool directly. In particular, if you wish to
 // execute JavaScript in parallel, you probably want to look at |js::ForkJoin|
 // in |forkjoin.cpp|.
 //
 // The ThreadPool always maintains a fixed pool of worker threads.  You can
 // query the number of worker threads via the method |numWorkers()|.  Note
@@ -74,92 +141,87 @@ class ParallelJob
 // therefore the execution overhead introduced is almost zero with balanced
 // workloads. The way a |ParallelJob| is divided into multiple slices has to
 // be specified by the instance implementing the job (e.g., |ForkJoinShared|
 // in |ForkJoin.cpp|).
 
 class ThreadPool : public Monitor
 {
   private:
-    friend class ThreadPoolBaseWorker;
     friend class ThreadPoolWorker;
-    friend class ThreadPoolMainWorker;
 
     // Initialized at startup only.
     JSRuntime *const runtime_;
 
-    // Worker threads and the main thread worker have different
-    // logic. Initialized lazily.
+    // Initialized lazily.
     js::Vector<ThreadPoolWorker *, 8, SystemAllocPolicy> workers_;
-    ThreadPoolMainWorker *mainWorker_;
 
     // The number of active workers. Should only access under lock.
     uint32_t activeWorkers_;
     PRCondVar *joinBarrier_;
 
     // The current job.
     ParallelJob *job_;
 
 #ifdef DEBUG
     // Number of stolen slices in the last parallel job.
     mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> stolenSlices_;
 #endif
 
     // Number of pending slices in the current job.
     mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> pendingSlices_;
 
+    // Whether the main thread is currently processing slices.
+    bool isMainThreadActive_;
+
     bool lazyStartWorkers(JSContext *cx);
     void terminateWorkers();
     void terminateWorkersAndReportOOM(JSContext *cx);
     void join(AutoLockMonitor &lock);
     void waitForWorkers(AutoLockMonitor &lock);
+    ThreadPoolWorker *mainThreadWorker() { return workers_[0]; }
 
   public:
     ThreadPool(JSRuntime *rt);
     ~ThreadPool();
 
     bool init();
 
-    // Return number of worker threads in the pool, not counting the main thread.
+    // Return number of worker threads in the pool, counting the main thread.
     uint32_t numWorkers() const;
 
     // Returns whether we have any pending slices.
     bool hasWork() const { return pendingSlices_ != 0; }
 
     // Returns the current job. Must have one.
     ParallelJob *job() const {
         MOZ_ASSERT(job_);
         return job_;
     }
 
     // Returns whether or not the scheduler should perform work stealing.
     bool workStealing() const;
 
     // Returns whether or not the main thread is working.
-    bool isMainThreadActive() const;
+    bool isMainThreadActive() const { return isMainThreadActive_; }
 
 #ifdef DEBUG
     // Return the number of stolen slices in the last parallel job.
     uint16_t stolenSlices() { return stolenSlices_; }
 #endif
 
     // Wait until all worker threads have finished their current set
     // of slices and then return.  You must not submit new jobs after
     // invoking |terminate()|.
     void terminate();
 
     // Execute the given ParallelJob using the main thread and any available worker.
     // Blocks until the main thread has completed execution.
     ParallelResult executeJob(JSContext *cx, ParallelJob *job, uint16_t sliceStart,
                               uint16_t numSlices);
 
-    // Get the next slice; work stealing happens here if work stealing is
-    // on. Returns false if there are no more slices to hand out.
-    bool getSliceForWorker(uint32_t workerId, uint16_t *sliceId);
-    bool getSliceForMainThread(uint16_t *sliceId);
-
     // Abort the current job.
     void abortJob();
 };
 
 } // namespace js
 
 #endif /* vm_ThreadPool_h */