tools/profiler/core/platform.cpp
author Andreea Pavel <apavel@mozilla.com>
Tue, 27 Nov 2018 08:53:18 +0200
changeset 504619 b0e69b1368832a3846c0940a6fb8bb834bdc62a1
parent 504605 8fa5e81ad8015f9bd5ba2b7497f840e4cc50b8bd
child 505383 6f3709b3878117466168c40affa7bca0b60cf75b
permissions -rw-r--r--
Backed out 8 changesets (bug 1446161, bug 1487287, bug 1488993, bug 1474991, bug 1496608) for very frequent automation.py crashes on a CLOSED TREE Backed out changeset 8b1f88d7bfeb (bug 1487287) Backed out changeset 8fa5e81ad801 (bug 1487287) Backed out changeset 7a480161fa0f (bug 1474991) Backed out changeset 80116391b7fe (bug 1446161) Backed out changeset 1bdf64b29121 (bug 1446161) Backed out changeset 37bf52f0e9cf (bug 1446161) Backed out changeset 8ede2ebe6b7a (bug 1496608) Backed out changeset cea43bc88c7a (bug 1488993)

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

// There are three kinds of samples done by the profiler.
//
// - A "periodic" sample is the most complex kind. It is done in response to a
//   timer while the profiler is active. It involves writing a stack trace plus
//   a variety of other values (memory measurements, responsiveness
//   measurements, markers, etc.) into the main ProfileBuffer. The sampling is
//   done from off-thread, and so SuspendAndSampleAndResumeThread() is used to
//   get the register values.
//
// - A "synchronous" sample is a simpler kind. It is done in response to an API
//   call (profiler_get_backtrace()). It involves writing a stack trace and
//   little else into a temporary ProfileBuffer, and wrapping that up in a
//   ProfilerBacktrace that can be subsequently used in a marker. The sampling
//   is done on-thread, and so Registers::SyncPopulate() is used to get the
//   register values.
//
// - A "backtrace" sample is the simplest kind. It is done in response to an
//   API call (profiler_suspend_and_sample_thread()). It involves getting a
//   stack trace via a ProfilerStackCollector; it does not write to a
//   ProfileBuffer. The sampling is done from off-thread, and so uses
//   SuspendAndSampleAndResumeThread() to get the register values.

#include <algorithm>
#include <ostream>
#include <fstream>
#include <sstream>
#include <errno.h>

#include "platform.h"
#include "PlatformMacros.h"
#include "mozilla/ArrayUtils.h"
#include "mozilla/Atomics.h"
#include "mozilla/UniquePtr.h"
#include "mozilla/Vector.h"
#include "GeckoProfiler.h"
#include "VTuneProfiler.h"
#include "GeckoProfilerReporter.h"
#include "ProfilerIOInterposeObserver.h"
#include "mozilla/AutoProfilerLabel.h"
#include "mozilla/ExtensionPolicyService.h"
#include "mozilla/Scheduler.h"
#include "mozilla/StackWalk.h"
#include "mozilla/StaticPtr.h"
#include "mozilla/SystemGroup.h"
#include "mozilla/ThreadLocal.h"
#include "mozilla/TimeStamp.h"
#include "mozilla/Tuple.h"
#include "mozilla/extensions/WebExtensionPolicy.h"
#include "ThreadInfo.h"
#include "nsIHttpProtocolHandler.h"
#include "nsIObserverService.h"
#include "nsIPropertyBag2.h"
#include "nsIXULAppInfo.h"
#include "nsIXULRuntime.h"
#include "nsDirectoryServiceUtils.h"
#include "nsDirectoryServiceDefs.h"
#include "nsJSPrincipals.h"
#include "nsMemoryReporterManager.h"
#include "nsScriptSecurityManager.h"
#include "nsXULAppAPI.h"
#include "nsProfilerStartParams.h"
#include "ProfilerParent.h"
#include "mozilla/Services.h"
#include "nsThreadUtils.h"
#include "ProfilerMarkerPayload.h"
#include "memory_hooks.h"
#include "shared-libraries.h"
#include "prdtoa.h"
#include "prtime.h"

#if defined(XP_WIN)
#include <processthreadsapi.h>  // for GetCurrentProcessId()
#else
#include <unistd.h> // for getpid()
#endif // defined(XP_WIN)

#ifdef MOZ_TASK_TRACER
#include "GeckoTaskTracer.h"
#endif

#if defined(GP_OS_android)
# include "FennecJNINatives.h"
# include "FennecJNIWrappers.h"
#endif

// Win32 builds always have frame pointers, so FramePointerStackWalk() always
// works.
#if defined(GP_PLAT_x86_windows)
# define HAVE_NATIVE_UNWIND
# define USE_FRAME_POINTER_STACK_WALK
#endif

// Win64 builds always omit frame pointers, so we use the slower
// MozStackWalk(), which works in that case.
#if defined(GP_PLAT_amd64_windows)
# define HAVE_NATIVE_UNWIND
# define USE_MOZ_STACK_WALK
#endif

// AArch64 Win64 builds use frame pointers.
#if defined(GP_PLAT_arm64_windows)
# define HAVE_NATIVE_UNWIND
# define USE_FRAME_POINTER_STACK_WALK
#endif

// Mac builds only have frame pointers when MOZ_PROFILING is specified, so
// FramePointerStackWalk() only works in that case. We don't use MozStackWalk()
// on Mac.
#if defined(GP_OS_darwin) && defined(MOZ_PROFILING)
# define HAVE_NATIVE_UNWIND
# define USE_FRAME_POINTER_STACK_WALK
#endif

// Android builds use the ARM Exception Handling ABI to unwind.
#if defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
# define HAVE_NATIVE_UNWIND
# define USE_EHABI_STACKWALK
# include "EHABIStackWalk.h"
#endif

// Linux builds use LUL, which uses DWARF info to unwind stacks.
#if defined(GP_PLAT_amd64_linux) || \
    defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android) || \
    defined(GP_PLAT_mips64_linux) || \
    defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
# define HAVE_NATIVE_UNWIND
# define USE_LUL_STACKWALK
# include "lul/LulMain.h"
# include "lul/platform-linux-lul.h"

// On linux we use LUL for periodic samples and synchronous samples, but we use
// FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled.
// (See the comment at the top of the file for a definition of
// periodic/synchronous/backtrace.).
//
// FramePointerStackWalk can produce incomplete stacks when the current entry is
// in a shared library without framepointers, however LUL can take a long time
// to initialize, which is undesirable for consumers of
// profiler_suspend_and_sample_thread like the Background Hang Reporter.
# if defined(MOZ_PROFILING)
#  define USE_FRAME_POINTER_STACK_WALK
# endif
#endif

// We can only stackwalk without expensive initialization on platforms which
// support FramePointerStackWalk or MozStackWalk. LUL Stackwalking requires
// initializing LUL, and EHABIStackWalk requires initializing EHABI, both of
// which can be expensive.
#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
# define HAVE_FASTINIT_NATIVE_UNWIND
#endif

#ifdef MOZ_VALGRIND
# include <valgrind/memcheck.h>
#else
# define VALGRIND_MAKE_MEM_DEFINED(_addr,_len)   ((void)0)
#endif

#if defined(GP_OS_linux) || defined(GP_OS_android)
#include <ucontext.h>
#endif

using namespace mozilla;
using mozilla::profiler::detail::RacyFeatures;

LazyLogModule gProfilerLog("prof");

#if defined(GP_OS_android)
class GeckoJavaSampler : public java::GeckoJavaSampler::Natives<GeckoJavaSampler>
{
private:
  GeckoJavaSampler();

public:
  static double GetProfilerTime() {
    if (!profiler_is_active()) {
      return 0.0;
    }
    return profiler_time();
  };
};
#endif

class PSMutex : public StaticMutex {};

typedef BaseAutoLock<PSMutex&> PSAutoLock;

// Only functions that take a PSLockRef arg can access CorePS's and ActivePS's
// fields.
typedef const PSAutoLock& PSLockRef;

#define PS_GET(type_, name_) \
  static type_ name_(PSLockRef) { return sInstance->m##name_; } \

#define PS_GET_LOCKLESS(type_, name_) \
  static type_ name_() { return sInstance->m##name_; } \

#define PS_GET_AND_SET(type_, name_) \
  PS_GET(type_, name_) \
  static void Set##name_(PSLockRef, type_ a##name_) \
    { sInstance->m##name_ = a##name_; }

// All functions in this file can run on multiple threads unless they have an
// NS_IsMainThread() assertion.

// This class contains the profiler's core global state, i.e. that which is
// valid even when the profiler is not active. Most profile operations can't do
// anything useful when this class is not instantiated, so we release-assert
// its non-nullness in all such operations.
//
// Accesses to CorePS are guarded by gPSMutex. Getters and setters take a
// PSAutoLock reference as an argument as proof that the gPSMutex is currently
// locked. This makes it clear when gPSMutex is locked and helps avoid
// accidental unlocked accesses to global state. There are ways to circumvent
// this mechanism, but please don't do so without *very* good reason and a
// detailed explanation.
//
// The exceptions to this rule:
//
// - mProcessStartTime, because it's immutable;
//
// - each thread's RacyRegisteredThread object is accessible without locking via
//   TLSRegisteredThread::RacyRegisteredThread().
class CorePS
{
private:
  CorePS()
    : mProcessStartTime(TimeStamp::ProcessCreation())
#ifdef USE_LUL_STACKWALK
    , mLul(nullptr)
#endif
  {}

  ~CorePS()
  {
  }

public:
  static void Create(PSLockRef aLock) { sInstance = new CorePS(); }

  static void Destroy(PSLockRef aLock)
  {
    delete sInstance;
    sInstance = nullptr;
  }

  // Unlike ActivePS::Exists(), CorePS::Exists() can be called without gPSMutex
  // being locked. This is because CorePS is instantiated so early on the main
  // thread that we don't have to worry about it being racy.
  static bool Exists() { return !!sInstance; }

  static void AddSizeOf(PSLockRef, MallocSizeOf aMallocSizeOf,
                        size_t& aProfSize, size_t& aLulSize)
  {
    aProfSize += aMallocSizeOf(sInstance);

    for (auto& registeredThread : sInstance->mRegisteredThreads) {
      aProfSize += registeredThread->SizeOfIncludingThis(aMallocSizeOf);
    }

    for (auto& registeredPage : sInstance->mRegisteredPages) {
      aProfSize += registeredPage->SizeOfIncludingThis(aMallocSizeOf);
    }

    // Measurement of the following things may be added later if DMD finds it
    // is worthwhile:
    // - CorePS::mRegisteredThreads itself (its elements' children are
    // measured above)
    // - CorePS::mRegisteredPages itself (its elements' children are
    // measured above)
    // - CorePS::mInterposeObserver

#if defined(USE_LUL_STACKWALK)
    if (sInstance->mLul) {
      aLulSize += sInstance->mLul->SizeOfIncludingThis(aMallocSizeOf);
    }
#endif
  }

  // No PSLockRef is needed for this field because it's immutable.
  PS_GET_LOCKLESS(TimeStamp, ProcessStartTime)

  PS_GET(const nsTArray<UniquePtr<RegisteredThread>>&, RegisteredThreads)

  static void AppendRegisteredThread(PSLockRef, UniquePtr<RegisteredThread>&& aRegisteredThread)
  {
    sInstance->mRegisteredThreads.AppendElement(std::move(aRegisteredThread));
  }

  static void RemoveRegisteredThread(PSLockRef, RegisteredThread* aRegisteredThread)
  {
    // Remove aRegisteredThread from mRegisteredThreads.
    // Can't use RemoveElement() because we can't equality-compare a UniquePtr
    // to a raw pointer.
    sInstance->mRegisteredThreads.RemoveElementsBy(
      [&](UniquePtr<RegisteredThread>& rt) { return rt.get() == aRegisteredThread; });
  }

  PS_GET(nsTArray<RefPtr<PageInformation>>&, RegisteredPages)

  static void AppendRegisteredPage(
    PSLockRef,
    RefPtr<PageInformation>&& aRegisteredPage)
  {
#ifdef DEBUG
    struct RegisteredPageComparator
    {
      bool Equals(PageInformation* aA,
                  PageInformation* aB) const
      {
        return aA->Equals(aB);
      }
    };
    MOZ_ASSERT(!sInstance->mRegisteredPages.Contains(
      aRegisteredPage, RegisteredPageComparator()));
#endif
    sInstance->mRegisteredPages.AppendElement(
      std::move(aRegisteredPage));
  }

  static void RemoveRegisteredPages(PSLockRef,
                                    const nsID& aRegisteredDocShellId)
  {
    // Remove RegisteredPage from mRegisteredPages by given DocShell Id.
    sInstance->mRegisteredPages.RemoveElementsBy(
      [&](RefPtr<PageInformation>& rd) {
        return rd->DocShellId().Equals(aRegisteredDocShellId);
      });
  }

  PS_GET(const nsTArray<BaseProfilerCount*>&, Counters)

  static void AppendCounter(PSLockRef, BaseProfilerCount* aCounter)
  {
    // we don't own the counter; they may be stored in static objects
    sInstance->mCounters.AppendElement(aCounter);
  }

  static void RemoveCounter(PSLockRef, BaseProfilerCount* aCounter)
  {
    // we may be called to remove a counter after the profiler is stopped or
    // late in shutdown.
    if (sInstance) {
      sInstance->mCounters.RemoveElement(aCounter);
    }
  }

#ifdef USE_LUL_STACKWALK
  static lul::LUL* Lul(PSLockRef) { return sInstance->mLul.get(); }
  static void SetLul(PSLockRef, UniquePtr<lul::LUL> aLul)
  {
    sInstance->mLul = std::move(aLul);
  }
#endif

private:
  // The singleton instance
  static CorePS* sInstance;

  // The time that the process started.
  const TimeStamp mProcessStartTime;

  // Info on all the registered threads.
  // ThreadIds in mRegisteredThreads are unique.
  nsTArray<UniquePtr<RegisteredThread>> mRegisteredThreads;

  // Info on all the registered pages.
  // DocShellId and DocShellHistoryId pairs in mRegisteredPages are unique.
  nsTArray<RefPtr<PageInformation>> mRegisteredPages;

  // Non-owning pointers to all active counters
  nsTArray<BaseProfilerCount*> mCounters;

#ifdef USE_LUL_STACKWALK
  // LUL's state. Null prior to the first activation, non-null thereafter.
  UniquePtr<lul::LUL> mLul;
#endif
};

CorePS* CorePS::sInstance = nullptr;

class SamplerThread;

static SamplerThread*
NewSamplerThread(PSLockRef aLock, uint32_t aGeneration, double aInterval);

struct LiveProfiledThreadData
{
  RegisteredThread* mRegisteredThread;
  UniquePtr<ProfiledThreadData> mProfiledThreadData;
};

// This class contains the profiler's global state that is valid only when the
// profiler is active. When not instantiated, the profiler is inactive.
//
// Accesses to ActivePS are guarded by gPSMutex, in much the same fashion as
// CorePS.
//
class ActivePS
{
private:
  static uint32_t AdjustFeatures(uint32_t aFeatures, uint32_t aFilterCount)
  {
    // Filter out any features unavailable in this platform/configuration.
    aFeatures &= profiler_get_available_features();

#if defined(GP_OS_android)
    if (!jni::IsFennec()) {
      aFeatures &= ~ProfilerFeature::Java;
    }
#endif

    // Always enable ProfilerFeature::Threads if we have a filter, because
    // users sometimes ask to filter by a list of threads but forget to
    // explicitly specify ProfilerFeature::Threads.
    if (aFilterCount > 0) {
      aFeatures |= ProfilerFeature::Threads;
    }

    return aFeatures;
  }

  ActivePS(PSLockRef aLock, uint32_t aCapacity, double aInterval,  uint32_t aFeatures,
    const char** aFilters, uint32_t aFilterCount, const Maybe<double>& aDuration)
    : mGeneration(sNextGeneration++)
    , mCapacity(aCapacity)
    , mDuration(aDuration)
    , mInterval(aInterval)
    , mFeatures(AdjustFeatures(aFeatures, aFilterCount))
    , mBuffer(MakeUnique<ProfileBuffer>(aCapacity))
      // The new sampler thread doesn't start sampling immediately because the
      // main loop within Run() is blocked until this function's caller unlocks
      // gPSMutex.
    , mSamplerThread(NewSamplerThread(aLock, mGeneration, aInterval))
    , mInterposeObserver(ProfilerFeature::HasMainThreadIO(aFeatures)
                         ? new ProfilerIOInterposeObserver()
                         : nullptr)
#undef HAS_FEATURE
    , mIsPaused(false)
#if defined(GP_OS_linux)
    , mWasPaused(false)
#endif
  {
    // Deep copy aFilters.
    MOZ_ALWAYS_TRUE(mFilters.resize(aFilterCount));
    for (uint32_t i = 0; i < aFilterCount; ++i) {
      mFilters[i] = aFilters[i];
    }

    if (mInterposeObserver) {
      // We need to register the observer on the main thread, because we want
      // to observe IO that happens on the main thread.
      if (NS_IsMainThread()) {
        IOInterposer::Register(IOInterposeObserver::OpAll, mInterposeObserver);
      } else {
        RefPtr<ProfilerIOInterposeObserver> observer = mInterposeObserver;
        NS_DispatchToMainThread(
          NS_NewRunnableFunction("ActivePS::ActivePS", [=]() {
            IOInterposer::Register(IOInterposeObserver::OpAll, observer);
          }));
      }
    }
  }

  ~ActivePS()
  {
    if (mInterposeObserver) {
      // We need to unregister the observer on the main thread, because that's
      // where we've registered it.
      if (NS_IsMainThread()) {
        IOInterposer::Unregister(IOInterposeObserver::OpAll, mInterposeObserver);
      } else {
        RefPtr<ProfilerIOInterposeObserver> observer = mInterposeObserver;
        NS_DispatchToMainThread(
          NS_NewRunnableFunction("ActivePS::~ActivePS", [=]() {
            IOInterposer::Unregister(IOInterposeObserver::OpAll, observer);
          }));
      }
    }
  }

  bool ThreadSelected(const char* aThreadName)
  {
    MOZ_RELEASE_ASSERT(sInstance);

    if (mFilters.empty()) {
      return true;
    }

    std::string name = aThreadName;
    std::transform(name.begin(), name.end(), name.begin(), ::tolower);

    for (uint32_t i = 0; i < mFilters.length(); ++i) {
      std::string filter = mFilters[i];
      std::transform(filter.begin(), filter.end(), filter.begin(), ::tolower);

      // Crude, non UTF-8 compatible, case insensitive substring search
      if (name.find(filter) != std::string::npos) {
        return true;
      }

      // If the filter starts with pid:, check for a pid match
      if (filter.find("pid:") == 0) {
        std::string mypid = std::to_string(
#ifdef XP_WIN
          GetCurrentProcessId()
#else
          getpid()
#endif
        );
        if (filter.compare(4, std::string::npos, mypid) == 0) {
          return true;
        }
      }
    }

    return false;
  }

public:
  static void Create(PSLockRef aLock, uint32_t aCapacity, double aInterval,
                     uint32_t aFeatures, const char** aFilters,
                     uint32_t aFilterCount, const Maybe<double>& aDuration)
  {
    sInstance = new ActivePS(aLock, aCapacity, aInterval, aFeatures,
                             aFilters, aFilterCount, aDuration);
  }

  static MOZ_MUST_USE SamplerThread* Destroy(PSLockRef aLock)
  {
    auto samplerThread = sInstance->mSamplerThread;
    delete sInstance;
    sInstance = nullptr;

    return samplerThread;
  }

  static bool Exists(PSLockRef) { return !!sInstance; }

  static bool Equals(PSLockRef,
                     uint32_t aCapacity, const Maybe<double>& aDuration,
                     double aInterval, uint32_t aFeatures,
                     const char** aFilters, uint32_t aFilterCount)
  {
    if (sInstance->mCapacity != aCapacity ||
        sInstance->mDuration != aDuration ||
        sInstance->mInterval != aInterval ||
        sInstance->mFeatures != aFeatures ||
        sInstance->mFilters.length() != aFilterCount) {
      return false;
    }

    for (uint32_t i = 0; i < sInstance->mFilters.length(); ++i) {
      if (strcmp(sInstance->mFilters[i].c_str(), aFilters[i]) != 0) {
        return false;
      }
    }
    return true;
  }

  static size_t SizeOf(PSLockRef, MallocSizeOf aMallocSizeOf)
  {
    size_t n = aMallocSizeOf(sInstance);

    n += sInstance->mBuffer->SizeOfIncludingThis(aMallocSizeOf);

    // Measurement of the following members may be added later if DMD finds it
    // is worthwhile:
    // - mLiveProfiledThreads (both the array itself, and the contents)
    // - mDeadProfiledThreads (both the array itself, and the contents)
    //

    return n;
  }

  static bool ShouldProfileThread(PSLockRef aLock, ThreadInfo* aInfo)
  {
    MOZ_RELEASE_ASSERT(sInstance);

    return ((aInfo->IsMainThread() || FeatureThreads(aLock)) &&
            sInstance->ThreadSelected(aInfo->Name()));
  }

  PS_GET(uint32_t, Generation)

  PS_GET(uint32_t, Capacity)

  PS_GET(Maybe<double>, Duration)

  PS_GET(double, Interval)

  PS_GET(uint32_t, Features)

  #define PS_GET_FEATURE(n_, str_, Name_) \
    static bool Feature##Name_(PSLockRef) \
    { \
      return ProfilerFeature::Has##Name_(sInstance->mFeatures); \
    }

  PROFILER_FOR_EACH_FEATURE(PS_GET_FEATURE)

  #undef PS_GET_FEATURE

  PS_GET(const Vector<std::string>&, Filters)

  static ProfileBuffer& Buffer(PSLockRef) { return *sInstance->mBuffer.get(); }

  static const nsTArray<LiveProfiledThreadData>& LiveProfiledThreads(PSLockRef)
  {
    return sInstance->mLiveProfiledThreads;
  }

  // Returns an array containing (RegisteredThread*, ProfiledThreadData*) pairs
  // for all threads that should be included in a profile, both for threads
  // that are still registered, and for threads that have been unregistered but
  // still have data in the buffer.
  // For threads that have already been unregistered, the RegisteredThread
  // pointer will be null.
  // The returned array is sorted by thread register time.
  // Do not hold on to the return value across thread registration or profiler
  // restarts.
  static nsTArray<Pair<RegisteredThread*, ProfiledThreadData*>> ProfiledThreads(PSLockRef)
  {
    nsTArray<Pair<RegisteredThread*, ProfiledThreadData*>> array;
    for (auto& t : sInstance->mLiveProfiledThreads) {
      array.AppendElement(MakePair(t.mRegisteredThread, t.mProfiledThreadData.get()));
    }
    for (auto& t : sInstance->mDeadProfiledThreads) {
      array.AppendElement(MakePair((RegisteredThread*)nullptr, t.get()));
    }

    class ThreadRegisterTimeComparator {
    public:
      bool Equals(const Pair<RegisteredThread*, ProfiledThreadData*>& a,
                  const Pair<RegisteredThread*, ProfiledThreadData*>& b) const
      {
        return a.second()->Info()->RegisterTime() == b.second()->Info()->RegisterTime();
      }

      bool LessThan(const Pair<RegisteredThread*, ProfiledThreadData*>& a,
                    const Pair<RegisteredThread*, ProfiledThreadData*>& b) const
      {
        return a.second()->Info()->RegisterTime() < b.second()->Info()->RegisterTime();
      }
    };
    array.Sort(ThreadRegisterTimeComparator());
    return array;
  }

  static nsTArray<RefPtr<PageInformation>> ProfiledPages(PSLockRef aLock)
  {
    nsTArray<RefPtr<PageInformation>> array;
    for (auto& d : CorePS::RegisteredPages(aLock)) {
      array.AppendElement(d);
    }
    for (auto& d : sInstance->mDeadProfiledPages) {
      array.AppendElement(d);
    }
    // We don't need to sort the DocShells like threads since we won't show them
    // as a list.
    return array;
  }

  // Do a linear search through mLiveProfiledThreads to find the
  // ProfiledThreadData object for a RegisteredThread.
  static ProfiledThreadData* GetProfiledThreadData(PSLockRef,
                                                   RegisteredThread* aRegisteredThread)
  {
    for (size_t i = 0; i < sInstance->mLiveProfiledThreads.Length(); i++) {
      LiveProfiledThreadData& thread = sInstance->mLiveProfiledThreads[i];
      if (thread.mRegisteredThread == aRegisteredThread) {
        return thread.mProfiledThreadData.get();
      }
    }
    return nullptr;
  }

  static ProfiledThreadData*
  AddLiveProfiledThread(PSLockRef, RegisteredThread* aRegisteredThread,
                        UniquePtr<ProfiledThreadData>&& aProfiledThreadData)
  {
    sInstance->mLiveProfiledThreads.AppendElement(
      LiveProfiledThreadData{ aRegisteredThread, std::move(aProfiledThreadData) });

    // Return a weak pointer to the ProfiledThreadData object.
    return sInstance->mLiveProfiledThreads.LastElement().mProfiledThreadData.get();
  }

  static void UnregisterThread(PSLockRef aLockRef, RegisteredThread* aRegisteredThread)
  {
    DiscardExpiredDeadProfiledThreads(aLockRef);

    // Find the right entry in the mLiveProfiledThreads array and remove the
    // element, moving the ProfiledThreadData object for the thread into the
    // mDeadProfiledThreads array.
    // The thread's RegisteredThread object gets destroyed here.
    for (size_t i = 0; i < sInstance->mLiveProfiledThreads.Length(); i++) {
      LiveProfiledThreadData& thread = sInstance->mLiveProfiledThreads[i];
      if (thread.mRegisteredThread == aRegisteredThread) {
        thread.mProfiledThreadData->NotifyUnregistered(sInstance->mBuffer->mRangeEnd);
        sInstance->mDeadProfiledThreads.AppendElement(std::move(thread.mProfiledThreadData));
        sInstance->mLiveProfiledThreads.RemoveElementAt(i);
        return;
      }
    }
  }

  PS_GET_AND_SET(bool, IsPaused)

#if defined(GP_OS_linux)
  PS_GET_AND_SET(bool, WasPaused)
#endif

  static void DiscardExpiredDeadProfiledThreads(PSLockRef)
  {
    uint64_t bufferRangeStart = sInstance->mBuffer->mRangeStart;
    // Discard any dead threads that were unregistered before bufferRangeStart.
    sInstance->mDeadProfiledThreads.RemoveElementsBy(
      [bufferRangeStart](UniquePtr<ProfiledThreadData>& aProfiledThreadData) {
        Maybe<uint64_t> bufferPosition =
          aProfiledThreadData->BufferPositionWhenUnregistered();
        MOZ_RELEASE_ASSERT(bufferPosition, "should have unregistered this thread");
        return *bufferPosition < bufferRangeStart;
      });
  }

  static void UnregisterPages(PSLockRef aLock,
                              const nsID& aRegisteredDocShellId)
  {
    auto& registeredPages = CorePS::RegisteredPages(aLock);
    for (size_t i = 0; i < registeredPages.Length(); i++) {
      RefPtr<PageInformation>& page = registeredPages[i];
      if (page->DocShellId().Equals(aRegisteredDocShellId)) {
        page->NotifyUnregistered(sInstance->mBuffer->mRangeEnd);
        sInstance->mDeadProfiledPages.AppendElement(std::move(page));
        registeredPages.RemoveElementAt(i--);
      }
    }
  }

  static void DiscardExpiredPages(PSLockRef)
  {
    uint64_t bufferRangeStart = sInstance->mBuffer->mRangeStart;
    // Discard any dead pages that were unregistered before
    // bufferRangeStart.
    sInstance->mDeadProfiledPages.RemoveElementsBy(
      [bufferRangeStart](RefPtr<PageInformation>& aProfiledPage) {
        Maybe<uint64_t> bufferPosition =
          aProfiledPage->BufferPositionWhenUnregistered();
        MOZ_RELEASE_ASSERT(bufferPosition,
                           "should have unregistered this page");
        return *bufferPosition < bufferRangeStart;
      });
  }

private:
  // The singleton instance.
  static ActivePS* sInstance;

  // We need to track activity generations. If we didn't we could have the
  // following scenario.
  //
  // - profiler_stop() locks gPSMutex, de-instantiates ActivePS, unlocks
  //   gPSMutex, deletes the SamplerThread (which does a join).
  //
  // - profiler_start() runs on a different thread, locks gPSMutex,
  //   re-instantiates ActivePS, unlocks gPSMutex -- all before the join
  //   completes.
  //
  // - SamplerThread::Run() locks gPSMutex, sees that ActivePS is instantiated,
  //   and continues as if the start/stop pair didn't occur. Also
  //   profiler_stop() is stuck, unable to finish.
  //
  // By checking ActivePS *and* the generation, we can avoid this scenario.
  // sNextGeneration is used to track the next generation number; it is static
  // because it must persist across different ActivePS instantiations.
  const uint32_t mGeneration;
  static uint32_t sNextGeneration;

  // The maximum number of entries in mBuffer.
  const uint32_t mCapacity;

  // The maximum duration of entries in mBuffer.
  const Maybe<double> mDuration;

  // The interval between samples, measured in milliseconds.
  const double mInterval;

  // The profile features that are enabled.
  const uint32_t mFeatures;

  // Substrings of names of threads we want to profile.
  Vector<std::string> mFilters;

  // The buffer into which all samples are recorded. Always non-null. Always
  // used in conjunction with CorePS::m{Live,Dead}Threads.
  const UniquePtr<ProfileBuffer> mBuffer;

  // ProfiledThreadData objects for any threads that were profiled at any point
  // during this run of the profiler:
  //  - mLiveProfiledThreads contains all threads that are still registered, and
  //  - mDeadProfiledThreads contains all threads that have already been
  //    unregistered but for which there is still data in the profile buffer.
  nsTArray<LiveProfiledThreadData> mLiveProfiledThreads;
  nsTArray<UniquePtr<ProfiledThreadData>> mDeadProfiledThreads;

  // Info on all the dead pages.
  // Registered pages are being moved to this array after unregistration.
  // We are keeping them in case we need them in the profile data.
  // We are removing them when we ensure that we won't need them anymore.
  nsTArray<RefPtr<PageInformation>> mDeadProfiledPages;

  // The current sampler thread. This class is not responsible for destroying
  // the SamplerThread object; the Destroy() method returns it so the caller
  // can destroy it.
  SamplerThread* const mSamplerThread;

  // The interposer that records main thread I/O.
  const RefPtr<ProfilerIOInterposeObserver> mInterposeObserver;

  // Is the profiler paused?
  bool mIsPaused;

#if defined(GP_OS_linux)
  // Used to record whether the profiler was paused just before forking. False
  // at all times except just before/after forking.
  bool mWasPaused;
#endif
};

ActivePS* ActivePS::sInstance = nullptr;
uint32_t ActivePS::sNextGeneration = 0;

#undef PS_GET
#undef PS_GET_LOCKLESS
#undef PS_GET_AND_SET

// The mutex that guards accesses to CorePS and ActivePS.
static PSMutex gPSMutex;

Atomic<uint32_t, MemoryOrdering::Relaxed, recordreplay::Behavior::DontPreserve>
  RacyFeatures::sActiveAndFeatures(0);

// Each live thread has a RegisteredThread, and we store a reference to it in TLS.
// This class encapsulates that TLS.
class TLSRegisteredThread
{
public:
  static bool Init(PSLockRef)
  {
    bool ok1 = sRegisteredThread.init();
    bool ok2 = AutoProfilerLabel::sProfilingStack.init();
    return ok1 && ok2;
  }

  // Get the entire RegisteredThread. Accesses are guarded by gPSMutex.
  static class RegisteredThread* RegisteredThread(PSLockRef)
  {
    return sRegisteredThread.get();
  }

  // Get only the RacyRegisteredThread. Accesses are not guarded by gPSMutex.
  static class RacyRegisteredThread* RacyRegisteredThread()
  {
    class RegisteredThread* registeredThread = sRegisteredThread.get();
    return registeredThread ? &registeredThread->RacyRegisteredThread()
                            : nullptr;
  }

  // Get only the ProfilingStack. Accesses are not guarded by gPSMutex.
  // RacyRegisteredThread() can also be used to get the ProfilingStack, but that
  // is marginally slower because it requires an extra pointer indirection.
  static ProfilingStack* Stack() { return AutoProfilerLabel::sProfilingStack.get(); }

  static void SetRegisteredThread(PSLockRef,
                                  class RegisteredThread* aRegisteredThread)
  {
    sRegisteredThread.set(aRegisteredThread);
    AutoProfilerLabel::sProfilingStack.set(
      aRegisteredThread
        ? &aRegisteredThread->RacyRegisteredThread().ProfilingStack()
        : nullptr);
  }

private:
  // This is a non-owning reference to the RegisteredThread;
  // CorePS::mRegisteredThreads is the owning reference. On thread
  // deregistration, this reference is cleared and the RegisteredThread is
  // destroyed.
  static MOZ_THREAD_LOCAL(class RegisteredThread*) sRegisteredThread;
};

MOZ_THREAD_LOCAL(RegisteredThread*) TLSRegisteredThread::sRegisteredThread;

// Although you can access a thread's ProfilingStack via
// TLSRegisteredThread::sRegisteredThread, we also have a second TLS pointer
// directly to the ProfilingStack. Here's why.
//
// - We need to be able to push to and pop from the ProfilingStack in
//   AutoProfilerLabel.
//
// - The class functions are hot and must be defined in GeckoProfiler.h so they
//   can be inlined.
//
// - We don't want to expose TLSRegisteredThread (and RegisteredThread) in
//   GeckoProfiler.h.
//
// This second pointer isn't ideal, but does provide a way to satisfy those
// constraints. TLSRegisteredThread is responsible for updating it.
MOZ_THREAD_LOCAL(ProfilingStack*) AutoProfilerLabel::sProfilingStack;

// The name of the main thread.
static const char* const kMainThreadName = "GeckoMain";

////////////////////////////////////////////////////////////////////////
// BEGIN sampling/unwinding code

// The registers used for stack unwinding and a few other sampling purposes.
// The ctor does nothing; users are responsible for filling in the fields.
class Registers
{
public:
  Registers() : mPC{nullptr}, mSP{nullptr}, mFP{nullptr}, mLR{nullptr} {}

#if defined(HAVE_NATIVE_UNWIND)
  // Fills in mPC, mSP, mFP, mLR, and mContext for a synchronous sample.
  void SyncPopulate();
#endif

  void Clear() { memset(this, 0, sizeof(*this)); }

  // These fields are filled in by
  // SamplerThread::SuspendAndSampleAndResumeThread() for periodic and
  // backtrace samples, and by SyncPopulate() for synchronous samples.
  Address mPC;    // Instruction pointer.
  Address mSP;    // Stack pointer.
  Address mFP;    // Frame pointer.
  Address mLR;    // ARM link register.
#if defined(GP_OS_linux) || defined(GP_OS_android)
  // This contains all the registers, which means it duplicates the four fields
  // above. This is ok.
  ucontext_t* mContext; // The context from the signal handler.
#endif
};

// Setting MAX_NATIVE_FRAMES too high risks the unwinder wasting a lot of time
// looping on corrupted stacks.
static const size_t MAX_NATIVE_FRAMES = 1024;
static const size_t MAX_JS_FRAMES     = 1024;

struct NativeStack
{
  void* mPCs[MAX_NATIVE_FRAMES];
  void* mSPs[MAX_NATIVE_FRAMES];
  size_t mCount;  // Number of frames filled.

  NativeStack()
    : mPCs(), mSPs(), mCount(0)
  {}
};

Atomic<bool> WALKING_JS_STACK(false);

struct AutoWalkJSStack
{
  bool walkAllowed;

  AutoWalkJSStack() : walkAllowed(false) {
    walkAllowed = WALKING_JS_STACK.compareExchange(false, true);
  }

  ~AutoWalkJSStack() {
    if (walkAllowed) {
      WALKING_JS_STACK = false;
    }
  }
};

// Merges the profiling stack, native stack, and JS stack, outputting the
// details to aCollector.
static void
MergeStacks(uint32_t aFeatures, bool aIsSynchronous,
            const RegisteredThread& aRegisteredThread, const Registers& aRegs,
            const NativeStack& aNativeStack,
            ProfilerStackCollector& aCollector)
{
  // WARNING: this function runs within the profiler's "critical section".
  // WARNING: this function might be called while the profiler is inactive, and
  //          cannot rely on ActivePS.

  const ProfilingStack& profilingStack =
    aRegisteredThread.RacyRegisteredThread().ProfilingStack();
  const js::ProfilingStackFrame* profilingStackFrames = profilingStack.frames;
  uint32_t profilingStackFrameCount = profilingStack.stackSize();
  JSContext* context = aRegisteredThread.GetJSContext();

  // Make a copy of the JS stack into a JSFrame array. This is necessary since,
  // like the native stack, the JS stack is iterated youngest-to-oldest and we
  // need to iterate oldest-to-youngest when adding frames to aInfo.

  // Non-periodic sampling passes Nothing() as the buffer write position to
  // ProfilingFrameIterator to avoid incorrectly resetting the buffer position
  // of sampled JIT frames inside the JS engine.
  Maybe<uint64_t> samplePosInBuffer;
  if (!aIsSynchronous) {
    // aCollector.SamplePositionInBuffer() will return Nothing() when
    // profiler_suspend_and_sample_thread is called from the background hang
    // reporter.
    samplePosInBuffer = aCollector.SamplePositionInBuffer();
  }
  uint32_t jsCount = 0;
  JS::ProfilingFrameIterator::Frame jsFrames[MAX_JS_FRAMES];

  // Only walk jit stack if profiling frame iterator is turned on.
  if (context && JS::IsProfilingEnabledForContext(context)) {
    AutoWalkJSStack autoWalkJSStack;
    const uint32_t maxFrames = ArrayLength(jsFrames);

    if (autoWalkJSStack.walkAllowed) {
      JS::ProfilingFrameIterator::RegisterState registerState;
      registerState.pc = aRegs.mPC;
      registerState.sp = aRegs.mSP;
      registerState.lr = aRegs.mLR;
      registerState.fp = aRegs.mFP;

      JS::ProfilingFrameIterator jsIter(context, registerState, samplePosInBuffer);
      for (; jsCount < maxFrames && !jsIter.done(); ++jsIter) {
        if (aIsSynchronous || jsIter.isWasm()) {
          uint32_t extracted =
            jsIter.extractStack(jsFrames, jsCount, maxFrames);
          jsCount += extracted;
          if (jsCount == maxFrames) {
            break;
          }
        } else {
          Maybe<JS::ProfilingFrameIterator::Frame> frame =
            jsIter.getPhysicalFrameWithoutLabel();
          if (frame.isSome()) {
            jsFrames[jsCount++] = frame.value();
          }
        }
      }
    }
  }

  // While the profiling stack array is ordered oldest-to-youngest, the JS and
  // native arrays are ordered youngest-to-oldest. We must add frames to aInfo
  // oldest-to-youngest. Thus, iterate over the profiling stack forwards and JS
  // and native arrays backwards. Note: this means the terminating condition
  // jsIndex and nativeIndex is being < 0.
  uint32_t profilingStackIndex = 0;
  int32_t jsIndex = jsCount - 1;
  int32_t nativeIndex = aNativeStack.mCount - 1;

  uint8_t* lastLabelFrameStackAddr = nullptr;
  uint8_t* jitEndStackAddr = nullptr;

  // Iterate as long as there is at least one frame remaining.
  while (profilingStackIndex != profilingStackFrameCount || jsIndex >= 0 ||
         nativeIndex >= 0) {
    // There are 1 to 3 frames available. Find and add the oldest.
    uint8_t* profilingStackAddr = nullptr;
    uint8_t* jsStackAddr = nullptr;
    uint8_t* nativeStackAddr = nullptr;
    uint8_t* jsActivationAddr = nullptr;

    if (profilingStackIndex != profilingStackFrameCount) {
      const js::ProfilingStackFrame& profilingStackFrame =
        profilingStackFrames[profilingStackIndex];

      if (profilingStackFrame.isLabelFrame() ||
          profilingStackFrame.isSpMarkerFrame()) {
        lastLabelFrameStackAddr = (uint8_t*) profilingStackFrame.stackAddress();
      }

      // Skip any JS_OSR frames. Such frames are used when the JS interpreter
      // enters a jit frame on a loop edge (via on-stack-replacement, or OSR).
      // To avoid both the profiling stack frame and jit frame being recorded
      // (and showing up twice), the interpreter marks the interpreter
      // profiling stack frame as JS_OSR to ensure that it doesn't get counted.
      if (profilingStackFrame.isOSRFrame()) {
          profilingStackIndex++;
          continue;
      }

      MOZ_ASSERT(lastLabelFrameStackAddr);
      profilingStackAddr = lastLabelFrameStackAddr;
    }

    if (jsIndex >= 0) {
      jsStackAddr = (uint8_t*) jsFrames[jsIndex].stackAddress;
      jsActivationAddr = (uint8_t*) jsFrames[jsIndex].activation;
    }

    if (nativeIndex >= 0) {
      nativeStackAddr = (uint8_t*) aNativeStack.mSPs[nativeIndex];
    }

    // If there's a native stack frame which has the same SP as a profiling
    // stack frame, pretend we didn't see the native stack frame.  Ditto for a
    // native stack frame which has the same SP as a JS stack frame.  In effect
    // this means profiling stack frames or JS frames trump conflicting native
    // frames.
    if (nativeStackAddr && (profilingStackAddr == nativeStackAddr ||
                            jsStackAddr == nativeStackAddr)) {
      nativeStackAddr = nullptr;
      nativeIndex--;
      MOZ_ASSERT(profilingStackAddr || jsStackAddr);
    }

    // Sanity checks.
    MOZ_ASSERT_IF(profilingStackAddr, profilingStackAddr != jsStackAddr &&
                                      profilingStackAddr != nativeStackAddr);
    MOZ_ASSERT_IF(jsStackAddr, jsStackAddr != profilingStackAddr &&
                               jsStackAddr != nativeStackAddr);
    MOZ_ASSERT_IF(nativeStackAddr, nativeStackAddr != profilingStackAddr &&
                                   nativeStackAddr != jsStackAddr);

    // Check to see if profiling stack frame is top-most.
    if (profilingStackAddr > jsStackAddr && profilingStackAddr > nativeStackAddr) {
      MOZ_ASSERT(profilingStackIndex < profilingStackFrameCount);
      const js::ProfilingStackFrame& profilingStackFrame =
        profilingStackFrames[profilingStackIndex];

      // Sp marker frames are just annotations and should not be recorded in
      // the profile.
      if (!profilingStackFrame.isSpMarkerFrame()) {
        // The JIT only allows the top-most frame to have a nullptr pc.
        MOZ_ASSERT_IF(profilingStackFrame.isJsFrame() &&
                      profilingStackFrame.script() && !profilingStackFrame.pc(),
                      &profilingStackFrame == &profilingStack.frames[profilingStack.stackSize() - 1]);
        aCollector.CollectProfilingStackFrame(profilingStackFrame);
      }
      profilingStackIndex++;
      continue;
    }

    // Check to see if JS jit stack frame is top-most
    if (jsStackAddr > nativeStackAddr) {
      MOZ_ASSERT(jsIndex >= 0);
      const JS::ProfilingFrameIterator::Frame& jsFrame = jsFrames[jsIndex];
      jitEndStackAddr = (uint8_t*) jsFrame.endStackAddress;
      // Stringifying non-wasm JIT frames is delayed until streaming time. To
      // re-lookup the entry in the JitcodeGlobalTable, we need to store the
      // JIT code address (OptInfoAddr) in the circular buffer.
      //
      // Note that we cannot do this when we are sychronously sampling the
      // current thread; that is, when called from profiler_get_backtrace. The
      // captured backtrace is usually externally stored for an indeterminate
      // amount of time, such as in nsRefreshDriver. Problematically, the
      // stored backtrace may be alive across a GC during which the profiler
      // itself is disabled. In that case, the JS engine is free to discard its
      // JIT code. This means that if we inserted such OptInfoAddr entries into
      // the buffer, nsRefreshDriver would now be holding on to a backtrace
      // with stale JIT code return addresses.
      if (aIsSynchronous ||
          jsFrame.kind == JS::ProfilingFrameIterator::Frame_Wasm) {
        aCollector.CollectWasmFrame(jsFrame.label);
      } else {
        MOZ_ASSERT(jsFrame.kind == JS::ProfilingFrameIterator::Frame_Ion ||
                   jsFrame.kind == JS::ProfilingFrameIterator::Frame_Baseline);
        aCollector.CollectJitReturnAddr(jsFrame.returnAddress);
      }

      jsIndex--;
      continue;
    }

    // If we reach here, there must be a native stack frame and it must be the
    // greatest frame.
    if (nativeStackAddr &&
        // If the latest JS frame was JIT, this could be the native frame that
        // corresponds to it. In that case, skip the native frame, because there's
        // no need for the same frame to be present twice in the stack. The JS
        // frame can be considered the symbolicated version of the native frame.
        (!jitEndStackAddr || nativeStackAddr < jitEndStackAddr ) &&
        // This might still be a JIT operation, check to make sure that is not in range
        // of the NEXT JavaScript's stacks' activation address.
        (!jsActivationAddr || nativeStackAddr > jsActivationAddr)
      ) {
      MOZ_ASSERT(nativeIndex >= 0);
      void* addr = (void*)aNativeStack.mPCs[nativeIndex];
      aCollector.CollectNativeLeafAddr(addr);
    }
    if (nativeIndex >= 0) {
      nativeIndex--;
    }
  }

  // Update the JS context with the current profile sample buffer generation.
  //
  // Only do this for periodic samples. We don't want to do this for
  // synchronous samples, and we also don't want to do it for calls to
  // profiler_suspend_and_sample_thread() from the background hang reporter -
  // in that case, aCollector.BufferRangeStart() will return Nothing().
  if (!aIsSynchronous && context && aCollector.BufferRangeStart()) {
    uint64_t bufferRangeStart = *aCollector.BufferRangeStart();
    JS::SetJSContextProfilerSampleBufferRangeStart(context, bufferRangeStart);
  }
}

#if defined(GP_OS_windows)
static HANDLE GetThreadHandle(PlatformData* aData);
#endif

#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
static void
StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP, void* aClosure)
{
  NativeStack* nativeStack = static_cast<NativeStack*>(aClosure);
  MOZ_ASSERT(nativeStack->mCount < MAX_NATIVE_FRAMES);
  nativeStack->mSPs[nativeStack->mCount] = aSP;
  nativeStack->mPCs[nativeStack->mCount] = aPC;
  nativeStack->mCount++;
}
#endif

#if defined(USE_FRAME_POINTER_STACK_WALK)
static void
DoFramePointerBacktrace(PSLockRef aLock, const RegisteredThread& aRegisteredThread,
                        const Registers& aRegs, NativeStack& aNativeStack)
{
  // WARNING: this function runs within the profiler's "critical section".
  // WARNING: this function might be called while the profiler is inactive, and
  //          cannot rely on ActivePS.

  // Start with the current function. We use 0 as the frame number here because
  // the FramePointerStackWalk() call below will use 1..N. This is a bit weird
  // but it doesn't matter because StackWalkCallback() doesn't use the frame
  // number argument.
  StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);

  uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount);

  const void* stackEnd = aRegisteredThread.StackTop();
  if (aRegs.mFP >= aRegs.mSP && aRegs.mFP <= stackEnd) {
    FramePointerStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames,
                          &aNativeStack, reinterpret_cast<void**>(aRegs.mFP),
                          const_cast<void*>(stackEnd));
  }
}
#endif

#if defined(USE_MOZ_STACK_WALK)
static void
DoMozStackWalkBacktrace(PSLockRef aLock, const RegisteredThread& aRegisteredThread,
                        const Registers& aRegs, NativeStack& aNativeStack)
{
  // WARNING: this function runs within the profiler's "critical section".
  // WARNING: this function might be called while the profiler is inactive, and
  //          cannot rely on ActivePS.

  // Start with the current function. We use 0 as the frame number here because
  // the MozStackWalkThread() call below will use 1..N. This is a bit weird but
  // it doesn't matter because StackWalkCallback() doesn't use the frame number
  // argument.
  StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);

  uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount);

  HANDLE thread = GetThreadHandle(aRegisteredThread.GetPlatformData());
  MOZ_ASSERT(thread);
  MozStackWalkThread(StackWalkCallback, /* skipFrames */ 0, maxFrames,
                     &aNativeStack, thread, /* context */ nullptr);
}
#endif

#ifdef USE_EHABI_STACKWALK
static void
DoEHABIBacktrace(PSLockRef aLock, const RegisteredThread& aRegisteredThread,
                 const Registers& aRegs, NativeStack& aNativeStack)
{
  // WARNING: this function runs within the profiler's "critical section".
  // WARNING: this function might be called while the profiler is inactive, and
  //          cannot rely on ActivePS.

  const mcontext_t* mcontext = &aRegs.mContext->uc_mcontext;
  mcontext_t savedContext;
  const ProfilingStack& profilingStack =
    aRegisteredThread.RacyRegisteredThread().ProfilingStack();

  // The profiling stack contains an "EnterJIT" frame whenever we enter
  // JIT code with profiling enabled; the stack pointer value points
  // the saved registers.  We use this to unwind resume unwinding
  // after encounting JIT code.
  for (uint32_t i = profilingStack.stackSize(); i > 0; --i) {
    // The profiling stack grows towards higher indices, so we iterate
    // backwards (from callee to caller).
    const js::ProfilingStackFrame& frame = profilingStack.frames[i - 1];
    if (!frame.isJsFrame() && strcmp(frame.label(), "EnterJIT") == 0) {
      // Found JIT entry frame.  Unwind up to that point (i.e., force
      // the stack walk to stop before the block of saved registers;
      // note that it yields nondecreasing stack pointers), then restore
      // the saved state.
      uint32_t* vSP = reinterpret_cast<uint32_t*>(frame.stackAddress());

      aNativeStack.mCount +=
        EHABIStackWalk(*mcontext, /* stackBase = */ vSP,
                       aNativeStack.mSPs + aNativeStack.mCount,
                       aNativeStack.mPCs + aNativeStack.mCount,
                       MAX_NATIVE_FRAMES - aNativeStack.mCount);

      memset(&savedContext, 0, sizeof(savedContext));

      // See also: struct EnterJITStack in js/src/jit/arm/Trampoline-arm.cpp
      savedContext.arm_r4  = *vSP++;
      savedContext.arm_r5  = *vSP++;
      savedContext.arm_r6  = *vSP++;
      savedContext.arm_r7  = *vSP++;
      savedContext.arm_r8  = *vSP++;
      savedContext.arm_r9  = *vSP++;
      savedContext.arm_r10 = *vSP++;
      savedContext.arm_fp  = *vSP++;
      savedContext.arm_lr  = *vSP++;
      savedContext.arm_sp  = reinterpret_cast<uint32_t>(vSP);
      savedContext.arm_pc  = savedContext.arm_lr;
      mcontext = &savedContext;
    }
  }

  // Now unwind whatever's left (starting from either the last EnterJIT frame
  // or, if no EnterJIT was found, the original registers).
  aNativeStack.mCount +=
    EHABIStackWalk(*mcontext, const_cast<void*>(aRegisteredThread.StackTop()),
                   aNativeStack.mSPs + aNativeStack.mCount,
                   aNativeStack.mPCs + aNativeStack.mCount,
                   MAX_NATIVE_FRAMES - aNativeStack.mCount);
}
#endif

#ifdef USE_LUL_STACKWALK

// See the comment at the callsite for why this function is necessary.
#if defined(MOZ_HAVE_ASAN_BLACKLIST)
MOZ_ASAN_BLACKLIST static void
ASAN_memcpy(void* aDst, const void* aSrc, size_t aLen)
{
  // The obvious thing to do here is call memcpy(). However, although
  // ASAN_memcpy() is not instrumented by ASAN, memcpy() still is, and the
  // false positive still manifests! So we must implement memcpy() ourselves
  // within this function.
  char* dst = static_cast<char*>(aDst);
  const char* src = static_cast<const char*>(aSrc);

  for (size_t i = 0; i < aLen; i++) {
    dst[i] = src[i];
  }
}
#endif

static void
DoLULBacktrace(PSLockRef aLock, const RegisteredThread& aRegisteredThread,
               const Registers& aRegs, NativeStack& aNativeStack)
{
  // WARNING: this function runs within the profiler's "critical section".
  // WARNING: this function might be called while the profiler is inactive, and
  //          cannot rely on ActivePS.

  const mcontext_t* mc = &aRegs.mContext->uc_mcontext;

  lul::UnwindRegs startRegs;
  memset(&startRegs, 0, sizeof(startRegs));

#if defined(GP_PLAT_amd64_linux)
  startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]);
  startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]);
  startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]);
#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
  startRegs.r15 = lul::TaggedUWord(mc->arm_pc);
  startRegs.r14 = lul::TaggedUWord(mc->arm_lr);
  startRegs.r13 = lul::TaggedUWord(mc->arm_sp);
  startRegs.r12 = lul::TaggedUWord(mc->arm_ip);
  startRegs.r11 = lul::TaggedUWord(mc->arm_fp);
  startRegs.r7  = lul::TaggedUWord(mc->arm_r7);
#elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
  startRegs.pc  = lul::TaggedUWord(mc->pc);
  startRegs.x29 = lul::TaggedUWord(mc->regs[29]);
  startRegs.x30 = lul::TaggedUWord(mc->regs[30]);
  startRegs.sp  = lul::TaggedUWord(mc->sp);
#elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
  startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]);
  startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]);
  startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]);
#elif defined(GP_PLAT_mips64_linux)
  startRegs.pc = lul::TaggedUWord(mc->pc);
  startRegs.sp = lul::TaggedUWord(mc->gregs[29]);
  startRegs.fp = lul::TaggedUWord(mc->gregs[30]);
#else
# error "Unknown plat"
#endif

  // Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not going past the
  // stack's registered top point.  Do some basic sanity checks too.  This
  // assumes that the TaggedUWord holding the stack pointer value is valid, but
  // it should be, since it was constructed that way in the code just above.

  // We could construct |stackImg| so that LUL reads directly from the stack in
  // question, rather than from a copy of it.  That would reduce overhead and
  // space use a bit.  However, it gives a problem with dynamic analysis tools
  // (ASan, TSan, Valgrind) which is that such tools will report invalid or
  // racing memory accesses, and such accesses will be reported deep inside LUL.
  // By taking a copy here, we can either sanitise the copy (for Valgrind) or
  // copy it using an unchecked memcpy (for ASan, TSan).  That way we don't have
  // to try and suppress errors inside LUL.
  //
  // N_STACK_BYTES is set to 160KB.  This is big enough to hold all stacks
  // observed in some minutes of testing, whilst keeping the size of this
  // function (DoNativeBacktrace)'s frame reasonable.  Most stacks observed in
  // practice are small, 4KB or less, and so the copy costs are insignificant
  // compared to other profiler overhead.
  //
  // |stackImg| is allocated on this (the sampling thread's) stack.  That
  // implies that the frame for this function is at least N_STACK_BYTES large.
  // In general it would be considered unacceptable to have such a large frame
  // on a stack, but it only exists for the unwinder thread, and so is not
  // expected to be a problem.  Allocating it on the heap is troublesome because
  // this function runs whilst the sampled thread is suspended, so any heap
  // allocation risks deadlock.  Allocating it as a global variable is not
  // thread safe, which would be a problem if we ever allow multiple sampler
  // threads.  Hence allocating it on the stack seems to be the least-worst
  // option.

  lul::StackImage stackImg;

  {
#if defined(GP_PLAT_amd64_linux)
    uintptr_t rEDZONE_SIZE = 128;
    uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
    uintptr_t rEDZONE_SIZE = 0;
    uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE;
#elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
    uintptr_t rEDZONE_SIZE = 0;
    uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
#elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
    uintptr_t rEDZONE_SIZE = 0;
    uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
#elif defined(GP_PLAT_mips64_linux)
    uintptr_t rEDZONE_SIZE = 0;
    uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
#else
#   error "Unknown plat"
#endif
    uintptr_t end = reinterpret_cast<uintptr_t>(aRegisteredThread.StackTop());
    uintptr_t ws  = sizeof(void*);
    start &= ~(ws-1);
    end   &= ~(ws-1);
    uintptr_t nToCopy = 0;
    if (start < end) {
      nToCopy = end - start;
      if (nToCopy > lul::N_STACK_BYTES)
        nToCopy = lul::N_STACK_BYTES;
    }
    MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
    stackImg.mLen       = nToCopy;
    stackImg.mStartAvma = start;
    if (nToCopy > 0) {
      // If this is a vanilla memcpy(), ASAN makes the following complaint:
      //
      //   ERROR: AddressSanitizer: stack-buffer-underflow ...
      //   ...
      //   HINT: this may be a false positive if your program uses some custom
      //   stack unwind mechanism or swapcontext
      //
      // This code is very much a custom stack unwind mechanism! So we use an
      // alternative memcpy() implementation that is ignored by ASAN.
#if defined(MOZ_HAVE_ASAN_BLACKLIST)
      ASAN_memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
#else
      memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
#endif
      (void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy);
    }
  }

  size_t framePointerFramesAcquired = 0;
  lul::LUL* lul = CorePS::Lul(aLock);
  lul->Unwind(reinterpret_cast<uintptr_t*>(aNativeStack.mPCs),
              reinterpret_cast<uintptr_t*>(aNativeStack.mSPs),
              &aNativeStack.mCount, &framePointerFramesAcquired,
              MAX_NATIVE_FRAMES, &startRegs, &stackImg);

  // Update stats in the LUL stats object.  Unfortunately this requires
  // three global memory operations.
  lul->mStats.mContext += 1;
  lul->mStats.mCFI     += aNativeStack.mCount - 1 - framePointerFramesAcquired;
  lul->mStats.mFP      += framePointerFramesAcquired;
}

#endif

#ifdef HAVE_NATIVE_UNWIND
static void
DoNativeBacktrace(PSLockRef aLock, const RegisteredThread& aRegisteredThread,
                  const Registers& aRegs, NativeStack& aNativeStack)
{
  // This method determines which stackwalker is used for periodic and
  // synchronous samples. (Backtrace samples are treated differently, see
  // profiler_suspend_and_sample_thread() for details). The only part of the
  // ordering that matters is that LUL must precede FRAME_POINTER, because on
  // Linux they can both be present.
#if defined(USE_LUL_STACKWALK)
  DoLULBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
#elif defined(USE_EHABI_STACKWALK)
  DoEHABIBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
#elif defined(USE_FRAME_POINTER_STACK_WALK)
  DoFramePointerBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
#elif defined(USE_MOZ_STACK_WALK)
  DoMozStackWalkBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
#else
  #error "Invalid configuration"
#endif
}
#endif

// Writes some components shared by periodic and synchronous profiles to
// ActivePS's ProfileBuffer. (This should only be called from DoSyncSample()
// and DoPeriodicSample().)
//
// The grammar for entry sequences is in a comment above
// ProfileBuffer::StreamSamplesToJSON.
static inline void
DoSharedSample(PSLockRef aLock, bool aIsSynchronous,
               RegisteredThread& aRegisteredThread, const TimeStamp& aNow,
               const Registers& aRegs, Maybe<uint64_t>* aLastSample,
               ProfileBuffer& aBuffer)
{
  // WARNING: this function runs within the profiler's "critical section".

  MOZ_RELEASE_ASSERT(ActivePS::Exists(aLock));

  uint64_t samplePos = aBuffer.AddThreadIdEntry(aRegisteredThread.Info()->ThreadId());
  if (aLastSample) {
    *aLastSample = Some(samplePos);
  }

  TimeDuration delta = aNow - CorePS::ProcessStartTime();
  aBuffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));

  ProfileBufferCollector collector(aBuffer, ActivePS::Features(aLock),
                                   samplePos);
  NativeStack nativeStack;
#if defined(HAVE_NATIVE_UNWIND)
  if (ActivePS::FeatureStackWalk(aLock)) {
    DoNativeBacktrace(aLock, aRegisteredThread, aRegs, nativeStack);

    MergeStacks(ActivePS::Features(aLock), aIsSynchronous, aRegisteredThread,
                aRegs, nativeStack, collector);
  } else
#endif
  {
    MergeStacks(ActivePS::Features(aLock), aIsSynchronous, aRegisteredThread,
                aRegs, nativeStack, collector);

    // We can't walk the whole native stack, but we can record the top frame.
    if (ActivePS::FeatureLeaf(aLock)) {
      aBuffer.AddEntry(ProfileBufferEntry::NativeLeafAddr((void*)aRegs.mPC));
    }
  }
}

// Writes the components of a synchronous sample to the given ProfileBuffer.
static void
DoSyncSample(PSLockRef aLock, RegisteredThread& aRegisteredThread,
             const TimeStamp& aNow, const Registers& aRegs,
             ProfileBuffer& aBuffer)
{
  // WARNING: this function runs within the profiler's "critical section".

  DoSharedSample(aLock, /* aIsSynchronous = */ true, aRegisteredThread, aNow,
                 aRegs, /* aLastSample = */ nullptr, aBuffer);
}

// Writes the components of a periodic sample to ActivePS's ProfileBuffer.
static void
DoPeriodicSample(PSLockRef aLock, RegisteredThread& aRegisteredThread,
                 ProfiledThreadData& aProfiledThreadData,
                 const TimeStamp& aNow, const Registers& aRegs)
{
  // WARNING: this function runs within the profiler's "critical section".

  ProfileBuffer& buffer = ActivePS::Buffer(aLock);

  DoSharedSample(aLock, /* aIsSynchronous = */ false, aRegisteredThread, aNow,
                 aRegs, &aProfiledThreadData.LastSample(), buffer);

  ProfilerMarkerLinkedList* pendingMarkersList =
    aRegisteredThread.RacyRegisteredThread().GetPendingMarkers();
  while (pendingMarkersList && pendingMarkersList->peek()) {
    ProfilerMarker* marker = pendingMarkersList->popHead();
    buffer.AddStoredMarker(marker);
    buffer.AddEntry(ProfileBufferEntry::Marker(marker));
  }

  ThreadResponsiveness* resp = aProfiledThreadData.GetThreadResponsiveness();
  if (resp && resp->HasData()) {
    double delta = resp->GetUnresponsiveDuration(
      (aNow - CorePS::ProcessStartTime()).ToMilliseconds());
    buffer.AddEntry(ProfileBufferEntry::Responsiveness(delta));
  }
}

// END sampling/unwinding code
////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////////
// BEGIN saving/streaming code

const static uint64_t kJS_MAX_SAFE_UINTEGER = +9007199254740991ULL;

static int64_t
SafeJSInteger(uint64_t aValue) {
  return aValue <= kJS_MAX_SAFE_UINTEGER ? int64_t(aValue) : -1;
}

static void
AddSharedLibraryInfoToStream(JSONWriter& aWriter, const SharedLibrary& aLib)
{
  aWriter.StartObjectElement();
  aWriter.IntProperty("start", SafeJSInteger(aLib.GetStart()));
  aWriter.IntProperty("end", SafeJSInteger(aLib.GetEnd()));
  aWriter.IntProperty("offset", SafeJSInteger(aLib.GetOffset()));
  aWriter.StringProperty("name", NS_ConvertUTF16toUTF8(aLib.GetModuleName()).get());
  aWriter.StringProperty("path", NS_ConvertUTF16toUTF8(aLib.GetModulePath()).get());
  aWriter.StringProperty("debugName", NS_ConvertUTF16toUTF8(aLib.GetDebugName()).get());
  aWriter.StringProperty("debugPath", NS_ConvertUTF16toUTF8(aLib.GetDebugPath()).get());
  aWriter.StringProperty("breakpadId", aLib.GetBreakpadId().get());
  aWriter.StringProperty("arch", aLib.GetArch().c_str());
  aWriter.EndObject();
}

void
AppendSharedLibraries(JSONWriter& aWriter)
{
  SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
  info.SortByAddress();
  for (size_t i = 0; i < info.GetSize(); i++) {
    AddSharedLibraryInfoToStream(aWriter, info.GetEntry(i));
  }
}

#ifdef MOZ_TASK_TRACER
static void
StreamNameAndThreadId(JSONWriter& aWriter, const char* aName, int aThreadId)
{
  aWriter.StartObjectElement();
  {
    if (XRE_GetProcessType() == GeckoProcessType_Plugin) {
      // TODO Add the proper plugin name
      aWriter.StringProperty("name", "Plugin");
    } else {
      aWriter.StringProperty("name", aName);
    }
    aWriter.IntProperty("tid", aThreadId);
  }
  aWriter.EndObject();
}
#endif

static void
StreamTaskTracer(PSLockRef aLock, SpliceableJSONWriter& aWriter)
{
#ifdef MOZ_TASK_TRACER
  MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));

  aWriter.StartArrayProperty("data");
  {
    UniquePtr<nsTArray<nsCString>> data =
      tasktracer::GetLoggedData(CorePS::ProcessStartTime());
    for (uint32_t i = 0; i < data->Length(); ++i) {
      aWriter.StringElement((data->ElementAt(i)).get());
    }
  }
  aWriter.EndArray();

  aWriter.StartArrayProperty("threads");
  {
    ActivePS::DiscardExpiredDeadProfiledThreads(aLock);
    nsTArray<Pair<RegisteredThread*, ProfiledThreadData*>> threads =
      ActivePS::ProfiledThreads(aLock);
    for (auto& thread : threads) {
      RefPtr<ThreadInfo> info = thread.second()->Info();
      StreamNameAndThreadId(aWriter, info->Name(), info->ThreadId());
    }
  }
  aWriter.EndArray();

  aWriter.DoubleProperty(
    "start", static_cast<double>(tasktracer::GetStartTime()));
#endif
}

static void
StreamCategories(SpliceableJSONWriter& aWriter)
{
  // Same order as ProfilingStackFrame::Category.
  // The list of available color names is:
  // transparent, grey, purple, yellow, orange, lightblue, green, blue, magenta
  aWriter.Start();
  aWriter.StringProperty("name", "Idle");
  aWriter.StringProperty("color", "transparent");
  aWriter.EndObject();
  aWriter.Start();
  aWriter.StringProperty("name", "Other");
  aWriter.StringProperty("color", "grey");
  aWriter.EndObject();
  aWriter.Start();
  aWriter.StringProperty("name", "Layout");
  aWriter.StringProperty("color", "purple");
  aWriter.EndObject();
  aWriter.Start();
  aWriter.StringProperty("name", "JavaScript");
  aWriter.StringProperty("color", "yellow");
  aWriter.EndObject();
  aWriter.Start();
  aWriter.StringProperty("name", "GC / CC");
  aWriter.StringProperty("color", "orange");
  aWriter.EndObject();
  aWriter.Start();
  aWriter.StringProperty("name", "Network");
  aWriter.StringProperty("color", "lightblue");
  aWriter.EndObject();
  aWriter.Start();
  aWriter.StringProperty("name", "Graphics");
  aWriter.StringProperty("color", "green");
  aWriter.EndObject();
  aWriter.Start();
  aWriter.StringProperty("name", "DOM");
  aWriter.StringProperty("color", "blue");
  aWriter.EndObject();
}

static void
StreamMetaJSCustomObject(PSLockRef aLock, SpliceableJSONWriter& aWriter,
                         bool aIsShuttingDown)
{
  MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));

  aWriter.IntProperty("version", 14);

  // The "startTime" field holds the number of milliseconds since midnight
  // January 1, 1970 GMT. This grotty code computes (Now - (Now -
  // ProcessStartTime)) to convert CorePS::ProcessStartTime() into that form.
  TimeDuration delta = TimeStamp::Now() - CorePS::ProcessStartTime();
  aWriter.DoubleProperty(
    "startTime", static_cast<double>(PR_Now()/1000.0 - delta.ToMilliseconds()));

  // Write the shutdownTime field. Unlike startTime, shutdownTime is not an
  // absolute time stamp: It's relative to startTime. This is consistent with
  // all other (non-"startTime") times anywhere in the profile JSON.
  if (aIsShuttingDown) {
    aWriter.DoubleProperty("shutdownTime", profiler_time());
  } else {
    aWriter.NullProperty("shutdownTime");
  }

  aWriter.StartArrayProperty("categories");
  StreamCategories(aWriter);
  aWriter.EndArray();

  if (!NS_IsMainThread()) {
    // Leave the rest of the properties out if we're not on the main thread.
    // At the moment, the only case in which this function is called on a
    // background thread is if we're in a content process and are going to
    // send this profile to the parent process. In that case, the parent
    // process profile's "meta" object already has the rest of the properties,
    // and the parent process profile is dumped on that process's main thread.
    return;
  }

  aWriter.DoubleProperty("interval", ActivePS::Interval(aLock));
  aWriter.IntProperty("stackwalk", ActivePS::FeatureStackWalk(aLock));

#ifdef DEBUG
  aWriter.IntProperty("debug", 1);
#else
  aWriter.IntProperty("debug", 0);
#endif

  aWriter.IntProperty("gcpoison", JS::IsGCPoisoning() ? 1 : 0);

  bool asyncStacks = Preferences::GetBool("javascript.options.asyncstack");
  aWriter.IntProperty("asyncstack", asyncStacks);

  aWriter.IntProperty("processType", XRE_GetProcessType());

  nsresult res;
  nsCOMPtr<nsIHttpProtocolHandler> http =
    do_GetService(NS_NETWORK_PROTOCOL_CONTRACTID_PREFIX "http", &res);

  if (!NS_FAILED(res)) {
    nsAutoCString string;

    res = http->GetPlatform(string);
    if (!NS_FAILED(res)) {
      aWriter.StringProperty("platform", string.Data());
    }

    res = http->GetOscpu(string);
    if (!NS_FAILED(res)) {
      aWriter.StringProperty("oscpu", string.Data());
    }

    res = http->GetMisc(string);
    if (!NS_FAILED(res)) {
      aWriter.StringProperty("misc", string.Data());
    }
  }

  nsCOMPtr<nsIXULRuntime> runtime = do_GetService("@mozilla.org/xre/runtime;1");
  if (runtime) {
    nsAutoCString string;

    res = runtime->GetXPCOMABI(string);
    if (!NS_FAILED(res))
      aWriter.StringProperty("abi", string.Data());

    res = runtime->GetWidgetToolkit(string);
    if (!NS_FAILED(res))
      aWriter.StringProperty("toolkit", string.Data());
  }

  nsCOMPtr<nsIXULAppInfo> appInfo =
    do_GetService("@mozilla.org/xre/app-info;1");

  if (appInfo) {
    nsAutoCString string;
    res = appInfo->GetName(string);
    if (!NS_FAILED(res))
      aWriter.StringProperty("product", string.Data());

    res = appInfo->GetAppBuildID(string);
    if (!NS_FAILED(res))
      aWriter.StringProperty("appBuildID", string.Data());

    res = appInfo->GetSourceURL(string);
    if (!NS_FAILED(res))
      aWriter.StringProperty("sourceURL", string.Data());
  }

  nsCOMPtr<nsIPropertyBag2> systemInfo =
    do_GetService("@mozilla.org/system-info;1");
  if (systemInfo) {
    int32_t cpus;
    res = systemInfo->GetPropertyAsInt32(NS_LITERAL_STRING("cpucores"), &cpus);
    if (!NS_FAILED(res)) {
      aWriter.IntProperty("physicalCPUs", cpus);
    }
    res = systemInfo->GetPropertyAsInt32(NS_LITERAL_STRING("cpucount"), &cpus);
    if (!NS_FAILED(res)) {
      aWriter.IntProperty("logicalCPUs", cpus);
    }
  }

  // We should avoid collecting extension metadata for profiler while XPCOM is
  // shutting down since it cannot create a new ExtensionPolicyService.
  if (!gXPCOMShuttingDown) {
    aWriter.StartObjectProperty("extensions");
    {
      {
        JSONSchemaWriter schema(aWriter);
        schema.WriteField("id");
        schema.WriteField("name");
        schema.WriteField("baseURL");
      }

      aWriter.StartArrayProperty("data");
      {
        nsTArray<RefPtr<WebExtensionPolicy>> exts;
        ExtensionPolicyService::GetSingleton().GetAll(exts);

        for (auto& ext : exts) {
          aWriter.StartArrayElement(JSONWriter::SingleLineStyle);

          nsAutoString id;
          ext->GetId(id);
          aWriter.StringElement(NS_ConvertUTF16toUTF8(id).get());

          aWriter.StringElement(NS_ConvertUTF16toUTF8(ext->Name()).get());

          auto url = ext->GetURL(NS_LITERAL_STRING(""));
          if (url.isOk()) {
            aWriter.StringElement(NS_ConvertUTF16toUTF8(url.unwrap()).get());
          }

          aWriter.EndArray();
        }
      }
      aWriter.EndArray();
    }
    aWriter.EndObject();
  }
}

static void
StreamPages(PSLockRef aLock, SpliceableJSONWriter& aWriter)
{
  MOZ_RELEASE_ASSERT(CorePS::Exists());
  ActivePS::DiscardExpiredPages(aLock);
  for (const auto& page : ActivePS::ProfiledPages(aLock)) {
    page->StreamJSON(aWriter);
  }
}

#if defined(GP_OS_android)
static UniquePtr<ProfileBuffer>
CollectJavaThreadProfileData()
{
  // locked_profiler_start uses sample count is 1000 for Java thread.
  // This entry size is enough now, but we might have to estimate it
  // if we can customize it
  auto buffer = MakeUnique<ProfileBuffer>(1000 * 1000);

  int sampleId = 0;
  while (true) {
    double sampleTime = java::GeckoJavaSampler::GetSampleTime(0, sampleId);
    if (sampleTime == 0.0) {
      break;
    }

    buffer->AddThreadIdEntry(0);
    buffer->AddEntry(ProfileBufferEntry::Time(sampleTime));
    bool parentFrameWasIdleFrame = false;
    int frameId = 0;
    while (true) {
      jni::String::LocalRef frameName =
        java::GeckoJavaSampler::GetFrameName(0, sampleId, frameId++);
      if (!frameName) {
        break;
      }
      nsCString frameNameString = frameName->ToCString();

      // Compute a category for the frame:
      //  - IDLE for the wait function android.os.MessageQueue.nativePollOnce()
      //  - OTHER for any function that's directly called by that wait function
      //  - no category on everything else
      Maybe<js::ProfilingStackFrame::Category> category;
      if (frameNameString.EqualsLiteral("android.os.MessageQueue.nativePollOnce()")) {
        category = Some(js::ProfilingStackFrame::Category::IDLE);
        parentFrameWasIdleFrame = true;
      } else if (parentFrameWasIdleFrame) {
        category = Some(js::ProfilingStackFrame::Category::OTHER);
        parentFrameWasIdleFrame = false;
      }

      buffer->CollectCodeLocation("", frameNameString.get(), 0,
          Nothing(), Nothing(), category);
    }
    sampleId++;
  }
  return buffer;
}
#endif

static void
locked_profiler_stream_json_for_this_process(PSLockRef aLock,
                                             SpliceableJSONWriter& aWriter,
                                             double aSinceTime,
                                             bool aIsShuttingDown)
{
  LOG("locked_profiler_stream_json_for_this_process");

  MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));

  double collectionStart = profiler_time();

  ProfileBuffer& buffer = ActivePS::Buffer(aLock);

  // Put shared library info
  aWriter.StartArrayProperty("libs");
  AppendSharedLibraries(aWriter);
  aWriter.EndArray();

  // Put meta data
  aWriter.StartObjectProperty("meta");
  {
    StreamMetaJSCustomObject(aLock, aWriter, aIsShuttingDown);
  }
  aWriter.EndObject();

  // Put page data
  aWriter.StartArrayProperty("pages");
  {
    StreamPages(aLock, aWriter);
  }
  aWriter.EndArray();

  buffer.StreamCountersToJSON(aWriter, CorePS::ProcessStartTime(), aSinceTime);
  buffer.StreamMemoryToJSON(aWriter, CorePS::ProcessStartTime(), aSinceTime);

  // Data of TaskTracer doesn't belong in the circular buffer.
  if (ActivePS::FeatureTaskTracer(aLock)) {
    aWriter.StartObjectProperty("tasktracer");
    StreamTaskTracer(aLock, aWriter);
    aWriter.EndObject();
  }

  // Lists the samples for each thread profile
  aWriter.StartArrayProperty("threads");
  {
    ActivePS::DiscardExpiredDeadProfiledThreads(aLock);
    nsTArray<Pair<RegisteredThread*, ProfiledThreadData*>> threads =
      ActivePS::ProfiledThreads(aLock);
    for (auto& thread : threads) {
      RegisteredThread* registeredThread = thread.first();
      JSContext* cx =
        registeredThread ? registeredThread->GetJSContext() : nullptr;
      ProfiledThreadData* profiledThreadData = thread.second();
      profiledThreadData->StreamJSON(buffer, cx, aWriter,
                                     CorePS::ProcessStartTime(), aSinceTime);
    }

#if defined(GP_OS_android)
  if (ActivePS::FeatureJava(aLock)) {
     java::GeckoJavaSampler::Pause();

     UniquePtr<ProfileBuffer> javaBuffer = CollectJavaThreadProfileData();

     // Thread id of java Main thread is 0, if we support profiling of other
     // java thread, we have to get thread id and name via JNI.
     RefPtr<ThreadInfo> threadInfo =
       new ThreadInfo("Java Main Thread", 0, false, CorePS::ProcessStartTime());
     ProfiledThreadData profiledThreadData(threadInfo, nullptr,
                                           ActivePS::FeatureResponsiveness(aLock));
     profiledThreadData.StreamJSON(*javaBuffer.get(), nullptr, aWriter,
                                   CorePS::ProcessStartTime(), aSinceTime);

     java::GeckoJavaSampler::Unpause();
  }
#endif

  }
  aWriter.EndArray();

  aWriter.StartArrayProperty("pausedRanges");
  {
    buffer.StreamPausedRangesToJSON(aWriter, aSinceTime);
  }
  aWriter.EndArray();

  double collectionEnd = profiler_time();

  // Record timestamps for the collection into the buffer, so that consumers
  // know why we didn't collect any samples for its duration.
  // We put these entries into the buffer after we've collected the profile,
  // so they'll be visible for the *next* profile collection (if they haven't
  // been overwritten due to buffer wraparound by then).
  buffer.AddEntry(ProfileBufferEntry::CollectionStart(collectionStart));
  buffer.AddEntry(ProfileBufferEntry::CollectionEnd(collectionEnd));
}

bool
profiler_stream_json_for_this_process(SpliceableJSONWriter& aWriter,
                                      double aSinceTime,
                                      bool aIsShuttingDown)
{
  LOG("profiler_stream_json_for_this_process");

  MOZ_RELEASE_ASSERT(CorePS::Exists());

  PSAutoLock lock(gPSMutex);

  if (!ActivePS::Exists(lock)) {
    return false;
  }

  locked_profiler_stream_json_for_this_process(lock, aWriter, aSinceTime,
                                               aIsShuttingDown);

  return true;
}

// END saving/streaming code
////////////////////////////////////////////////////////////////////////

static void
PrintUsageThenExit(int aExitCode)
{
  MOZ_RELEASE_ASSERT(NS_IsMainThread());

  printf(
    "\n"
    "Profiler environment variable usage:\n"
    "\n"
    "  MOZ_PROFILER_HELP\n"
    "  If set to any value, prints this message.\n"
    "\n"
    "  MOZ_LOG\n"
    "  Enables logging. The levels of logging available are\n"
    "  'prof:3' (least verbose), 'prof:4', 'prof:5' (most verbose).\n"
    "\n"
    "  MOZ_PROFILER_STARTUP\n"
    "  If set to any value, starts the profiler immediately on start-up.\n"
    "  Useful if you want profile code that runs very early.\n"
    "\n"
    "  MOZ_PROFILER_STARTUP_ENTRIES=<1..>\n"
    "  If MOZ_PROFILER_STARTUP is set, specifies the number of entries in\n"
    "  the profiler's circular buffer when the profiler is first started.\n"
    "  If unset, the platform default is used.\n"
    "\n"
    "  MOZ_PROFILER_STARTUP_DURATION=<1..>\n"
    "  If MOZ_PROFILER_STARTUP is set, specifies the duration of entries in\n"
    "  the profiler's circular buffer when the profiler is first started.\n"
    "  If unset, duration of the entries will be restricted by.\n"
    "  MOZ_PROFILER_STARTUP_ENTRIES or its default value instead of a time duration."
    "\n"
    "  MOZ_PROFILER_STARTUP_INTERVAL=<1..1000>\n"
    "  If MOZ_PROFILER_STARTUP is set, specifies the sample interval,\n"
    "  measured in milliseconds, when the profiler is first started.\n"
    "  If unset, the platform default is used.\n"
    "\n"
    "  MOZ_PROFILER_STARTUP_FEATURES_BITFIELD=<Number>\n"
    "  If MOZ_PROFILER_STARTUP is set, specifies the profiling features, as\n"
    "  the integer value of the features bitfield.\n"
    "  If unset, the value from MOZ_PROFILER_STARTUP_FEATURES is used.\n"
    "\n"
    "  MOZ_PROFILER_STARTUP_FEATURES=<Features>\n"
    "  If MOZ_PROFILER_STARTUP is set, specifies the profiling features, as\n"
    "  a comma-separated list of strings.\n"
    "  Ignored if  MOZ_PROFILER_STARTUP_FEATURES_BITFIELD is set.\n"
    "  If unset, the platform default is used.\n"
    "\n"
    "  MOZ_PROFILER_STARTUP_FILTERS=<Filters>\n"
    "  If MOZ_PROFILER_STARTUP is set, specifies the thread filters, as a\n"
    "  comma-separated list of strings. A given thread will be sampled if any\n"
    "  of the filters is a case-insensitive substring of the thread name.\n"
    "  If unset, a default is used.\n"
    "\n"
    "  MOZ_PROFILER_SHUTDOWN\n"
    "  If set, the profiler saves a profile to the named file on shutdown.\n"
    "\n"
    "  MOZ_PROFILER_LUL_TEST\n"
    "  If set to any value, runs LUL unit tests at startup.\n"
    "\n"
    "  This platform %s native unwinding.\n"
    "\n",
#if defined(HAVE_NATIVE_UNWIND)
    "supports"
#else
    "does not support"
#endif
  );

  exit(aExitCode);
}

////////////////////////////////////////////////////////////////////////
// BEGIN Sampler

#if defined(GP_OS_linux) || defined(GP_OS_android)
struct SigHandlerCoordinator;
#endif

// Sampler performs setup and teardown of the state required to sample with the
// profiler. Sampler may exist when ActivePS is not present.
//
// SuspendAndSampleAndResumeThread must only be called from a single thread,
// and must not sample the thread it is being called from. A separate Sampler
// instance must be used for each thread which wants to capture samples.

// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
//
// With the exception of SamplerThread, all Sampler objects must be Disable-d
// before releasing the lock which was used to create them. This avoids races
// on linux with the SIGPROF signal handler.

class Sampler
{
public:
  // Sets up the profiler such that it can begin sampling.
  explicit Sampler(PSLockRef aLock);

  // Disable the sampler, restoring it to its previous state. This must be
  // called once, and only once, before the Sampler is destroyed.
  void Disable(PSLockRef aLock);

  // This method suspends and resumes the samplee thread. It calls the passed-in
  // function-like object aProcessRegs (passing it a populated |const
  // Registers&| arg) while the samplee thread is suspended.
  //
  // Func must be a function-like object of type `void()`.
  template<typename Func>
  void SuspendAndSampleAndResumeThread(PSLockRef aLock,
                                       const RegisteredThread& aRegisteredThread,
                                       const Func& aProcessRegs);

private:
#if defined(GP_OS_linux) || defined(GP_OS_android)
  // Used to restore the SIGPROF handler when ours is removed.
  struct sigaction mOldSigprofHandler;

  // This process' ID. Needed as an argument for tgkill in
  // SuspendAndSampleAndResumeThread.
  int mMyPid;

  // The sampler thread's ID.  Used to assert that it is not sampling itself,
  // which would lead to deadlock.
  int mSamplerTid;

public:
  // This is the one-and-only variable used to communicate between the sampler
  // thread and the samplee thread's signal handler. It's static because the
  // samplee thread's signal handler is static.
  static struct SigHandlerCoordinator* sSigHandlerCoordinator;
#endif
};

// END Sampler
////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////////
// BEGIN SamplerThread

// The sampler thread controls sampling and runs whenever the profiler is
// active. It periodically runs through all registered threads, finds those
// that should be sampled, then pauses and samples them.

class SamplerThread : public Sampler
{
public:
  // Creates a sampler thread, but doesn't start it.
  SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
                double aIntervalMilliseconds);
  ~SamplerThread();

  // This runs on (is!) the sampler thread.
  void Run();

  // This runs on the main thread.
  void Stop(PSLockRef aLock);

private:
  // This suspends the calling thread for the given number of microseconds.
  // Best effort timing.
  void SleepMicro(uint32_t aMicroseconds);

  // The activity generation, for detecting when the sampler thread must stop.
  const uint32_t mActivityGeneration;

  // The interval between samples, measured in microseconds.
  const int mIntervalMicroseconds;

  // The OS-specific handle for the sampler thread.
#if defined(GP_OS_windows)
  HANDLE mThread;
#elif defined(GP_OS_darwin) || defined(GP_OS_linux) || defined(GP_OS_android)
  pthread_t mThread;
#endif

  SamplerThread(const SamplerThread&) = delete;
  void operator=(const SamplerThread&) = delete;
};

// This function is required because we need to create a SamplerThread within
// ActivePS's constructor, but SamplerThread is defined after ActivePS. It
// could probably be removed by moving some code around.
static SamplerThread*
NewSamplerThread(PSLockRef aLock, uint32_t aGeneration, double aInterval)
{
  return new SamplerThread(aLock, aGeneration, aInterval);
}

// This function is the sampler thread.  This implementation is used for all
// targets.
void
SamplerThread::Run()
{
  PR_SetCurrentThreadName("SamplerThread");

  // This will be positive if we are running behind schedule (sampling less
  // frequently than desired) and negative if we are ahead of schedule.
  TimeDuration lastSleepOvershoot = 0;
  TimeStamp sampleStart = TimeStamp::Now();

  while (true) {
    // This scope is for |lock|. It ends before we sleep below.
    {
      PSAutoLock lock(gPSMutex);

      if (!ActivePS::Exists(lock)) {
        return;
      }

      // At this point profiler_stop() might have been called, and
      // profiler_start() might have been called on another thread. If this
      // happens the generation won't match.
      if (ActivePS::Generation(lock) != mActivityGeneration) {
        return;
      }

      ActivePS::Buffer(lock).DeleteExpiredStoredMarkers();

      if (!ActivePS::IsPaused(lock)) {
        const nsTArray<LiveProfiledThreadData>& liveThreads =
          ActivePS::LiveProfiledThreads(lock);

        TimeDuration delta = sampleStart - CorePS::ProcessStartTime();
        ProfileBuffer& buffer = ActivePS::Buffer(lock);

        // Report memory use
        int64_t rssMemory = 0;
        int64_t ussMemory = 0;
        if (ActivePS::FeatureMemory(lock)) {
          rssMemory = nsMemoryReporterManager::ResidentFast();
#if defined(GP_OS_linux) || defined(GP_OS_android)
          ussMemory = nsMemoryReporterManager::ResidentUnique();
#endif
          if (rssMemory != 0) {
            buffer.AddEntry(ProfileBufferEntry::ResidentMemory(rssMemory));
            if (ussMemory != 0) {
              buffer.AddEntry(ProfileBufferEntry::UnsharedMemory(ussMemory));
            }
          }
          buffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
        }

        // handle per-process generic counters
        const nsTArray<BaseProfilerCount*>& counters =
          CorePS::Counters(lock);
        TimeStamp now = TimeStamp::Now();
        for (auto& counter : counters) {
          // create Buffer entries for each counter
          buffer.AddEntry(ProfileBufferEntry::CounterId(counter));
          buffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
          // XXX support keyed maps of counts
          // In the future, we'll support keyed counters - for example, counters with a key
          // which is a thread ID. For "simple" counters we'll just use a key of 0.
          int64_t count;
          uint64_t number;
          counter->Sample(count, number);
          buffer.AddEntry(ProfileBufferEntry::CounterKey(0));
          buffer.AddEntry(ProfileBufferEntry::Count(count));
          if (number) {
            buffer.AddEntry(ProfileBufferEntry::Number(number));
          }
        }

        for (auto& thread : liveThreads) {
          RegisteredThread* registeredThread = thread.mRegisteredThread;
          ProfiledThreadData* profiledThreadData =
            thread.mProfiledThreadData.get();
          RefPtr<ThreadInfo> info = registeredThread->Info();

          // If the thread is asleep and has been sampled before in the same
          // sleep episode, find and copy the previous sample, as that's
          // cheaper than taking a new sample.
          if (registeredThread->RacyRegisteredThread().CanDuplicateLastSampleDueToSleep()) {
            bool dup_ok =
              ActivePS::Buffer(lock).DuplicateLastSample(
                info->ThreadId(), CorePS::ProcessStartTime(),
                profiledThreadData->LastSample());
            if (dup_ok) {
              continue;
            }
          }

          ThreadResponsiveness* resp = profiledThreadData->GetThreadResponsiveness();
          if (resp) {
            resp->Update();
          }

          now = TimeStamp::Now();
          SuspendAndSampleAndResumeThread(lock, *registeredThread,
                                          [&](const Registers& aRegs) {
            DoPeriodicSample(lock, *registeredThread, *profiledThreadData, now,
                             aRegs);
            // only report these once per sample-time (if 0 we don't put them in the buffer,
            // so for the rest of the threads we won't insert them)
            rssMemory = 0;
            ussMemory = 0;
          });
        }

#if defined(USE_LUL_STACKWALK)
        // The LUL unwind object accumulates frame statistics. Periodically we
        // should poke it to give it a chance to print those statistics.  This
        // involves doing I/O (fprintf, __android_log_print, etc.) and so
        // can't safely be done from the critical section inside
        // SuspendAndSampleAndResumeThread, which is why it is done here.
        CorePS::Lul(lock)->MaybeShowStats();
#endif
      }

      Maybe<double> duration = ActivePS::Duration(lock);
      if (duration) {
        ActivePS::Buffer(lock).DiscardSamplesBeforeTime(
        (TimeStamp::Now() - TimeDuration::FromSeconds(*duration) -
         CorePS::ProcessStartTime())
          .ToMilliseconds());
      }
    }
    // gPSMutex is not held after this point.

    // Calculate how long a sleep to request.  After the sleep, measure how
    // long we actually slept and take the difference into account when
    // calculating the sleep interval for the next iteration.  This is an
    // attempt to keep "to schedule" in the presence of inaccuracy of the
    // actual sleep intervals.
    TimeStamp targetSleepEndTime =
      sampleStart + TimeDuration::FromMicroseconds(mIntervalMicroseconds);
    TimeStamp beforeSleep = TimeStamp::Now();
    TimeDuration targetSleepDuration = targetSleepEndTime - beforeSleep;
    double sleepTime = std::max(0.0, (targetSleepDuration -
                                      lastSleepOvershoot).ToMicroseconds());
    SleepMicro(static_cast<uint32_t>(sleepTime));
    sampleStart = TimeStamp::Now();
    lastSleepOvershoot =
      sampleStart - (beforeSleep + TimeDuration::FromMicroseconds(sleepTime));
  }
}

// We #include these files directly because it means those files can use
// declarations from this file trivially.  These provide target-specific
// implementations of all SamplerThread methods except Run().
#if defined(GP_OS_windows)
# include "platform-win32.cpp"
#elif defined(GP_OS_darwin)
# include "platform-macos.cpp"
#elif defined(GP_OS_linux) || defined(GP_OS_android)
# include "platform-linux-android.cpp"
#else
# error "bad platform"
#endif

UniquePlatformData
AllocPlatformData(int aThreadId)
{
  return UniquePlatformData(new PlatformData(aThreadId));
}

void
PlatformDataDestructor::operator()(PlatformData* aData)
{
  delete aData;
}

// END SamplerThread
////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////////
// BEGIN externally visible functions

MOZ_DEFINE_MALLOC_SIZE_OF(GeckoProfilerMallocSizeOf)

NS_IMETHODIMP
GeckoProfilerReporter::CollectReports(nsIHandleReportCallback* aHandleReport,
                                      nsISupports* aData, bool aAnonymize)
{
  MOZ_RELEASE_ASSERT(NS_IsMainThread());

  size_t profSize = 0;
  size_t lulSize = 0;

  {
    PSAutoLock lock(gPSMutex);

    if (CorePS::Exists()) {
      CorePS::AddSizeOf(lock, GeckoProfilerMallocSizeOf, profSize, lulSize);
    }

    if (ActivePS::Exists(lock)) {
      profSize += ActivePS::SizeOf(lock, GeckoProfilerMallocSizeOf);
    }
  }

  MOZ_COLLECT_REPORT(
    "explicit/profiler/profiler-state", KIND_HEAP, UNITS_BYTES, profSize,
    "Memory used by the Gecko Profiler's global state (excluding memory used "
    "by LUL).");

#if defined(USE_LUL_STACKWALK)
  MOZ_COLLECT_REPORT(
    "explicit/profiler/lul", KIND_HEAP, UNITS_BYTES, lulSize,
    "Memory used by LUL, a stack unwinder used by the Gecko Profiler.");
#endif

  return NS_OK;
}

NS_IMPL_ISUPPORTS(GeckoProfilerReporter, nsIMemoryReporter)

static bool
HasFeature(const char** aFeatures, uint32_t aFeatureCount, const char* aFeature)
{
  for (size_t i = 0; i < aFeatureCount; i++) {
    if (strcmp(aFeatures[i], aFeature) == 0) {
      return true;
    }
  }
  return false;
}

uint32_t
ParseFeaturesFromStringArray(const char** aFeatures, uint32_t aFeatureCount)
{
  #define ADD_FEATURE_BIT(n_, str_, Name_) \
    if (HasFeature(aFeatures, aFeatureCount, str_)) { \
      features |= ProfilerFeature::Name_; \
    }

  uint32_t features = 0;
  PROFILER_FOR_EACH_FEATURE(ADD_FEATURE_BIT)

  #undef ADD_FEATURE_BIT

  return features;
}

// Find the RegisteredThread for the current thread. This should only be called
// in places where TLSRegisteredThread can't be used.
static RegisteredThread*
FindCurrentThreadRegisteredThread(PSLockRef aLock)
{
  int id = Thread::GetCurrentId();
  const nsTArray<UniquePtr<RegisteredThread>>& registeredThreads =
    CorePS::RegisteredThreads(aLock);
  for (auto& registeredThread : registeredThreads) {
    if (registeredThread->Info()->ThreadId() == id) {
      return registeredThread.get();
    }
  }

  return nullptr;
}

static ProfilingStack*
locked_register_thread(PSLockRef aLock, const char* aName, void* aStackTop)
{
  MOZ_RELEASE_ASSERT(CorePS::Exists());

  MOZ_RELEASE_ASSERT(!FindCurrentThreadRegisteredThread(aLock));

  VTUNE_REGISTER_THREAD(aName);

  if (!TLSRegisteredThread::Init(aLock)) {
    return nullptr;
  }

  RefPtr<ThreadInfo> info =
    new ThreadInfo(aName, Thread::GetCurrentId(), NS_IsMainThread());
  UniquePtr<RegisteredThread> registeredThread =
    MakeUnique<RegisteredThread>(info, NS_GetCurrentThreadNoCreate(),
                                 aStackTop);

  TLSRegisteredThread::SetRegisteredThread(aLock, registeredThread.get());

  if (ActivePS::Exists(aLock) &&
      ActivePS::ShouldProfileThread(aLock, info)) {
    registeredThread->RacyRegisteredThread().SetIsBeingProfiled(true);
    nsCOMPtr<nsIEventTarget> eventTarget = registeredThread->GetEventTarget();
    ProfiledThreadData* profiledThreadData =
      ActivePS::AddLiveProfiledThread(aLock, registeredThread.get(),
        MakeUnique<ProfiledThreadData>(info, eventTarget,
                                       ActivePS::FeatureResponsiveness(aLock)));

    if (ActivePS::FeatureJS(aLock)) {
      // This StartJSSampling() call is on-thread, so we can poll manually to
      // start JS sampling immediately.
      registeredThread->StartJSSampling(
        ActivePS::FeatureTrackOptimizations(aLock));
      registeredThread->PollJSSampling();
      if (registeredThread->GetJSContext()) {
        profiledThreadData->NotifyReceivedJSContext(ActivePS::Buffer(aLock).mRangeEnd);
      }
    }
  }

  ProfilingStack* profilingStack =
    &registeredThread->RacyRegisteredThread().ProfilingStack();

  CorePS::AppendRegisteredThread(aLock, std::move(registeredThread));

  return profilingStack;
}

static void
NotifyObservers(const char* aTopic, nsISupports* aSubject = nullptr)
{
  if (!NS_IsMainThread()) {
    // Dispatch a task to the main thread that notifies observers.
    // If NotifyObservers is called both on and off the main thread within a
    // short time, the order of the notifications can be different from the
    // order of the calls to NotifyObservers.
    // Getting the order 100% right isn't that important at the moment, because
    // these notifications are only observed in the parent process, where the
    // profiler_* functions are currently only called on the main thread.
    nsCOMPtr<nsISupports> subject = aSubject;
    NS_DispatchToMainThread(NS_NewRunnableFunction(
      "NotifyObservers", [=] { NotifyObservers(aTopic, subject); }));
    return;
  }

  if (nsCOMPtr<nsIObserverService> os = services::GetObserverService()) {
    os->NotifyObservers(aSubject, aTopic, nullptr);
  }
}

static void
NotifyProfilerStarted(const int aCapacity, const Maybe<double>& aDuration,
                      double aInterval, uint32_t aFeatures,
                      const char** aFilters, uint32_t aFilterCount)
{
  nsTArray<nsCString> filtersArray;
  for (size_t i = 0; i < aFilterCount; ++i) {
    filtersArray.AppendElement(aFilters[i]);
  }

  nsCOMPtr<nsIProfilerStartParams> params =
    new nsProfilerStartParams(aCapacity, aDuration, aInterval,
                              aFeatures, filtersArray);

  ProfilerParent::ProfilerStarted(params);
  NotifyObservers("profiler-started", params);
}

static void
locked_profiler_start(PSLockRef aLock, uint32_t aCapacity, double aInterval,
                      uint32_t aFeatures, const char** aFilters,
                      uint32_t aFilterCount, const Maybe<double>& aDuration);

// This basically duplicates AutoProfilerLabel's constructor.
ProfilingStack*
MozGlueLabelEnter(const char* aLabel, const char* aDynamicString, void* aSp)
{
  ProfilingStack* profilingStack = AutoProfilerLabel::sProfilingStack.get();
  if (profilingStack) {
    profilingStack->pushLabelFrame(aLabel, aDynamicString, aSp,
                                js::ProfilingStackFrame::Category::OTHER);
  }
  return profilingStack;
}

// This basically duplicates AutoProfilerLabel's destructor.
void
MozGlueLabelExit(ProfilingStack* sProfilingStack)
{
  if (sProfilingStack) {
    sProfilingStack->pop();
  }
}

static nsTArray<const char*>
SplitAtCommas(const char* aString, UniquePtr<char[]>& aStorage)
{
  size_t len = strlen(aString);
  aStorage = MakeUnique<char[]>(len + 1);
  PodCopy(aStorage.get(), aString, len + 1);

  // Iterate over all characters in aStorage and split at commas, by
  // overwriting commas with the null char.
  nsTArray<const char*> array;
  size_t currentElementStart = 0;
  for (size_t i = 0; i <= len; i++) {
    if (aStorage[i] == ',') {
      aStorage[i] = '\0';
    }
    if (aStorage[i] == '\0') {
      array.AppendElement(&aStorage[currentElementStart]);
      currentElementStart = i + 1;
    }
  }
  return array;
}

void
profiler_init(void* aStackTop)
{
  LOG("profiler_init");

  VTUNE_INIT();

  MOZ_RELEASE_ASSERT(!CorePS::Exists());

  if (getenv("MOZ_PROFILER_HELP")) {
    PrintUsageThenExit(0); // terminates execution
  }

  SharedLibraryInfo::Initialize();

  uint32_t features =
#if defined(GP_OS_android)
                      ProfilerFeature::Java |
#endif
                      ProfilerFeature::JS |
                      ProfilerFeature::Leaf |
#if defined(HAVE_NATIVE_UNWIND)
                      ProfilerFeature::StackWalk |
#endif
                      ProfilerFeature::Threads |
                      ProfilerFeature::Responsiveness |
                      0;

  UniquePtr<char[]> filterStorage;

  nsTArray<const char*> filters;
  filters.AppendElement("GeckoMain");
  filters.AppendElement("Compositor");
  filters.AppendElement("DOM Worker");

  int capacity = PROFILER_DEFAULT_ENTRIES;
  Maybe<double> duration = Nothing();
  double interval = PROFILER_DEFAULT_INTERVAL;

  {
    PSAutoLock lock(gPSMutex);

    // We've passed the possible failure point. Instantiate CorePS, which
    // indicates that the profiler has initialized successfully.
    CorePS::Create(lock);

    locked_register_thread(lock, kMainThreadName, aStackTop);

    // Platform-specific initialization.
    PlatformInit(lock);

#ifdef MOZ_TASK_TRACER
    tasktracer::InitTaskTracer();
#endif

#if defined(GP_OS_android)
    if (jni::IsFennec()) {
      GeckoJavaSampler::Init();
    }
#endif

    // Setup support for pushing/popping labels in mozglue.
    RegisterProfilerLabelEnterExit(MozGlueLabelEnter, MozGlueLabelExit);

    // (Linux-only) We could create CorePS::mLul and read unwind info into it
    // at this point. That would match the lifetime implied by destruction of
    // it in profiler_shutdown() just below. However, that gives a big delay on
    // startup, even if no profiling is actually to be done. So, instead, it is
    // created on demand at the first call to PlatformStart().

    const char* startupEnv = getenv("MOZ_PROFILER_STARTUP");
    if (!startupEnv || startupEnv[0] == '\0') {
      return;
    }

    LOG("- MOZ_PROFILER_STARTUP is set");

    const char* startupCapacity = getenv("MOZ_PROFILER_STARTUP_ENTRIES");
    if (startupCapacity && startupCapacity[0] != '\0') {
      errno = 0;
      capacity = strtol(startupCapacity, nullptr, 10);
      if (errno == 0 && capacity > 0) {
        LOG("- MOZ_PROFILER_STARTUP_ENTRIES = %d", capacity);
      } else {
        LOG("- MOZ_PROFILER_STARTUP_ENTRIES not a valid integer: %s",
            startupCapacity);
        PrintUsageThenExit(1);
      }
    }

    const char* startupDuration = getenv("MOZ_PROFILER_STARTUP_DURATION");
    if (startupDuration && startupDuration[0] != '\0') {
      errno = 0;
      double durationVal = PR_strtod(startupDuration, nullptr);
      if (errno == 0 && durationVal >= 0.0) {
        if (durationVal > 0.0) {
          duration = Some(durationVal);
        }
        LOG("- MOZ_PROFILER_STARTUP_DURATION = %f", durationVal);
      } else {
        LOG("- MOZ_PROFILER_STARTUP_DURATION not a valid float: %s",
            startupDuration);
        PrintUsageThenExit(1);
      }
    }

    const char* startupInterval = getenv("MOZ_PROFILER_STARTUP_INTERVAL");
    if (startupInterval && startupInterval[0] != '\0') {
      errno = 0;
      interval = PR_strtod(startupInterval, nullptr);
      if (errno == 0 && interval > 0.0 && interval <= 1000.0) {
        LOG("- MOZ_PROFILER_STARTUP_INTERVAL = %f", interval);
      } else {
        LOG("- MOZ_PROFILER_STARTUP_INTERVAL not a valid float: %s",
            startupInterval);
        PrintUsageThenExit(1);
      }
    }

    const char* startupFeaturesBitfield =
      getenv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD");
    if (startupFeaturesBitfield && startupFeaturesBitfield[0] != '\0') {
      errno = 0;
      features = strtol(startupFeaturesBitfield, nullptr, 10);
      if (errno == 0 && features != 0) {
        LOG("- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD = %d", features);
      } else {
        LOG("- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD not a valid integer: %s",
            startupFeaturesBitfield);
        PrintUsageThenExit(1);
      }
    } else {
      const char* startupFeatures = getenv("MOZ_PROFILER_STARTUP_FEATURES");
      if (startupFeatures && startupFeatures[0] != '\0') {
        // Interpret startupFeatures as a list of feature strings, separated by
        // commas.
        UniquePtr<char[]> featureStringStorage;
        nsTArray<const char*> featureStringArray =
          SplitAtCommas(startupFeatures, featureStringStorage);
        features = ParseFeaturesFromStringArray(featureStringArray.Elements(),
                                                featureStringArray.Length());
        LOG("- MOZ_PROFILER_STARTUP_FEATURES = %d", features);
      }
    }

    const char* startupFilters = getenv("MOZ_PROFILER_STARTUP_FILTERS");
    if (startupFilters && startupFilters[0] != '\0') {
      filters = SplitAtCommas(startupFilters, filterStorage);
      LOG("- MOZ_PROFILER_STARTUP_FILTERS = %s", startupFilters);
    }

    locked_profiler_start(lock, capacity, interval, features, filters.Elements(),
                          filters.Length(), duration);
  }

  // We do this with gPSMutex unlocked. The comment in profiler_stop() explains
  // why.
  NotifyProfilerStarted(capacity, duration, interval, features,
                        filters.Elements(), filters.Length());
}

static void
locked_profiler_save_profile_to_file(PSLockRef aLock, const char* aFilename,
                                     bool aIsShuttingDown);

static SamplerThread*
locked_profiler_stop(PSLockRef aLock);

void
profiler_shutdown()
{
  LOG("profiler_shutdown");

  VTUNE_SHUTDOWN();

  MOZ_RELEASE_ASSERT(NS_IsMainThread());
  MOZ_RELEASE_ASSERT(CorePS::Exists());

  // If the profiler is active we must get a handle to the SamplerThread before
  // ActivePS is destroyed, in order to delete it.
  SamplerThread* samplerThread = nullptr;
  {
    PSAutoLock lock(gPSMutex);

    // Save the profile on shutdown if requested.
    if (ActivePS::Exists(lock)) {
      const char* filename = getenv("MOZ_PROFILER_SHUTDOWN");
      if (filename) {
        locked_profiler_save_profile_to_file(lock, filename,
                                             /* aIsShuttingDown */ true);
      }

      samplerThread = locked_profiler_stop(lock);
    }

    CorePS::Destroy(lock);

    // We just destroyed CorePS and the ThreadInfos it contains, so we can
    // clear this thread's TLSRegisteredThread.
    TLSRegisteredThread::SetRegisteredThread(lock, nullptr);

#ifdef MOZ_TASK_TRACER
    tasktracer::ShutdownTaskTracer();
#endif
  }

  // We do these operations with gPSMutex unlocked. The comments in
  // profiler_stop() explain why.
  if (samplerThread) {
    ProfilerParent::ProfilerStopped();
    NotifyObservers("profiler-stopped");
    delete samplerThread;
  }
}

static bool
WriteProfileToJSONWriter(SpliceableChunkedJSONWriter& aWriter,
                         double aSinceTime,
                         bool aIsShuttingDown)
{
  LOG("WriteProfileToJSONWriter");

  MOZ_RELEASE_ASSERT(CorePS::Exists());

  aWriter.Start();
  {
    if (!profiler_stream_json_for_this_process(
          aWriter, aSinceTime, aIsShuttingDown)) {
      return false;
    }

    // Don't include profiles from other processes because this is a
    // synchronous function.
    aWriter.StartArrayProperty("processes");
    aWriter.EndArray();
  }
  aWriter.End();
  return true;
}

UniquePtr<char[]>
profiler_get_profile(double aSinceTime, bool aIsShuttingDown)
{
  LOG("profiler_get_profile");

  SpliceableChunkedJSONWriter b;
  if (!WriteProfileToJSONWriter(b, aSinceTime, aIsShuttingDown)) {
    return nullptr;
  }
  return b.WriteFunc()->CopyData();
}

void
profiler_get_profile_json_into_lazily_allocated_buffer(
  const std::function<char*(size_t)>& aAllocator,
  double aSinceTime,
  bool aIsShuttingDown)
{
  LOG("profiler_get_profile_json_into_lazily_allocated_buffer");

  SpliceableChunkedJSONWriter b;
  if (!WriteProfileToJSONWriter(b, aSinceTime, aIsShuttingDown)) {
    return;
  }

  b.WriteFunc()->CopyDataIntoLazilyAllocatedBuffer(aAllocator);
}

void
profiler_get_start_params(int* aCapacity, Maybe<double>* aDuration, double* aInterval,
                          uint32_t* aFeatures, Vector<const char*>* aFilters)
{
  MOZ_RELEASE_ASSERT(CorePS::Exists());

  if (NS_WARN_IF(!aCapacity) || NS_WARN_IF(!aDuration) ||
      NS_WARN_IF(!aInterval) || NS_WARN_IF(!aFeatures) ||
      NS_WARN_IF(!aFilters)) {
    return;
  }

  PSAutoLock lock(gPSMutex);

  if (!ActivePS::Exists(lock)) {
    *aCapacity = 0;
    *aDuration = Nothing();
    *aInterval = 0;
    *aFeatures = 0;
    aFilters->clear();
    return;
  }

  *aCapacity = ActivePS::Capacity(lock);
  *aDuration = ActivePS::Duration(lock);
  *aInterval = ActivePS::Interval(lock);
  *aFeatures = ActivePS::Features(lock);

  const Vector<std::string>& filters = ActivePS::Filters(lock);
  MOZ_ALWAYS_TRUE(aFilters->resize(filters.length()));
  for (uint32_t i = 0; i < filters.length(); ++i) {
    (*aFilters)[i] = filters[i].c_str();
  }
}

AutoSetProfilerEnvVarsForChildProcess::AutoSetProfilerEnvVarsForChildProcess(
  MOZ_GUARD_OBJECT_NOTIFIER_ONLY_PARAM_IN_IMPL)
  : mSetCapacity()
  , mSetInterval()
  , mSetFeaturesBitfield()
  , mSetFilters()
{
  MOZ_GUARD_OBJECT_NOTIFIER_INIT;

  MOZ_RELEASE_ASSERT(CorePS::Exists());

  PSAutoLock lock(gPSMutex);

  if (!ActivePS::Exists(lock)) {
    PR_SetEnv("MOZ_PROFILER_STARTUP=");
    return;
  }

  PR_SetEnv("MOZ_PROFILER_STARTUP=1");
  SprintfLiteral(mSetCapacity, "MOZ_PROFILER_STARTUP_ENTRIES=%d",
                 ActivePS::Capacity(lock));
  PR_SetEnv(mSetCapacity);

  // Use AppendFloat instead of SprintfLiteral with %f because the decimal
  // separator used by %f is locale-dependent. But the string we produce needs
  // to be parseable by strtod, which only accepts the period character as a
  // decimal separator. AppendFloat always uses the period character.
  nsCString setInterval;
  setInterval.AppendLiteral("MOZ_PROFILER_STARTUP_INTERVAL=");
  setInterval.AppendFloat(ActivePS::Interval(lock));
  strncpy(mSetInterval, setInterval.get(), MOZ_ARRAY_LENGTH(mSetInterval));
  mSetInterval[MOZ_ARRAY_LENGTH(mSetInterval) - 1] = '\0';
  PR_SetEnv(mSetInterval);

  SprintfLiteral(mSetFeaturesBitfield,
                 "MOZ_PROFILER_STARTUP_FEATURES_BITFIELD=%d",
                 ActivePS::Features(lock));
  PR_SetEnv(mSetFeaturesBitfield);

  std::string filtersString;
  const Vector<std::string>& filters = ActivePS::Filters(lock);
  for (uint32_t i = 0; i < filters.length(); ++i) {
    filtersString += filters[i];
    if (i != filters.length() - 1) {
      filtersString += ",";
    }
  }
  SprintfLiteral(mSetFilters, "MOZ_PROFILER_STARTUP_FILTERS=%s",
                 filtersString.c_str());
  PR_SetEnv(mSetFilters);
}

AutoSetProfilerEnvVarsForChildProcess::~AutoSetProfilerEnvVarsForChildProcess()
{
  // Our current process doesn't look at these variables after startup, so we
  // can just unset all the variables. This allows us to use literal strings,
  // which will be valid for the whole life time of the program and can be
  // passed to PR_SetEnv without problems.
  PR_SetEnv("MOZ_PROFILER_STARTUP=");
  PR_SetEnv("MOZ_PROFILER_STARTUP_ENTRIES=");
  PR_SetEnv("MOZ_PROFILER_STARTUP_INTERVAL=");
  PR_SetEnv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD=");
  PR_SetEnv("MOZ_PROFILER_STARTUP_FILTERS=");
}

static void
locked_profiler_save_profile_to_file(PSLockRef aLock, const char* aFilename,
                                     bool aIsShuttingDown = false)
{
  LOG("locked_profiler_save_profile_to_file(%s)", aFilename);

  MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));

  std::ofstream stream;
  stream.open(aFilename);
  if (stream.is_open()) {
    SpliceableJSONWriter w(MakeUnique<OStreamJSONWriteFunc>(stream));
    w.Start();
    {
      locked_profiler_stream_json_for_this_process(aLock, w, /* sinceTime */ 0,
                                                   aIsShuttingDown);

      // Don't include profiles from other processes because this is a
      // synchronous function.
      w.StartArrayProperty("processes");
      w.EndArray();
    }
    w.End();

    stream.close();
  }
}

void
profiler_save_profile_to_file(const char* aFilename)
{
  LOG("profiler_save_profile_to_file(%s)", aFilename);

  MOZ_RELEASE_ASSERT(CorePS::Exists());

  PSAutoLock lock(gPSMutex);

  if (!ActivePS::Exists(lock)) {
    return;
  }

  locked_profiler_save_profile_to_file(lock, aFilename);
}

uint32_t
profiler_get_available_features()
{
  MOZ_RELEASE_ASSERT(CorePS::Exists());

  uint32_t features = 0;

  #define ADD_FEATURE(n_, str_, Name_) ProfilerFeature::Set##Name_(features);

  // Add all the possible features.
  PROFILER_FOR_EACH_FEATURE(ADD_FEATURE)

  #undef ADD_FEATURE

  // Now remove features not supported on this platform/configuration.
#if !defined(GP_OS_android)
  ProfilerFeature::ClearJava(features);
#endif
#if !defined(HAVE_NATIVE_UNWIND)
  ProfilerFeature::ClearStackWalk(features);
#endif
#if !defined(MOZ_TASK_TRACER)
  ProfilerFeature::ClearTaskTracer(features);
#endif

  return features;
}

Maybe<ProfilerBufferInfo>
profiler_get_buffer_info()
{
  MOZ_RELEASE_ASSERT(CorePS::Exists());

  PSAutoLock lock(gPSMutex);

  if (!ActivePS::Exists(lock)) {
    return Nothing();
  }

  return Some(ProfilerBufferInfo {
    ActivePS::Buffer(lock).mRangeStart,
    ActivePS::Buffer(lock).mRangeEnd,
    ActivePS::Capacity(lock)
  });
}

static void
PollJSSamplingForCurrentThread()
{
  MOZ_RELEASE_ASSERT(CorePS::Exists());

  PSAutoLock lock(gPSMutex);

  RegisteredThread* registeredThread =
    TLSRegisteredThread::RegisteredThread(lock);
  if (!registeredThread) {
    return;
  }

  registeredThread->PollJSSampling();
}

// When the profiler is started on a background thread, we can't synchronously
// call PollJSSampling on the main thread's ThreadInfo. And the next regular
// call to PollJSSampling on the main thread would only happen once the main
// thread triggers a JS interrupt callback.
// This means that all the JS execution between profiler_start() and the first
// JS interrupt would happen with JS sampling disabled, and we wouldn't get any
// JS function information for that period of time.
// So in order to start JS sampling as soon as possible, we dispatch a runnable
// to the main thread which manually calls PollJSSamplingForCurrentThread().
// In some cases this runnable will lose the race with the next JS interrupt.
// That's fine; PollJSSamplingForCurrentThread() is immune to redundant calls.
static void
TriggerPollJSSamplingOnMainThread()
{
  nsCOMPtr<nsIThread> mainThread;
  nsresult rv = NS_GetMainThread(getter_AddRefs(mainThread));
  if (NS_SUCCEEDED(rv) && mainThread) {
    nsCOMPtr<nsIRunnable> task =
      NS_NewRunnableFunction("TriggerPollJSSamplingOnMainThread", []() {
        PollJSSamplingForCurrentThread();
      });
    SystemGroup::Dispatch(TaskCategory::Other, task.forget());
  }
}

static void
locked_profiler_start(PSLockRef aLock, uint32_t aCapacity, double aInterval,
                      uint32_t aFeatures, const char** aFilters,
                      uint32_t aFilterCount, const Maybe<double>& aDuration)
{
  if (LOG_TEST) {
    LOG("locked_profiler_start");
    LOG("- capacity  = %d", aCapacity);
    LOG("- duration  = %.2f", aDuration ? *aDuration : -1);
    LOG("- interval = %.2f", aInterval);

    #define LOG_FEATURE(n_, str_, Name_) \
      if (ProfilerFeature::Has##Name_(aFeatures)) { \
        LOG("- feature  = %s", str_); \
      }

    PROFILER_FOR_EACH_FEATURE(LOG_FEATURE)

    #undef LOG_FEATURE

    for (uint32_t i = 0; i < aFilterCount; i++) {
      LOG("- threads  = %s", aFilters[i]);
    }
  }

  MOZ_RELEASE_ASSERT(CorePS::Exists() && !ActivePS::Exists(aLock));

#if defined(GP_PLAT_amd64_windows)
  InitializeWin64ProfilerHooks();
#endif

  // Fall back to the default values if the passed-in values are unreasonable.
  uint32_t capacity = aCapacity > 0 ? aCapacity : PROFILER_DEFAULT_ENTRIES;
  Maybe<double> duration = aDuration;

  if (aDuration && *aDuration <= 0) {
    duration = Nothing();
  }
  double interval = aInterval > 0 ? aInterval : PROFILER_DEFAULT_INTERVAL;

  ActivePS::Create(aLock, capacity, interval, aFeatures, aFilters, aFilterCount, duration);

  // Set up profiling for each registered thread, if appropriate.
  int tid = Thread::GetCurrentId();
  const nsTArray<UniquePtr<RegisteredThread>>& registeredThreads =
    CorePS::RegisteredThreads(aLock);
  for (auto& registeredThread : registeredThreads) {
    RefPtr<ThreadInfo> info = registeredThread->Info();

    if (ActivePS::ShouldProfileThread(aLock, info)) {
      registeredThread->RacyRegisteredThread().SetIsBeingProfiled(true);
      nsCOMPtr<nsIEventTarget> eventTarget = registeredThread->GetEventTarget();
      ProfiledThreadData* profiledThreadData =
        ActivePS::AddLiveProfiledThread(aLock, registeredThread.get(),
          MakeUnique<ProfiledThreadData>(info, eventTarget,
                                         ActivePS::FeatureResponsiveness(aLock)));
      if (ActivePS::FeatureJS(aLock)) {
        registeredThread->StartJSSampling(
          ActivePS::FeatureTrackOptimizations(aLock));
        if (info->ThreadId() == tid) {
          // We can manually poll the current thread so it starts sampling
          // immediately.
          registeredThread->PollJSSampling();
        } else if (info->IsMainThread()) {
          // Dispatch a runnable to the main thread to call PollJSSampling(),
          // so that we don't have wait for the next JS interrupt callback in
          // order to start profiling JS.
          TriggerPollJSSamplingOnMainThread();
        }
      }
      registeredThread->RacyRegisteredThread().ReinitializeOnResume();
      if (registeredThread->GetJSContext()) {
        profiledThreadData->NotifyReceivedJSContext(0);
      }
    }
  }

#ifdef MOZ_TASK_TRACER
  if (ActivePS::FeatureTaskTracer(aLock)) {
    tasktracer::StartLogging();
  }
#endif

#if defined(GP_OS_android)
  if (ActivePS::FeatureJava(aLock)) {
    int javaInterval = interval;
    // Java sampling doesn't accurately keep up with 1ms sampling.
    if (javaInterval < 10) {
      javaInterval = 10;
    }
    java::GeckoJavaSampler::Start(javaInterval, 1000);
  }
#endif

  // At the very end, set up RacyFeatures.
  RacyFeatures::SetActive(ActivePS::Features(aLock));
}

void
profiler_start(uint32_t aCapacity, double aInterval, uint32_t aFeatures,
               const char** aFilters, uint32_t aFilterCount,
               const Maybe<double>& aDuration)
{
  LOG("profiler_start");


  SamplerThread* samplerThread = nullptr;
  {
    PSAutoLock lock(gPSMutex);

    // Initialize if necessary.
    if (!CorePS::Exists()) {
      profiler_init(nullptr);
    }

    // Reset the current state if the profiler is running.
    if (ActivePS::Exists(lock)) {
      samplerThread = locked_profiler_stop(lock);
    }

    locked_profiler_start(lock, aCapacity, aInterval, aFeatures,
                          aFilters, aFilterCount, aDuration);
  }

#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
  // start counting memory allocations (outside of lock)
  mozilla::profiler::install_memory_counter(true);
#endif

  // We do these operations with gPSMutex unlocked. The comments in
  // profiler_stop() explain why.
  if (samplerThread) {
    ProfilerParent::ProfilerStopped();
    NotifyObservers("profiler-stopped");
    delete samplerThread;
  }
  NotifyProfilerStarted(aCapacity, aDuration, aInterval, aFeatures,
                        aFilters, aFilterCount);
}

void
profiler_ensure_started(uint32_t aCapacity, double aInterval, uint32_t aFeatures,
                        const char** aFilters, uint32_t aFilterCount,
                        const Maybe<double>& aDuration)
{
  LOG("profiler_ensure_started");

  bool startedProfiler = false;
  SamplerThread* samplerThread = nullptr;
  {
    PSAutoLock lock(gPSMutex);

    // Initialize if necessary.
    if (!CorePS::Exists()) {
      profiler_init(nullptr);
    }

    if (ActivePS::Exists(lock)) {
      // The profiler is active.
      if (!ActivePS::Equals(lock, aCapacity, aDuration, aInterval, aFeatures,
                            aFilters, aFilterCount)) {
        // Stop and restart with different settings.
        samplerThread = locked_profiler_stop(lock);
        locked_profiler_start(lock, aCapacity, aInterval, aFeatures,
                              aFilters, aFilterCount, aDuration);
        startedProfiler = true;
      }
    } else {
      // The profiler is stopped.
      locked_profiler_start(lock, aCapacity, aInterval, aFeatures,
                            aFilters, aFilterCount, aDuration);
      startedProfiler = true;
    }
  }

  // We do these operations with gPSMutex unlocked. The comments in
  // profiler_stop() explain why.
  if (samplerThread) {
    ProfilerParent::ProfilerStopped();
    NotifyObservers("profiler-stopped");
    delete samplerThread;
  }
  if (startedProfiler) {
    NotifyProfilerStarted(aCapacity, aDuration, aInterval, aFeatures,
                          aFilters, aFilterCount);
  }
}

static MOZ_MUST_USE SamplerThread*
locked_profiler_stop(PSLockRef aLock)
{
  LOG("locked_profiler_stop");

  MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));

  // At the very start, clear RacyFeatures.
  RacyFeatures::SetInactive();

#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
  mozilla::profiler::install_memory_counter(false);
#endif

#if defined(GP_OS_android)
  if (ActivePS::FeatureJava(aLock)) {
    java::GeckoJavaSampler::Stop();
  }
#endif

#ifdef MOZ_TASK_TRACER
  if (ActivePS::FeatureTaskTracer(aLock)) {
    tasktracer::StopLogging();
  }
#endif

  // Stop sampling live threads.
  int tid = Thread::GetCurrentId();
  const nsTArray<LiveProfiledThreadData>& liveProfiledThreads =
    ActivePS::LiveProfiledThreads(aLock);
  for (auto& thread : liveProfiledThreads) {
    RegisteredThread* registeredThread = thread.mRegisteredThread;
    registeredThread->RacyRegisteredThread().SetIsBeingProfiled(false);
    if (ActivePS::FeatureJS(aLock)) {
      registeredThread->StopJSSampling();
      RefPtr<ThreadInfo> info = registeredThread->Info();
      if (info->ThreadId() == tid) {
        // We can manually poll the current thread so it stops profiling
        // immediately.
        registeredThread->PollJSSampling();
      } else if (info->IsMainThread()) {
        // Dispatch a runnable to the main thread to call PollJSSampling(),
        // so that we don't have wait for the next JS interrupt callback in
        // order to start profiling JS.
        TriggerPollJSSamplingOnMainThread();
      }
    }
  }

  // The Stop() call doesn't actually stop Run(); that happens in this
  // function's caller when the sampler thread is destroyed. Stop() just gives
  // the SamplerThread a chance to do some cleanup with gPSMutex locked.
  SamplerThread* samplerThread = ActivePS::Destroy(aLock);
  samplerThread->Stop(aLock);

  return samplerThread;
}

void
profiler_stop()
{
  LOG("profiler_stop");

  MOZ_RELEASE_ASSERT(CorePS::Exists());

  SamplerThread* samplerThread;
  {
    PSAutoLock lock(gPSMutex);

    if (!ActivePS::Exists(lock)) {
      return;
    }

    samplerThread = locked_profiler_stop(lock);
  }

  // We notify observers with gPSMutex unlocked. Otherwise we might get a
  // deadlock, if code run by these functions calls a profiler function that
  // locks gPSMutex, for example when it wants to insert a marker.
  // (This has been seen in practise in bug 1346356, when we were still firing
  // these notifications synchronously.)
  ProfilerParent::ProfilerStopped();
  NotifyObservers("profiler-stopped");

  // We delete with gPSMutex unlocked. Otherwise we would get a deadlock: we
  // would be waiting here with gPSMutex locked for SamplerThread::Run() to
  // return so the join operation within the destructor can complete, but Run()
  // needs to lock gPSMutex to return.
  //
  // Because this call occurs with gPSMutex unlocked, it -- including the final
  // iteration of Run()'s loop -- must be able detect deactivation and return
  // in a way that's safe with respect to other gPSMutex-locking operations
  // that may have occurred in the meantime.
  delete samplerThread;
}

bool
profiler_is_paused()
{
  MOZ_RELEASE_ASSERT(CorePS::Exists());

  PSAutoLock lock(gPSMutex);

  if (!ActivePS::Exists(lock)) {
    return false;
  }

  return ActivePS::IsPaused(lock);
}

void
profiler_pause()
{
  LOG("profiler_pause");

  MOZ_RELEASE_ASSERT(CorePS::Exists());

  {
    PSAutoLock lock(gPSMutex);

    if (!ActivePS::Exists(lock)) {
      return;
    }

    ActivePS::SetIsPaused(lock, true);
    ActivePS::Buffer(lock).AddEntry(ProfileBufferEntry::Pause(profiler_time()));
  }

  // gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
  ProfilerParent::ProfilerPaused();
  NotifyObservers("profiler-paused");
}

void
profiler_resume()
{
  LOG("profiler_resume");

  MOZ_RELEASE_ASSERT(CorePS::Exists());

  {
    PSAutoLock lock(gPSMutex);

    if (!ActivePS::Exists(lock)) {
      return;
    }

    ActivePS::Buffer(lock).AddEntry(ProfileBufferEntry::Resume(profiler_time()));
    ActivePS::SetIsPaused(lock, false);
  }

  // gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
  ProfilerParent::ProfilerResumed();
  NotifyObservers("profiler-resumed");
}

bool
profiler_feature_active(uint32_t aFeature)
{
  // This function runs both on and off the main thread.

  MOZ_RELEASE_ASSERT(CorePS::Exists());

  // This function is hot enough that we use RacyFeatures, not ActivePS.
  return RacyFeatures::IsActiveWithFeature(aFeature);
}

void
profiler_add_sampled_counter(BaseProfilerCount* aCounter)
{
  DEBUG_LOG("profiler_add_sampled_counter(%s)", aCounter->mLabel);
  PSAutoLock lock(gPSMutex);
  CorePS::AppendCounter(lock, aCounter);
}

void
profiler_remove_sampled_counter(BaseProfilerCount* aCounter)
{
  DEBUG_LOG("profiler_remove_sampled_counter(%s)", aCounter->mLabel);
  PSAutoLock lock(gPSMutex);
  // Note: we don't enforce a final sample, though we could do so if the
  // profiler was active
  CorePS::RemoveCounter(lock, aCounter);
}

ProfilingStack*
profiler_register_thread(const char* aName, void* aGuessStackTop)
{
  DEBUG_LOG("profiler_register_thread(%s)", aName);

  MOZ_ASSERT_IF(NS_IsMainThread(), Scheduler::IsCooperativeThread());
  MOZ_RELEASE_ASSERT(CorePS::Exists());

  // Make sure we have a nsThread wrapper for the current thread, and that NSPR
  // knows its name.
  (void) NS_GetCurrentThread();
  NS_SetCurrentThreadName(aName);

  PSAutoLock lock(gPSMutex);

  void* stackTop = GetStackTop(aGuessStackTop);
  return locked_register_thread(lock, aName, stackTop);
}

void
profiler_unregister_thread()
{
  MOZ_ASSERT_IF(NS_IsMainThread(), Scheduler::IsCooperativeThread());

  if (!CorePS::Exists()) {
    // This function can be called after the main thread has already shut down.
    return;
  }

  PSAutoLock lock(gPSMutex);

  // We don't call RegisteredThread::StopJSSampling() here; there's no point
  // doing that for a JS thread that is in the process of disappearing.

  RegisteredThread* registeredThread = FindCurrentThreadRegisteredThread(lock);
  MOZ_RELEASE_ASSERT(registeredThread == TLSRegisteredThread::RegisteredThread(lock));
  if (registeredThread) {
    RefPtr<ThreadInfo> info = registeredThread->Info();

    DEBUG_LOG("profiler_unregister_thread: %s", info->Name());

    if (ActivePS::Exists(lock)) {
      ActivePS::UnregisterThread(lock, registeredThread);
    }

    // Clear the pointer to the RegisteredThread object that we're about to
    // destroy.
    TLSRegisteredThread::SetRegisteredThread(lock, nullptr);

    // Remove the thread from the list of registered threads. This deletes the
    // registeredThread object.
    CorePS::RemoveRegisteredThread(lock, registeredThread);
  } else {
    // There are two ways FindCurrentThreadRegisteredThread() might have failed.
    //
    // - TLSRegisteredThread::Init() failed in locked_register_thread().
    //
    // - We've already called profiler_unregister_thread() for this thread.
    //   (Whether or not it should, this does happen in practice.)
    //
    // Either way, TLSRegisteredThread should be empty.
    MOZ_RELEASE_ASSERT(!TLSRegisteredThread::RegisteredThread(lock));
  }
}

void
profiler_register_page(const nsID& aDocShellId,
                           uint32_t aHistoryId,
                           const nsCString& aUrl,
                           bool aIsSubFrame)
{
  DEBUG_LOG("profiler_register_page(%s, %u, %s, %d)",
            aDocShellId.ToString(),
            aHistoryId,
            aUrl.get(),
            aIsSubFrame);

  MOZ_RELEASE_ASSERT(CorePS::Exists());

  PSAutoLock lock(gPSMutex);

  // If profiler is not active, delete all the previous page entries of the
  // given DocShell since we won't need those.
  if (!ActivePS::Exists(lock)) {
    CorePS::RemoveRegisteredPages(lock, aDocShellId);
  }

  RefPtr<PageInformation> pageInfo =
    new PageInformation(aDocShellId, aHistoryId, aUrl, aIsSubFrame);
  CorePS::AppendRegisteredPage(lock, std::move(pageInfo));

  // After appending the given page to CorePS, look for the expired
  // pages and remove them if there are any.
  if (ActivePS::Exists(lock)) {
    ActivePS::DiscardExpiredPages(lock);
  }
}

void
profiler_unregister_pages(const nsID& aRegisteredDocShellId)
{
  if (!CorePS::Exists()) {
    // This function can be called after the main thread has already shut down.
    return;
  }

  PSAutoLock lock(gPSMutex);

  // During unregistration, if the profiler is active, we have to keep the
  // page information since there may be some markers associated with the given
  // page. But if profiler is not active. we have no reason to keep the
  // page information here because there can't be any marker associated with it.
  if (ActivePS::Exists(lock)) {
    ActivePS::UnregisterPages(lock, aRegisteredDocShellId);
  } else {
    CorePS::RemoveRegisteredPages(lock, aRegisteredDocShellId);
  }
}

void
profiler_thread_sleep()
{
  // This function runs both on and off the main thread.

  MOZ_RELEASE_ASSERT(CorePS::Exists());

  RacyRegisteredThread* racyRegisteredThread =
    TLSRegisteredThread::RacyRegisteredThread();
  if (!racyRegisteredThread) {
    return;
  }

  racyRegisteredThread->SetSleeping();
}

void
profiler_thread_wake()
{
  // This function runs both on and off the main thread.

  MOZ_RELEASE_ASSERT(CorePS::Exists());

  RacyRegisteredThread* racyRegisteredThread =
    TLSRegisteredThread::RacyRegisteredThread();
  if (!racyRegisteredThread) {
    return;
  }

  racyRegisteredThread->SetAwake();
}

bool
mozilla::profiler::detail::IsThreadBeingProfiled()
{
  MOZ_RELEASE_ASSERT(CorePS::Exists());

  const RacyRegisteredThread* racyRegisteredThread =
    TLSRegisteredThread::RacyRegisteredThread();
  return racyRegisteredThread && racyRegisteredThread->IsBeingProfiled();
}

bool
profiler_thread_is_sleeping()
{
  MOZ_RELEASE_ASSERT(NS_IsMainThread());
  MOZ_RELEASE_ASSERT(CorePS::Exists());

  RacyRegisteredThread* racyRegisteredThread =
    TLSRegisteredThread::RacyRegisteredThread();
  if (!racyRegisteredThread) {
    return false;
  }
  return racyRegisteredThread->IsSleeping();
}

void
profiler_js_interrupt_callback()
{
  // This function runs on JS threads being sampled.
  PollJSSamplingForCurrentThread();
}

double
profiler_time()
{
  MOZ_RELEASE_ASSERT(CorePS::Exists());

  TimeDuration delta = TimeStamp::Now() - CorePS::ProcessStartTime();
  return delta.ToMilliseconds();
}

UniqueProfilerBacktrace
profiler_get_backtrace()
{
  MOZ_RELEASE_ASSERT(CorePS::Exists());

  PSAutoLock lock(gPSMutex);

  if (!ActivePS::Exists(lock) || ActivePS::FeaturePrivacy(lock)) {
    return nullptr;
  }

  RegisteredThread* registeredThread =
    TLSRegisteredThread::RegisteredThread(lock);
  if (!registeredThread) {
    MOZ_ASSERT(registeredThread);
    return nullptr;
  }

  int tid = Thread::GetCurrentId();

  TimeStamp now = TimeStamp::Now();

  Registers regs;
#if defined(HAVE_NATIVE_UNWIND)
  regs.SyncPopulate();
#else
  regs.Clear();
#endif

  // 1000 should be plenty for a single backtrace.
  auto buffer = MakeUnique<ProfileBuffer>(1000);

  DoSyncSample(lock, *registeredThread, now, regs, *buffer.get());

  return UniqueProfilerBacktrace(
    new ProfilerBacktrace("SyncProfile", tid, std::move(buffer)));
}

void
ProfilerBacktraceDestructor::operator()(ProfilerBacktrace* aBacktrace)
{
  delete aBacktrace;
}

static void
racy_profiler_add_marker(const char* aMarkerName,
                         UniquePtr<ProfilerMarkerPayload> aPayload)
{
  MOZ_RELEASE_ASSERT(CorePS::Exists());

  // We don't assert that RacyFeatures::IsActiveWithoutPrivacy() or
  // RacyRegisteredThread::IsBeingProfiled() is true here, because it's
  // possible that the result has changed since we tested it in the caller.
  //
  // Because of this imprecision it's possible to miss a marker or record one
  // we shouldn't. Either way is not a big deal.

  RacyRegisteredThread* racyRegisteredThread =
    TLSRegisteredThread::RacyRegisteredThread();
  if (!racyRegisteredThread || !racyRegisteredThread->IsBeingProfiled()) {
    return;
  }

  TimeStamp origin = (aPayload && !aPayload->GetStartTime().IsNull())
                       ? aPayload->GetStartTime()
                       : TimeStamp::Now();
  TimeDuration delta = origin - CorePS::ProcessStartTime();
  racyRegisteredThread->AddPendingMarker(aMarkerName, std::move(aPayload),
                                         delta.ToMilliseconds());
}

void
profiler_add_marker(const char* aMarkerName,
                    UniquePtr<ProfilerMarkerPayload> aPayload)
{
  MOZ_RELEASE_ASSERT(CorePS::Exists());

  // This function is hot enough that we use RacyFeatures, not ActivePS.
  if (!RacyFeatures::IsActiveWithoutPrivacy()) {
    return;
  }

  racy_profiler_add_marker(aMarkerName, std::move(aPayload));
}

void
profiler_add_marker(const char* aMarkerName)
{
  profiler_add_marker(aMarkerName, nullptr);
}

void
profiler_add_network_marker(nsIURI* aURI,
                            int32_t aPriority,
                            uint64_t aChannelId,
                            NetworkLoadType aType,
                            mozilla::TimeStamp aStart,
                            mozilla::TimeStamp aEnd,
                            int64_t aCount,
                            mozilla::net::CacheDisposition aCacheDisposition,
                            const mozilla::net::TimingStruct* aTimings,
                            nsIURI* aRedirectURI)
{
  if (!profiler_is_active()) {
    return;
  }
  // These do allocations/frees/etc; avoid if not active
  nsAutoCString spec;
  nsAutoCString redirect_spec;
  if (aURI) {
    aURI->GetAsciiSpec(spec);
  }
  if (aRedirectURI) {
    aRedirectURI->GetAsciiSpec(redirect_spec);
  }
  // top 32 bits are process id of the load
  uint32_t id = static_cast<uint32_t>(aChannelId & 0xFFFFFFFF);
  char name[2048];
  SprintfLiteral(name, "Load %d: %s", id, PromiseFlatCString(spec).get());
  profiler_add_marker(name,
                      MakeUnique<NetworkMarkerPayload>(static_cast<int64_t>(aChannelId),
                                                       PromiseFlatCString(spec).get(),
                                                       aType,
                                                       aStart,
                                                       aEnd,
                                                       aPriority,
                                                       aCount,
                                                       aCacheDisposition,
                                                       aTimings,
                                                       PromiseFlatCString(redirect_spec).get()));
}

// This logic needs to add a marker for a different thread, so we actually need
// to lock here.
void
profiler_add_marker_for_thread(int aThreadId,
                               const char* aMarkerName,
                               UniquePtr<ProfilerMarkerPayload> aPayload)
{
  MOZ_RELEASE_ASSERT(CorePS::Exists());

  PSAutoLock lock(gPSMutex);
  if (!ActivePS::Exists(lock)) {
    return;
  }

  // Create the ProfilerMarker which we're going to store.
  TimeStamp origin = (aPayload && !aPayload->GetStartTime().IsNull())
                   ? aPayload->GetStartTime()
                   : TimeStamp::Now();
  TimeDuration delta = origin - CorePS::ProcessStartTime();
  ProfilerMarker* marker =
    new ProfilerMarker(aMarkerName, aThreadId, std::move(aPayload),
                       delta.ToMilliseconds());

#ifdef DEBUG
  // Assert that our thread ID makes sense
  bool realThread = false;
  const nsTArray<UniquePtr<RegisteredThread>>& registeredThreads =
    CorePS::RegisteredThreads(lock);
  for (auto& thread : registeredThreads) {
    RefPtr<ThreadInfo> info = thread->Info();
    if (info->ThreadId() == aThreadId) {
      realThread = true;
      break;
    }
  }
  MOZ_ASSERT(realThread, "Invalid thread id");
#endif

  // Insert the marker into the buffer
  ProfileBuffer& buffer = ActivePS::Buffer(lock);
  buffer.AddStoredMarker(marker);
  buffer.AddEntry(ProfileBufferEntry::Marker(marker));
}

void
profiler_tracing(const char* aCategory,
                 const char* aMarkerName,
                 TracingKind aKind,
                 const Maybe<nsID>& aDocShellId,
                 const Maybe<uint32_t>& aDocShellHistoryId)
{
  MOZ_RELEASE_ASSERT(CorePS::Exists());

  VTUNE_TRACING(aMarkerName, aKind);

  // This function is hot enough that we use RacyFeatures, notActivePS.
  if (!RacyFeatures::IsActiveWithoutPrivacy()) {
    return;
  }

  auto payload = MakeUnique<TracingMarkerPayload>(
    aCategory, aKind, aDocShellId, aDocShellHistoryId);
  racy_profiler_add_marker(aMarkerName, std::move(payload));
}

void
profiler_tracing(const char* aCategory,
                 const char* aMarkerName,
                 TracingKind aKind,
                 UniqueProfilerBacktrace aCause,
                 const Maybe<nsID>& aDocShellId,
                 const Maybe<uint32_t>& aDocShellHistoryId)
{
  MOZ_RELEASE_ASSERT(CorePS::Exists());

  VTUNE_TRACING(aMarkerName, aKind);

  // This function is hot enough that we use RacyFeatures, notActivePS.
  if (!RacyFeatures::IsActiveWithoutPrivacy()) {
    return;
  }

  auto payload = MakeUnique<TracingMarkerPayload>(
    aCategory, aKind, aDocShellId, aDocShellHistoryId, std::move(aCause));
  racy_profiler_add_marker(aMarkerName, std::move(payload));
}

void
profiler_set_js_context(JSContext* aCx)
{
  MOZ_ASSERT(aCx);

  PSAutoLock lock(gPSMutex);

  RegisteredThread* registeredThread =
    TLSRegisteredThread::RegisteredThread(lock);
  if (!registeredThread) {
    return;
  }

  registeredThread->SetJSContext(aCx);

  // This call is on-thread, so we can call PollJSSampling() to start JS
  // sampling immediately.
  registeredThread->PollJSSampling();

  if (ActivePS::Exists(lock)) {
    ProfiledThreadData* profiledThreadData =
      ActivePS::GetProfiledThreadData(lock, registeredThread);
    if (profiledThreadData) {
      profiledThreadData->NotifyReceivedJSContext(ActivePS::Buffer(lock).mRangeEnd);
    }
  }
}

void
profiler_clear_js_context()
{
  MOZ_RELEASE_ASSERT(CorePS::Exists());

  PSAutoLock lock(gPSMutex);

  RegisteredThread* registeredThread =
    TLSRegisteredThread::RegisteredThread(lock);
  if (!registeredThread) {
    return;
  }

  JSContext* cx = registeredThread->GetJSContext();
  if (!cx) {
    return;
  }

  if (ActivePS::Exists(lock) && ActivePS::FeatureJS(lock)) {
    ProfiledThreadData* profiledThreadData =
      ActivePS::GetProfiledThreadData(lock, registeredThread);
    if (profiledThreadData) {
      profiledThreadData->NotifyAboutToLoseJSContext(cx,
                                                     CorePS::ProcessStartTime(),
                                                     ActivePS::Buffer(lock));

      // Notify the JS context that profiling for this context has stopped.
      // Do this by calling StopJSSampling and PollJSSampling before
      // nulling out the JSContext.
      registeredThread->StopJSSampling();
      registeredThread->PollJSSampling();

      registeredThread->ClearJSContext();

      // Tell the thread that we'd like to have JS sampling on this
      // thread again, once it gets a new JSContext (if ever).
      registeredThread->StartJSSampling(
        ActivePS::FeatureTrackOptimizations(lock));
      return;
    }
  }

  registeredThread->ClearJSContext();
}

int
profiler_current_thread_id()
{
  return Thread::GetCurrentId();
}

// NOTE: aCollector's methods will be called while the target thread is paused.
// Doing things in those methods like allocating -- which may try to claim
// locks -- is a surefire way to deadlock.
void
profiler_suspend_and_sample_thread(int aThreadId,
                                   uint32_t aFeatures,
                                   ProfilerStackCollector& aCollector,
                                   bool aSampleNative /* = true */)
{
  // Lock the profiler mutex
  PSAutoLock lock(gPSMutex);

  const nsTArray<UniquePtr<RegisteredThread>>& registeredThreads =
    CorePS::RegisteredThreads(lock);
  for (auto& thread : registeredThreads) {
    RefPtr<ThreadInfo> info = thread->Info();
    RegisteredThread& registeredThread = *thread.get();

    if (info->ThreadId() == aThreadId) {
      if (info->IsMainThread()) {
        aCollector.SetIsMainThread();
      }

      // Allocate the space for the native stack
      NativeStack nativeStack;

      // Suspend, sample, and then resume the target thread.
      Sampler sampler(lock);
      sampler.SuspendAndSampleAndResumeThread(lock, registeredThread,
                                              [&](const Registers& aRegs) {
        // The target thread is now suspended. Collect a native backtrace, and
        // call the callback.
        bool isSynchronous = false;
#if defined(HAVE_FASTINIT_NATIVE_UNWIND)
        if (aSampleNative) {
          // We can only use FramePointerStackWalk or MozStackWalk from
          // suspend_and_sample_thread as other stackwalking methods may not be
          // initialized.
# if defined(USE_FRAME_POINTER_STACK_WALK)
          DoFramePointerBacktrace(lock, registeredThread, aRegs, nativeStack);
# elif defined(USE_MOZ_STACK_WALK)
          DoMozStackWalkBacktrace(lock, registeredThread, aRegs, nativeStack);
# else
#  error "Invalid configuration"
# endif

          MergeStacks(aFeatures, isSynchronous, registeredThread, aRegs,
                      nativeStack, aCollector);
        } else
#endif
        {
          MergeStacks(aFeatures, isSynchronous, registeredThread, aRegs,
                      nativeStack, aCollector);

          if (ProfilerFeature::HasLeaf(aFeatures)) {
            aCollector.CollectNativeLeafAddr((void*)aRegs.mPC);
          }
        }
      });

      // NOTE: Make sure to disable the sampler before it is destroyed, in case
      // the profiler is running at the same time.
      sampler.Disable(lock);
      break;
    }
  }
}

// END externally visible functions
////////////////////////////////////////////////////////////////////////