Bug 1368915 (part 4) - Clean up NativeStack. r=mstange.
authorNicholas Nethercote <nnethercote@mozilla.com>
Fri, 02 Jun 2017 09:41:55 +1000
changeset 410755 2f636eeb949d07746a412fbf327ce4e4015f2aad
parent 410754 e7d5a07cede0aa1d2de6fdf13b7f522aa4a1cc22
child 410756 0145ab10f5cc62bdf5cee9453be305809e5f7840
push id7391
push usermtabara@mozilla.com
push dateMon, 12 Jun 2017 13:08:53 +0000
treeherdermozilla-beta@2191d7f87e2e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmstange
bugs1368915
milestone55.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1368915 (part 4) - Clean up NativeStack. r=mstange. This patch puts the arrays inside NativeStack, gives NativeStack a constructor, and renames its fields using "mFoo" form. The patch also moves MAX_NATIVE_FRAMES from the LUL-only code and applies it globally. This increases the max native frame count from 256 to 1024 on the LUL platforms, which makes things consistent with other platforms.
tools/profiler/core/platform.cpp
--- a/tools/profiler/core/platform.cpp
+++ b/tools/profiler/core/platform.cpp
@@ -791,22 +791,29 @@ AddPseudoEntry(PSLockRef aLock, ProfileB
 
   if (lineno != -1) {
     aBuffer->addTag(ProfileBufferEntry::LineNumber(lineno));
   }
 
   aBuffer->addTag(ProfileBufferEntry::Category(uint32_t(entry.category())));
 }
 
+// The maximum number of native frames obtained. Setting it too high risks the
+// unwinder wasting a lot of time looping on corrupted stacks.
+static const size_t MAX_NATIVE_FRAMES = 1024;
+
 struct NativeStack
 {
-  void** pc_array;
-  void** sp_array;
-  size_t size;
-  size_t count;
+  void* mPCs[MAX_NATIVE_FRAMES];
+  void* mSPs[MAX_NATIVE_FRAMES];
+  size_t mCount;  // Number of entries filled.
+
+  NativeStack()
+    : mCount(0)
+  {}
 };
 
 Atomic<bool> WALKING_JS_STACK(false);
 
 struct AutoWalkJSStack
 {
   bool walkAllowed;
 
@@ -884,17 +891,17 @@ MergeStacksIntoProfile(PSLockRef aLock, 
 
   // While the pseudo-stack array is ordered oldest-to-youngest, the JS and
   // native arrays are ordered youngest-to-oldest. We must add frames to aInfo
   // oldest-to-youngest. Thus, iterate over the pseudo-stack forwards and JS
   // and native arrays backwards. Note: this means the terminating condition
   // jsIndex and nativeIndex is being < 0.
   uint32_t pseudoIndex = 0;
   int32_t jsIndex = jsCount - 1;
-  int32_t nativeIndex = aNativeStack.count - 1;
+  int32_t nativeIndex = aNativeStack.mCount - 1;
 
   uint8_t* lastPseudoCppStackAddr = nullptr;
 
   // Iterate as long as there is at least one frame remaining.
   while (pseudoIndex != pseudoCount || jsIndex >= 0 || nativeIndex >= 0) {
     // There are 1 to 3 frames available. Find and add the oldest.
     uint8_t* pseudoStackAddr = nullptr;
     uint8_t* jsStackAddr = nullptr;
@@ -921,17 +928,17 @@ MergeStacksIntoProfile(PSLockRef aLock, 
       pseudoStackAddr = lastPseudoCppStackAddr;
     }
 
     if (jsIndex >= 0) {
       jsStackAddr = (uint8_t*) jsFrames[jsIndex].stackAddress;
     }
 
     if (nativeIndex >= 0) {
-      nativeStackAddr = (uint8_t*) aNativeStack.sp_array[nativeIndex];
+      nativeStackAddr = (uint8_t*) aNativeStack.mSPs[nativeIndex];
     }
 
     // If there's a native stack entry which has the same SP as a pseudo stack
     // entry, pretend we didn't see the native stack entry.  Ditto for a native
     // stack entry which has the same SP as a JS stack entry.  In effect this
     // means pseudo or JS entries trump conflicting native entries.
     if (nativeStackAddr && (pseudoStackAddr == nativeStackAddr ||
                             jsStackAddr == nativeStackAddr)) {
@@ -993,17 +1000,17 @@ MergeStacksIntoProfile(PSLockRef aLock, 
       jsIndex--;
       continue;
     }
 
     // If we reach here, there must be a native stack entry and it must be the
     // greatest entry.
     if (nativeStackAddr) {
       MOZ_ASSERT(nativeIndex >= 0);
-      void* addr = (void*)aNativeStack.pc_array[nativeIndex];
+      void* addr = (void*)aNativeStack.mPCs[nativeIndex];
       aBuffer->addTag(ProfileBufferEntry::NativeLeafAddr(addr));
     }
     if (nativeIndex >= 0) {
       nativeIndex--;
     }
   }
 
   // Update the JS context with the current profile sample buffer generation.
@@ -1022,42 +1029,35 @@ MergeStacksIntoProfile(PSLockRef aLock, 
 static uintptr_t GetThreadHandle(PlatformData* aData);
 #endif
 
 #ifdef USE_NS_STACKWALK
 static void
 StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP, void* aClosure)
 {
   NativeStack* nativeStack = static_cast<NativeStack*>(aClosure);
-  MOZ_ASSERT(nativeStack->count < nativeStack->size);
-  nativeStack->sp_array[nativeStack->count] = aSP;
-  nativeStack->pc_array[nativeStack->count] = aPC;
-  nativeStack->count++;
+  MOZ_ASSERT(nativeStack->mCount < MAX_NATIVE_FRAMES);
+  nativeStack->mSPs[nativeStack->mCount] = aSP;
+  nativeStack->mPCs[nativeStack->mCount] = aPC;
+  nativeStack->mCount++;
 }
 
 static void
 DoNativeBacktrace(PSLockRef aLock, ProfileBuffer* aBuffer,
                   const TickSample& aSample)
 {
-  void* pc_array[1000];
-  void* sp_array[1000];
-  NativeStack nativeStack = {
-    pc_array,
-    sp_array,
-    ArrayLength(pc_array),
-    0
-  };
+  NativeStack nativeStack;
 
   // Start with the current function. We use 0 as the frame number here because
   // the FramePointerStackWalk() and MozStackWalk() calls below will use 1..N.
   // This is a bit weird but it doesn't matter because StackWalkCallback()
   // doesn't use the frame number argument.
   StackWalkCallback(/* frameNum */ 0, aSample.mPC, aSample.mSP, &nativeStack);
 
-  uint32_t maxFrames = uint32_t(nativeStack.size - nativeStack.count);
+  uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - nativeStack.mCount);
 
 #if defined(GP_OS_darwin) || (defined(GP_PLAT_x86_windows))
   void* stackEnd = aSample.mStackTop;
   if (aSample.mFP >= aSample.mSP && aSample.mFP <= stackEnd) {
     FramePointerStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames,
                           &nativeStack, reinterpret_cast<void**>(aSample.mFP),
                           stackEnd);
   }
@@ -1074,24 +1074,17 @@ DoNativeBacktrace(PSLockRef aLock, Profi
 }
 #endif
 
 #ifdef USE_EHABI_STACKWALK
 static void
 DoNativeBacktrace(PSLockRef aLock, ProfileBuffer* aBuffer,
                   const TickSample& aSample)
 {
-  void* pc_array[1000];
-  void* sp_array[1000];
-  NativeStack nativeStack = {
-    pc_array,
-    sp_array,
-    ArrayLength(pc_array),
-    0
-  };
+  NativeStack nativeStack;
 
   const mcontext_t* mcontext = &aSample.mContext->uc_mcontext;
   mcontext_t savedContext;
   NotNull<RacyThreadInfo*> racyInfo = aSample.mRacyInfo;
 
   // The pseudostack contains an "EnterJIT" frame whenever we enter
   // JIT code with profiling enabled; the stack pointer value points
   // the saved registers.  We use this to unwind resume unwinding
@@ -1102,21 +1095,21 @@ DoNativeBacktrace(PSLockRef aLock, Profi
     js::ProfileEntry& entry = racyInfo->entries[i - 1];
     if (!entry.isJs() && strcmp(entry.label(), "EnterJIT") == 0) {
       // Found JIT entry frame.  Unwind up to that point (i.e., force
       // the stack walk to stop before the block of saved registers;
       // note that it yields nondecreasing stack pointers), then restore
       // the saved state.
       uint32_t* vSP = reinterpret_cast<uint32_t*>(entry.stackAddress());
 
-      nativeStack.count += EHABIStackWalk(*mcontext,
-                                          /* stackBase = */ vSP,
-                                          sp_array + nativeStack.count,
-                                          pc_array + nativeStack.count,
-                                          nativeStack.size - nativeStack.count);
+      nativeStack.mCount +=
+        EHABIStackWalk(*mcontext, /* stackBase = */ vSP,
+                       nativeStack.mSPs + nativeStack.mCount,
+                       nativeStack.mPCs + nativeStack.mCount,
+                       MAX_NATIVE_FRAMES - nativeStack.mCount);
 
       memset(&savedContext, 0, sizeof(savedContext));
 
       // See also: struct EnterJITStack in js/src/jit/arm/Trampoline-arm.cpp
       savedContext.arm_r4  = *vSP++;
       savedContext.arm_r5  = *vSP++;
       savedContext.arm_r6  = *vSP++;
       savedContext.arm_r7  = *vSP++;
@@ -1128,21 +1121,21 @@ DoNativeBacktrace(PSLockRef aLock, Profi
       savedContext.arm_sp  = reinterpret_cast<uint32_t>(vSP);
       savedContext.arm_pc  = savedContext.arm_lr;
       mcontext = &savedContext;
     }
   }
 
   // Now unwind whatever's left (starting from either the last EnterJIT frame
   // or, if no EnterJIT was found, the original registers).
-  nativeStack.count += EHABIStackWalk(*mcontext,
-                                      aSample.mStackTop,
-                                      sp_array + nativeStack.count,
-                                      pc_array + nativeStack.count,
-                                      nativeStack.size - nativeStack.count);
+  nativeStack.mCount +=
+    EHABIStackWalk(*mcontext, aSample.mStackTop,
+                   nativeStack.mSPs + nativeStack.mCount,
+                   nativeStack.mPCs + nativeStack.mCount,
+                   MAX_NATIVE_FRAMES - nativeStack.mCount);
 
   MergeStacksIntoProfile(aLock, aBuffer, aSample, nativeStack);
 }
 #endif
 
 #ifdef USE_LUL_STACKWALK
 
 // See the comment at the callsite for why this function is necessary.
@@ -1264,59 +1257,49 @@ DoNativeBacktrace(PSLockRef aLock, Profi
       ASAN_memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
 #else
       memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
 #endif
       (void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy);
     }
   }
 
-  // The maximum number of frames that LUL will produce.  Setting it
-  // too high gives a risk of it wasting a lot of time looping on
-  // corrupted stacks.
-  const int MAX_NATIVE_FRAMES = 256;
+  NativeStack nativeStack;
 
   size_t scannedFramesAllowed = 0;
 
-  uintptr_t framePCs[MAX_NATIVE_FRAMES];
-  uintptr_t frameSPs[MAX_NATIVE_FRAMES];
-  size_t framesAvail = ArrayLength(framePCs);
-  size_t framesUsed  = 0;
   size_t scannedFramesAcquired = 0, framePointerFramesAcquired = 0;
   lul::LUL* lul = CorePS::Lul(aLock);
-  lul->Unwind(&framePCs[0], &frameSPs[0],
-              &framesUsed, &framePointerFramesAcquired, &scannedFramesAcquired,
-              framesAvail, scannedFramesAllowed,
+  lul->Unwind(reinterpret_cast<uintptr_t*>(nativeStack.mPCs),
+              reinterpret_cast<uintptr_t*>(nativeStack.mSPs),
+              &nativeStack.mCount, &framePointerFramesAcquired,
+              &scannedFramesAcquired,
+              MAX_NATIVE_FRAMES, scannedFramesAllowed,
               &startRegs, &stackImg);
 
-  NativeStack nativeStack = {
-    reinterpret_cast<void**>(framePCs),
-    reinterpret_cast<void**>(frameSPs),
-    ArrayLength(framePCs),
-    framesUsed
-  };
-
   MergeStacksIntoProfile(aLock, aBuffer, aSample, nativeStack);
 
   // Update stats in the LUL stats object.  Unfortunately this requires
   // three global memory operations.
   lul->mStats.mContext += 1;
-  lul->mStats.mCFI     += framesUsed - 1 - framePointerFramesAcquired -
-                                           scannedFramesAcquired;
+  lul->mStats.mCFI     += nativeStack.mCount - 1 - framePointerFramesAcquired -
+                          scannedFramesAcquired;
   lul->mStats.mFP      += framePointerFramesAcquired;
   lul->mStats.mScanned += scannedFramesAcquired;
 }
 
 #endif
 
 static void
 DoSampleStackTrace(PSLockRef aLock, ProfileBuffer* aBuffer,
                    const TickSample& aSample)
 {
-  NativeStack nativeStack = { nullptr, nullptr, 0, 0 };
+  NativeStack nativeStack;
+
+  // |nativeStack| is empty at this point.
   MergeStacksIntoProfile(aLock, aBuffer, aSample, nativeStack);
 
   if (ActivePS::FeatureLeaf(aLock)) {
     aBuffer->addTag(ProfileBufferEntry::NativeLeafAddr((void*)aSample.mPC));
   }
 }
 
 // This function is called for each sampling period with the current program