Bug 696376 - Change how we find critical ranges so that it works on 10.6 too. r=dbaron.
authorRafael Ávila de Espíndola <respindola@mozilla.com>
Fri, 02 Dec 2011 19:26:04 -0500
changeset 82002 c9a74f4ee1f7f6bea7b5851c800a4116bd68952e
parent 82001 827420c6b7564afa55839541fbcbf7c1b54620ba
child 82003 dcf31efc8c12acad913f829c769298758b03310b
push idunknown
push userunknown
push dateunknown
reviewersdbaron
bugs696376
milestone11.0a1
Bug 696376 - Change how we find critical ranges so that it works on 10.6 too. r=dbaron. Currently we use dlsym on pthread_cond_wait$UNIX2003 to find a function that indicates that new_sem_from_pool is on the stack. This works on 10.5, but on 10.6 I could not find a single reliable indicator that would work with dlsym. The good news is that dladdr works with any symbol, not just exported ones. To find the address of new_sem_from_pool, we set up a malloc logger and force a call to new_sem_from_pool. From the logger callback we walk the stack trying dladdr on every address. To force a call to new_sem_from_pool, the initialization code has to be the first to use semaphores, so it is now run from NS_LogInit. This works on 10.6 and 10.5 (but we have to look for "pthread_cond_wait$UNIX2003"). In 10.7 the call to malloc is gone, so we don't have to worry about critical addresses on it anymore.
tools/trace-malloc/lib/nsTraceMalloc.c
xpcom/base/nsStackWalk.cpp
xpcom/base/nsStackWalkPrivate.h
xpcom/base/nsTraceRefcntImpl.cpp
--- a/tools/trace-malloc/lib/nsTraceMalloc.c
+++ b/tools/trace-malloc/lib/nsTraceMalloc.c
@@ -952,17 +952,17 @@ backtrace(tm_thread *t, int skip, int *i
     stack_buffer_info *info = &t->backtrace_buf;
     void ** new_stack_buffer;
     size_t new_stack_buffer_size;
     nsresult rv;
 
     t->suppress_tracing++;
 
     if (!stacks_enabled) {
-#if defined(XP_MACOSX) && defined(__i386)
+#if defined(XP_MACOSX)
         /* Walk the stack, even if stacks_enabled is false. We do this to
            check if we must set immediate_abort. */
         info->entries = 0;
         rv = NS_StackWalk(stack_callback, skip, info);
         *immediate_abort = rv == NS_ERROR_UNEXPECTED;
         if (rv == NS_ERROR_UNEXPECTED || info->entries == 0) {
             t->suppress_tracing--;
             return NULL;
--- a/xpcom/base/nsStackWalk.cpp
+++ b/xpcom/base/nsStackWalk.cpp
@@ -36,19 +36,157 @@
  * the provisions above, a recipient may use your version of this file under
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
 
 /* API for getting a stack trace of the C/C++ stack on the current thread */
 
 #include "mozilla/Util.h"
+#include "nsDebug.h"
+#include "nsStackWalkPrivate.h"
 
 #include "nsStackWalk.h"
 
+// The presence of this address is the stack must stop the stack walk. If
+// there is no such address, the structure will be {NULL, true}.
+struct CriticalAddress {
+  void* mAddr;
+  bool mInit;
+};
+static CriticalAddress gCriticalAddress;
+
+#if defined(HAVE_DLOPEN) || defined(XP_MACOSX)
+#include <dlfcn.h>
+#endif
+
+#ifdef XP_MACOSX
+#include <pthread.h>
+#include <errno.h>
+#include <CoreServices/CoreServices.h>
+
+typedef void
+malloc_logger_t(uint32_t type, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
+                uintptr_t result, uint32_t num_hot_frames_to_skip);
+extern malloc_logger_t *malloc_logger;
+
+static void
+stack_callback(void *pc, void *closure)
+{
+  const char *name = reinterpret_cast<char *>(closure);
+  Dl_info info;
+
+  // On Leopard dladdr returns the wrong value for "new_sem_from_pool". The
+  // stack shows up as having two pthread_cond_wait$UNIX2003 frames. The
+  // correct one is the first that we find on our way up, so the
+  // following check for gCriticalAddress.mAddr is critical.
+  if (gCriticalAddress.mAddr || dladdr(pc, &info) == 0  ||
+      info.dli_sname == NULL || strcmp(info.dli_sname, name) != 0)
+    return;
+  gCriticalAddress.mAddr = pc;
+}
+
+#define MAC_OS_X_VERSION_10_7_HEX 0x00001070
+#define MAC_OS_X_VERSION_10_6_HEX 0x00001060
+
+static PRInt32 OSXVersion()
+{
+  static PRInt32 gOSXVersion = 0x0;
+  if (gOSXVersion == 0x0) {
+    OSErr err = ::Gestalt(gestaltSystemVersion, (SInt32*)&gOSXVersion);
+    MOZ_ASSERT(err == noErr);
+  }
+  return gOSXVersion;
+}
+
+static bool OnLionOrLater()
+{
+  return (OSXVersion() >= MAC_OS_X_VERSION_10_7_HEX);
+}
+
+static bool OnSnowLeopardOrLater()
+{
+  return (OSXVersion() >= MAC_OS_X_VERSION_10_6_HEX);
+}
+
+static void
+my_malloc_logger(uint32_t type, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
+                 uintptr_t result, uint32_t num_hot_frames_to_skip)
+{
+  static bool once = false;
+  if (once)
+    return;
+  once = true;
+
+  // On Leopard dladdr returns the wrong value for "new_sem_from_pool". The
+  // stack shows up as having two pthread_cond_wait$UNIX2003 frames.
+  const char *name = OnSnowLeopardOrLater() ? "new_sem_from_pool" :
+    "pthread_cond_wait$UNIX2003";
+  NS_StackWalk(stack_callback, 0, const_cast<char*>(name));
+}
+
+void
+StackWalkInitCriticalAddress()
+{
+  if(gCriticalAddress.mInit)
+    return;
+  gCriticalAddress.mInit = true;
+  // We must not do work when 'new_sem_from_pool' calls realloc, since
+  // it holds a non-reentrant spin-lock and we will quickly deadlock.
+  // new_sem_from_pool is not directly accessible using dlsym, so
+  // we force a situation where new_sem_from_pool is on the stack and
+  // use dladdr to check the addresses.
+
+  MOZ_ASSERT(malloc_logger == NULL);
+  malloc_logger = my_malloc_logger;
+
+  pthread_cond_t cond;
+  int r = pthread_cond_init(&cond, 0);
+  MOZ_ASSERT(r == 0);
+  pthread_mutex_t mutex;
+  r = pthread_mutex_init(&mutex,0);
+  MOZ_ASSERT(r == 0);
+  r = pthread_mutex_lock(&mutex);
+  MOZ_ASSERT(r == 0);
+  struct timespec abstime = {0, 1};
+  r = pthread_cond_timedwait_relative_np(&cond, &mutex, &abstime);
+  malloc_logger = NULL;
+
+  // On Lion, malloc is no longer called from pthread_cond_*wait*. This prevents
+  // us from finding the address, but that is fine, since with no call to malloc
+  // there is no critical address.
+  MOZ_ASSERT(OnLionOrLater() || gCriticalAddress.mAddr != NULL);
+  MOZ_ASSERT(r == ETIMEDOUT);
+  r = pthread_mutex_unlock(&mutex);
+  MOZ_ASSERT(r == 0);
+  r = pthread_mutex_destroy(&mutex);
+  MOZ_ASSERT(r == 0);
+  r = pthread_cond_destroy(&cond);
+  MOZ_ASSERT(r == 0);
+}
+
+static bool IsCriticalAddress(void* aPC)
+{
+  return gCriticalAddress.mAddr == aPC;
+}
+#else
+static bool IsCriticalAddress(void* aPC)
+{
+  return false;
+}
+// We still initialize gCriticalAddress.mInit so that this code behaves
+// the same on all platforms. Otherwise a failure to init would be visible
+// only on OS X.
+void
+StackWalkInitCriticalAddress()
+{
+  gCriticalAddress.mInit = true;
+}
+#endif
+
 #if defined(_WIN32) && (defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64)) // WIN32 x86 stack walking code
 
 #include "nscore.h"
 #include <windows.h>
 #include <process.h>
 #include <stdio.h>
 #include "plstr.h"
 #include "mozilla/FunctionTimer.h"
@@ -650,16 +788,17 @@ WalkStackThread(void* aData)
  * Otherwise StackWalk will return FALSE when it hits a frame in a DLL
  * whose in memory address doesn't match its in-file address.
  */
 
 EXPORT_XPCOM_API(nsresult)
 NS_StackWalk(NS_WalkStackCallback aCallback, PRUint32 aSkipFrames,
              void *aClosure)
 {
+    MOZ_ASSERT(gCriticalAddress.mInit);
     HANDLE myProcess, myThread;
     DWORD walkerReturn;
     struct WalkStackData data;
 
     if (!EnsureImageHlpInitialized())
         return false;
 
     // Have to duplicate handle to get a real handle.
@@ -1135,22 +1274,16 @@ NS_FormatCodeAddressDetails(void *aPC, c
 // On glibc 2.1, the Dl_info api defined in <dlfcn.h> is only exposed
 // if __USE_GNU is defined.  I suppose its some kind of standards
 // adherence thing.
 //
 #if (__GLIBC_MINOR__ >= 1) && !defined(__USE_GNU)
 #define __USE_GNU
 #endif
 
-#if defined(HAVE_DLOPEN) || defined(XP_MACOSX)
-#include <dlfcn.h>
-#endif
-
-
-
 // This thing is exported by libstdc++
 // Yes, this is a gcc only hack
 #if defined(MOZ_DEMANGLE_SYMBOLS)
 #include <cxxabi.h>
 #include <stdlib.h> // for free()
 #endif // MOZ_DEMANGLE_SYMBOLS
 
 void DemangleSymbol(const char * aSymbol, 
@@ -1343,16 +1476,17 @@ cs_operate(int (*operate_func)(void *, v
 {
     cswalkstack(csgetframeptr(), operate_func, usrarg);
 }
 
 EXPORT_XPCOM_API(nsresult)
 NS_StackWalk(NS_WalkStackCallback aCallback, PRUint32 aSkipFrames,
              void *aClosure)
 {
+    MOZ_ASSERT(gCriticalAddress.mInit);
     struct my_user_args args;
 
     if (!initialized)
         myinit();
 
     args.callback = aCallback;
     args.skipFrames = aSkipFrames; /* XXX Not handled! */
     args.closure = aClosure;
@@ -1417,68 +1551,22 @@ NS_FormatCodeAddressDetails(void *aPC, c
 #else
 #define HAVE___LIBC_STACK_END 0
 #endif
 
 #if HAVE___LIBC_STACK_END
 extern void *__libc_stack_end; // from ld-linux.so
 #endif
 
-#ifdef XP_MACOSX
-struct AddressRange {
-  void* mStart;
-  void* mEnd;
-};
-// Addresses in this range must stop the stack walk
-static AddressRange gCriticalRange;
-
-static void FindFunctionAddresses(const char* aName, AddressRange* aRange)
-{
-  aRange->mStart = dlsym(RTLD_DEFAULT, aName);
-  if (!aRange->mStart)
-    return;
-  aRange->mEnd = aRange->mStart;
-  while (true) {
-    Dl_info info;
-    if (!dladdr(aRange->mEnd, &info))
-      break;
-    if (strcmp(info.dli_sname, aName))
-      break;
-    aRange->mEnd = (char*)aRange->mEnd + 1;
-  }
-}
-
-static void InitCriticalRanges()
-{
-  if (gCriticalRange.mStart)
-    return;
-  // We must not do work when 'new_sem_from_pool' calls realloc, since
-  // it holds a non-reentrant spin-lock and we will quickly deadlock.
-  // new_sem_from_pool is not directly accessible using dladdr but its
-  // code is bundled with pthread_cond_wait$UNIX2003 (on
-  // Leopard anyway).
-  FindFunctionAddresses("pthread_cond_wait$UNIX2003", &gCriticalRange);
-}
-
-static bool InCriticalRange(void* aPC)
-{
-  return gCriticalRange.mStart &&
-    gCriticalRange.mStart <= aPC && aPC < gCriticalRange.mEnd;
-}
-#else
-static void InitCriticalRanges() {}
-static bool InCriticalRange(void* aPC) { return false; }
-#endif
-
 EXPORT_XPCOM_API(nsresult)
 NS_StackWalk(NS_WalkStackCallback aCallback, PRUint32 aSkipFrames,
              void *aClosure)
 {
+  MOZ_ASSERT(gCriticalAddress.mInit);
   // Stack walking code courtesy Kipp's "leaky".
-  InitCriticalRanges();
 
   // Get the frame pointer
   void **bp;
 #if defined(__i386) 
   __asm__( "movl %%ebp, %0" : "=g"(bp));
 #else
   // It would be nice if this worked uniformly, but at least on i386 and
   // x86_64, it stopped working with gcc 4.1, because it points to the
@@ -1501,18 +1589,18 @@ NS_StackWalk(NS_WalkStackCallback aCallb
       break;
     }
 #if (defined(__ppc__) && defined(XP_MACOSX)) || defined(__powerpc64__)
     // ppc mac or powerpc64 linux
     void *pc = *(bp+2);
 #else // i386 or powerpc32 linux
     void *pc = *(bp+1);
 #endif
-    if (InCriticalRange(pc)) {
-      printf("Aborting stack trace, PC in critical range\n");
+    if (IsCriticalAddress(pc)) {
+      printf("Aborting stack trace, PC is critical\n");
       return NS_ERROR_UNEXPECTED;
     }
     if (--skip < 0) {
       (*aCallback)(pc, aClosure);
     }
     bp = next;
   }
   return NS_OK;
@@ -1528,34 +1616,42 @@ struct unwind_info {
     int skip;
     void *closure;
 };
 
 static _Unwind_Reason_Code
 unwind_callback (struct _Unwind_Context *context, void *closure)
 {
     unwind_info *info = static_cast<unwind_info *>(closure);
-    if (--info->skip < 0) {
-        void *pc = reinterpret_cast<void *>(_Unwind_GetIP(context));
+    void *pc = reinterpret_cast<void *>(_Unwind_GetIP(context));
+    if (IsCriticalAddress(pc)) {
+        printf("Aborting stack trace, PC is critical\n");
+        /* We just want to stop the walk, so any error code will do.
+           Using _URC_NORMAL_STOP would probably be the most accurate,
+           but it is not defined on Android for ARM. */
+        return _URC_FOREIGN_EXCEPTION_CAUGHT;
+    }
+    if (--info->skip < 0)
         (*info->callback)(pc, info->closure);
-    }
     return _URC_NO_REASON;
 }
 
 EXPORT_XPCOM_API(nsresult)
 NS_StackWalk(NS_WalkStackCallback aCallback, PRUint32 aSkipFrames,
              void *aClosure)
 {
+    MOZ_ASSERT(gCriticalAddress.mInit);
     unwind_info info;
     info.callback = aCallback;
     info.skip = aSkipFrames + 1;
     info.closure = aClosure;
 
-    _Unwind_Backtrace(unwind_callback, &info);
-
+    _Unwind_Reason_Code t = _Unwind_Backtrace(unwind_callback, &info);
+    if (t != _URC_END_OF_STACK)
+        return NS_ERROR_UNEXPECTED;
     return NS_OK;
 }
 
 #endif
 
 EXPORT_XPCOM_API(nsresult)
 NS_DescribeCodeAddress(void *aPC, nsCodeAddressDetails *aDetails)
 {
@@ -1615,16 +1711,17 @@ NS_FormatCodeAddressDetails(void *aPC, c
 #endif
 
 #else // unsupported platform.
 
 EXPORT_XPCOM_API(nsresult)
 NS_StackWalk(NS_WalkStackCallback aCallback, PRUint32 aSkipFrames,
              void *aClosure)
 {
+    MOZ_ASSERT(gCriticalAddress.mInit);
     return NS_ERROR_NOT_IMPLEMENTED;
 }
 
 EXPORT_XPCOM_API(nsresult)
 NS_DescribeCodeAddress(void *aPC, nsCodeAddressDetails *aDetails)
 {
     aDetails->library[0] = '\0';
     aDetails->loffset = 0;
new file mode 100644
--- /dev/null
+++ b/xpcom/base/nsStackWalkPrivate.h
@@ -0,0 +1,43 @@
+/* vim: set shiftwidth=4 tabstop=8 autoindent cindent expandtab: */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is NS_WalkTheStack.
+ *
+ * The Initial Developer of the Original Code is the Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2007
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *  Mozilla Corporation (original author)
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+/**
+ * Initialize the critical sections for this platform so that we can
+ * abort stack walks when needed.
+ */
+void
+StackWalkInitCriticalAddress(void);
--- a/xpcom/base/nsTraceRefcntImpl.cpp
+++ b/xpcom/base/nsTraceRefcntImpl.cpp
@@ -45,16 +45,17 @@
 #include "prprf.h"
 #include "prlog.h"
 #include "plstr.h"
 #include "prlink.h"
 #include <stdlib.h>
 #include "nsCOMPtr.h"
 #include "nsCRT.h"
 #include <math.h>
+#include "nsStackWalkPrivate.h"
 #include "nsStackWalk.h"
 #include "nsString.h"
 
 #include "nsXULAppAPI.h"
 #ifdef XP_WIN
 #include <process.h>
 #define getpid _getpid
 #else
@@ -918,16 +919,18 @@ nsTraceRefcntImpl::DemangleSymbol(const 
 }
 
 
 //----------------------------------------------------------------------
 
 EXPORT_XPCOM_API(void)
 NS_LogInit()
 {
+  // FIXME: This is called multiple times, we should probably not allow that.
+  StackWalkInitCriticalAddress();
 #ifdef NS_IMPL_REFCNT_LOGGING
   if (++gInitCount)
     nsTraceRefcntImpl::SetActivityIsLegal(true);
 #endif
 
 #ifdef NS_TRACE_MALLOC
   // XXX we don't have to worry about shutting down trace-malloc; it
   // handles this itself, through an atexit() callback.