author | Vladan Djeric <vdjeric@mozilla.com> |
Mon, 12 Mar 2012 07:07:05 -0400 (2012-03-12) | |
changeset 88773 | 35c41def4857fccbadb899abc5fb6b3037f3bf98 |
parent 88772 | fead9a65565f75cb8472f771b801e6453561ea06 |
child 88774 | f419ad71f1cdcc961b2fc87acd4a0a0b82ed61dd |
push id | 7000 |
push user | vdjeric@mozilla.com |
push date | Mon, 12 Mar 2012 11:08:34 +0000 (2012-03-12) |
treeherder | mozilla-inbound@35c41def4857 [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
reviewers | ehsan |
bugs | 712109 |
milestone | 13.0a1 |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
--- a/toolkit/components/telemetry/Telemetry.cpp +++ b/toolkit/components/telemetry/Telemetry.cpp @@ -128,22 +128,30 @@ public: ~TelemetryImpl(); static bool CanRecord(); static already_AddRefed<nsITelemetry> CreateTelemetryInstance(); static void ShutdownTelemetry(); static void RecordSlowStatement(const nsACString &statement, const nsACString &dbName, PRUint32 delay); + static void RecordChromeHang(PRUint32 duration, + const Telemetry::HangStack &callStack, + SharedLibraryInfo &moduleMap); static nsresult GetHistogramEnumId(const char *name, Telemetry::ID *id); struct StmtStats { PRUint32 hitCount; PRUint32 totalTime; }; typedef nsBaseHashtableET<nsCStringHashKey, StmtStats> SlowSQLEntryType; + struct HangReport { + PRUint32 duration; + Telemetry::HangStack callStack; + SharedLibraryInfo moduleMap; + }; private: static bool StatementReflector(SlowSQLEntryType *entry, JSContext *cx, JSObject *obj); bool AddSQLInfo(JSContext *cx, JSObject *rootObj, bool mainThread); // Like GetHistogramById, but returns the underlying C++ object, not the JS one. nsresult GetHistogramByName(const nsACString &name, Histogram **ret); @@ -176,16 +184,18 @@ private: bool mCanRecord; static TelemetryImpl *sTelemetry; AutoHashtable<SlowSQLEntryType> mSlowSQLOnMainThread; AutoHashtable<SlowSQLEntryType> mSlowSQLOnOtherThread; // This gets marked immutable in debug builds, so we can't use // AutoHashtable here. nsTHashtable<nsCStringHashKey> mTrackedDBs; Mutex mHashMutex; + nsTArray<HangReport> mHangReports; + Mutex mHangReportsMutex; }; TelemetryImpl* TelemetryImpl::sTelemetry = NULL; // A initializer to initialize histogram collection StatisticsRecorder gStatisticsRecorder; // Hardcoded probes @@ -455,17 +465,18 @@ WrapAndReturnHistogram(Histogram *h, JSC *ret = OBJECT_TO_JSVAL(obj); JS_SetPrivate(obj, h); return NS_OK; } TelemetryImpl::TelemetryImpl(): mHistogramMap(Telemetry::HistogramCount), mCanRecord(XRE_GetProcessType() == GeckoProcessType_Default), -mHashMutex("Telemetry::mHashMutex") +mHashMutex("Telemetry::mHashMutex"), +mHangReportsMutex("Telemetry::mHangReportsMutex") { // A whitelist to prevent Telemetry reporting on Addon & Thunderbird DBs const char *trackedDBs[] = { "addons.sqlite", "chromeappsstore.sqlite", "content-prefs.sqlite", "cookies.sqlite", "downloads.sqlite", "extensions.sqlite", "formhistory.sqlite", "index.sqlite", "permissions.sqlite", "places.sqlite", "search.sqlite", "signons.sqlite", "urlclassifier3.sqlite", "webappsstore.sqlite" @@ -938,16 +949,154 @@ TelemetryImpl::GetSlowSQL(JSContext *cx, // Add info about slow SQL queries on other threads if (!AddSQLInfo(cx, root_obj, false)) return NS_ERROR_FAILURE; return NS_OK; } NS_IMETHODIMP +TelemetryImpl::GetChromeHangs(JSContext *cx, jsval *ret) +{ + MutexAutoLock hangReportMutex(mHangReportsMutex); + JSObject *reportArray = JS_NewArrayObject(cx, 0, nsnull); + if (!reportArray) { + return NS_ERROR_FAILURE; + } + *ret = OBJECT_TO_JSVAL(reportArray); + + // Each hang report is an object in the 'chromeHangs' array + for (size_t i = 0; i < mHangReports.Length(); ++i) { + JSObject *reportObj = JS_NewObject(cx, NULL, NULL, NULL); + if (!reportObj) { + return NS_ERROR_FAILURE; + } + jsval reportObjVal = OBJECT_TO_JSVAL(reportObj); + if (!JS_SetElement(cx, reportArray, i, &reportObjVal)) { + return NS_ERROR_FAILURE; + } + + // Record the hang duration (expressed in seconds) + JSBool ok = JS_DefineProperty(cx, reportObj, "duration", + INT_TO_JSVAL(mHangReports[i].duration), + NULL, NULL, JSPROP_ENUMERATE); + if (!ok) { + return NS_ERROR_FAILURE; + } + + // Represent call stack PCs as strings + // (JS can't represent all 64-bit integer values) + JSObject *pcArray = JS_NewArrayObject(cx, 0, nsnull); + if (!pcArray) { + return NS_ERROR_FAILURE; + } + ok = JS_DefineProperty(cx, reportObj, "stack", OBJECT_TO_JSVAL(pcArray), + NULL, NULL, JSPROP_ENUMERATE); + if (!ok) { + return NS_ERROR_FAILURE; + } + + const PRUint32 pcCount = mHangReports[i].callStack.Length(); + for (size_t pcIndex = 0; pcIndex < pcCount; ++pcIndex) { + nsCAutoString pcString; + pcString.AppendPrintf("0x%p", mHangReports[i].callStack[pcIndex]); + JSString *str = JS_NewStringCopyZ(cx, pcString.get()); + if (!str) { + return NS_ERROR_FAILURE; + } + jsval v = STRING_TO_JSVAL(str); + if (!JS_SetElement(cx, pcArray, pcIndex, &v)) { + return NS_ERROR_FAILURE; + } + } + + // Record memory map info + JSObject *moduleArray = JS_NewArrayObject(cx, 0, nsnull); + if (!moduleArray) { + return NS_ERROR_FAILURE; + } + ok = JS_DefineProperty(cx, reportObj, "memoryMap", + OBJECT_TO_JSVAL(moduleArray), + NULL, NULL, JSPROP_ENUMERATE); + if (!ok) { + return NS_ERROR_FAILURE; + } + + const PRUint32 moduleCount = mHangReports[i].moduleMap.GetSize(); + for (size_t moduleIndex = 0; moduleIndex < moduleCount; ++moduleIndex) { + // Current module + const SharedLibrary &module = + mHangReports[i].moduleMap.GetEntry(moduleIndex); + + JSObject *moduleInfoArray = JS_NewArrayObject(cx, 0, nsnull); + if (!moduleInfoArray) { + return NS_ERROR_FAILURE; + } + jsval val = OBJECT_TO_JSVAL(moduleInfoArray); + if (!JS_SetElement(cx, moduleArray, moduleIndex, &val)) { + return NS_ERROR_FAILURE; + } + + // Start address + nsCAutoString addressString; + addressString.AppendPrintf("0x%p", module.GetStart()); + JSString *str = JS_NewStringCopyZ(cx, addressString.get()); + if (!str) { + return NS_ERROR_FAILURE; + } + val = STRING_TO_JSVAL(str); + if (!JS_SetElement(cx, moduleInfoArray, 0, &val)) { + return NS_ERROR_FAILURE; + } + + // Module name + str = JS_NewStringCopyZ(cx, module.GetName()); + if (!str) { + return NS_ERROR_FAILURE; + } + val = STRING_TO_JSVAL(str); + if (!JS_SetElement(cx, moduleInfoArray, 1, &val)) { + return NS_ERROR_FAILURE; + } + + // Module size in memory + val = INT_TO_JSVAL(int32_t(module.GetEnd() - module.GetStart())); + if (!JS_SetElement(cx, moduleInfoArray, 2, &val)) { + return NS_ERROR_FAILURE; + } + + // "PDB Age" identifier + val = INT_TO_JSVAL(0); +#if defined(MOZ_PROFILING) && defined(XP_WIN) + val = INT_TO_JSVAL(module.GetPdbAge()); +#endif + if (!JS_SetElement(cx, moduleInfoArray, 3, &val)) { + return NS_ERROR_FAILURE; + } + + // "PDB Signature" GUID + char guidString[NSID_LENGTH] = { 0 }; +#if defined(MOZ_PROFILING) && defined(XP_WIN) + module.GetPdbSignature().ToProvidedString(guidString); +#endif + str = JS_NewStringCopyZ(cx, guidString); + if (!str) { + return NS_ERROR_FAILURE; + } + val = STRING_TO_JSVAL(str); + if (!JS_SetElement(cx, moduleInfoArray, 4, &val)) { + return NS_ERROR_FAILURE; + } + } + } + + return NS_OK; +} + +NS_IMETHODIMP TelemetryImpl::GetRegisteredHistograms(JSContext *cx, jsval *ret) { size_t count = ArrayLength(gHistograms); JSObject *info = JS_NewObject(cx, NULL, NULL, NULL); if (!info) return NS_ERROR_FAILURE; JS::AutoObjectRooter root(cx, info); @@ -1373,16 +1522,44 @@ TelemetryImpl::RecordSlowStatement(const return; entry->mData.hitCount = 0; entry->mData.totalTime = 0; } entry->mData.hitCount++; entry->mData.totalTime += delay; } +void +TelemetryImpl::RecordChromeHang(PRUint32 duration, + const Telemetry::HangStack &callStack, + SharedLibraryInfo &moduleMap) +{ + MOZ_ASSERT(sTelemetry); + if (!sTelemetry->mCanRecord) { + return; + } + + MutexAutoLock hangReportMutex(sTelemetry->mHangReportsMutex); + + // Only report the modules which changed since the first hang report + if (sTelemetry->mHangReports.Length()) { + SharedLibraryInfo &firstModuleMap = + sTelemetry->mHangReports[0].moduleMap; + for (size_t i = 0; i < moduleMap.GetSize(); ++i) { + if (firstModuleMap.Contains(moduleMap.GetEntry(i))) { + moduleMap.RemoveEntries(i, i + 1); + --i; + } + } + } + + HangReport newReport = { duration, callStack, moduleMap }; + sTelemetry->mHangReports.AppendElement(newReport); +} + NS_IMPL_THREADSAFE_ISUPPORTS1(TelemetryImpl, nsITelemetry) NS_GENERIC_FACTORY_SINGLETON_CONSTRUCTOR(nsITelemetry, TelemetryImpl::CreateTelemetryInstance) #define NS_TELEMETRY_CID \ {0xaea477f2, 0xb3a2, 0x469c, {0xaa, 0x29, 0x0a, 0x82, 0xd1, 0x32, 0xb8, 0x29}} NS_DEFINE_NAMED_CID(NS_TELEMETRY_CID); const Module::CIDEntry kTelemetryCIDs[] = { @@ -1454,16 +1631,23 @@ RecordSlowSQLStatement(const nsACString void Init() { // Make the service manager hold a long-lived reference to the service nsCOMPtr<nsITelemetry> telemetryService = do_GetService("@mozilla.org/base/telemetry;1"); MOZ_ASSERT(telemetryService); } +void RecordChromeHang(PRUint32 duration, + const Telemetry::HangStack &callStack, + SharedLibraryInfo &moduleMap) +{ + TelemetryImpl::RecordChromeHang(duration, callStack, moduleMap); +} + } // namespace Telemetry } // namespace mozilla NSMODULE_DEFN(nsTelemetryModule) = &kTelemetryModule; /** * The XRE_TelemetryAdd function is to be used by embedding applications * that can't use mozilla::Telemetry::Accumulate() directly.
--- a/toolkit/components/telemetry/Telemetry.h +++ b/toolkit/components/telemetry/Telemetry.h @@ -37,16 +37,19 @@ * ***** END LICENSE BLOCK ***** */ #ifndef Telemetry_h__ #define Telemetry_h__ #include "mozilla/GuardObjects.h" #include "mozilla/TimeStamp.h" #include "mozilla/StartupTimeline.h" +#include "nsTArray.h" +#include "nsStringGlue.h" +#include "shared-libraries.h" namespace base { class Histogram; } namespace mozilla { namespace Telemetry { @@ -151,11 +154,27 @@ void RecordSlowSQLStatement(const nsACSt const nsACString &dbName, PRUint32 delay); /** * Threshold for a statement to be considered slow, in milliseconds */ const PRUint32 kSlowStatementThreshold = 100; +/** + * nsTArray of pointers representing PCs on a call stack + */ +typedef nsTArray<uintptr_t> HangStack; + +/** + * Record the main thread's call stack after it hangs. + * + * @param duration - Approximate duration of main thread hang in seconds + * @param callStack - Array of PCs from the hung call stack + * @param moduleMap - Array of info about modules in memory (for symbolication) + */ +void RecordChromeHang(PRUint32 duration, + const HangStack &callStack, + SharedLibraryInfo &moduleMap); + } // namespace Telemetry } // namespace mozilla #endif // Telemetry_h__
--- a/toolkit/components/telemetry/TelemetryPing.js +++ b/toolkit/components/telemetry/TelemetryPing.js @@ -448,16 +448,17 @@ TelemetryPing.prototype = { if (havePreviousSession) { payloadObj.histograms = this.getHistograms(this._prevSession.snapshots); } else { payloadObj.simpleMeasurements = getSimpleMeasurements(); payloadObj.histograms = this.getHistograms(Telemetry.histogramSnapshots); payloadObj.slowSQL = Telemetry.slowSQL; + payloadObj.chromeHangs = Telemetry.chromeHangs; payloadObj.addonHistograms = this.getAddonHistograms(); } if (Object.keys(this._slowSQLStartup.mainThread).length || Object.keys(this._slowSQLStartup.otherThreads).length) { payloadObj.slowSQLStartup = this._slowSQLStartup; } return { previous: !!havePreviousSession,
--- a/toolkit/components/telemetry/nsITelemetry.idl +++ b/toolkit/components/telemetry/nsITelemetry.idl @@ -65,17 +65,17 @@ interface nsITelemetryLoadSessionDataCal }; [scriptable, function, uuid(40065f26-afd2-4417-93de-c1de9adb1548)] interface nsITelemetrySaveSessionDataCallback : nsISupports { void handle(in bool success); }; -[scriptable, uuid(db854295-478d-4de9-8211-d73ed7d81cd0)] +[scriptable, uuid(f23a2c8d-9286-42e9-ab1b-ed287eeade6d)] interface nsITelemetry : nsISupports { /** * Histogram types: * HISTOGRAM_EXPONENTIAL - buckets increase exponentially * HISTOGRAM_LINEAR - buckets increase linearly * HISTOGRAM_BOOLEAN - For storing 0/1 values * HISTOGRAM_FLAG - For storing a single value; its count is always == 1. @@ -113,16 +113,24 @@ interface nsITelemetry : nsISupports * otherThreads: Slow statements that executed on a non-main thread * sqlString - String of the offending prepared statement * hit count - The number of times this statement required longer than the threshold time to execute * total time - The sum of all execution times above the threshold time for this statement */ [implicit_jscontext] readonly attribute jsval slowSQL; + /* + * An array of chrome hang reports. Each element is a hang report represented + * as an object containing the hang duration, call stack PCs and information + * about modules in memory. + */ + [implicit_jscontext] + readonly attribute jsval chromeHangs; + /** * An object whose properties are the names of histograms defined in * TelemetryHistograms.h and whose corresponding values are the textual * comments associated with said histograms. */ [implicit_jscontext] readonly attribute jsval registeredHistograms;
--- a/tools/profiler/Makefile.in +++ b/tools/profiler/Makefile.in @@ -43,16 +43,17 @@ srcdir = @srcdir@ VPATH = $(srcdir) include $(DEPTH)/config/autoconf.mk EXPORTS = \ sampler.h \ sps_sampler.h \ thread_helper.h \ + shared-libraries.h \ $(NULL) LOCAL_INCLUDES += \ -I$(topsrcdir)/ipc/chromium/src \ $(NULL) MODULE = profiler MODULE_NAME = nsProfilerModule
--- a/tools/profiler/nsIProfiler.idl +++ b/tools/profiler/nsIProfiler.idl @@ -49,12 +49,16 @@ interface nsIProfiler : nsISupports void GetResponsivenessTimes(out PRUint32 aCount, [retval, array, size_is(aCount)] out double aResult); void GetFeatures(out PRUint32 aCount, [retval, array, size_is(aCount)] out string aFeatures); /** * Returns a JSON string of an array of shared library objects. * Every object has three properties: start, end, and name. * start and end are integers describing the address range that the library * occupies in memory. name is the path of the library as a string. + * + * On Windows profiling builds, the shared library objects will have + * additional pdbSignature and pdbAge properties for uniquely identifying + * shared library versions for stack symbolication. */ AString getSharedLibraryInformation(); };
--- a/tools/profiler/nsProfiler.cpp +++ b/tools/profiler/nsProfiler.cpp @@ -91,16 +91,20 @@ nsProfiler::GetProfile(char **aProfile) static void AddSharedLibraryInfoToStream(std::ostream& aStream, SharedLibrary& aLib) { aStream << "{"; aStream << "\"start\":" << aLib.GetStart(); aStream << ",\"end\":" << aLib.GetEnd(); aStream << ",\"name\":\"" << aLib.GetName() << "\""; +#ifdef XP_WIN + aStream << ",\"pdbSignature\":\"" << aLib.GetPdbSignature().ToString() << "\""; + aStream << ",\"pdbAge\":" << aLib.GetPdbAge(); +#endif aStream << "}"; } static std::string GetSharedLibraryInfoString() { SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf(); if (info.GetSize() == 0)
--- a/tools/profiler/shared-libraries-win32.cc +++ b/tools/profiler/shared-libraries-win32.cc @@ -16,16 +16,17 @@ * * The Initial Developer of the Original Code is * Mozilla Foundation. * Portions created by the Initial Developer are Copyright (C) 2011 * the Initial Developer. All Rights Reserved. * * Contributor(s): * Jeff Muizelaar <jmuizelaar@mozilla.com> + * Vladan Djeric <vdjeric@mozilla.com> * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your @@ -33,33 +34,94 @@ * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ #include <windows.h> #include <tlhelp32.h> +#include <Dbghelp.h> #include "shared-libraries.h" #include "nsWindowsHelpers.h" +#define CV_SIGNATURE 0x53445352 // 'SDSR' + +struct CodeViewRecord70 +{ + uint32_t signature; + GUID pdbSignature; + uint32_t pdbAge; + uint8_t pdbFileName[1]; +}; + +static bool GetPdbInfo(uintptr_t aStart, nsID& aSignature, uint32_t& aAge) +{ + if (!aStart) { + return false; + } + + PIMAGE_DOS_HEADER dosHeader = reinterpret_cast<PIMAGE_DOS_HEADER>(aStart); + if (dosHeader->e_magic != IMAGE_DOS_SIGNATURE) { + return false; + } + + PIMAGE_NT_HEADERS ntHeaders = reinterpret_cast<PIMAGE_NT_HEADERS>( + aStart + dosHeader->e_lfanew); + if (ntHeaders->Signature != IMAGE_NT_SIGNATURE) { + return false; + } + + uint32_t relativeVirtualAddress = + ntHeaders->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG].VirtualAddress; + if (!relativeVirtualAddress) { + return false; + } + + PIMAGE_DEBUG_DIRECTORY debugDirectory = + reinterpret_cast<PIMAGE_DEBUG_DIRECTORY>(aStart + relativeVirtualAddress); + if (!debugDirectory || debugDirectory->Type != IMAGE_DEBUG_TYPE_CODEVIEW) { + return false; + } + + CodeViewRecord70 *debugInfo = reinterpret_cast<CodeViewRecord70 *>( + aStart + debugDirectory->AddressOfRawData); + if (!debugInfo || debugInfo->signature != CV_SIGNATURE) { + return false; + } + + aAge = debugInfo->pdbAge; + GUID& pdbSignature = debugInfo->pdbSignature; + aSignature.m0 = pdbSignature.Data1; + aSignature.m1 = pdbSignature.Data2; + aSignature.m2 = pdbSignature.Data3; + memcpy(aSignature.m3, pdbSignature.Data4, sizeof(pdbSignature.Data4)); + return true; +} + SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() { SharedLibraryInfo sharedLibraryInfo; nsAutoHandle snap(CreateToolhelp32Snapshot(TH32CS_SNAPMODULE, GetCurrentProcessId())); MODULEENTRY32 module = {0}; module.dwSize = sizeof(MODULEENTRY32); if (Module32First(snap, &module)) { do { - SharedLibrary shlib((uintptr_t)module.modBaseAddr, - (uintptr_t)module.modBaseAddr+module.modBaseSize, - 0, // DLLs are always mapped at offset 0 on Windows - module.szModule); - sharedLibraryInfo.AddSharedLibrary(shlib); + nsID pdbSig; + uint32_t pdbAge; + if (GetPdbInfo((uintptr_t)module.modBaseAddr, pdbSig, pdbAge)) { + SharedLibrary shlib((uintptr_t)module.modBaseAddr, + (uintptr_t)module.modBaseAddr+module.modBaseSize, + 0, // DLLs are always mapped at offset 0 on Windows + pdbSig, + pdbAge, + module.szModule); + sharedLibraryInfo.AddSharedLibrary(shlib); + } } while (Module32Next(snap, &module)); } return sharedLibraryInfo; }
--- a/tools/profiler/shared-libraries.h +++ b/tools/profiler/shared-libraries.h @@ -31,83 +31,159 @@ * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ +#include <algorithm> #include <vector> #include <string.h> #include <stdlib.h> #include <mozilla/StandardInteger.h> +#include <nsID.h> class SharedLibrary { public: - SharedLibrary(unsigned long aStart, unsigned long aEnd, unsigned long aOffset, char *aName) + + SharedLibrary(unsigned long aStart, + unsigned long aEnd, + unsigned long aOffset, +#ifdef XP_WIN + nsID aPdbSignature, + unsigned long aPdbAge, +#endif + char *aName) : mStart(aStart) , mEnd(aEnd) , mOffset(aOffset) +#ifdef XP_WIN + , mPdbSignature(aPdbSignature) + , mPdbAge(aPdbAge) +#endif , mName(strdup(aName)) {} SharedLibrary(const SharedLibrary& aEntry) : mStart(aEntry.mStart) , mEnd(aEntry.mEnd) , mOffset(aEntry.mOffset) +#ifdef XP_WIN + , mPdbSignature(aEntry.mPdbSignature) + , mPdbAge(aEntry.mPdbAge) +#endif , mName(strdup(aEntry.mName)) {} SharedLibrary& operator=(const SharedLibrary& aEntry) { // Gracefully handle self assignment if (this == &aEntry) return *this; mStart = aEntry.mStart; mEnd = aEntry.mEnd; mOffset = aEntry.mOffset; +#ifdef XP_WIN + mPdbSignature = aEntry.mPdbSignature; + mPdbAge = aEntry.mPdbAge; +#endif if (mName) free(mName); mName = strdup(aEntry.mName); return *this; } + bool operator==(const SharedLibrary& other) const + { + bool equal = ((mStart == other.mStart) && + (mEnd == other.mEnd) && + (mOffset == other.mOffset) && + (mName && other.mName && (strcmp(mName, other.mName) == 0))); +#ifdef XP_WIN + equal = equal && + (mPdbSignature.Equals(other.mPdbSignature)) && + (mPdbAge == other.mPdbAge); +#endif + return equal; + } + ~SharedLibrary() { free(mName); + mName = NULL; } - uintptr_t GetStart() { return mStart; } - uintptr_t GetEnd() { return mEnd; } - char* GetName() { return mName; } + uintptr_t GetStart() const { return mStart; } + uintptr_t GetEnd() const { return mEnd; } +#ifdef XP_WIN + nsID GetPdbSignature() const { return mPdbSignature; } + uint32_t GetPdbAge() const { return mPdbAge; } +#endif + char* GetName() const { return mName; } private: explicit SharedLibrary() {} uintptr_t mStart; uintptr_t mEnd; uintptr_t mOffset; +#ifdef XP_WIN + // Windows-specific PDB file identifiers + nsID mPdbSignature; + uint32_t mPdbAge; +#endif char *mName; }; +static bool +CompareAddresses(const SharedLibrary& first, const SharedLibrary& second) +{ + return first.GetStart() < second.GetStart(); +} + class SharedLibraryInfo { public: static SharedLibraryInfo GetInfoForSelf(); SharedLibraryInfo() {} void AddSharedLibrary(SharedLibrary entry) { mEntries.push_back(entry); } SharedLibrary& GetEntry(size_t i) { return mEntries[i]; } - size_t GetSize() + // Removes items in the range [first, last) + // i.e. element at the "last" index is not removed + void RemoveEntries(size_t first, size_t last) + { + mEntries.erase(mEntries.begin() + first, mEntries.begin() + last); + } + + bool Contains(const SharedLibrary& searchItem) const + { + return (mEntries.end() != + std::find(mEntries.begin(), mEntries.end(), searchItem)); + } + + size_t GetSize() const { return mEntries.size(); } + + void SortByAddress() + { + std::sort(mEntries.begin(), mEntries.end(), CompareAddresses); + } + + void Clear() + { + mEntries.clear(); + } + private: std::vector<SharedLibrary> mEntries; };
--- a/xpcom/threads/HangMonitor.cpp +++ b/xpcom/threads/HangMonitor.cpp @@ -33,37 +33,45 @@ * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ #include "mozilla/HangMonitor.h" #include "mozilla/Monitor.h" #include "mozilla/Preferences.h" +#include "mozilla/Telemetry.h" #include "nsXULAppAPI.h" #include "nsThreadUtils.h" +#include "nsStackWalk.h" #ifdef MOZ_CRASHREPORTER #include "nsExceptionHandler.h" #endif #ifdef XP_WIN #include <windows.h> #endif +#if defined(MOZ_PROFILING) && defined(XP_WIN) + #define REPORT_CHROME_HANGS +#endif + namespace mozilla { namespace HangMonitor { /** * A flag which may be set from within a debugger to disable the hang * monitor. */ volatile bool gDebugDisableHangMonitor = false; const char kHangMonitorPrefName[] = "hangmonitor.timeout"; +const char kTelemetryPrefName[] = "toolkit.telemetry.enabled"; + // Monitor protects gShutdown and gTimeout, but not gTimestamp which rely on // being atomically set by the processor; synchronization doesn't really matter // in this use case. Monitor* gMonitor; // The timeout preference, in seconds. PRInt32 gTimeout; @@ -71,21 +79,38 @@ PRThread* gThread; // Set when shutdown begins to signal the thread to exit immediately. bool gShutdown; // The timestamp of the last event notification, or PR_INTERVAL_NO_WAIT if // we're currently not processing events. volatile PRIntervalTime gTimestamp; +#ifdef REPORT_CHROME_HANGS +// Main thread ID used in reporting chrome hangs under Windows +static HANDLE winMainThreadHandle = NULL; + +// Default timeout for reporting chrome hangs to Telemetry (10 seconds) +static const PRInt32 DEFAULT_CHROME_HANG_INTERVAL = 10; +#endif + // PrefChangedFunc int PrefChanged(const char*, void*) { PRInt32 newval = Preferences::GetInt(kHangMonitorPrefName); +#ifdef REPORT_CHROME_HANGS + // Monitor chrome hangs on the profiling branch if Telemetry enabled + if (newval == 0) { + PRBool telemetryEnabled = Preferences::GetBool(kTelemetryPrefName); + if (telemetryEnabled) { + newval = DEFAULT_CHROME_HANG_INTERVAL; + } + } +#endif MonitorAutoLock lock(*gMonitor); if (newval != gTimeout) { gTimeout = newval; lock.Notify(); } return 0; } @@ -106,27 +131,100 @@ Crash() #ifdef MOZ_CRASHREPORTER CrashReporter::AnnotateCrashReport(NS_LITERAL_CSTRING("Hang"), NS_LITERAL_CSTRING("1")); #endif NS_RUNTIMEABORT("HangMonitor triggered"); } +#ifdef REPORT_CHROME_HANGS +static void +ChromeStackWalker(void *aPC, void *aClosure) +{ + MOZ_ASSERT(aClosure); + Telemetry::HangStack *callStack = + reinterpret_cast< Telemetry::HangStack* >(aClosure); + callStack->AppendElement(reinterpret_cast<uintptr_t>(aPC)); +} + +static void +GetChromeHangReport(Telemetry::HangStack &callStack, SharedLibraryInfo &moduleMap) +{ + MOZ_ASSERT(winMainThreadHandle); + moduleMap = SharedLibraryInfo::GetInfoForSelf(); + moduleMap.SortByAddress(); + + DWORD ret = ::SuspendThread(winMainThreadHandle); + if (ret == -1) { + callStack.Clear(); + moduleMap.Clear(); + return; + } + NS_StackWalk(ChromeStackWalker, 0, &callStack, + reinterpret_cast<uintptr_t>(winMainThreadHandle)); + ret = ::ResumeThread(winMainThreadHandle); + if (ret == -1) { + callStack.Clear(); + moduleMap.Clear(); + return; + } + + // Remove all modules not referenced by a PC on the stack + Telemetry::HangStack sortedStack = callStack; + sortedStack.Sort(); + + size_t moduleIndex = 0; + size_t stackIndex = 0; + bool unreferencedModule = true; + while (stackIndex < sortedStack.Length() && moduleIndex < moduleMap.GetSize()) { + uintptr_t pc = sortedStack[stackIndex]; + SharedLibrary& module = moduleMap.GetEntry(moduleIndex); + uintptr_t moduleStart = module.GetStart(); + uintptr_t moduleEnd = module.GetEnd() - 1; + if (moduleStart <= pc && pc <= moduleEnd) { + // If the current PC is within the current module, mark module as used + unreferencedModule = false; + ++stackIndex; + } else if (pc > moduleEnd) { + if (unreferencedModule) { + // Remove module if no PCs within its address range + moduleMap.RemoveEntries(moduleIndex, moduleIndex + 1); + } else { + // Module was referenced on stack, but current PC belongs to later module + unreferencedModule = true; + ++moduleIndex; + } + } else { + // PC does not belong to any module + ++stackIndex; + } + } + + // Clean up remaining unreferenced modules, i.e. module addresses > max(pc) + if (moduleIndex + 1 < moduleMap.GetSize()) { + moduleMap.RemoveEntries(moduleIndex + 1, moduleMap.GetSize()); + } +} +#endif + void ThreadMain(void*) { MonitorAutoLock lock(*gMonitor); // In order to avoid issues with the hang monitor incorrectly triggering // during a general system stop such as sleeping, the monitor thread must // run twice to trigger hang protection. PRIntervalTime lastTimestamp = 0; int waitCount = 0; + Telemetry::HangStack hangStack; + SharedLibraryInfo hangModuleMap; + while (true) { if (gShutdown) { return; // Exit the thread } // avoid rereading the volatile value in this loop PRIntervalTime timestamp = gTimestamp; @@ -138,25 +236,37 @@ ThreadMain(void*) timestamp = 1; // lowest legal PRInterval value } if (timestamp != PR_INTERVAL_NO_WAIT && timestamp == lastTimestamp && gTimeout > 0) { ++waitCount; if (waitCount == 2) { +#ifdef REPORT_CHROME_HANGS + GetChromeHangReport(hangStack, hangModuleMap); +#else PRInt32 delay = PRInt32(PR_IntervalToSeconds(now - timestamp)); if (delay > gTimeout) { MonitorAutoUnlock unlock(*gMonitor); Crash(); } +#endif } } else { +#ifdef REPORT_CHROME_HANGS + if (waitCount >= 2) { + PRUint32 hangDuration = PR_IntervalToSeconds(now - lastTimestamp); + Telemetry::RecordChromeHang(hangDuration, hangStack, hangModuleMap); + hangStack.Clear(); + hangModuleMap.Clear(); + } +#endif lastTimestamp = timestamp; waitCount = 0; } PRIntervalTime timeout; if (gTimeout <= 0) { timeout = PR_INTERVAL_NO_TIMEOUT; } @@ -177,16 +287,24 @@ Startup() return; NS_ASSERTION(!gMonitor, "Hang monitor already initialized"); gMonitor = new Monitor("HangMonitor"); Preferences::RegisterCallback(PrefChanged, kHangMonitorPrefName, NULL); PrefChanged(NULL, NULL); +#ifdef REPORT_CHROME_HANGS + Preferences::RegisterCallback(PrefChanged, kTelemetryPrefName, NULL); + winMainThreadHandle = + OpenThread(THREAD_ALL_ACCESS, FALSE, GetCurrentThreadId()); + if (!winMainThreadHandle) + return; +#endif + // Don't actually start measuring hangs until we hit the main event loop. // This potentially misses a small class of really early startup hangs, // but avoids dealing with some xpcshell tests and other situations which // start XPCOM but don't ever start the event loop. Suspend(); gThread = PR_CreateThread(PR_USER_THREAD, ThreadMain,