Bug 1198209 - Add a "Total Memory" Histogram. r=vladan a=ritu
authorChris H-C <chutten@mozilla.com>
Mon, 21 Dec 2015 13:57:00 +0100
changeset 310546 e86d52914d4d35e3c16cce13a49e9e650473139a
parent 310545 ef7943dc6f2f370ebbf355dab51f5bab595316dd
child 310547 95ca96f282c92a3cf83df6adb1b8464d02636a5d
push id5513
push userraliiev@mozilla.com
push dateMon, 25 Jan 2016 13:55:34 +0000
treeherdermozilla-beta@5ee97dd05b5c [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersvladan, ritu
bugs1198209
milestone45.0a2
Bug 1198209 - Add a "Total Memory" Histogram. r=vladan a=ritu We now have all the necessary measurement APIs to get a full memory picture for a running multi-process instance. However, there's no way to correlate one particular RSS measurement on chrome with its USS measurements on content processes. So do that in TelemetrySession and report it.
toolkit/components/telemetry/Histograms.json
toolkit/components/telemetry/TelemetrySession.jsm
--- a/toolkit/components/telemetry/Histograms.json
+++ b/toolkit/components/telemetry/Histograms.json
@@ -472,16 +472,26 @@
     "expires_in_version": "never",
     "kind": "exponential",
     "low": "32 * 1024",
     "high": "16 * 1024 * 1024",
     "n_buckets": 100,
     "bug_numbers": [1226196],
     "description": "Resident memory size (KB)"
   },
+  "MEMORY_TOTAL": {
+    "alert_emails": ["memshrink-telemetry-alerts@mozilla.com"],
+    "bug_numbers": [1198209],
+    "expires_in_version": "never",
+    "kind": "exponential",
+    "low": "32 * 1024",
+    "high": "16 * 1024 * 1024",
+    "n_buckets": 100,
+    "description": "Total Memory Across All Processes (KB)"
+  },
   "MEMORY_UNIQUE": {
     "alert_emails": ["memshrink-telemetry-alerts@mozilla.com"],
     "bug_numbers": [1198209],
     "expires_in_version": "never",
     "kind": "exponential",
     "low": "32 * 1024",
     "high": "16 * 1024 * 1024",
     "n_buckets": 100,
--- a/toolkit/components/telemetry/TelemetrySession.jsm
+++ b/toolkit/components/telemetry/TelemetrySession.jsm
@@ -53,16 +53,18 @@ const PREF_BRANCH = "toolkit.telemetry."
 const PREF_PREVIOUS_BUILDID = PREF_BRANCH + "previousBuildID";
 const PREF_FHR_UPLOAD_ENABLED = "datareporting.healthreport.uploadEnabled";
 const PREF_ASYNC_PLUGIN_INIT = "dom.ipc.plugins.asyncInit.enabled";
 const PREF_UNIFIED = PREF_BRANCH + "unified";
 
 
 const MESSAGE_TELEMETRY_PAYLOAD = "Telemetry:Payload";
 const MESSAGE_TELEMETRY_GET_CHILD_PAYLOAD = "Telemetry:GetChildPayload";
+const MESSAGE_TELEMETRY_USS = "Telemetry:USS";
+const MESSAGE_TELEMETRY_GET_CHILD_USS = "Telemetry:GetChildUSS";
 
 const DATAREPORTING_DIRECTORY = "datareporting";
 const ABORTED_SESSION_FILE_NAME = "aborted-session-ping";
 
 // Whether the FHR/Telemetry unification features are enabled.
 // Changing this pref requires a restart.
 const IS_UNIFIED_TELEMETRY = Preferences.get(PREF_UNIFIED, false);
 
@@ -89,16 +91,19 @@ const SCHEDULER_MIDNIGHT_TOLERANCE_MS = 
 const IDLE_TIMEOUT_SECONDS = Preferences.get("toolkit.telemetry.idleTimeout", 5 * 60);
 
 // The frequency at which we persist session data to the disk to prevent data loss
 // in case of aborted sessions (currently 5 minutes).
 const ABORTED_SESSION_UPDATE_INTERVAL_MS = 5 * 60 * 1000;
 
 const TOPIC_CYCLE_COLLECTOR_BEGIN = "cycle-collector-begin";
 
+// How long to wait in millis for all the child memory reports to come in
+const TOTAL_MEMORY_COLLECTOR_TIMEOUT = 200;
+
 var gLastMemoryPoll = null;
 
 var gWasDebuggerAttached = false;
 
 function getLocale() {
   return Cc["@mozilla.org/chrome/chrome-registry;1"].
          getService(Ci.nsIXULChromeRegistry).
          getSelectedLocale('global');
@@ -661,16 +666,25 @@ var Impl = {
   // length measurements.
   _subsessionStartTimeMonotonic: 0,
   // The active ticks counted when the subsession starts
   _subsessionStartActiveTicks: 0,
   // A task performing delayed initialization of the chrome process
   _delayedInitTask: null,
   // The deferred promise resolved when the initialization task completes.
   _delayedInitTaskDeferred: null,
+  // Need a timeout in case children are tardy in giving back their memory reports.
+  _totalMemoryTimeout: undefined,
+  // An accumulator of total memory across all processes. Only valid once the final child reports.
+  _totalMemory: null,
+  // A Set of outstanding USS report ids
+  _childrenToHearFrom: null,
+  // monotonically-increasing id for USS reports
+  _nextTotalMemoryId: 1,
+
 
   get _log() {
     if (!this._logger) {
       this._logger = Log.repository.getLoggerWithMessagePrefix(LOGGER_NAME, LOGGER_PREFIX);
     }
     return this._logger;
   },
 
@@ -1051,16 +1065,42 @@ var Impl = {
     c("MEMORY_JS_COMPARTMENTS_USER", "JSMainRuntimeCompartmentsUser");
     b("MEMORY_IMAGES_CONTENT_USED_UNCOMPRESSED", "imagesContentUsedUncompressed");
     b("MEMORY_STORAGE_SQLITE", "storageSQLite");
     cc("LOW_MEMORY_EVENTS_VIRTUAL", "lowMemoryEventsVirtual");
     cc("LOW_MEMORY_EVENTS_PHYSICAL", "lowMemoryEventsPhysical");
     c("GHOST_WINDOWS", "ghostWindows");
     cc("PAGE_FAULTS_HARD", "pageFaultsHard");
 
+    if (!Utils.isContentProcess && !this._totalMemoryTimeout) {
+      // Only the chrome process should gather total memory
+      // total = parent RSS + sum(child USS)
+      this._totalMemory = mgr.residentFast;
+      if (ppmm.childCount > 1) {
+        // Do not report If we time out waiting for the children to call
+        this._totalMemoryTimeout = setTimeout(
+          () => {
+            this._totalMemoryTimeout = undefined;
+            this._childrenToHearFrom.clear();
+          },
+          TOTAL_MEMORY_COLLECTOR_TIMEOUT);
+        this._childrenToHearFrom = new Set();
+        for (let i = 1; i < ppmm.childCount; i++) {
+          ppmm.getChildAt(i).sendAsyncMessage(MESSAGE_TELEMETRY_GET_CHILD_USS, {id: this._nextTotalMemoryId});
+          this._childrenToHearFrom.add(this._nextTotalMemoryId);
+          this._nextTotalMemoryId++;
+        }
+      } else {
+        boundHandleMemoryReport(
+          "MEMORY_TOTAL",
+          Ci.nsIMemoryReporter.UNITS_BYTES,
+          this._totalMemory);
+      }
+    }
+
     histogram.add(new Date() - startTime);
   },
 
   handleMemoryReport: function(id, units, amount) {
     let val;
     if (units == Ci.nsIMemoryReporter.UNITS_BYTES) {
       val = Math.floor(amount / 1024);
     }
@@ -1332,16 +1372,17 @@ var Impl = {
     if (AppConstants.platform === "android") {
       Services.obs.addObserver(this, "application-background", false);
     }
     Services.obs.addObserver(this, "xul-window-visible", false);
     this._hasWindowRestoredObserver = true;
     this._hasXulWindowVisibleObserver = true;
 
     ppml.addMessageListener(MESSAGE_TELEMETRY_PAYLOAD, this);
+    ppml.addMessageListener(MESSAGE_TELEMETRY_USS, this);
 
     // Delay full telemetry initialization to give the browser time to
     // run various late initializers. Otherwise our gathered memory
     // footprint and other numbers would be too optimistic.
     this._delayedInitTaskDeferred = Promise.defer();
     this._delayedInitTask = new DeferredTask(function* () {
       try {
         this._initialized = true;
@@ -1397,16 +1438,17 @@ var Impl = {
 
     if (!Telemetry.canRecordBase) {
       this._log.trace("setupContentProcess - base recording is disabled, not initializing");
       return;
     }
 
     Services.obs.addObserver(this, "content-child-shutdown", false);
     cpml.addMessageListener(MESSAGE_TELEMETRY_GET_CHILD_PAYLOAD, this);
+    cpml.addMessageListener(MESSAGE_TELEMETRY_GET_CHILD_USS, this);
 
     this.gatherStartupHistograms();
 
     let delayedTask = new DeferredTask(function* () {
       this._initialized = true;
 
       this.attachObservers();
       this.gatherMemory();
@@ -1456,23 +1498,63 @@ var Impl = {
 
       break;
     }
     case MESSAGE_TELEMETRY_GET_CHILD_PAYLOAD:
     {
       this.sendContentProcessPing("saved-session");
       break;
     }
+    case MESSAGE_TELEMETRY_USS:
+    {
+      if (this._totalMemoryTimeout && this._childrenToHearFrom.delete(message.data.id)) {
+        this._totalMemory += message.data.bytes;
+        if (this._childrenToHearFrom.size == 0) {
+          clearTimeout(this._totalMemoryTimeout);
+          this._totalMemoryTimeout = undefined;
+          this.handleMemoryReport(
+            "MEMORY_TOTAL",
+            Ci.nsIMemoryReporter.UNITS_BYTES,
+            this._totalMemory);
+        }
+      } else {
+        this._log.trace("Child USS report was missed");
+      }
+      break;
+    }
+    case MESSAGE_TELEMETRY_GET_CHILD_USS:
+    {
+      this.sendContentProcessUSS(message.data.id);
+      break
+    }
     default:
       throw new Error("Telemetry.receiveMessage: bad message name");
     }
   },
 
   _processUUID: generateUUID(),
 
+  sendContentProcessUSS: function sendContentProcessUSS(aMessageId) {
+    this._log.trace("sendContentProcessUSS");
+
+    let mgr;
+    try {
+      mgr = Cc["@mozilla.org/memory-reporter-manager;1"].
+            getService(Ci.nsIMemoryReporterManager);
+    } catch (e) {
+      // OK to skip memory reporters in xpcshell
+      return;
+    }
+
+    cpmm.sendAsyncMessage(
+      MESSAGE_TELEMETRY_USS,
+      {bytes: mgr.residentUnique, id: aMessageId}
+    );
+  },
+
   sendContentProcessPing: function sendContentProcessPing(reason) {
     this._log.trace("sendContentProcessPing - Reason " + reason);
     const isSubsession = !this._isClassicReason(reason);
     let payload = this.getSessionPayload(reason, isSubsession);
     payload.childUUID = this._processUUID;
     cpmm.sendAsyncMessage(MESSAGE_TELEMETRY_PAYLOAD, payload);
   },