Bug 1344174 -Add measure for memory distribution among child processes. r=chutten a=gchang
authorGabor Krizsanits <gkrizsanits@mozilla.com>
Fri, 07 Apr 2017 12:14:45 +0200
changeset 395767 a5df1ac45705449a8aac4faca6a6f258d5d529fa
parent 395766 5fcb7e7b5f2db66d890645d41cbd88ba6e72c532
child 395768 18daa17d013b5e7a2ad5d9df9f4c4e786aefa4ce
push id1468
push userasasaki@mozilla.com
push dateMon, 05 Jun 2017 19:31:07 +0000
treeherdermozilla-release@0641fc6ee9d1 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerschutten, gchang
bugs1344174
milestone54.0a2
Bug 1344174 -Add measure for memory distribution among child processes. r=chutten a=gchang
dom/ipc/tests/browser.ini
dom/ipc/tests/browser_memory_distribution_telemetry.js
toolkit/components/telemetry/Histograms.json
toolkit/components/telemetry/TelemetrySession.jsm
--- a/dom/ipc/tests/browser.ini
+++ b/dom/ipc/tests/browser.ini
@@ -1,6 +1,8 @@
 [DEFAULT]
 support-files =
   file_disableScript.html
   file_domainPolicy_base.html
 
 [browser_domainPolicy.js]
+[browser_memory_distribution_telemetry.js]
+skip-if = !e10 # This is an e10s only probe.
new file mode 100644
--- /dev/null
+++ b/dom/ipc/tests/browser_memory_distribution_telemetry.js
@@ -0,0 +1,71 @@
+"use strict";
+
+var session = Cu.import("resource://gre/modules/TelemetrySession.jsm", {});
+
+const DUMMY_PAGE_DATA_URI = `data:text/html,
+    <html>
+      <head>
+        <meta charset="utf-8"/>
+        <title>Dummy</title>
+      </head>
+      <body>
+        <h1 id='header'>Just a regular everyday normal page.</h1>
+      </body>
+    </html>`;
+
+/**
+ * Tests the MEMORY_DISTRIBUTION_AMONG_CONTENT probe by opening a few tabs, then triggering
+ * the memory probes and waiting for the "gather-memory-telemetry-finished" notification.
+ */
+add_task(function* test_memory_distribution() {
+  waitForExplicitFinish();
+
+  if (SpecialPowers.getIntPref("dom.ipc.processCount", 1) < 2) {
+    ok(true, "Skip this test if e10s-multi is disabled.");
+    finish();
+    return;
+  }
+
+  yield SpecialPowers.pushPrefEnv({set: [["toolkit.telemetry.enabled", true]]});
+  Services.telemetry.canRecordExtended = true;
+
+  let histogram = Services.telemetry.getKeyedHistogramById("MEMORY_DISTRIBUTION_AMONG_CONTENT");
+  histogram.clear();
+
+  let tab1 = yield BrowserTestUtils.openNewForegroundTab(gBrowser, DUMMY_PAGE_DATA_URI);
+  let tab2 = yield BrowserTestUtils.openNewForegroundTab(gBrowser, DUMMY_PAGE_DATA_URI);
+  let tab3 = yield BrowserTestUtils.openNewForegroundTab(gBrowser, DUMMY_PAGE_DATA_URI);
+
+  let finishedGathering = new Promise(resolve => {
+    let obs = function () {
+      Services.obs.removeObserver(obs, "gather-memory-telemetry-finished");
+      resolve();
+    }
+    Services.obs.addObserver(obs, "gather-memory-telemetry-finished");
+  });
+
+  session.TelemetrySession.getPayload();
+
+  yield finishedGathering;
+
+  let s = histogram.snapshot();
+  ok("0 - 10 tabs" in s, "We should have some samples by now in this bucket.")
+  for (var key in s) {
+    is(key, "0 - 10 tabs");
+    let fewTabsSnapshot = s[key];
+    ok(fewTabsSnapshot.sum > 0, "Zero difference between all the content processes is unlikely, what happened?");
+    ok(fewTabsSnapshot.sum < 80, "20 percentage difference on average is unlikely, what happened?");
+    let c = fewTabsSnapshot.counts;
+    for (let i = 10; i < c.length; i++) {
+      // If this check fails it means that one of the content processes uses at least 20% more or 20% less than the mean.
+      is(c[i], 0, "All the buckets above 10 should be empty");
+    }
+  }
+
+  histogram.clear();
+
+  yield BrowserTestUtils.removeTab(tab3);
+  yield BrowserTestUtils.removeTab(tab2);
+  yield BrowserTestUtils.removeTab(tab1);
+  finish();
+});
--- a/toolkit/components/telemetry/Histograms.json
+++ b/toolkit/components/telemetry/Histograms.json
@@ -1112,16 +1112,26 @@
     "bug_numbers": [1198209],
     "expires_in_version": "never",
     "kind": "exponential",
     "low": 32768,
     "high": 16777216,
     "n_buckets": 100,
     "description": "Total Memory Across All Processes (KB)"
   },
+  "MEMORY_DISTRIBUTION_AMONG_CONTENT": {
+    "alert_emails": ["memshrink-telemetry-alerts@mozilla.com"],
+    "bug_numbers": [1344174],
+    "expires_in_version": "never",
+    "kind": "linear",
+    "keyed": true,
+    "high": 200,
+    "n_buckets": 100,
+    "description": "Absolute difference of each content process' USS and the mean of USS's, normalized by the mean, in percentage. 100 means that the content process uses twice as much memory as the mean of the rest (or theoretically it could also mean that it uses 0 while the mean of the others are non 0.)"
+  },
   "MEMORY_UNIQUE": {
     "alert_emails": ["memshrink-telemetry-alerts@mozilla.com"],
     "bug_numbers": [1198209],
     "expires_in_version": "never",
     "kind": "exponential",
     "low": 32768,
     "high": 16777216,
     "n_buckets": 100,
--- a/toolkit/components/telemetry/TelemetrySession.jsm
+++ b/toolkit/components/telemetry/TelemetrySession.jsm
@@ -709,16 +709,17 @@ var Impl = {
   _totalMemoryTimeout: undefined,
   _testing: false,
   // An accumulator of total memory across all processes. Only valid once the final child reports.
   _totalMemory: null,
   // A Set of outstanding USS report ids
   _childrenToHearFrom: null,
   // monotonically-increasing id for USS reports
   _nextTotalMemoryId: 1,
+  _USSFromChildProcesses: null,
   _lastEnvironmentChangeDate: 0,
 
 
   get _log() {
     if (!this._logger) {
       this._logger = Log.repository.getLoggerWithMessagePrefix(LOGGER_NAME, LOGGER_PREFIX);
     }
     return this._logger;
@@ -1166,16 +1167,17 @@ var Impl = {
       if (ppmm.childCount > 1) {
         // Do not report If we time out waiting for the children to call
         this._totalMemoryTimeout = setTimeout(
           () => {
             this._totalMemoryTimeout = undefined;
             this._childrenToHearFrom.clear();
           },
           TOTAL_MEMORY_COLLECTOR_TIMEOUT);
+        this._USSFromChildProcesses = [];
         this._childrenToHearFrom = new Set();
         for (let i = 1; i < ppmm.childCount; i++) {
           let child = ppmm.getChildAt(i);
           try {
             child.sendAsyncMessage(MESSAGE_TELEMETRY_GET_CHILD_USS, {id: this._nextTotalMemoryId});
             this._childrenToHearFrom.add(this._nextTotalMemoryId);
             this._nextTotalMemoryId++;
           } catch (ex) {
@@ -1190,17 +1192,17 @@ var Impl = {
           Ci.nsIMemoryReporter.UNITS_BYTES,
           this._totalMemory);
       }
     }
 
     histogram.add(new Date() - startTime);
   },
 
-  handleMemoryReport(id, units, amount) {
+  handleMemoryReport(id, units, amount, key) {
     let val;
     if (units == Ci.nsIMemoryReporter.UNITS_BYTES) {
       val = Math.floor(amount / 1024);
     } else if (units == Ci.nsIMemoryReporter.UNITS_PERCENTAGE) {
       // UNITS_PERCENTAGE amounts are 100x greater than their raw value.
       val = Math.floor(amount / 100);
     } else if (units == Ci.nsIMemoryReporter.UNITS_COUNT) {
       val = amount;
@@ -1220,20 +1222,29 @@ var Impl = {
       this._prevValues[id] = amount;
     } else {
       NS_ASSERT(false, "Can't handle memory reporter with units " + units);
       return;
     }
 
     let h = this._histograms[id];
     if (!h) {
-      h = Telemetry.getHistogramById(id);
+      if (key) {
+        h = Telemetry.getKeyedHistogramById(id);
+      } else {
+        h = Telemetry.getHistogramById(id);
+      }
       this._histograms[id] = h;
     }
-    h.add(val);
+
+    if (key) {
+      h.add(key, val);
+    } else {
+      h.add(val);
+    }
   },
 
   getChildPayloads: function getChildPayloads() {
     return this._childTelemetry.map(child => child.payload);
   },
 
   /**
    * Get the current session's payload using the provided
@@ -1552,16 +1563,28 @@ var Impl = {
         this._delayedInitTask = null;
         throw e;
       }
     }.bind(this));
 
     return this._delayedInitTask;
   },
 
+  getOpenTabsCount: function getOpenTabsCount() {
+    let tabCount = 0;
+
+    let browserEnum = Services.wm.getEnumerator("navigator:browser");
+    while (browserEnum.hasMoreElements()) {
+      let win = browserEnum.getNext();
+      tabCount += win.gBrowser.tabs.length;
+    }
+
+    return tabCount;
+  },
+
   /**
    * Initializes telemetry for a content process.
    */
   setupContentProcess: function setupContentProcess(testing) {
     this._log.trace("setupContentProcess");
     this._testing = testing;
 
     if (!Telemetry.canRecordBase) {
@@ -1648,24 +1671,56 @@ var Impl = {
       // In child process, send the requested child thread hangs
       this.sendContentProcessThreadHangs();
       break;
     }
     case MESSAGE_TELEMETRY_USS:
     {
       // In parent process, receive the USS report from the child
       if (this._totalMemoryTimeout && this._childrenToHearFrom.delete(message.data.id)) {
-        this._totalMemory += message.data.bytes;
+        let uss = message.data.bytes;
+        this._totalMemory += uss;
+        this._USSFromChildProcesses.push(uss);
         if (this._childrenToHearFrom.size == 0) {
           clearTimeout(this._totalMemoryTimeout);
           this._totalMemoryTimeout = undefined;
           this.handleMemoryReport(
             "MEMORY_TOTAL",
             Ci.nsIMemoryReporter.UNITS_BYTES,
             this._totalMemory);
+
+          let length = this._USSFromChildProcesses.length;
+          if (length > 1) {
+            // Mean of the USS of all the content processes.
+            let mean = this._USSFromChildProcesses.reduce((a, b) => a + b, 0) / length;
+            // Absolute error of USS for each content process, normalized by the mean (*100 to get it in percentage).
+            // 20% means for a content process that it is using 20% more or 20% less than the mean.
+            let diffs = this._USSFromChildProcesses.map(value => Math.floor(Math.abs(value - mean) * 100 / mean));
+            let tabsCount = this.getOpenTabsCount();
+            let key;
+            if (tabsCount < 11) {
+              key = "0 - 10 tabs";
+            } else if (tabsCount < 501) {
+              key = "11 - 500 tabs";
+            } else {
+              key = "more tabs";
+            }
+
+            diffs.forEach(value => {
+              this.handleMemoryReport(
+              "MEMORY_DISTRIBUTION_AMONG_CONTENT",
+              Ci.nsIMemoryReporter.UNITS_COUNT,
+              value,
+              key);
+            });
+
+            // This notification is for testing only.
+            Services.obs.notifyObservers(null, "gather-memory-telemetry-finished", null);
+          }
+          this._USSFromChildProcesses = undefined;
         }
       } else {
         this._log.trace("Child USS report was missed");
       }
       break;
     }
     case MESSAGE_TELEMETRY_GET_CHILD_USS:
     {