Bug 1139460 - Part 2 - Overhaul telemetry ping submission code. r=vladan
☠☠ backed out by 70068a6d3c7a ☠ ☠
authorGeorg Fritzsche <georg.fritzsche@googlemail.com>
Fri, 20 Mar 2015 14:32:35 +0100
changeset 265044 20d390554f5e9c42e598455f3c151a02183041fc
parent 265043 02623719b09419865790bcf5dad10f253509372f
child 265045 79be0402cbdb800d5e08a5939ec53f54decbdf80
push id4718
push userraliiev@mozilla.com
push dateMon, 11 May 2015 18:39:53 +0000
treeherdermozilla-beta@c20c4ef55f08 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersvladan
bugs1139460
milestone39.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1139460 - Part 2 - Overhaul telemetry ping submission code. r=vladan
toolkit/components/telemetry/TelemetryPing.jsm
toolkit/components/telemetry/docs/pings.rst
--- a/toolkit/components/telemetry/TelemetryPing.jsm
+++ b/toolkit/components/telemetry/TelemetryPing.jsm
@@ -12,16 +12,17 @@ const Cu = Components.utils;
 const myScope = this;
 
 Cu.import("resource://gre/modules/Log.jsm");
 Cu.import("resource://gre/modules/debug.js", this);
 Cu.import("resource://gre/modules/Services.jsm", this);
 Cu.import("resource://gre/modules/XPCOMUtils.jsm", this);
 Cu.import("resource://gre/modules/osfile.jsm", this);
 Cu.import("resource://gre/modules/Promise.jsm", this);
+Cu.import("resource://gre/modules/PromiseUtils.jsm", this);
 Cu.import("resource://gre/modules/DeferredTask.jsm", this);
 Cu.import("resource://gre/modules/Preferences.jsm");
 
 const LOGGER_NAME = "Toolkit.Telemetry";
 const LOGGER_PREFIX = "TelemetryPing::";
 
 const PREF_BRANCH = "toolkit.telemetry.";
 const PREF_BRANCH_LOG = PREF_BRANCH + "log.";
@@ -35,16 +36,18 @@ const PREF_FHR_UPLOAD_ENABLED = "datarep
 const PING_FORMAT_VERSION = 4;
 
 // Delay before intializing telemetry (ms)
 const TELEMETRY_DELAY = 60000;
 // Delay before initializing telemetry if we're testing (ms)
 const TELEMETRY_TEST_DELAY = 100;
 // The number of days to keep pings serialised on the disk in case of failures.
 const DEFAULT_RETENTION_DAYS = 14;
+// Timeout after which we consider a ping submission failed.
+const PING_SUBMIT_TIMEOUT_MS = 2 * 60 * 1000;
 
 XPCOMUtils.defineLazyServiceGetter(this, "Telemetry",
                                    "@mozilla.org/base/telemetry;1",
                                    "nsITelemetry");
 XPCOMUtils.defineLazyModuleGetter(this, "AsyncShutdown",
                                   "resource://gre/modules/AsyncShutdown.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "TelemetryFile",
                                   "resource://gre/modules/TelemetryFile.jsm");
@@ -485,18 +488,18 @@ let Impl = {
                               .then(() => { return pingData.id; });
         } else {
           return TelemetryFile.savePing(pingData, aOptions.overwrite)
                               .then(() => { return pingData.id; });
         }
       }, error => this._log.error("savePing - Rejection", error));
   },
 
-  finishPingRequest: function finishPingRequest(success, startTime, ping, isPersisted) {
-    this._log.trace("finishPingRequest - Success " + success + ", Persisted " + isPersisted);
+  onPingRequestFinished: function(success, startTime, ping, isPersisted) {
+    this._log.trace("onPingRequestFinished - success: " + success + ", persisted: " + isPersisted);
 
     let hping = Telemetry.getHistogramById("TELEMETRY_PING");
     let hsuccess = Telemetry.getHistogramById("TELEMETRY_SUCCESS");
 
     hsuccess.add(success);
     hping.add(new Date() - startTime);
 
     if (success && isPersisted) {
@@ -536,63 +539,100 @@ let Impl = {
     }
 
     let slug = pathComponents.join("/");
     return "/submit/telemetry/" + slug;
   },
 
   doPing: function doPing(ping, isPersisted) {
     this._log.trace("doPing - Server " + this._server + ", Persisted " + isPersisted);
-    let deferred = Promise.defer();
-    let isNewPing = isNewPingFormat(ping);
-    let version = isNewPing ? PING_FORMAT_VERSION : 1;
-    let url = this._server + this.submissionPath(ping) + "?v=" + version;
+
+    const isNewPing = isNewPingFormat(ping);
+    const version = isNewPing ? PING_FORMAT_VERSION : 1;
+    const url = this._server + this.submissionPath(ping) + "?v=" + version;
+
     let request = Cc["@mozilla.org/xmlextras/xmlhttprequest;1"]
                   .createInstance(Ci.nsIXMLHttpRequest);
     request.mozBackgroundRequest = true;
+    request.timeout = PING_SUBMIT_TIMEOUT_MS;
+
     request.open("POST", url, true);
     request.overrideMimeType("text/plain");
     request.setRequestHeader("Content-Type", "application/json; charset=UTF-8");
 
     let startTime = new Date();
+    let deferred = PromiseUtils.defer();
 
-    function handler(success) {
-      let handleCompletion = event => {
+    let onRequestFinished = (success, event) => {
+      let onCompletion = () => {
         if (success) {
           deferred.resolve();
         } else {
           deferred.reject(event);
         }
       };
 
-      return function(event) {
-        this.finishPingRequest(success, startTime, ping, isPersisted)
-          .then(() => handleCompletion(event),
-                error => {
-                  this._log.error("doPing - Request Success " + success + ", Error " +
-                                  error);
-                  handleCompletion(event);
-                });
-      };
-    }
-    request.addEventListener("error", handler(false).bind(this), false);
-    request.addEventListener("load", handler(true).bind(this), false);
+      this.onPingRequestFinished(success, startTime, ping, isPersisted)
+        .then(() => onCompletion(),
+              (error) => {
+                this._log.error("doPing - request success: " + success + ", error" + error);
+                onCompletion();
+              });
+    };
+
+    let errorhandler = (event) => {
+      this._log.error("doPing - error making request to " + url + ": " + event.type);
+      onRequestFinished(false, event);
+    };
+    request.onerror = errorhandler;
+    request.ontimeout = errorhandler;
+    request.onabort = errorhandler;
+
+    request.onload = (event) => {
+      let status = request.status;
+      let statusClass = status - (status % 100);
+      let success = false;
+
+      if (statusClass === 200) {
+        // We can treat all 2XX as success.
+        this._log.info("doPing - successfully loaded, status: " + status);
+        success = true;
+      } else if (statusClass === 400) {
+        // 4XX means that something with the request was broken.
+        this._log.error("doPing - error submitting to " + url + ", status: " + status
+                        + " - ping request broken?");
+        // TODO: we should handle this better, but for now we should avoid resubmitting
+        // broken requests by pretending success.
+        success = true;
+      } else if (statusClass === 500) {
+        // 5XX means there was a server-side error and we should try again later.
+        this._log.error("doPing - error submitting to " + url + ", status: " + status
+                        + " - server error, should retry later");
+      } else {
+        // We received an unexpected status codes.
+        this._log.error("doPing - error submitting to " + url + ", status: " + status
+                        + ", type: " + event.type);
+      }
+
+      onRequestFinished(success, event);
+    };
 
     // If that's a legacy ping format, just send its payload.
     let networkPayload = isNewPing ? ping : ping.payload;
     request.setRequestHeader("Content-Encoding", "gzip");
     let converter = Cc["@mozilla.org/intl/scriptableunicodeconverter"]
                     .createInstance(Ci.nsIScriptableUnicodeConverter);
     converter.charset = "UTF-8";
     let utf8Payload = converter.ConvertFromUnicode(JSON.stringify(networkPayload));
     utf8Payload += converter.Finish();
     let payloadStream = Cc["@mozilla.org/io/string-input-stream;1"]
                         .createInstance(Ci.nsIStringInputStream);
     payloadStream.data = this.gzipCompressString(utf8Payload);
     request.send(payloadStream);
+
     return deferred.promise;
   },
 
   gzipCompressString: function gzipCompressString(string) {
     let observer = {
       buffer: "",
       onStreamComplete: function(loader, context, status, length, result) {
         this.buffer = String.fromCharCode.apply(this, result);
--- a/toolkit/components/telemetry/docs/pings.rst
+++ b/toolkit/components/telemetry/docs/pings.rst
@@ -14,16 +14,22 @@ It contains some basic information share
 Submission
 ==========
 
 Pings are submitted via a common API on ``TelemetryPing``. It allows callers to choose a custom retention period that determines how long pings are kept on disk if submission wasn't successful.
 If a ping failed to submit (e.g. because of missing internet connection), Telemetry will retry to submit it until its retention period is up.
 
 *Note:* the :doc:`main pings <main-ping>` are kept locally even after successful submission to enable the HealthReport and SelfSupport features. They will be deleted after their retention period of 180 days.
 
+The telemetry server team is working towards `the common services status codes <https://wiki.mozilla.org/CloudServices/DataPipeline/HTTPEdgeServerSpecification#Server_Responses>`_, but for now the following logic is sufficient for Telemetry:
+
+* `2XX` - success, don't resubmit
+* `4XX` - there was some problem with the request - the client should not try to resubmit as it would just receive the same response
+* `5XX` - there was a server-side error, the client should try to resubmit later
+
 Ping types
 ==========
 
 * :doc:`main <main-ping>` - contains the information collected by Telemetry (Histograms, hang stacks, ...)
 * :doc:`saved-session <main-ping>` - contains the *"classic"* Telemetry payload with measurements covering the whole browser session. Used to make storage of saved-session easier server-side.
 * ``activation`` - *planned* - sent right after installation or profile creation
 * ``upgrade`` - *planned* - sent right after an upgrade
 * ``deletion`` - *planned* - on opt-out we may have to tell the server to delete user data