Bug 1689438 - Instrument ClientID.jsm for potential failures r=janerik
☠☠ backed out by f6e71a0eec55 ☠ ☠
authorChris H-C <chutten@mozilla.com>
Mon, 01 Feb 2021 11:18:56 +0000
changeset 3480209 cb8d52e34d8019494c348a2e64df6e9d0a89fb4d
parent 3480208 aa8c4b0a672a631cdc7b705ce7da27ce0c36d964
child 3480210 6854bb2548b7b584293ea8ca44053ed6843acf56
child 3481613 f04cc05740287f793e592eba40b387cc8be04824
child 3481618 905df5369176be3b50b6886902770b54b9b78bc9
child 3481620 90b8c0d32115b75ed8e383fc395b2e07106aa233
child 3493925 c52d3b58c0b0e89f01e3eb7db0c0763330daea41
child 3509327 3be2de16f580c0c663707d6c07821c30a65e8517
child 3509330 7a11c851e654e4d0ea52412a19eefabe56ab4a25
push id648815
push userwptsync@mozilla.com
push dateMon, 01 Feb 2021 17:48:27 +0000
treeherdertry@abfb9840bad0 [default view] [failures only]
reviewersjanerik
bugs1689438
milestone87.0a1
Bug 1689438 - Instrument ClientID.jsm for potential failures r=janerik Differential Revision: https://phabricator.services.mozilla.com/D103373
toolkit/components/telemetry/Scalars.yaml
toolkit/components/telemetry/app/ClientID.jsm
toolkit/components/telemetry/tests/unit/test_client_id.js
--- a/toolkit/components/telemetry/Scalars.yaml
+++ b/toolkit/components/telemetry/Scalars.yaml
@@ -3295,16 +3295,62 @@ telemetry:
     notification_emails:
       - sync-team@mozilla.com
     products:
       - 'firefox'
     release_channel_collection: opt-out
     record_in_processes:
       - main
 
+  state_file_save_errors:
+    bug_numbers:
+      - 1689438
+    description: >
+      How many times we errored out trying to save state.json
+    expires: "90"
+    kind: uint
+    notification_emails:
+      - chutten@mozilla.com
+    products:
+      - 'firefox'
+    release_channel_collection: opt-out
+    record_in_processes:
+      - main
+
+  generated_new_client_id:
+    bug_numbers:
+      - 1689438
+    description: >
+      Whether we generated a new client id this subsession.
+    expires: "90"
+    kind: boolean
+    notification_emails:
+      - chutten@mozilla.com
+    products:
+      - 'firefox'
+    release_channel_collection: opt-out
+    record_in_processes:
+      - main
+
+  loaded_client_id_doesnt_match_pref:
+    bug_numbers:
+      - 1689438
+    description: >
+      How often we loaded a client_id from disk that doesn't match
+      what's cached in the pref.
+    expires: "90"
+    kind: uint
+    notification_emails:
+      - chutten@mozilla.com
+    products:
+      - 'firefox'
+    release_channel_collection: opt-out
+    record_in_processes:
+      - main
+
 telemetry.discarded:
   accumulations:
     bug_numbers:
       - 1369041
     description: >
       Number of discarded accumulations to histograms in child processes
     expires: "never"
     kind: uint
--- a/toolkit/components/telemetry/app/ClientID.jsm
+++ b/toolkit/components/telemetry/app/ClientID.jsm
@@ -211,16 +211,32 @@ var ClientIDImpl = {
     let hasCurrentClientID = false;
     let hasCurrentEcosystemClientID = false;
     try {
       let state = await CommonUtils.readJSON(gStateFilePath);
       if (AppConstants.platform == "android" && state && "wasCanary" in state) {
         this._wasCanary = state.wasCanary;
       }
       if (state) {
+        try {
+          if (Services.prefs.prefHasUserValue(PREF_CACHED_CLIENTID)) {
+            let cachedID = Services.prefs.getStringPref(
+              PREF_CACHED_CLIENTID,
+              null
+            );
+            if (cachedID && cachedID != state.clientID) {
+              Services.telemetry.scalarAdd(
+                "telemetry.loaded_client_id_doesnt_match_pref",
+                1
+              );
+            }
+          }
+        } catch (e) {
+          // This data collection's not that important.
+        }
         hasCurrentClientID = this.updateClientID(state.clientID);
         hasCurrentEcosystemClientID = this.updateEcosystemClientID(
           state.ecosystemClientID
         );
         if (hasCurrentClientID && hasCurrentEcosystemClientID) {
           this._log.trace(`_doLoadClientIDs: Client IDs loaded from state.`);
           return {
             clientID: this._clientID,
@@ -229,16 +245,17 @@ var ClientIDImpl = {
         }
       }
     } catch (e) {
       // fall through to next option
     }
 
     // We're missing one or both IDs from the DRS state file, generate new ones.
     if (!hasCurrentClientID) {
+      Services.telemetry.scalarSet("telemetry.generated_new_client_id", true);
       this.updateClientID(CommonUtils.generateUUID());
     }
     if (!hasCurrentEcosystemClientID) {
       this.updateEcosystemClientID(CommonUtils.generateUUID());
     }
     this._saveClientIdsTask = this._saveClientIDs();
 
     // Wait on persisting the id. Otherwise failure to save the ID would result in
@@ -255,34 +272,39 @@ var ClientIDImpl = {
   },
 
   /**
    * Save the client ID to the client ID file.
    *
    * @return {Promise} A promise resolved when the client ID is saved to disk.
    */
   async _saveClientIDs() {
-    this._log.trace(`_saveClientIDs`);
-    let obj = {
-      clientID: this._clientID,
-      ecosystemClientID: this._ecosystemClientID,
-    };
-    // We detected a canary client ID when resetting, storing this as a flag
-    if (AppConstants.platform == "android" && this._wasCanary) {
-      obj.wasCanary = true;
+    try {
+      this._log.trace(`_saveClientIDs`);
+      let obj = {
+        clientID: this._clientID,
+        ecosystemClientID: this._ecosystemClientID,
+      };
+      // We detected a canary client ID when resetting, storing this as a flag
+      if (AppConstants.platform == "android" && this._wasCanary) {
+        obj.wasCanary = true;
+      }
+      try {
+        await IOUtils.makeDirectory(gDatareportingPath);
+      } catch (ex) {
+        if (ex.name != "NotAllowedError") {
+          throw ex;
+        }
+      }
+      await CommonUtils.writeJSON(obj, gStateFilePath);
+      this._saveClientIdsTask = null;
+    } catch (ex) {
+      Services.telemetry.scalarAdd("telemetry.state_file_save_errors", 1);
+      throw ex;
     }
-    try {
-      await IOUtils.makeDirectory(gDatareportingPath);
-    } catch (ex) {
-      if (ex.name != "NotAllowedError") {
-        throw ex;
-      }
-    }
-    await CommonUtils.writeJSON(obj, gStateFilePath);
-    this._saveClientIdsTask = null;
   },
 
   /**
    * This returns a promise resolving to the the stable client ID we use for
    * data reporting (FHR & Telemetry).
    *
    * @return {Promise<string>} The stable client ID.
    */
--- a/toolkit/components/telemetry/tests/unit/test_client_id.js
+++ b/toolkit/components/telemetry/tests/unit/test_client_id.js
@@ -70,29 +70,37 @@ add_task(async function test_client_id()
     [-1, "setIntPref"],
     [0.5, "setIntPref"],
     ["INVALID-UUID", "setStringPref"],
     [true, "setBoolPref"],
     ["", "setStringPref"],
     ["3d1e1560-682a-4043-8cf2-aaaaaaaaaaaZ", "setStringPref"],
   ];
 
+  // Clear the scalar snapshot from previous tests.
+  Services.telemetry.getSnapshotForScalars("main", true);
+
   // If there is no DRS file, we should get a new client ID.
   await ClientID._reset();
+  await OS.File.remove(drsPath, { ignoreAbsent: true });
   let clientID = await ClientID.getClientID();
   Assert.equal(typeof clientID, "string");
   Assert.ok(uuidRegex.test(clientID));
   if (AppConstants.MOZ_GLEAN) {
     Assert.equal(
       Glean.fogValidation.legacyTelemetryClientId.testGetValue(
         "fog-validation"
       ),
       clientID
     );
   }
+  let snapshot = Services.telemetry.getSnapshotForScalars("main", true).parent;
+  Assert.equal(snapshot["telemetry.generated_new_client_id"], true);
+  // No file to read means no value to mismatch with pref.
+  Assert.ok(!("telemetry.loaded_client_id_doesnt_match_pref" in snapshot));
 
   // We should be guarded against invalid DRS json.
   await ClientID._reset();
   await OS.File.writeAtomic(drsPath, "abcd", {
     encoding: "utf-8",
     tmpPath: drsPath + ".tmp",
   });
   clientID = await ClientID.getClientID();
@@ -101,34 +109,77 @@ add_task(async function test_client_id()
   if (AppConstants.MOZ_GLEAN) {
     Assert.equal(
       Glean.fogValidation.legacyTelemetryClientId.testGetValue(
         "fog-validation"
       ),
       clientID
     );
   }
+  snapshot = Services.telemetry.getSnapshotForScalars("main", true).parent;
+  Assert.equal(snapshot["telemetry.generated_new_client_id"], true);
+  // Invalid file means no value to mismatch with pref.
+  Assert.ok(!("telemetry.loaded_client_id_doesnt_match_pref" in snapshot));
 
   // If the DRS data is broken, we should end up with a new client ID.
   for (let [invalidID] of invalidIDs) {
     await ClientID._reset();
     await CommonUtils.writeJSON({ clientID: invalidID }, drsPath);
     clientID = await ClientID.getClientID();
     Assert.equal(typeof clientID, "string");
     Assert.ok(uuidRegex.test(clientID));
     if (AppConstants.MOZ_GLEAN) {
       Assert.equal(
         Glean.fogValidation.legacyTelemetryClientId.testGetValue(
           "fog-validation"
         ),
         clientID
       );
     }
+    snapshot = Services.telemetry.getSnapshotForScalars("main", true).parent;
+    Assert.equal(snapshot["telemetry.generated_new_client_id"], true);
+    Assert.equal(snapshot["telemetry.loaded_client_id_doesnt_match_pref"], 1);
   }
 
+  // Test that valid DRS actually works.
+  const validClientID = "5afebd62-a33c-416c-b519-5c60fb988e8e";
+  await ClientID._reset();
+  await CommonUtils.writeJSON({ clientID: validClientID }, drsPath);
+  clientID = await ClientID.getClientID();
+  Assert.equal(clientID, validClientID);
+  if (AppConstants.MOZ_GLEAN) {
+    Assert.equal(
+      Glean.fogValidation.legacyTelemetryClientId.testGetValue(
+        "fog-validation"
+      ),
+      clientID
+    );
+  }
+  snapshot = Services.telemetry.getSnapshotForScalars("main", true).parent;
+  Assert.ok(!("telemetry.generated_new_client_id" in snapshot));
+  Assert.equal(snapshot["telemetry.loaded_client_id_doesnt_match_pref"], 1);
+
+  // Test that reloading a valid DRS works.
+  await ClientID._reset();
+  clientID = await ClientID.getClientID();
+  Assert.equal(clientID, validClientID);
+  if (AppConstants.MOZ_GLEAN) {
+    Assert.equal(
+      Glean.fogValidation.legacyTelemetryClientId.testGetValue(
+        "fog-validation"
+      ),
+      clientID
+    );
+  }
+  // snapshot may be empty if no other scalars are recorded.
+  snapshot =
+    Services.telemetry.getSnapshotForScalars("main", true).parent || {};
+  Assert.ok(!("telemetry.generated_new_client_id" in snapshot));
+  Assert.ok(!("telemetry.loaded_client_id_doesnt_match_pref" in snapshot));
+
   // Assure that cached IDs are being checked for validity.
   for (let [invalidID, prefFunc] of invalidIDs) {
     await ClientID._reset();
     Services.prefs[prefFunc](PREF_CACHED_CLIENTID, invalidID);
     let cachedID = ClientID.getCachedClientID();
     Assert.strictEqual(
       cachedID,
       null,