Bug 1629826 - Re-enable event telemetry probes for certificate error pages. r=nhnt11
authorJohann Hofmann <jhofmann@mozilla.com>
Sat, 25 Apr 2020 18:12:32 +0000
changeset 526087 f757556c4e2c5754d2fed3c0ee229bf1f2b037f1
parent 526086 1247c4bbb03988aba7329e4676257ec916cf487d
child 526088 c04304e53312694216e694e091e73a176b2f5286
push id37350
push usernbeleuzu@mozilla.com
push dateSun, 26 Apr 2020 09:43:12 +0000
treeherdermozilla-central@21659f178a12 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersnhnt11
bugs1629826
milestone77.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1629826 - Re-enable event telemetry probes for certificate error pages. r=nhnt11 Differential Revision: https://phabricator.services.mozilla.com/D72514
browser/actors/NetErrorChild.jsm
browser/base/content/aboutNetError.js
browser/base/content/aboutNetError.xhtml
browser/base/content/test/about/browser.ini
browser/base/content/test/about/browser_aboutCertError_telemetry.js
browser/components/BrowserGlue.jsm
toolkit/components/telemetry/Events.yaml
toolkit/modules/RemotePageAccessManager.jsm
--- a/browser/actors/NetErrorChild.jsm
+++ b/browser/actors/NetErrorChild.jsm
@@ -23,16 +23,17 @@ XPCOMUtils.defineLazyServiceGetter(
 class NetErrorChild extends RemotePageChild {
   actorCreated() {
     super.actorCreated();
 
     const exportableFunctions = [
       "RPMGetAppBuildID",
       "RPMPrefIsLocked",
       "RPMAddToHistogram",
+      "RPMRecordTelemetryEvent",
     ];
     this.exportFunctions(exportableFunctions);
   }
 
   getSerializedSecurityInfo(docShell) {
     let securityInfo =
       docShell.failedChannel && docShell.failedChannel.securityInfo;
     if (!securityInfo) {
@@ -72,9 +73,13 @@ class NetErrorChild extends RemotePageCh
 
   RPMPrefIsLocked(aPref) {
     return Services.prefs.prefIsLocked(aPref);
   }
 
   RPMAddToHistogram(histID, bin) {
     Services.telemetry.getHistogramById(histID).add(bin);
   }
+
+  RPMRecordTelemetryEvent(category, event, object, value, extra) {
+    Services.telemetry.recordEvent(category, event, object, value, extra);
+  }
 }
--- a/browser/base/content/aboutNetError.js
+++ b/browser/base/content/aboutNetError.js
@@ -510,19 +510,58 @@ function initPageCertError() {
   let hideAddExceptionButton = RPMGetBoolPref(
     "security.certerror.hideAddException",
     false
   );
   if (hideAddExceptionButton) {
     document.querySelector(".exceptionDialogButtonContainer").hidden = true;
   }
 
+  let els = document.querySelectorAll("[data-telemetry-id]");
+  for (let el of els) {
+    el.addEventListener("click", recordClickTelemetry);
+  }
+
+  let failedCertInfo = document.getFailedCertSecurityInfo();
+  // Truncate the error code to avoid going over the allowed
+  // string size limit for telemetry events.
+  let errorCode = failedCertInfo.errorCodeString.substring(0, 40);
+  RPMRecordTelemetryEvent(
+    "security.ui.certerror",
+    "load",
+    "aboutcerterror",
+    errorCode,
+    {
+      has_sts: (getCSSClass() == "badStsCert").toString(),
+      is_frame: (window.parent != window).toString(),
+    }
+  );
+
   setCertErrorDetails();
 }
 
+function recordClickTelemetry(e) {
+  let target = e.originalTarget;
+  let telemetryId = target.dataset.telemetryId;
+  let failedCertInfo = document.getFailedCertSecurityInfo();
+  // Truncate the error code to avoid going over the allowed
+  // string size limit for telemetry events.
+  let errorCode = failedCertInfo.errorCodeString.substring(0, 40);
+  RPMRecordTelemetryEvent(
+    "security.ui.certerror",
+    "click",
+    telemetryId,
+    errorCode,
+    {
+      has_sts: (getCSSClass() == "badStsCert").toString(),
+      is_frame: (window.parent != window).toString(),
+    }
+  );
+}
+
 function initCertErrorPageActions() {
   document
     .getElementById("returnButton")
     .addEventListener("click", onReturnButtonClick);
   document
     .getElementById("advancedPanelReturnButton")
     .addEventListener("click", onReturnButtonClick);
   document
@@ -1059,16 +1098,17 @@ async function setTechnicalDetailsOnCert
 
   setL10NLabel(
     "cert-error-code-prefix-link",
     { error: failedCertInfo.errorCodeString },
     {
       title: failedCertInfo.errorCodeString,
       id: "errorCode",
       "data-l10n-name": "error-code-link",
+      "data-telemetry-id": "error_code_link",
     },
     false
   );
   let errorCodeLink = document.getElementById("errorCode");
   if (errorCodeLink) {
     // We're attaching the event listener to the parent element and not on
     // the errorCodeLink itself because event listeners cannot be attached
     // to fluent DOM overlays.
@@ -1082,16 +1122,17 @@ async function setTechnicalDetailsOnCert
 function handleErrorCodeClick(event) {
   if (event.target.id !== "errorCode") {
     return;
   }
 
   let debugInfo = document.getElementById("certificateErrorDebugInformation");
   debugInfo.style.display = "block";
   debugInfo.scrollIntoView({ block: "start", behavior: "smooth" });
+  recordClickTelemetry(event);
 }
 
 /* Only do autofocus if we're the toplevel frame; otherwise we
    don't want to call attention to ourselves!  The key part is
    that autofocus happens on insertion into the tree, so we
    can remove the button, add @autofocus, and reinsert the
    button.
 */
--- a/browser/base/content/aboutNetError.xhtml
+++ b/browser/base/content/aboutNetError.xhtml
@@ -139,17 +139,17 @@
               <p id="errorWhatToDoText2" />
               <p id="badStsCertExplanation" hidden="true">&certerror.whatShouldIDo.badStsCertExplanation1;</p>
           </div>
 
           <!-- Long Description (Note: See netError.dtd for used XHTML tags) -->
           <div id="errorLongDesc" />
 
           <div id="learnMoreContainer">
-            <p><a id="learnMoreLink" target="new">&errorReporting.learnMore;</a></p>
+            <p><a id="learnMoreLink" target="new" data-telemetry-id="learn_more_link">&errorReporting.learnMore;</a></p>
           </div>
         </div>
 
         <!-- UI to temporarily re-enable TLS 1.0 and 1.1.
              This should be removed after March 2020, see bug 1579285.  -->
         <div id="enableTls10Container" class="button-container">
           <p>&enableTls10.longDesc;</p>
           <p>&enableTls10.note;</p>
@@ -159,49 +159,49 @@
         <!-- UI for option to report certificate errors to Mozilla. Removed on
              init for other error types .-->
         <div id="prefChangeContainer" class="button-container">
           <p>&prefReset.longDesc;</p>
           <button id="prefResetButton" class="primary">&prefReset.label;</button>
         </div>
 
         <div id="certErrorAndCaptivePortalButtonContainer" class="button-container">
-          <button id="returnButton" class="primary">&returnToPreviousPage1.label;</button>
+          <button id="returnButton" class="primary" data-telemetry-id="return_button_top">&returnToPreviousPage1.label;</button>
           <button id="openPortalLoginPageButton" class="primary">&openPortalLoginPage.label2;</button>
           <button class="primary try-again">&retry.label;</button>
-          <button id="advancedButton">&advanced2.label;</button>
+          <button id="advancedButton" data-telemetry-id="advanced_button">&advanced2.label;</button>
         </div>
       </div>
 
       <div id="netErrorButtonContainer" class="button-container">
         <button class="primary try-again">&retry.label;</button>
       </div>
 
       <div id="advancedPanelContainer">
         <div id="badCertAdvancedPanel" class="advanced-panel">
           <p id="badCertTechnicalInfo"/>
           <a id="viewCertificate" href="javascript:void(0)">&viewCertificate.label;</a>
           <div id="advancedPanelButtonContainer" class="button-container">
-            <button id="advancedPanelReturnButton" class="primary">&returnToPreviousPage1.label;</button>
+            <button id="advancedPanelReturnButton" class="primary" data-telemetry-id="return_button_adv">&returnToPreviousPage1.label;</button>
             <button class="primary try-again">&retry.label;</button>
             <div class="exceptionDialogButtonContainer">
-              <button id="exceptionDialogButton">&securityOverride.exceptionButton1Label;</button>
+              <button id="exceptionDialogButton" data-telemetry-id="exception_button">&securityOverride.exceptionButton1Label;</button>
             </div>
           </div>
         </div>
 
         <div id="certificateErrorReporting">
             <p class="toggle-container-with-text">
                 <input type="checkbox" id="automaticallyReportInFuture" role="checkbox"/>
-                <label for="automaticallyReportInFuture">&errorReporting.automatic2;</label>
+                <label for="automaticallyReportInFuture" data-telemetry-id="auto_report_cb">&errorReporting.automatic2;</label>
             </p>
         </div>
 
         <div id="certificateErrorDebugInformation">
-          <button id="copyToClipboardTop">&certerror.copyToClipboard.label;</button>
+          <button id="copyToClipboardTop" data-telemetry-id="clipboard_button_top">&certerror.copyToClipboard.label;</button>
           <div id="certificateErrorText"/>
-          <button id="copyToClipboardBottom">&certerror.copyToClipboard.label;</button>
+          <button id="copyToClipboardBottom" data-telemetry-id="clipboard_button_bot">&certerror.copyToClipboard.label;</button>
         </div>
       </div>
     </div>
   </body>
   <script src="chrome://browser/content/aboutNetError.js"/>
 </html>
--- a/browser/base/content/test/about/browser.ini
+++ b/browser/base/content/test/about/browser.ini
@@ -10,16 +10,17 @@ prefs =
   browser.newtabpage.activity-stream.improvesearch.handoffToAwesomebar=false
 
 [browser_aboutCertError.js]
 [browser_aboutCertError_clockSkew.js]
 [browser_aboutCertError_exception.js]
 [browser_aboutCertError_mitm.js]
 skip-if = true #Bug 1603002
 [browser_aboutCertError_noSubjectAltName.js]
+[browser_aboutCertError_telemetry.js]
 [browser_aboutHome_search_POST.js]
 [browser_aboutHome_search_composing.js]
 [browser_aboutHome_search_searchbar.js]
 [browser_aboutHome_search_suggestion.js]
 skip-if = os == "mac" || (os == "linux" && (!debug || bits == 64)) || (os == 'win' && os_version == '10.0' && bits == 64 && !debug) # Bug 1399648, bug 1402502
 [browser_aboutHome_search_telemetry.js]
 [browser_aboutNetError.js]
 [browser_aboutStopReload.js]
new file mode 100644
--- /dev/null
+++ b/browser/base/content/test/about/browser_aboutCertError_telemetry.js
@@ -0,0 +1,161 @@
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/ */
+
+"use strict";
+
+requestLongerTimeout(2);
+
+const BAD_CERT = "https://expired.example.com/";
+const BAD_STS_CERT =
+  "https://badchain.include-subdomains.pinning.example.com:443";
+
+add_task(async function checkTelemetryClickEvents() {
+  info("Loading a bad cert page and verifying telemetry click events arrive.");
+
+  let oldCanRecord = Services.telemetry.canRecordExtended;
+  Services.telemetry.canRecordExtended = true;
+
+  registerCleanupFunction(() => {
+    Services.telemetry.canRecordExtended = oldCanRecord;
+  });
+
+  // For obvious reasons event telemetry in the content processes updates with
+  // the main processs asynchronously, so we need to wait for the main process
+  // to catch up through the entire test.
+
+  // There's an arbitrary interval of 2 seconds in which the content
+  // processes sync their event data with the parent process, we wait
+  // this out to ensure that we clear everything that is left over from
+  // previous tests and don't receive random events in the middle of our tests.
+  // eslint-disable-next-line mozilla/no-arbitrary-setTimeout
+  await new Promise(c => setTimeout(c, 2000));
+
+  // Clear everything.
+  Services.telemetry.clearEvents();
+  await TestUtils.waitForCondition(() => {
+    let events = Services.telemetry.snapshotEvents(
+      Ci.nsITelemetry.DATASET_PRERELEASE_CHANNELS,
+      true
+    ).content;
+    return !events || !events.length;
+  });
+
+  // Now enable recording our telemetry. Even if this is disabled, content
+  // processes will send event telemetry to the parent, thus we needed to ensure
+  // we waited and cleared first. Sigh.
+  Services.telemetry.setEventRecordingEnabled("security.ui.certerror", true);
+
+  for (let useFrame of [false, true]) {
+    let recordedObjects = [
+      "advanced_button",
+      "learn_more_link",
+      "auto_report_cb",
+      "error_code_link",
+      "clipboard_button_top",
+      "clipboard_button_bot",
+      "return_button_top",
+    ];
+
+    recordedObjects.push("return_button_adv");
+    if (!useFrame) {
+      recordedObjects.push("exception_button");
+    }
+
+    for (let object of recordedObjects) {
+      let tab = await openErrorPage(BAD_CERT, useFrame);
+      let browser = tab.linkedBrowser;
+
+      let loadEvents = await TestUtils.waitForCondition(() => {
+        let events = Services.telemetry.snapshotEvents(
+          Ci.nsITelemetry.DATASET_PRERELEASE_CHANNELS,
+          true
+        ).content;
+        if (events && events.length) {
+          events = events.filter(
+            e => e[1] == "security.ui.certerror" && e[2] == "load"
+          );
+          if (
+            events.length == 1 &&
+            events[0][5].is_frame == useFrame.toString()
+          ) {
+            return events;
+          }
+        }
+        return null;
+      }, "recorded telemetry for the load");
+
+      is(
+        loadEvents.length,
+        1,
+        `recorded telemetry for the load testing ${object}, useFrame: ${useFrame}`
+      );
+
+      let bc = browser.browsingContext;
+      if (useFrame) {
+        bc = bc.children[0];
+      }
+
+      await SpecialPowers.spawn(bc, [object], async function(objectId) {
+        let doc = content.document;
+
+        await ContentTaskUtils.waitForCondition(
+          () => doc.body.classList.contains("certerror"),
+          "Wait for certerror to be loaded"
+        );
+
+        let domElement = doc.querySelector(`[data-telemetry-id='${objectId}']`);
+        domElement.click();
+      });
+
+      let clickEvents = await TestUtils.waitForCondition(() => {
+        let events = Services.telemetry.snapshotEvents(
+          Ci.nsITelemetry.DATASET_PRERELEASE_CHANNELS,
+          true
+        ).content;
+        if (events && events.length) {
+          events = events.filter(
+            e =>
+              e[1] == "security.ui.certerror" &&
+              e[2] == "click" &&
+              e[3] == object
+          );
+          if (
+            events.length == 1 &&
+            events[0][5].is_frame == useFrame.toString()
+          ) {
+            return events;
+          }
+        }
+        return null;
+      }, "Has captured telemetry events.");
+
+      is(
+        clickEvents.length,
+        1,
+        `recorded telemetry for the click on ${object}, useFrame: ${useFrame}`
+      );
+
+      // We opened an extra tab for the SUMO page, need to close it.
+      if (object == "learn_more_link") {
+        BrowserTestUtils.removeTab(gBrowser.selectedTab);
+      }
+
+      if (object == "exception_button") {
+        let certOverrideService = Cc[
+          "@mozilla.org/security/certoverride;1"
+        ].getService(Ci.nsICertOverrideService);
+        certOverrideService.clearValidityOverride("expired.example.com", -1);
+      }
+
+      BrowserTestUtils.removeTab(gBrowser.selectedTab);
+    }
+  }
+
+  let enableCertErrorUITelemetry = Services.prefs.getBoolPref(
+    "security.certerrors.recordEventTelemetry"
+  );
+  Services.telemetry.setEventRecordingEnabled(
+    "security.ui.certerror",
+    enableCertErrorUITelemetry
+  );
+});
--- a/browser/components/BrowserGlue.jsm
+++ b/browser/components/BrowserGlue.jsm
@@ -2158,16 +2158,29 @@ BrowserGlue.prototype = {
       {
         task: () => {
           this._recordDataSanitizationPrefs();
         },
       },
 
       {
         task: () => {
+          let enableCertErrorUITelemetry = Services.prefs.getBoolPref(
+            "security.certerrors.recordEventTelemetry",
+            true
+          );
+          Services.telemetry.setEventRecordingEnabled(
+            "security.ui.certerror",
+            enableCertErrorUITelemetry
+          );
+        },
+      },
+
+      {
+        task: () => {
           let siteSpecific = Services.prefs.getBoolPref(
             "browser.zoom.siteSpecific",
             false
           );
           Services.telemetry.scalarSet("a11y.sitezoom", siteSpecific);
         },
       },
 
--- a/toolkit/components/telemetry/Events.yaml
+++ b/toolkit/components/telemetry/Events.yaml
@@ -1946,8 +1946,67 @@ security.doh.trrPerformance:
     extra_keys:
       domain: The resolved domain.
       trr: The TRR provider used.
       time: The network time for the resolution.
       status: The DNS status code.
       retryCount: The number of lookup attempts before success.
       networkUnstable: Whether there was network fluctuation while gathering the results.
       captivePortal: Whether there a captive portal was detected during the run.
+
+security.ui.certerror:
+  load:
+    objects: ["aboutcerterror"]
+    bug_numbers:
+      - 1484255
+      - 1505310
+      - 1553181
+      - 1629826
+    description: >
+      The about:certerror page is loaded, keyed by error code, see https://searchfox.org/mozilla-central/source/security/nss/lib/mozpkix/include/pkix/Result.h
+    expiry_version: never
+    notification_emails:
+      - jhofmann@mozilla.com
+      - rtestard@mozilla.com
+      - seceng-telemetry@mozilla.com
+    release_channel_collection: opt-out
+    products:
+      - "firefox"
+    record_in_processes: ["content"]
+    products:
+      - firefox
+    extra_keys:
+      is_frame: If the error page is loaded in an iframe.
+      has_sts: If the error page is for a site with HSTS headers or with a pinned key.
+  click:
+    objects: [
+      "advanced_button",
+      "exception_button",
+      "return_button_top",
+      "return_button_adv",
+      "learn_more_link",
+      "auto_report_cb",
+      "error_code_link",
+      "clipboard_button_top",
+      "clipboard_button_bot",
+    ]
+    bug_numbers:
+      - 1484255
+      - 1505310
+      - 1553181
+      - 1629826
+    description: >
+      User interaction by click events on the cert error page. Keyed by error code, see https://searchfox.org/mozilla-central/source/security/nss/lib/mozpkix/include/pkix/Result.h
+    expiry_version: never
+    notification_emails:
+      - jhofmann@mozilla.com
+      - rtestard@mozilla.com
+      - seceng-telemetry@mozilla.com
+    release_channel_collection: opt-out
+    products:
+      - "firefox"
+    record_in_processes: ["content"]
+    products:
+      - firefox
+    extra_keys:
+      is_frame: If the error page is loaded in an iframe.
+      has_sts: If the error page is for a site with HSTS headers or with a pinned key.
+      panel_open: If the advanced panel was open at the time of the interaction.
--- a/toolkit/modules/RemotePageAccessManager.jsm
+++ b/toolkit/modules/RemotePageAccessManager.jsm
@@ -36,16 +36,17 @@ let RemotePageAccessManager = {
         "Browser:ResetSSLPreferences",
         "GetChangedCertPrefs",
         "ReportTLSError",
         "Browser:OpenCaptivePortalPage",
         "Browser:SSLErrorGoBack",
         "Browser:PrimeMitm",
         "Browser:ResetEnterpriseRootsPref",
       ],
+      RPMRecordTelemetryEvent: ["*"],
       RPMAddMessageListener: ["*"],
       RPMRemoveMessageListener: ["*"],
       RPMGetFormatURLPref: ["app.support.baseURL"],
       RPMGetBoolPref: [
         "security.certerrors.mitm.priming.enabled",
         "security.certerrors.permanentOverride",
         "security.enterprise_roots.auto-enabled",
         "security.certerror.hideAddException",