Bug 1311931 - Add telemetry to measure full match rate for v2 and v4. r=francois
authorDimi Lee <dlee@mozilla.com>
Thu, 23 Feb 2017 23:07:13 +0800
changeset 344521 2aa2e92d394f993715cce84de22ba1bb9fc87047
parent 344520 88a7b28e2b714298493a1471c25925527c6d8409
child 344522 549cbcddd9b3ff4fbb004c94f0ef1a8499821f5d
push id31413
push usercbook@mozilla.com
push dateFri, 24 Feb 2017 10:18:46 +0000
treeherdermozilla-central@c7935d540027 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersfrancois
bugs1311931
milestone54.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1311931 - Add telemetry to measure full match rate for v2 and v4. r=francois MozReview-Commit-ID: H9jAR82rgDh
toolkit/components/telemetry/Histograms.json
toolkit/components/url-classifier/Classifier.cpp
toolkit/components/url-classifier/LookupCache.h
toolkit/components/url-classifier/nsUrlClassifierDBService.cpp
--- a/toolkit/components/telemetry/Histograms.json
+++ b/toolkit/components/telemetry/Histograms.json
@@ -4166,16 +4166,24 @@
   "URLCLASSIFIER_PREFIX_MATCH": {
     "alert_emails": ["safebrowsing-telemetry@mozilla.org"],
     "expires_in_version": "58",
     "kind": "enumerated",
     "n_values": 4,
     "bug_numbers": [1298257],
     "description": "Classifier prefix matching result (0 = no match, 1 = match only V2, 2 = match only V4, 3 = match both V2 and V4)"
   },
+  "URLCLASSIFIER_MATCH_RESULT": {
+    "alert_emails": ["safebrowsing-telemetry@mozilla.org"],
+    "expires_in_version": "60",
+    "kind": "enumerated",
+    "n_values": 16,
+    "bug_numbers": [1311931],
+    "description": "The result of each URL lookup against both google and google4 lists(0 = no match, 1 = V2 prefix only, 2 = V4 prefix only, 3 = V2 and V4 prefixes, 4 = V2 and V4 completions, 5 = V2 completion only, 6 = V4 completion only, 7 = V2 completion and v4 prefix, 8 = V2 prefix and V4 completion, 9 = unexpected result)"
+  },
   "URLCLASSIFIER_POSITIVE_CACHE_DURATION": {
     "alert_emails": ["safebrowsing-telemetry@mozilla.org"],
     "expires_in_version": "60",
     "kind": "exponential",
     "high": 86400000,
     "n_buckets": 50,
     "bug_numbers": [1338082],
     "description": "Positive cache duration (ms) received in fullhash response from any v4 provider"
--- a/toolkit/components/url-classifier/Classifier.cpp
+++ b/toolkit/components/url-classifier/Classifier.cpp
@@ -465,20 +465,22 @@ Classifier::Check(const nsACString& aSpe
       cacheArray.AppendElement(cache);
     } else {
       return NS_ERROR_FAILURE;
     }
   }
 
   // Only record telemetry when both v2 and v4 have data.
   bool isV2Empty = true, isV4Empty = true;
+  bool shouldDoTelemetry = false;
   for (auto&& cache : cacheArray) {
     bool& ref = LookupCache::Cast<LookupCacheV2>(cache) ? isV2Empty : isV4Empty;
     ref = ref ? cache->IsEmpty() : false;
     if (!isV2Empty && !isV4Empty) {
+      shouldDoTelemetry = true;
       break;
     }
   }
 
   PrefixMatch matchingStatistics = PrefixMatch::eNoMatch;
 
   // Now check each lookup fragment against the entries in the DB.
   for (uint32_t i = 0; i < fragments.Length(); i++) {
@@ -516,33 +518,50 @@ Classifier::Check(const nsACString& aSpe
         LOG(("Found a result in %s: %s",
              cache->TableName().get(),
              confirmed ? "confirmed." : "Not confirmed."));
 
         result->hash.complete = lookupHash;
         result->mConfirmed = confirmed;
         result->mTableName.Assign(cache->TableName());
         result->mPartialHashLength = confirmed ? COMPLETE_SIZE : matchLength;
+        result->mProtocolV2 = LookupCache::Cast<LookupCacheV2>(cache);
 
-        if (LookupCache::Cast<LookupCacheV4>(cache)) {
-          matchingStatistics |= PrefixMatch::eMatchV4Prefix;
-          result->mProtocolV2 = false;
+        // There are two cases we are going to ignore the result for telemetry:
+        // 1. shouldDoTelemetry == false(when either v2 or v4 table is empty)
+        // 2. When match was found in the table which is not provided by google.
+        if (!shouldDoTelemetry ||
+            !StringBeginsWith(result->mTableName, NS_LITERAL_CSTRING("goog"))) {
+          continue;
+        }
+
+        if (result->mProtocolV2) {
+          matchingStatistics |= PrefixMatch::eMatchV2Prefix;
+          result->mMatchResult = MatchResult::eV2Prefix;
         } else {
           matchingStatistics |= PrefixMatch::eMatchV2Prefix;
-          result->mProtocolV2 = true;
+          result->mMatchResult = MatchResult::eV4Prefix;
         }
       }
     }
 
-    if (!isV2Empty && !isV4Empty) {
+    // TODO : This will be removed in Bug 1338033.
+    if (shouldDoTelemetry) {
       Telemetry::Accumulate(Telemetry::URLCLASSIFIER_PREFIX_MATCH,
                             static_cast<uint8_t>(matchingStatistics));
     }
   }
 
+  // If we cannot find the prefix in neither the v2 nor the v4 database, record the
+  // telemetry here because we won't reach nsUrlClassifierLookupCallback:::HandleResult.
+  if (shouldDoTelemetry && aResults.Length() == 0) {
+    Telemetry::Accumulate(Telemetry::URLCLASSIFIER_MATCH_RESULT,
+                          static_cast<uint8_t>(MatchResult::eNoMatch));
+  }
+
   return NS_OK;
 }
 
 static nsresult
 SwapDirectoryContent(nsIFile* aDir1,
                      nsIFile* aDir2,
                      nsIFile* aParentDir,
                      nsIFile* aTempDir)
--- a/toolkit/components/url-classifier/LookupCache.h
+++ b/toolkit/components/url-classifier/LookupCache.h
@@ -11,28 +11,49 @@
 #include "nsTArray.h"
 #include "nsCOMPtr.h"
 #include "nsIFile.h"
 #include "nsIFileStreams.h"
 #include "mozilla/RefPtr.h"
 #include "nsUrlClassifierPrefixSet.h"
 #include "VariableLengthPrefixSet.h"
 #include "mozilla/Logging.h"
+#include "mozilla/TypedEnumBits.h"
 
 namespace mozilla {
 namespace safebrowsing {
 
 #define MAX_HOST_COMPONENTS 5
 #define MAX_PATH_COMPONENTS 4
 
+enum class MatchResult : uint8_t
+{
+  eNoMatch           = 0x00,
+  eV2Prefix          = 0x01,
+  eV4Prefix          = 0x02,
+  eV2Completion      = 0x04,
+  eV4Completion      = 0x08,
+  eTelemetryDisabled = 0x10,
+
+  eBothPrefix      = eV2Prefix     | eV4Prefix,
+  eBothCompletion  = eV2Completion | eV4Completion,
+  eV2PreAndCom     = eV2Prefix     | eV2Completion,
+  eV4PreAndCom     = eV4Prefix     | eV4Completion,
+  eBothPreAndV2Com = eBothPrefix   | eV2Completion,
+  eBothPreAndV4Com = eBothPrefix   | eV4Completion,
+  eAll             = eBothPrefix   | eBothCompletion,
+};
+MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(MatchResult)
+
 class LookupResult {
 public:
   LookupResult() : mNoise(false), mProtocolConfirmed(false),
                    mPartialHashLength(0), mConfirmed(false),
-                   mProtocolV2(true) {}
+                   mProtocolV2(true),
+                   mMatchResult(MatchResult::eTelemetryDisabled) {}
 
   // The fragment that matched in the LookupCache
   union {
     Prefix fixedLengthPrefix;
     Completion complete;
   } hash;
 
   const Completion &CompleteHash() {
@@ -70,16 +91,19 @@ public:
   nsCString mTableName;
 
   uint32_t mPartialHashLength;
 
   // True as long as this lookup is complete and hasn't expired.
   bool mConfirmed;
 
   bool mProtocolV2;
+
+  // This is only used by telemetry to record the match result.
+  MatchResult mMatchResult;
 };
 
 typedef nsTArray<LookupResult> LookupResultArray;
 
 struct CacheResult {
   AddComplete entry;
   nsCString table;
 
--- a/toolkit/components/url-classifier/nsUrlClassifierDBService.cpp
+++ b/toolkit/components/url-classifier/nsUrlClassifierDBService.cpp
@@ -1095,16 +1095,37 @@ nsUrlClassifierLookupCallback::Completio
         && result.mTableName.Equals(tableName)) {
       result.mProtocolConfirmed = true;
     }
   }
 
   return NS_OK;
 }
 
+
+static uint8_t
+ConvertMatchResultToUint(const MatchResult& aResult)
+{
+  MOZ_ASSERT(!(aResult & MatchResult::eTelemetryDisabled));
+  switch (aResult) {
+  case MatchResult::eNoMatch:          return 0;
+  case MatchResult::eV2Prefix:         return 1;
+  case MatchResult::eV4Prefix:         return 2;
+  case MatchResult::eBothPrefix:       return 3;
+  case MatchResult::eAll:              return 4;
+  case MatchResult::eV2PreAndCom:      return 5;
+  case MatchResult::eV4PreAndCom:      return 6;
+  case MatchResult::eBothPreAndV2Com:  return 7;
+  case MatchResult::eBothPreAndV4Com:  return 8;
+  default:
+    MOZ_ASSERT_UNREACHABLE("Unexpected match result");
+    return 9;
+  }
+}
+
 nsresult
 nsUrlClassifierLookupCallback::HandleResults()
 {
   if (!mResults) {
     // No results, this URI is clean.
     LOG(("nsUrlClassifierLookupCallback::HandleResults [%p, no results]", this));
     return mCallback->HandleEvent(NS_LITERAL_CSTRING(""));
   }
@@ -1112,29 +1133,48 @@ nsUrlClassifierLookupCallback::HandleRes
              "called while there are pending completions");
 
   LOG(("nsUrlClassifierLookupCallback::HandleResults [%p, %" PRIuSIZE " results]",
        this, mResults->Length()));
 
   nsCOMPtr<nsIUrlClassifierClassifyCallback> classifyCallback =
     do_QueryInterface(mCallback);
 
+  MatchResult matchResult = MatchResult::eTelemetryDisabled;
+
   nsTArray<nsCString> tables;
   // Build a stringified list of result tables.
   for (uint32_t i = 0; i < mResults->Length(); i++) {
     LookupResult& result = mResults->ElementAt(i);
 
     // Leave out results that weren't confirmed, as their existence on
     // the list can't be verified.  Also leave out randomly-generated
     // noise.
     if (result.mNoise) {
       LOG(("Skipping result %s from table %s (noise)",
            result.PartialHashHex().get(), result.mTableName.get()));
       continue;
-    } else if (!result.Confirmed()) {
+    }
+
+    bool confirmed = result.Confirmed();
+
+    // If mMatchResult is set to eTelemetryDisabled, then we don't need to
+    // set |matchResult| for this lookup.
+    if (result.mMatchResult != MatchResult::eTelemetryDisabled) {
+      matchResult &= ~(MatchResult::eTelemetryDisabled);
+      if (result.mProtocolV2) {
+        matchResult |=
+          confirmed ? MatchResult::eV2PreAndCom : MatchResult::eV2Prefix;
+      } else {
+        matchResult |=
+          confirmed ? MatchResult::eV4PreAndCom : MatchResult::eV4Prefix;
+      }
+    }
+
+    if (!confirmed) {
       LOG(("Skipping result %s from table %s (not confirmed)",
            result.PartialHashHex().get(), result.mTableName.get()));
       continue;
     }
 
     LOG(("Confirmed result %s from table %s",
          result.PartialHashHex().get(), result.mTableName.get()));
 
@@ -1144,16 +1184,21 @@ nsUrlClassifierLookupCallback::HandleRes
 
     if (classifyCallback) {
       nsCString prefixString;
       result.hash.fixedLengthPrefix.ToString(prefixString);
       classifyCallback->HandleResult(result.mTableName, prefixString);
     }
   }
 
+  if (matchResult != MatchResult::eTelemetryDisabled) {
+    Telemetry::Accumulate(Telemetry::URLCLASSIFIER_MATCH_RESULT,
+                          ConvertMatchResultToUint(matchResult));
+  }
+
   // TODO: Bug 1333328, Refactor cache miss mechanism for v2.
   // Some parts of this gethash request generated no hits at all.
   // Prefixes must have been removed from the database since our last update.
   // Save the prefixes we checked to prevent repeated requests
   // until the next update.
   nsAutoPtr<PrefixArray> cacheMisses(new PrefixArray());
   if (cacheMisses) {
     for (uint32_t i = 0; i < mResults->Length(); i++) {