Bug 1311931 - Add telemetry to measure full match rate for v2 and v4. r=francois
authorDimi Lee <dlee@mozilla.com>
Thu, 23 Feb 2017 23:07:13 +0800
changeset 373548 2aa2e92d394f993715cce84de22ba1bb9fc87047
parent 373547 88a7b28e2b714298493a1471c25925527c6d8409
child 373549 549cbcddd9b3ff4fbb004c94f0ef1a8499821f5d
push id10863
push userjlorenzo@mozilla.com
push dateMon, 06 Mar 2017 23:02:23 +0000
treeherdermozilla-aurora@0931190cd725 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersfrancois
bugs1311931
milestone54.0a1
Bug 1311931 - Add telemetry to measure full match rate for v2 and v4. r=francois MozReview-Commit-ID: H9jAR82rgDh
toolkit/components/telemetry/Histograms.json
toolkit/components/url-classifier/Classifier.cpp
toolkit/components/url-classifier/LookupCache.h
toolkit/components/url-classifier/nsUrlClassifierDBService.cpp
--- a/toolkit/components/telemetry/Histograms.json
+++ b/toolkit/components/telemetry/Histograms.json
@@ -4166,16 +4166,24 @@
   "URLCLASSIFIER_PREFIX_MATCH": {
     "alert_emails": ["safebrowsing-telemetry@mozilla.org"],
     "expires_in_version": "58",
     "kind": "enumerated",
     "n_values": 4,
     "bug_numbers": [1298257],
     "description": "Classifier prefix matching result (0 = no match, 1 = match only V2, 2 = match only V4, 3 = match both V2 and V4)"
   },
+  "URLCLASSIFIER_MATCH_RESULT": {
+    "alert_emails": ["safebrowsing-telemetry@mozilla.org"],
+    "expires_in_version": "60",
+    "kind": "enumerated",
+    "n_values": 16,
+    "bug_numbers": [1311931],
+    "description": "The result of each URL lookup against both google and google4 lists(0 = no match, 1 = V2 prefix only, 2 = V4 prefix only, 3 = V2 and V4 prefixes, 4 = V2 and V4 completions, 5 = V2 completion only, 6 = V4 completion only, 7 = V2 completion and v4 prefix, 8 = V2 prefix and V4 completion, 9 = unexpected result)"
+  },
   "URLCLASSIFIER_POSITIVE_CACHE_DURATION": {
     "alert_emails": ["safebrowsing-telemetry@mozilla.org"],
     "expires_in_version": "60",
     "kind": "exponential",
     "high": 86400000,
     "n_buckets": 50,
     "bug_numbers": [1338082],
     "description": "Positive cache duration (ms) received in fullhash response from any v4 provider"
--- a/toolkit/components/url-classifier/Classifier.cpp
+++ b/toolkit/components/url-classifier/Classifier.cpp
@@ -465,20 +465,22 @@ Classifier::Check(const nsACString& aSpe
       cacheArray.AppendElement(cache);
     } else {
       return NS_ERROR_FAILURE;
     }
   }
 
   // Only record telemetry when both v2 and v4 have data.
   bool isV2Empty = true, isV4Empty = true;
+  bool shouldDoTelemetry = false;
   for (auto&& cache : cacheArray) {
     bool& ref = LookupCache::Cast<LookupCacheV2>(cache) ? isV2Empty : isV4Empty;
     ref = ref ? cache->IsEmpty() : false;
     if (!isV2Empty && !isV4Empty) {
+      shouldDoTelemetry = true;
       break;
     }
   }
 
   PrefixMatch matchingStatistics = PrefixMatch::eNoMatch;
 
   // Now check each lookup fragment against the entries in the DB.
   for (uint32_t i = 0; i < fragments.Length(); i++) {
@@ -516,33 +518,50 @@ Classifier::Check(const nsACString& aSpe
         LOG(("Found a result in %s: %s",
              cache->TableName().get(),
              confirmed ? "confirmed." : "Not confirmed."));
 
         result->hash.complete = lookupHash;
         result->mConfirmed = confirmed;
         result->mTableName.Assign(cache->TableName());
         result->mPartialHashLength = confirmed ? COMPLETE_SIZE : matchLength;
+        result->mProtocolV2 = LookupCache::Cast<LookupCacheV2>(cache);
 
-        if (LookupCache::Cast<LookupCacheV4>(cache)) {
-          matchingStatistics |= PrefixMatch::eMatchV4Prefix;
-          result->mProtocolV2 = false;
+        // There are two cases we are going to ignore the result for telemetry:
+        // 1. shouldDoTelemetry == false(when either v2 or v4 table is empty)
+        // 2. When match was found in the table which is not provided by google.
+        if (!shouldDoTelemetry ||
+            !StringBeginsWith(result->mTableName, NS_LITERAL_CSTRING("goog"))) {
+          continue;
+        }
+
+        if (result->mProtocolV2) {
+          matchingStatistics |= PrefixMatch::eMatchV2Prefix;
+          result->mMatchResult = MatchResult::eV2Prefix;
         } else {
           matchingStatistics |= PrefixMatch::eMatchV2Prefix;
-          result->mProtocolV2 = true;
+          result->mMatchResult = MatchResult::eV4Prefix;
         }
       }
     }
 
-    if (!isV2Empty && !isV4Empty) {
+    // TODO : This will be removed in Bug 1338033.
+    if (shouldDoTelemetry) {
       Telemetry::Accumulate(Telemetry::URLCLASSIFIER_PREFIX_MATCH,
                             static_cast<uint8_t>(matchingStatistics));
     }
   }
 
+  // If we cannot find the prefix in neither the v2 nor the v4 database, record the
+  // telemetry here because we won't reach nsUrlClassifierLookupCallback:::HandleResult.
+  if (shouldDoTelemetry && aResults.Length() == 0) {
+    Telemetry::Accumulate(Telemetry::URLCLASSIFIER_MATCH_RESULT,
+                          static_cast<uint8_t>(MatchResult::eNoMatch));
+  }
+
   return NS_OK;
 }
 
 static nsresult
 SwapDirectoryContent(nsIFile* aDir1,
                      nsIFile* aDir2,
                      nsIFile* aParentDir,
                      nsIFile* aTempDir)
--- a/toolkit/components/url-classifier/LookupCache.h
+++ b/toolkit/components/url-classifier/LookupCache.h
@@ -11,28 +11,49 @@
 #include "nsTArray.h"
 #include "nsCOMPtr.h"
 #include "nsIFile.h"
 #include "nsIFileStreams.h"
 #include "mozilla/RefPtr.h"
 #include "nsUrlClassifierPrefixSet.h"
 #include "VariableLengthPrefixSet.h"
 #include "mozilla/Logging.h"
+#include "mozilla/TypedEnumBits.h"
 
 namespace mozilla {
 namespace safebrowsing {
 
 #define MAX_HOST_COMPONENTS 5
 #define MAX_PATH_COMPONENTS 4
 
+enum class MatchResult : uint8_t
+{
+  eNoMatch           = 0x00,
+  eV2Prefix          = 0x01,
+  eV4Prefix          = 0x02,
+  eV2Completion      = 0x04,
+  eV4Completion      = 0x08,
+  eTelemetryDisabled = 0x10,
+
+  eBothPrefix      = eV2Prefix     | eV4Prefix,
+  eBothCompletion  = eV2Completion | eV4Completion,
+  eV2PreAndCom     = eV2Prefix     | eV2Completion,
+  eV4PreAndCom     = eV4Prefix     | eV4Completion,
+  eBothPreAndV2Com = eBothPrefix   | eV2Completion,
+  eBothPreAndV4Com = eBothPrefix   | eV4Completion,
+  eAll             = eBothPrefix   | eBothCompletion,
+};
+MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(MatchResult)
+
 class LookupResult {
 public:
   LookupResult() : mNoise(false), mProtocolConfirmed(false),
                    mPartialHashLength(0), mConfirmed(false),
-                   mProtocolV2(true) {}
+                   mProtocolV2(true),
+                   mMatchResult(MatchResult::eTelemetryDisabled) {}
 
   // The fragment that matched in the LookupCache
   union {
     Prefix fixedLengthPrefix;
     Completion complete;
   } hash;
 
   const Completion &CompleteHash() {
@@ -70,16 +91,19 @@ public:
   nsCString mTableName;
 
   uint32_t mPartialHashLength;
 
   // True as long as this lookup is complete and hasn't expired.
   bool mConfirmed;
 
   bool mProtocolV2;
+
+  // This is only used by telemetry to record the match result.
+  MatchResult mMatchResult;
 };
 
 typedef nsTArray<LookupResult> LookupResultArray;
 
 struct CacheResult {
   AddComplete entry;
   nsCString table;
 
--- a/toolkit/components/url-classifier/nsUrlClassifierDBService.cpp
+++ b/toolkit/components/url-classifier/nsUrlClassifierDBService.cpp
@@ -1095,16 +1095,37 @@ nsUrlClassifierLookupCallback::Completio
         && result.mTableName.Equals(tableName)) {
       result.mProtocolConfirmed = true;
     }
   }
 
   return NS_OK;
 }
 
+
+static uint8_t
+ConvertMatchResultToUint(const MatchResult& aResult)
+{
+  MOZ_ASSERT(!(aResult & MatchResult::eTelemetryDisabled));
+  switch (aResult) {
+  case MatchResult::eNoMatch:          return 0;
+  case MatchResult::eV2Prefix:         return 1;
+  case MatchResult::eV4Prefix:         return 2;
+  case MatchResult::eBothPrefix:       return 3;
+  case MatchResult::eAll:              return 4;
+  case MatchResult::eV2PreAndCom:      return 5;
+  case MatchResult::eV4PreAndCom:      return 6;
+  case MatchResult::eBothPreAndV2Com:  return 7;
+  case MatchResult::eBothPreAndV4Com:  return 8;
+  default:
+    MOZ_ASSERT_UNREACHABLE("Unexpected match result");
+    return 9;
+  }
+}
+
 nsresult
 nsUrlClassifierLookupCallback::HandleResults()
 {
   if (!mResults) {
     // No results, this URI is clean.
     LOG(("nsUrlClassifierLookupCallback::HandleResults [%p, no results]", this));
     return mCallback->HandleEvent(NS_LITERAL_CSTRING(""));
   }
@@ -1112,29 +1133,48 @@ nsUrlClassifierLookupCallback::HandleRes
              "called while there are pending completions");
 
   LOG(("nsUrlClassifierLookupCallback::HandleResults [%p, %" PRIuSIZE " results]",
        this, mResults->Length()));
 
   nsCOMPtr<nsIUrlClassifierClassifyCallback> classifyCallback =
     do_QueryInterface(mCallback);
 
+  MatchResult matchResult = MatchResult::eTelemetryDisabled;
+
   nsTArray<nsCString> tables;
   // Build a stringified list of result tables.
   for (uint32_t i = 0; i < mResults->Length(); i++) {
     LookupResult& result = mResults->ElementAt(i);
 
     // Leave out results that weren't confirmed, as their existence on
     // the list can't be verified.  Also leave out randomly-generated
     // noise.
     if (result.mNoise) {
       LOG(("Skipping result %s from table %s (noise)",
            result.PartialHashHex().get(), result.mTableName.get()));
       continue;
-    } else if (!result.Confirmed()) {
+    }
+
+    bool confirmed = result.Confirmed();
+
+    // If mMatchResult is set to eTelemetryDisabled, then we don't need to
+    // set |matchResult| for this lookup.
+    if (result.mMatchResult != MatchResult::eTelemetryDisabled) {
+      matchResult &= ~(MatchResult::eTelemetryDisabled);
+      if (result.mProtocolV2) {
+        matchResult |=
+          confirmed ? MatchResult::eV2PreAndCom : MatchResult::eV2Prefix;
+      } else {
+        matchResult |=
+          confirmed ? MatchResult::eV4PreAndCom : MatchResult::eV4Prefix;
+      }
+    }
+
+    if (!confirmed) {
       LOG(("Skipping result %s from table %s (not confirmed)",
            result.PartialHashHex().get(), result.mTableName.get()));
       continue;
     }
 
     LOG(("Confirmed result %s from table %s",
          result.PartialHashHex().get(), result.mTableName.get()));
 
@@ -1144,16 +1184,21 @@ nsUrlClassifierLookupCallback::HandleRes
 
     if (classifyCallback) {
       nsCString prefixString;
       result.hash.fixedLengthPrefix.ToString(prefixString);
       classifyCallback->HandleResult(result.mTableName, prefixString);
     }
   }
 
+  if (matchResult != MatchResult::eTelemetryDisabled) {
+    Telemetry::Accumulate(Telemetry::URLCLASSIFIER_MATCH_RESULT,
+                          ConvertMatchResultToUint(matchResult));
+  }
+
   // TODO: Bug 1333328, Refactor cache miss mechanism for v2.
   // Some parts of this gethash request generated no hits at all.
   // Prefixes must have been removed from the database since our last update.
   // Save the prefixes we checked to prevent repeated requests
   // until the next update.
   nsAutoPtr<PrefixArray> cacheMisses(new PrefixArray());
   if (cacheMisses) {
     for (uint32_t i = 0; i < mResults->Length(); i++) {