Bug 1298257 - Implement url matching for variable-length prefix set. r=dimi,gcp
authorThomas Nguyen <tnguyen@mozilla.com>
Fri, 04 Nov 2016 12:00:33 +0800
changeset 321066 0797f7f58f2287952815196dece545faf5af14a1
parent 321065 cb4ab44839fddd59d37cd67c51aa8954998f9435
child 321067 3e2ce6e549e3c43ab11cce77839d7389b3c8a967
push id30915
push userphilringnalda@gmail.com
push dateSat, 05 Nov 2016 03:42:29 +0000
treeherdermozilla-central@a7c654513f2f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdimi, gcp
bugs1298257
milestone52.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1298257 - Implement url matching for variable-length prefix set. r=dimi,gcp MozReview-Commit-ID: 8Goh7yyAotN
toolkit/components/telemetry/Histograms.json
toolkit/components/url-classifier/Classifier.cpp
toolkit/components/url-classifier/LookupCacheV4.cpp
toolkit/components/url-classifier/tests/gtest/Common.cpp
toolkit/components/url-classifier/tests/gtest/Common.h
toolkit/components/url-classifier/tests/gtest/TestLookupCacheV4.cpp
toolkit/components/url-classifier/tests/gtest/TestPerProviderDirectory.cpp
toolkit/components/url-classifier/tests/gtest/TestUrlClassifierTableUpdateV4.cpp
toolkit/components/url-classifier/tests/gtest/moz.build
--- a/toolkit/components/telemetry/Histograms.json
+++ b/toolkit/components/telemetry/Histograms.json
@@ -3880,16 +3880,24 @@
   "URLCLASSIFIER_UPDATE_ERROR_TYPE": {
     "alert_emails": ["safebrowsing-telemetry@mozilla.org"],
     "expires_in_version": "58",
     "kind": "enumerated",
     "n_values": 10,
     "bug_numbers": [1305801],
     "description": "An error was encountered while parsing a partial update returned by a Safe Browsing V4 server (0 = addition of an already existing prefix, 1 = parser got into an infinite loop, 2 = removal index out of bounds, 3 = checksum mismatch, 4 = missing checksum)"
   },
+  "URLCLASSIFIER_PREFIX_MATCH": {
+    "alert_emails": ["safebrowsing-telemetry@mozilla.org"],
+    "expires_in_version": "58",
+    "kind": "enumerated",
+    "n_values": 4,
+    "bug_numbers": [1298257],
+    "description": "Classifier prefix matching result (0 = no match, 1 = match only V2, 2 = match only V4, 3 = match both V2 and V4)"
+  },
   "CSP_DOCUMENTS_COUNT": {
     "alert_emails": ["seceng@mozilla.com"],
     "bug_numbers": [1252829],
     "expires_in_version": "55",
     "kind": "count",
     "description": "Number of unique pages that contain a CSP"
   },
   "CSP_UNSAFE_INLINE_DOCUMENTS_COUNT": {
--- a/toolkit/components/url-classifier/Classifier.cpp
+++ b/toolkit/components/url-classifier/Classifier.cpp
@@ -15,16 +15,17 @@
 #include "nsNetCID.h"
 #include "nsPrintfCString.h"
 #include "nsThreadUtils.h"
 #include "mozilla/Telemetry.h"
 #include "mozilla/Logging.h"
 #include "mozilla/SyncRunnable.h"
 #include "mozilla/Base64.h"
 #include "mozilla/Unused.h"
+#include "mozilla/TypedEnumBits.h"
 
 // MOZ_LOG=UrlClassifierDbService:5
 extern mozilla::LazyLogModule gUrlClassifierDbServiceLog;
 #define LOG(args) MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args)
 #define LOG_ENABLED() MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug)
 
 #define STORE_DIRECTORY      NS_LITERAL_CSTRING("safebrowsing")
 #define TO_DELETE_DIR_SUFFIX NS_LITERAL_CSTRING("-to_delete")
@@ -471,16 +472,26 @@ Classifier::TableRequest(nsACString& aRe
   // Load meta data from *.metadata files in the root directory.
   // Specifically for v4 tables.
   nsCString metadata;
   nsresult rv = LoadMetadata(mRootStoreDirectory, metadata);
   NS_ENSURE_SUCCESS_VOID(rv);
   aResult.Append(metadata);
 }
 
+// This is used to record the matching statistics for v2 and v4.
+enum class PrefixMatch : uint8_t {
+  eNoMatch = 0x00,
+  eMatchV2Prefix = 0x01,
+  eMatchV4Prefix = 0x02,
+  eMatchBoth = eMatchV2Prefix | eMatchV4Prefix
+};
+
+MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(PrefixMatch)
+
 nsresult
 Classifier::Check(const nsACString& aSpec,
                   const nsACString& aTables,
                   uint32_t aFreshnessGuarantee,
                   LookupResultArray& aResults)
 {
   Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_CL_CHECK_TIME> timer;
 
@@ -500,31 +511,49 @@ Classifier::Check(const nsACString& aSpe
     LookupCache *cache = GetLookupCache(activeTables[i]);
     if (cache) {
       cacheArray.AppendElement(cache);
     } else {
       return NS_ERROR_FAILURE;
     }
   }
 
+  PrefixMatch matchingStatistics = PrefixMatch::eNoMatch;
+
   // Now check each lookup fragment against the entries in the DB.
   for (uint32_t i = 0; i < fragments.Length(); i++) {
     Completion lookupHash;
     lookupHash.FromPlaintext(fragments[i], mCryptoHash);
 
     if (LOG_ENABLED()) {
       nsAutoCString checking;
       lookupHash.ToHexString(checking);
       LOG(("Checking fragment %s, hash %s (%X)", fragments[i].get(),
            checking.get(), lookupHash.ToUint32()));
     }
 
     for (uint32_t i = 0; i < cacheArray.Length(); i++) {
       LookupCache *cache = cacheArray[i];
       bool has, complete;
+
+      if (LookupCache::Cast<LookupCacheV4>(cache)) {
+        // TODO Bug 1312339 Return length in LookupCache.Has and support
+        // VariableLengthPrefix in LookupResultArray
+        rv = cache->Has(lookupHash, &has, &complete);
+        if (NS_FAILED(rv)) {
+          LOG(("Failed to lookup fragment %s V4", fragments[i].get()));
+        }
+        if (has) {
+          matchingStatistics |= PrefixMatch::eMatchV4Prefix;
+          // TODO: Bug 1311935 - Implement Safe Browsing v4 caching
+          // Should check cache expired
+        }
+        continue;
+      }
+
       rv = cache->Has(lookupHash, &has, &complete);
       NS_ENSURE_SUCCESS(rv, rv);
       if (has) {
         LookupResult *result = aResults.AppendElement();
         if (!result)
           return NS_ERROR_OUT_OF_MEMORY;
 
         int64_t age;
@@ -540,19 +569,23 @@ Classifier::Check(const nsACString& aSpe
              cache->TableName().get(),
              complete ? "complete." : "Not complete.",
              age));
 
         result->hash.complete = lookupHash;
         result->mComplete = complete;
         result->mFresh = (age < aFreshnessGuarantee);
         result->mTableName.Assign(cache->TableName());
+
+        matchingStatistics |= PrefixMatch::eMatchV2Prefix;
       }
     }
 
+    Telemetry::Accumulate(Telemetry::URLCLASSIFIER_PREFIX_MATCH,
+                          static_cast<uint8_t>(matchingStatistics));
   }
 
   return NS_OK;
 }
 
 nsresult
 Classifier::ApplyUpdates(nsTArray<TableUpdate*>* aUpdates)
 {
--- a/toolkit/components/url-classifier/LookupCacheV4.cpp
+++ b/toolkit/components/url-classifier/LookupCacheV4.cpp
@@ -73,22 +73,38 @@ LookupCacheV4::Init()
 {
   mVLPrefixSet = new VariableLengthPrefixSet();
   nsresult rv = mVLPrefixSet->Init(mTableName);
   NS_ENSURE_SUCCESS(rv, rv);
 
   return NS_OK;
 }
 
-// TODO : Bug 1298257, Implement url matching for variable-length prefix set
 nsresult
 LookupCacheV4::Has(const Completion& aCompletion,
                    bool* aHas, bool* aComplete)
 {
   *aHas = false;
+
+  uint32_t length = 0;
+  nsDependentCSubstring fullhash;
+  fullhash.Rebind((const char *)aCompletion.buf, COMPLETE_SIZE);
+
+  nsresult rv = mVLPrefixSet->Matches(fullhash, &length);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  *aHas = length >= PREFIX_SIZE;
+  *aComplete = length == COMPLETE_SIZE;
+
+  if (LOG_ENABLED()) {
+    uint32_t prefix = aCompletion.ToUint32();
+    LOG(("Probe in V4 %s: %X, found %d, complete %d", mTableName.get(),
+          prefix, *aHas, *aComplete));
+  }
+
   return NS_OK;
 }
 
 nsresult
 LookupCacheV4::Build(PrefixStringMap& aPrefixMap)
 {
   return mVLPrefixSet->SetPrefixes(aPrefixMap);
 }
@@ -146,16 +162,27 @@ AppendPrefixToMap(PrefixStringMap& prefi
   if (!prefix.Length()) {
     return;
   }
 
   nsCString* prefixString = prefixes.LookupOrAdd(prefix.Length());
   prefixString->Append(prefix.BeginReading(), prefix.Length());
 }
 
+// Read prefix into a buffer and also update the hash which
+// keeps track of the checksum
+static void
+UpdateChecksum(nsICryptoHash* aCrypto, const nsACString& aPrefix)
+{
+  MOZ_ASSERT(aCrypto);
+  aCrypto->Update(reinterpret_cast<uint8_t*>(const_cast<char*>(
+                  aPrefix.BeginReading())),
+                  aPrefix.Length());
+}
+
 // Please see https://bug1287058.bmoattachments.org/attachment.cgi?id=8795366
 // for detail about partial update algorithm.
 nsresult
 LookupCacheV4::ApplyUpdate(TableUpdateV4* aTableUpdate,
                            PrefixStringMap& aInputMap,
                            PrefixStringMap& aOutputMap)
 {
   MOZ_ASSERT(aOutputMap.IsEmpty());
@@ -227,28 +254,22 @@ LookupCacheV4::ApplyUpdate(TableUpdateV4
 
       // If the number of picks from old map matches the removalIndex, then this prefix
       // will be removed by not merging it to new map.
       if (removalIndex < removalArray.Length() &&
           numOldPrefixPicked == removalArray[removalIndex]) {
         removalIndex++;
       } else {
         AppendPrefixToMap(aOutputMap, smallestOldPrefix);
-
-        crypto->Update(reinterpret_cast<uint8_t*>(const_cast<char*>(
-                       smallestOldPrefix.BeginReading())),
-                       smallestOldPrefix.Length());
+        UpdateChecksum(crypto, smallestOldPrefix);
       }
       smallestOldPrefix.SetLength(0);
     } else {
       AppendPrefixToMap(aOutputMap, smallestAddPrefix);
-
-      crypto->Update(reinterpret_cast<uint8_t*>(const_cast<char*>(
-                     smallestAddPrefix.BeginReading())),
-                     smallestAddPrefix.Length());
+      UpdateChecksum(crypto, smallestAddPrefix);
 
       smallestAddPrefix.SetLength(0);
     }
   }
 
   // We expect index will be greater to 0 because max number of runs will be
   // the number of original prefix plus add prefix.
   if (index <= 0) {
@@ -292,17 +313,17 @@ LookupCacheV4::InitCrypto(nsCOMPtr<nsICr
 {
   nsresult rv;
   aCrypto = do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv);
   if (NS_WARN_IF(NS_FAILED(rv))) {
     return rv;
   }
 
   rv = aCrypto->Init(nsICryptoHash::SHA256);
-  Unused << NS_WARN_IF(NS_FAILED(rv));
+  NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "InitCrypto failed");
 
   return rv;
 }
 
 nsresult
 LookupCacheV4::VerifyChecksum(const nsACString& aChecksum)
 {
   nsCOMPtr<nsICryptoHash> crypto;
@@ -316,19 +337,17 @@ LookupCacheV4::VerifyChecksum(const nsAC
 
   VLPrefixSet loadPSet(map);
   uint32_t index = loadPSet.Count() + 1;
   for(;index > 0; index--) {
     nsDependentCSubstring prefix;
     if (!loadPSet.GetSmallestPrefix(prefix)) {
       break;
     }
-    crypto->Update(reinterpret_cast<uint8_t*>(const_cast<char*>(
-                   prefix.BeginReading())),
-                   prefix.Length());
+    UpdateChecksum(crypto, prefix);
   }
 
   nsAutoCString checksum;
   crypto->Finish(false, checksum);
 
   if (checksum != aChecksum) {
     LOG(("Checksum mismatch when loading prefixes from file."));
     return NS_ERROR_FILE_CORRUPTED;
--- a/toolkit/components/url-classifier/tests/gtest/Common.cpp
+++ b/toolkit/components/url-classifier/tests/gtest/Common.cpp
@@ -46,8 +46,22 @@ void ApplyUpdate(nsTArray<TableUpdate*>&
   });
 }
 
 void ApplyUpdate(TableUpdate* update)
 {
   nsTArray<TableUpdate*> updates = { update };
   ApplyUpdate(updates);
 }
+
+void
+PrefixArrayToPrefixStringMap(const nsTArray<nsCString>& prefixArray,
+                             PrefixStringMap& out)
+{
+  out.Clear();
+
+  for (uint32_t i = 0; i < prefixArray.Length(); i++) {
+    const nsCString& prefix = prefixArray[i];
+    nsCString* prefixString = out.LookupOrAdd(prefix.Length());
+    prefixString->Append(prefix.BeginReading(), prefix.Length());
+  }
+}
+
--- a/toolkit/components/url-classifier/tests/gtest/Common.h
+++ b/toolkit/components/url-classifier/tests/gtest/Common.h
@@ -14,8 +14,13 @@ void RunTestInNewThread(Function&& aFunc
 already_AddRefed<nsIFile>
 GetFile(const nsTArray<nsString>& path);
 
 // ApplyUpdate will call |ApplyUpdates| of Classifier within a new thread
 void ApplyUpdate(nsTArray<TableUpdate*>& updates);
 
 void ApplyUpdate(TableUpdate* update);
 
+// This function converts lexigraphic-sorted prefixes to a hashtable
+// which key is prefix size and value is concatenated prefix string.
+void PrefixArrayToPrefixStringMap(const nsTArray<nsCString>& prefixArray,
+                                  PrefixStringMap& out);
+
new file mode 100644
--- /dev/null
+++ b/toolkit/components/url-classifier/tests/gtest/TestLookupCacheV4.cpp
@@ -0,0 +1,88 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "LookupCacheV4.h"
+#include "Common.h"
+
+#define GTEST_SAFEBROWSING_DIR NS_LITERAL_CSTRING("safebrowsing")
+#define GTEST_TABLE NS_LITERAL_CSTRING("gtest-malware-proto")
+
+typedef nsCString _Fragment;
+typedef nsTArray<nsCString> _PrefixArray;
+
+// Generate a hash prefix from string
+static const nsCString
+GeneratePrefix(const _Fragment& aFragment, uint8_t aLength)
+{
+  Completion complete;
+  nsCOMPtr<nsICryptoHash> cryptoHash = do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID);
+  complete.FromPlaintext(aFragment, cryptoHash);
+
+  nsCString hash;
+  hash.Assign((const char *)complete.buf, aLength);
+  return hash;
+}
+
+static UniquePtr<LookupCacheV4>
+SetupLookupCacheV4(const _PrefixArray& prefixArray)
+{
+  nsCOMPtr<nsIFile> file;
+  NS_GetSpecialDirectory(NS_APP_USER_PROFILE_50_DIR, getter_AddRefs(file));
+
+  file->AppendNative(GTEST_SAFEBROWSING_DIR);
+
+  UniquePtr<LookupCacheV4> cache = MakeUnique<LookupCacheV4>(GTEST_TABLE, file);
+  nsresult rv = cache->Init();
+  EXPECT_EQ(rv, NS_OK);
+
+  PrefixStringMap map;
+  PrefixArrayToPrefixStringMap(prefixArray, map);
+  rv = cache->Build(map);
+  EXPECT_EQ(rv, NS_OK);
+
+  return Move(cache);
+}
+
+void
+TestHasPrefix(const _Fragment& aFragment, bool aExpectedHas, bool aExpectedComplete)
+{
+  _PrefixArray array = { GeneratePrefix(_Fragment("bravo.com/"), 32),
+                         GeneratePrefix(_Fragment("browsing.com/"), 8),
+                         GeneratePrefix(_Fragment("gound.com/"), 5),
+                         GeneratePrefix(_Fragment("small.com/"), 4)
+                       };
+
+  RunTestInNewThread([&] () -> void {
+    UniquePtr<LookupCache> cache = SetupLookupCacheV4(array);
+
+    Completion lookupHash;
+    nsCOMPtr<nsICryptoHash> cryptoHash = do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID);
+    lookupHash.FromPlaintext(aFragment, cryptoHash);
+
+    bool has, complete;
+    nsresult rv = cache->Has(lookupHash, &has, &complete);
+
+    EXPECT_EQ(rv, NS_OK);
+    EXPECT_EQ(has, aExpectedHas);
+    EXPECT_EQ(complete, aExpectedComplete);
+
+    cache->ClearAll();
+  });
+
+}
+
+TEST(LookupCacheV4, HasComplete)
+{
+  TestHasPrefix(_Fragment("bravo.com/"), true, true);
+}
+
+TEST(LookupCacheV4, HasPrefix)
+{
+  TestHasPrefix(_Fragment("browsing.com/"), true, false);
+}
+
+TEST(LookupCacheV4, Nomatch)
+{
+  TestHasPrefix(_Fragment("nomatch.com/"), false, false);
+}
--- a/toolkit/components/url-classifier/tests/gtest/TestPerProviderDirectory.cpp
+++ b/toolkit/components/url-classifier/tests/gtest/TestPerProviderDirectory.cpp
@@ -1,15 +1,13 @@
 #include "LookupCache.h"
 #include "LookupCacheV4.h"
 #include "HashStore.h"
 #include "gtest/gtest.h"
-#include "nsIThread.h"
 #include "nsAppDirectoryServiceDefs.h"
-#include "nsThreadUtils.h"
 
 namespace mozilla {
 namespace safebrowsing {
 
 class PerProviderDirectoryTestUtils {
 public:
   template<typename T>
   static nsIFile* InspectStoreDirectory(const T& aT)
--- a/toolkit/components/url-classifier/tests/gtest/TestUrlClassifierTableUpdateV4.cpp
+++ b/toolkit/components/url-classifier/tests/gtest/TestUrlClassifierTableUpdateV4.cpp
@@ -1,11 +1,12 @@
 /* Any copyright is dedicated to the Public Domain.
  * http://creativecommons.org/publicdomain/zero/1.0/ */
 
+#include "Common.h"
 #include "Classifier.h"
 #include "HashStore.h"
 #include "nsAppDirectoryServiceDefs.h"
 #include "nsIFile.h"
 #include "nsIThread.h"
 #include "string.h"
 #include "gtest/gtest.h"
 #include "nsThreadUtils.h"
@@ -50,31 +51,16 @@ MergeAndSortArray(const _PrefixArray& ar
                   _PrefixArray& output)
 {
   output.Clear();
   output.AppendElements(array1);
   output.AppendElements(array2);
   output.Sort();
 }
 
-// This function converts lexigraphic-sorted prefixes to a hashtable
-// which key is prefix size and value is concatenated prefix string.
-static void
-PrefixArrayToPrefixStringMap(const _PrefixArray& prefixArray,
-                             PrefixStringMap& outMap)
-{
-  outMap.Clear();
-
-  for (uint32_t i = 0; i < prefixArray.Length(); i++) {
-    const _Prefix& prefix = prefixArray[i];
-    nsCString* prefixString = outMap.LookupOrAdd(prefix.Length());
-    prefixString->Append(prefix.BeginReading(), prefix.Length());
-  }
-}
-
 static void
 CalculateCheckSum(_PrefixArray& prefixArray, nsCString& checksum)
 {
   prefixArray.Sort();
 
   nsresult rv;
   nsCOMPtr<nsICryptoHash> cryptoHash =
     do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv);
--- a/toolkit/components/url-classifier/tests/gtest/moz.build
+++ b/toolkit/components/url-classifier/tests/gtest/moz.build
@@ -7,16 +7,17 @@
 LOCAL_INCLUDES += [
     '../..',
 ]
 
 UNIFIED_SOURCES += [
     'Common.cpp',
     'TestChunkSet.cpp',
     'TestFailUpdate.cpp',
+    'TestLookupCacheV4.cpp',
     'TestPerProviderDirectory.cpp',
     'TestProtocolParser.cpp',
     'TestRiceDeltaDecoder.cpp',
     'TestSafebrowsingHash.cpp',
     'TestSafeBrowsingProtobuf.cpp',
     'TestTable.cpp',
     'TestUrlClassifierTableUpdateV4.cpp',
     'TestUrlClassifierUtils.cpp',