Bug 669410 - Use PrefixSet to speed up URL classification. r=tony
authorGian-Carlo Pascutto <gpascutto@mozilla.com>
Thu, 08 Sep 2011 22:15:18 +0200
changeset 76775 a5c9e4c8975f4b69d7f43ab2675b50d21f06ab2f
parent 76774 72a461aae95abfc5ccd9916b192e99575caf1bce
child 76776 5e9bb169d5fc5d2a9f54329818b3bc554b3814e9
push id3
push userfelipc@gmail.com
push dateFri, 30 Sep 2011 20:09:13 +0000
reviewerstony
bugs669410
milestone9.0a1
Bug 669410 - Use PrefixSet to speed up URL classification. r=tony
toolkit/components/build/nsToolkitCompsCID.h
toolkit/components/url-classifier/nsIUrlClassifierPrefixSet.idl
toolkit/components/url-classifier/nsUrlClassifierDBService.cpp
toolkit/components/url-classifier/nsUrlClassifierDBService.h
--- a/toolkit/components/build/nsToolkitCompsCID.h
+++ b/toolkit/components/build/nsToolkitCompsCID.h
@@ -155,19 +155,19 @@
 // {59648a91-5a60-4122-8ff2-54b839c84aed}
 #define NS_PARENTALCONTROLSSERVICE_CID \
 { 0x580530e5, 0x118c, 0x4bc7, { 0xab, 0x88, 0xbc, 0x2c, 0xd2, 0xb9, 0x72, 0x23 } }
 
 // {e7f70966-9a37-48d7-8aeb-35998f31090e}
 #define NS_TYPEAHEADFIND_CID \
 { 0xe7f70966, 0x9a37, 0x48d7, { 0x8a, 0xeb, 0x35, 0x99, 0x8f, 0x31, 0x09, 0x0e} }
 
-// {61a2318e-0a7a-483e-9105-367d4827f288}
+// {6e9f759a-3f8d-4552-99ed-dbf0ea0a5f67}
 #define NS_URLCLASSIFIERPREFIXSET_CID \
-{ 0x61a2318e, 0x0a7a, 0x483e, { 0x91, 0x05, 0x36, 0x7d, 0x48, 0x27, 0xf2, 0x88} }
+{ 0x6e9f759a, 0x3f8d, 0x4552, { 0x99, 0xed, 0xdb, 0xf0, 0xea, 0x0a, 0x5f, 0x67} }
 
 // {5eb7c3c1-ec1f-4007-87cc-eefb37d68ce6}
 #define NS_URLCLASSIFIERDBSERVICE_CID \
 { 0x5eb7c3c1, 0xec1f, 0x4007, { 0x87, 0xcc, 0xee, 0xfb, 0x37, 0xd6, 0x8c, 0xe6} }
 
 // {c2be6dc0-ef1e-4abd-86a2-4f864ddc57f6}
 #define NS_URLCLASSIFIERSTREAMUPDATER_CID \
 { 0xc2be6dc0, 0xef1e, 0x4abd, { 0x86, 0xa2, 0x4f, 0x86, 0x4d, 0xdc, 0x57, 0xf6} }
--- a/toolkit/components/url-classifier/nsIUrlClassifierPrefixSet.idl
+++ b/toolkit/components/url-classifier/nsIUrlClassifierPrefixSet.idl
@@ -2,11 +2,13 @@
 
 interface nsIArray;
 
 [scriptable, uuid(6e9f759a-3f8d-4552-99ed-dbf0ea0a5f67)]
 interface nsIUrlClassifierPrefixSet : nsISupports
 {
   void setPrefixes([const, array, size_is(aLength)] in unsigned long aPrefixes,
                    in unsigned long aLength);
-
+  void addPrefixes([const, array, size_is(aLength)] in unsigned long aPrefixes,
+                   in unsigned long aLength);
   boolean contains(in unsigned long aPrefix);
+  PRUint32 estimateSize();
 };
--- a/toolkit/components/url-classifier/nsUrlClassifierDBService.cpp
+++ b/toolkit/components/url-classifier/nsUrlClassifierDBService.cpp
@@ -61,16 +61,17 @@
 #include "nsIPrefService.h"
 #include "nsIProperties.h"
 #include "nsToolkitCompsCID.h"
 #include "nsIUrlClassifierUtils.h"
 #include "nsIRandomGenerator.h"
 #include "nsUrlClassifierDBService.h"
 #include "nsUrlClassifierUtils.h"
 #include "nsUrlClassifierProxies.h"
+#include "nsIUrlClassifierPrefixSet.h"
 #include "nsURILoader.h"
 #include "nsString.h"
 #include "nsReadableUtils.h"
 #include "nsTArray.h"
 #include "nsNetUtil.h"
 #include "nsNetCID.h"
 #include "nsThreadUtils.h"
 #include "nsXPCOMStrings.h"
@@ -172,16 +173,20 @@ static const PRLogModuleInfo *gUrlClassi
 #define UPDATE_WORKING_TIME         "urlclassifier.workingtime"
 #define UPDATE_WORKING_TIME_DEFAULT 5
 
 // The amount of time to delay after hitting UPDATE_WORKING_TIME, in
 // seconds.
 #define UPDATE_DELAY_TIME           "urlclassifier.updatetime"
 #define UPDATE_DELAY_TIME_DEFAULT   60
 
+// XOR value for encoding domains-present in the prefix tree,
+// to distinguish them from completely blocked domains.
+#define ENCODE_DOMAIN_MAGIC 0xAF154126
+
 class nsUrlClassifierDBServiceWorker;
 
 // Singleton instance.
 static nsUrlClassifierDBService* sUrlClassifierDBService;
 
 nsIThread* nsUrlClassifierDBService::gDbBackgroundThread = nsnull;
 
 // Once we've committed to shutting down, don't do work in the background
@@ -269,16 +274,19 @@ struct nsUrlClassifierHash
   }
   const PRBool operator<(const self_type& hash) const {
     return memcmp(buf, hash.buf, sizeof(self_type)) < 0;
   }
   const PRBool StartsWith(const nsUrlClassifierHash<PARTIAL_LENGTH>& hash) const {
     NS_ASSERTION(sHashSize >= PARTIAL_LENGTH, "nsUrlClassifierHash must be at least PARTIAL_LENGTH bytes long");
     return memcmp(buf, hash.buf, PARTIAL_LENGTH) == 0;
   }
+  PRUint32 ToUint32() const {
+    return *(reinterpret_cast<const PRUint32*>(buf));
+  }
 };
 
 typedef nsUrlClassifierHash<DOMAIN_LENGTH> nsUrlClassifierDomainHash;
 typedef nsUrlClassifierHash<PARTIAL_LENGTH> nsUrlClassifierPartialHash;
 typedef nsUrlClassifierHash<COMPLETE_LENGTH> nsUrlClassifierCompleteHash;
 
 
 // -------------------------------------------------------------------------
@@ -476,16 +484,19 @@ public:
 
   // Read a certain number of rows adjacent to the requested rowid that
   // don't have complete hash data.
   nsresult ReadNoiseEntries(PRInt64 rowID,
                             PRUint32 numRequested,
                             PRBool before,
                             nsTArray<nsUrlClassifierEntry> &entries);
 
+  // Return an array with all Prefixes known
+  nsresult ReadPrefixes(nsTArray<PRUint32>& array);
+
 protected:
   nsresult ReadEntries(mozIStorageStatement *statement,
                        nsTArray<nsUrlClassifierEntry>& entries);
   nsUrlClassifierDBServiceWorker *mWorker;
   nsCOMPtr<mozIStorageConnection> mConnection;
 
   nsCOMPtr<mozIStorageStatement> mLookupWithIDStatement;
 
@@ -493,16 +504,19 @@ protected:
   nsCOMPtr<mozIStorageStatement> mUpdateStatement;
   nsCOMPtr<mozIStorageStatement> mDeleteStatement;
   nsCOMPtr<mozIStorageStatement> mExpireStatement;
 
   nsCOMPtr<mozIStorageStatement> mPartialEntriesStatement;
   nsCOMPtr<mozIStorageStatement> mPartialEntriesAfterStatement;
   nsCOMPtr<mozIStorageStatement> mLastPartialEntriesStatement;
   nsCOMPtr<mozIStorageStatement> mPartialEntriesBeforeStatement;
+
+  nsCOMPtr<mozIStorageStatement> mAllPrefixStatement;
+  nsCOMPtr<mozIStorageStatement> mDomainPrefixStatement;
 };
 
 nsresult
 nsUrlClassifierStore::Init(nsUrlClassifierDBServiceWorker *worker,
                            mozIStorageConnection *connection,
                            const nsACString& entriesName)
 {
   mWorker = worker;
@@ -549,16 +563,28 @@ nsUrlClassifierStore::Init(nsUrlClassifi
 
   rv = mConnection->CreateStatement
     (NS_LITERAL_CSTRING("SELECT * FROM ") + entriesName +
      NS_LITERAL_CSTRING(" WHERE id < ?1 AND complete_data ISNULL"
                         " ORDER BY id DESC LIMIT ?2"),
      getter_AddRefs(mPartialEntriesBeforeStatement));
   NS_ENSURE_SUCCESS(rv, rv);
 
+  rv = mConnection->CreateStatement
+    (NS_LITERAL_CSTRING("SELECT domain, partial_data, complete_data FROM ")
+     + entriesName,
+     getter_AddRefs(mAllPrefixStatement));
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  rv = mConnection->CreateStatement
+    (NS_LITERAL_CSTRING("SELECT domain FROM ") + entriesName +
+     NS_LITERAL_CSTRING(" GROUP BY domain"),
+     getter_AddRefs(mDomainPrefixStatement));
+  NS_ENSURE_SUCCESS(rv, rv);
+
   return NS_OK;
 }
 
 void
 nsUrlClassifierStore::Close()
 {
   mLookupWithIDStatement = nsnull;
 
@@ -567,16 +593,19 @@ nsUrlClassifierStore::Close()
   mDeleteStatement = nsnull;
   mExpireStatement = nsnull;
 
   mPartialEntriesStatement = nsnull;
   mPartialEntriesAfterStatement = nsnull;
   mPartialEntriesBeforeStatement = nsnull;
   mLastPartialEntriesStatement = nsnull;
 
+  mAllPrefixStatement = nsnull;
+  mDomainPrefixStatement = nsnull;
+
   mConnection = nsnull;
 }
 
 
 PRBool
 nsUrlClassifierStore::ReadStatement(mozIStorageStatement* statement,
                                     nsUrlClassifierEntry& entry)
 {
@@ -1001,17 +1030,18 @@ class nsUrlClassifierDBServiceWorker : p
 public:
   nsUrlClassifierDBServiceWorker();
 
   NS_DECL_ISUPPORTS
   NS_DECL_NSIURLCLASSIFIERDBSERVICE
   NS_DECL_NSIURLCLASSIFIERDBSERVICEWORKER
 
   // Initialize, called in the main thread
-  nsresult Init(PRInt32 gethashNoise);
+  nsresult Init(PRInt32 gethashNoise,
+                nsCOMPtr<nsIUrlClassifierPrefixSet> & prefSet);
 
   // Queue a lookup for the worker to perform, called in the main thread.
   nsresult QueueLookup(const nsACString& lookupKey,
                        nsIUrlClassifierLookupCallback* callback);
 
   // Check if the key is on a known-clean host.
   nsresult CheckCleanHost(const nsACString &lookupKey,
                           PRBool *clean);
@@ -1152,32 +1182,32 @@ private:
   // Similar to GetKey(), but if the domain contains three or more components,
   // two keys will be returned:
   //  hostname.com/foo/bar -> [hostname.com]
   //  mail.hostname.com/foo/bar -> [hostname.com, mail.hostname.com]
   //  www.mail.hostname.com/foo/bar -> [hostname.com, mail.hostname.com]
   nsresult GetHostKeys(const nsACString &spec,
                        nsTArray<nsCString> &hostKeys);
 
-// Read all relevant entries for the given URI into mCachedEntries.
-  nsresult CacheEntries(const nsCSubstring& spec);
-
   // Look for a given lookup string (www.hostname.com/path/to/resource.html)
   // Returns a list of entries that match.
   nsresult Check(const nsCSubstring& spec,
                  nsTArray<nsUrlClassifierLookupResult>& results);
 
   // Perform a classifier lookup for a given url.
   nsresult DoLookup(const nsACString& spec, nsIUrlClassifierLookupCallback* c);
 
   // Add entries to the results.
   nsresult AddNoise(PRInt64 nearID,
                     PRInt32 count,
                     nsTArray<nsUrlClassifierLookupResult>& results);
 
+  // Construct a Prefix Tree with known prefixes
+  nsresult ConstructPrefixTree();
+
   nsCOMPtr<nsIFile> mDBFile;
 
   nsCOMPtr<nsICryptoHash> mCryptoHash;
 
   // Holds a connection to the Db.  We lazily initialize this because it has
   // to be created in the background thread (currently mozStorageConnection
   // isn't thread safe).
   nsCOMPtr<mozIStorageConnection> mConnection;
@@ -1265,24 +1295,18 @@ private:
   // entry in the db).
   nsUrlClassifierFragmentSet mCleanHostKeys;
 
   // The clean-host-key cache is updated in the worker thread, but
   // checked in the main thread (to avoid posting lookup requests if
   // not necessary).
   Mutex mCleanHostKeysLock;
 
-  // We maintain an MRU cache of clean fragments (fragments with no
-  // entry in the db).
-  nsUrlClassifierFragmentSet mCleanFragments;
-
-  // The host keys from the last host to be checked for malware are
-  // cached for quicker lookup next time through.
-  nsCString mCachedHostKey;
-  nsTArray<nsUrlClassifierEntry> mCachedEntries;
+  // Set of prefixes known to be in the database
+  nsCOMPtr<nsIUrlClassifierPrefixSet> mPrefixSet;
 
   // Pending lookups are stored in a queue for processing.  The queue
   // is protected by mPendingLookupLock.
   Mutex mPendingLookupLock;
 
   class PendingLookup {
   public:
     nsCString mKey;
@@ -1312,31 +1336,34 @@ nsUrlClassifierDBServiceWorker::nsUrlCla
   , mPrimaryStream(PR_FALSE)
   , mHaveCachedLists(PR_FALSE)
   , mCachedListsTable(PR_UINT32_MAX)
   , mHaveCachedAddChunks(PR_FALSE)
   , mHaveCachedSubChunks(PR_FALSE)
   , mUpdateStartTime(0)
   , mGethashNoise(0)
   , mCleanHostKeysLock("nsUrlClassifierDBServerWorker.mCleanHostKeysLock")
+  , mPrefixSet(0)
   , mPendingLookupLock("nsUrlClassifierDBServerWorker.mPendingLookupLock")
 {
 }
 
 nsUrlClassifierDBServiceWorker::~nsUrlClassifierDBServiceWorker()
 {
   NS_ASSERTION(!mConnection,
                "Db connection not closed, leaking memory!  Call CloseDb "
                "to close the connection.");
 }
 
 nsresult
-nsUrlClassifierDBServiceWorker::Init(PRInt32 gethashNoise)
+nsUrlClassifierDBServiceWorker::Init(PRInt32 gethashNoise,
+                                     nsCOMPtr<nsIUrlClassifierPrefixSet> & prefSet)
 {
   mGethashNoise = gethashNoise;
+  mPrefixSet = prefSet;
 
   // Compute database filename
 
   // Because we dump raw integers into the database, this database isn't
   // portable between machine types, so store it in the local profile dir.
   nsresult rv = NS_GetSpecialDirectory(NS_APP_USER_PROFILE_LOCAL_50_DIR,
                                        getter_AddRefs(mDBFile));
 
@@ -1348,19 +1375,16 @@ nsUrlClassifierDBServiceWorker::Init(PRI
   if (NS_FAILED(rv)) return NS_ERROR_NOT_AVAILABLE;
 
   rv = mDBFile->Append(NS_LITERAL_STRING(DATABASE_FILENAME));
   NS_ENSURE_SUCCESS(rv, rv);
 
   if (!mCleanHostKeys.Init(CLEAN_HOST_KEYS_SIZE))
     return NS_ERROR_OUT_OF_MEMORY;
 
-  if (!mCleanFragments.Init(CLEAN_FRAGMENTS_SIZE))
-    return NS_ERROR_OUT_OF_MEMORY;
-
   ResetUpdate();
 
   mTableFreshness.Init();
 
   return NS_OK;
 }
 
 nsresult
@@ -1498,143 +1522,111 @@ nsUrlClassifierDBServiceWorker::GetLooku
       fragments.AppendElement(key);
     }
   }
 
   return NS_OK;
 }
 
 nsresult
-nsUrlClassifierDBServiceWorker::CacheEntries(const nsACString& spec)
+nsUrlClassifierDBServiceWorker::Check(const nsACString& spec,
+                                      nsTArray<nsUrlClassifierLookupResult>& results)
 {
   nsAutoTArray<nsCString, 2> lookupHosts;
   nsresult rv = GetHostKeys(spec, lookupHosts);
   NS_ENSURE_SUCCESS(rv, rv);
 
-  // Build a unique string for this set of lookup hosts.
-  nsCAutoString hostKey;
+  // First check if any of the hosts appear in the DB
+  PRBool anyFound = PR_FALSE;
   for (PRUint32 i = 0; i < lookupHosts.Length(); i++) {
-    hostKey.Append(lookupHosts[i]);
-    hostKey.Append("|");
-  }
-
-  if (hostKey == mCachedHostKey) {
-    // mCachedHostKeys is valid for this set of lookup hosts.
-    return NS_OK;
-  }
-
-  mCachedEntries.Clear();
-  mCachedHostKey.Truncate();
-
-  PRUint32 prevLength = 0;
-  for (PRUint32 i = 0; i < lookupHosts.Length(); i++) {
-    // First, if this key has been checked since our last update and
-    // had no entries, we don't need to check the DB here.  We also do
-    // this check before posting the lookup to this thread, but in
-    // case multiple lookups are queued at the same time, it's worth
-    // checking again here.
-    {
-      MutexAutoLock lock(mCleanHostKeysLock);
-      if (mCleanHostKeys.Has(lookupHosts[i]))
-        continue;
-    }
-
-    // Read the entries for this lookup houst
     nsUrlClassifierDomainHash hostKeyHash;
     hostKeyHash.FromPlaintext(lookupHosts[i], mCryptoHash);
-    mMainStore.ReadAddEntries(hostKeyHash, mCachedEntries);
-
-    if (mCachedEntries.Length() == prevLength) {
-      // There were no entries in the db for this host key.  Go
-      // ahead and mark the host key as clean to help short-circuit
-      // future lookups.
+
+    // Probe the Prefix Tree for presence
+    PRUint32 domainkey = hostKeyHash.ToUint32() ^ ENCODE_DOMAIN_MAGIC;
+    PRBool found;
+    rv = mPrefixSet->Contains(domainkey, &found);
+    NS_ENSURE_SUCCESS(rv, rv);
+
+    if (!found) {
       MutexAutoLock lock(mCleanHostKeysLock);
       mCleanHostKeys.Put(lookupHosts[i]);
     } else {
-      prevLength = mCachedEntries.Length();
+      anyFound = PR_TRUE;
     }
   }
 
-  mCachedHostKey = hostKey;
-
-  return NS_OK;
-}
-
-nsresult
-nsUrlClassifierDBServiceWorker::Check(const nsACString& spec,
-                                      nsTArray<nsUrlClassifierLookupResult>& results)
-{
-  // Read any entries that might apply to this URI into mCachedEntries
-  nsresult  rv = CacheEntries(spec);
-  NS_ENSURE_SUCCESS(rv, rv);
-
-  if (mCachedEntries.Length() == 0) {
+  if (!anyFound) {
     return NS_OK;
   }
 
   // Now get the set of fragments to look up.
   nsTArray<nsCString> fragments;
   rv = GetLookupFragments(spec, fragments);
   NS_ENSURE_SUCCESS(rv, rv);
 
   PRInt64 now = (PR_Now() / PR_USEC_PER_SEC);
 
   // Now check each lookup fragment against the entries in the DB.
   for (PRUint32 i = 0; i < fragments.Length(); i++) {
-    // If this fragment has been previously checked, ignore it.
-    if (mCleanFragments.Has(fragments[i]))
-      continue;
-
     nsUrlClassifierCompleteHash lookupHash;
     lookupHash.FromPlaintext(fragments[i], mCryptoHash);
 
-    PRBool foundMatch = PR_FALSE;
-    for (PRUint32 j = 0; j < mCachedEntries.Length(); j++) {
-      nsUrlClassifierEntry &entry = mCachedEntries[j];
-      if (entry.Match(lookupHash)) {
-        // If the entry doesn't contain a complete hash, we need to
-        // save it here so that it can be compared against the
-        // complete hash.  However, we don't set entry.mHaveComplete
-        // because it isn't a verified part of the entry yet.
-        nsUrlClassifierLookupResult *result = results.AppendElement();
-        if (!result)
-          return NS_ERROR_OUT_OF_MEMORY;
-
-        result->mLookupFragment = lookupHash;
-        result->mEntry = entry;
-
-        // Fill in the table name.
-        GetTableName(entry.mTableId, result->mTableName);
-
-        PRBool fresh;
-        PRInt64 tableUpdateTime;
-        if (mTableFreshness.Get(result->mTableName, &tableUpdateTime)) {
-          LOG(("tableUpdateTime: %lld, now: %lld, freshnessGuarantee: %d\n",
-               tableUpdateTime, now, gFreshnessGuarantee));
-          fresh = ((now - tableUpdateTime) <= gFreshnessGuarantee);
-        } else {
-          LOG(("No expiration time for this table.\n"));
-          fresh = PR_FALSE;
+    PRUint32 fragmentkey = lookupHash.ToUint32();
+    PRBool foundPrefix;
+    rv = mPrefixSet->Contains(fragmentkey, &foundPrefix);
+    NS_ENSURE_SUCCESS(rv, rv);
+
+    if (foundPrefix) {
+      // Find the corresponding host key
+      nsUrlClassifierDomainHash hostKey;
+      nsresult rv = GetKey(fragments[i], hostKey);
+      NS_ENSURE_SUCCESS(rv, rv);
+
+      // Read the entries for this fragments host from SQLite
+      nsTArray<nsUrlClassifierEntry> mCachedEntries;
+      mMainStore.ReadAddEntries(hostKey, mCachedEntries);
+
+      for (PRUint32 j = 0; j < mCachedEntries.Length(); j++) {
+        nsUrlClassifierEntry &entry = mCachedEntries[j];
+        if (entry.Match(lookupHash)) {
+          // If the entry doesn't contain a complete hash, we need to
+          // save it here so that it can be compared against the
+          // complete hash.  However, we don't set entry.mHaveComplete
+          // because it isn't a verified part of the entry yet.
+          nsUrlClassifierLookupResult *result = results.AppendElement();
+          if (!result)
+            return NS_ERROR_OUT_OF_MEMORY;
+
+          result->mLookupFragment = lookupHash;
+          result->mEntry = entry;
+
+          // Fill in the table name.
+          GetTableName(entry.mTableId, result->mTableName);
+
+          PRBool fresh;
+          PRInt64 tableUpdateTime;
+          if (mTableFreshness.Get(result->mTableName, &tableUpdateTime)) {
+            LOG(("tableUpdateTime: %lld, now: %lld, freshnessGuarantee: %d\n",
+                 tableUpdateTime, now, gFreshnessGuarantee));
+            fresh = ((now - tableUpdateTime) <= gFreshnessGuarantee);
+          } else {
+            LOG(("No expiration time for this table.\n"));
+            fresh = PR_FALSE;
+          }
+
+          // This is a confirmed result if we match a complete fragment in
+          // an up-to-date table.
+          result->mConfirmed = entry.mHaveComplete && fresh;
+
+          LOG(("Found a result.  complete=%d, fresh=%d",
+               entry.mHaveComplete, fresh));
         }
-
-        // This is a confirmed result if we match a complete fragment in
-        // an up-to-date table.
-        result->mConfirmed = entry.mHaveComplete && fresh;
-
-        foundMatch = PR_TRUE;
-        LOG(("Found a result.  complete=%d, fresh=%d",
-             entry.mHaveComplete, fresh));
       }
     }
-
-    if (!foundMatch) {
-      // This fragment is clean, we don't need to bother checking it
-      // again until the next update.
-      mCleanFragments.Put(fragments[i]);
-    }
   }
 
   return NS_OK;
 }
 
 /**
  * Lookup up a key in the database is a two step process:
  *
@@ -2861,21 +2853,16 @@ nsUrlClassifierDBServiceWorker::ResetUpd
   mUpdateClientKey.Truncate();
   mResetRequested = PR_FALSE;
   mUpdateTables.Clear();
 }
 
 void
 nsUrlClassifierDBServiceWorker::ResetLookupCache()
 {
-    mCachedHostKey.Truncate();
-    mCachedEntries.Clear();
-
-    mCleanFragments.Clear();
-
     MutexAutoLock lock(mCleanHostKeysLock);
     mCleanHostKeys.Clear();
 }
 
 NS_IMETHODIMP
 nsUrlClassifierDBServiceWorker::SetHashCompleter(const nsACString &tableName,
                                                  nsIUrlClassifierHashCompleter *completer)
 {
@@ -3168,16 +3155,19 @@ nsUrlClassifierDBServiceWorker::ApplyUpd
       }
     }
   }
 
   if (NS_SUCCEEDED(mUpdateStatus)) {
     // We have modified the db, we can't trust the set of clean
     // fragments or domains anymore.
     ResetLookupCache();
+    // Reconstruct the prefix tree from the DB
+    nsresult rv = ConstructPrefixTree();
+    NS_ENSURE_SUCCESS(rv, rv);
   }
 
   if (mGrewCache) {
     // During the update we increased the page cache to bigger than we
     // want to keep around.  At the moment, the only reliable way to make
     // sure that the page cache is freed is to reopen the connection.
     mGrewCache = PR_FALSE;
     CloseDb();
@@ -3328,21 +3318,16 @@ nsUrlClassifierDBServiceWorker::CacheCom
   for (PRUint32 i = 0; i < results->Length(); i++) {
     nsUrlClassifierLookupResult& result = results->ElementAt(i);
     // Failing to update here shouldn't be fatal (and might be common,
     // if we're updating entries that were removed since they were
     // returned after a lookup).
     mMainStore.UpdateEntry(result.mEntry);
   }
 
-  // Completions change entries in the DB, the cached set of entries is
-  // no longer valid.
-  mCachedHostKey.Truncate();
-  mCachedEntries.Clear();
-
   return NS_OK;
 }
 
 nsresult
 nsUrlClassifierDBServiceWorker::OpenDb()
 {
   // Connection already open, don't do anything.
   if (mConnection)
@@ -3458,16 +3443,107 @@ nsUrlClassifierDBServiceWorker::OpenDb()
      getter_AddRefs(mGetPageSizeStatement));
   NS_ENSURE_SUCCESS(rv, rv);
 
   mConnection = connection;
 
   mCryptoHash = do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv);
   NS_ENSURE_SUCCESS(rv, rv);
 
+  LOG(("SB construcing Prefix Tree\n"));
+
+  rv = ConstructPrefixTree();
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  return NS_OK;
+}
+
+nsresult nsUrlClassifierStore::ReadPrefixes(nsTArray<PRUint32>& array)
+{
+  mozStorageStatementScoper scoper(mAllPrefixStatement);
+  mozStorageStatementScoper scopertoo(mDomainPrefixStatement);
+  PRBool hasMoreData;
+  PRUint32 dcnt = 0;
+  PRUint32 pcnt = 0;
+  PRUint32 fcnt = 0;
+
+  while (NS_SUCCEEDED(mDomainPrefixStatement->ExecuteStep(&hasMoreData)) && hasMoreData) {
+    PRUint32 domainval;
+    PRUint32 size;
+
+    const PRUint8 *blobdomain = mDomainPrefixStatement->AsSharedBlob(0, &size);
+    if (!blobdomain || (size != DOMAIN_LENGTH))
+      return PR_FALSE;
+
+    domainval = *(reinterpret_cast<const PRUint32*>(blobdomain));
+
+    // Encode that the domain is present.
+    // We need to encode this so it will not match with
+    // the same entry with an empty path.
+    domainval ^= ENCODE_DOMAIN_MAGIC;
+
+    array.AppendElement(domainval);
+    dcnt++;
+  }
+
+  while (NS_SUCCEEDED(mAllPrefixStatement->ExecuteStep(&hasMoreData)) && hasMoreData) {
+    PRUint32 prefixval;
+    PRUint32 domainval;
+    PRUint32 size;
+
+    const PRUint8 *blobdomain = mAllPrefixStatement->AsSharedBlob(0, &size);
+    if (!blobdomain || (size != DOMAIN_LENGTH))
+      return PR_FALSE;
+
+    domainval = *(reinterpret_cast<const PRUint32*>(blobdomain));
+
+    const PRUint8 *blobprefix = mAllPrefixStatement->AsSharedBlob(1, &size);
+    if (!blobprefix || (size != PARTIAL_LENGTH)) {
+      const PRUint8 *blobfull = mAllPrefixStatement->AsSharedBlob(2, &size);
+      if (!blobfull || (size != COMPLETE_LENGTH)) {
+        prefixval = domainval;
+        fcnt++;
+      } else {
+        prefixval = *(reinterpret_cast<const PRUint32*>(blobfull));
+      }
+    } else {
+      prefixval = *(reinterpret_cast<const PRUint32*>(blobprefix));
+    }
+
+    array.AppendElement(prefixval);
+    pcnt++;
+  }
+
+  LOG(("SB domains: %d prefixes: %d fulldomain: %d\n", dcnt, pcnt, fcnt));
+
+  return NS_OK;
+}
+
+nsresult
+nsUrlClassifierDBServiceWorker::ConstructPrefixTree()
+{
+  nsTArray<PRUint32> array;
+  nsresult rv = mMainStore.ReadPrefixes(array);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  if (array.Length() > 0) {
+    // clear old tree
+    rv = mPrefixSet->SetPrefixes(array.Elements(), 0);
+    NS_ENSURE_SUCCESS(rv, rv);
+    // construct new one
+    rv = mPrefixSet->SetPrefixes(array.Elements(), array.Length());
+    NS_ENSURE_SUCCESS(rv, rv);
+  }
+#ifdef DEBUG
+  PRUint32 size = 0;
+  rv = mPrefixSet->EstimateSize(&size);
+  LOG(("SB tree done, size = %d bytes\n", size));
+  NS_ENSURE_SUCCESS(rv, rv);
+#endif
+
   return NS_OK;
 }
 
 nsresult
 nsUrlClassifierDBServiceWorker::MaybeCreateTables(mozIStorageConnection* connection)
 {
   LOG(("MaybeCreateTables\n"));
 
@@ -3854,16 +3930,19 @@ nsUrlClassifierDBService::Init()
     do_GetService(MOZ_STORAGE_SERVICE_CONTRACTID, &rv);
   NS_ENSURE_SUCCESS(rv, rv);
 
   // Force PSM to be loaded on the main thread.
   nsCOMPtr<nsICryptoHash> hash =
     do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv);
   NS_ENSURE_SUCCESS(rv, rv);
 
+  mPrefixSet = do_CreateInstance("@mozilla.org/url-classifier/prefixset;1", &rv);
+  NS_ENSURE_SUCCESS(rv, rv);
+
   // Should we check document loads for malware URIs?
   nsCOMPtr<nsIPrefBranch2> prefs = do_GetService(NS_PREFSERVICE_CONTRACTID);
 
   PRInt32 gethashNoise = 0;
   if (prefs) {
     PRBool tmpbool;
     rv = prefs->GetBoolPref(CHECK_MALWARE_PREF, &tmpbool);
     mCheckMalware = NS_SUCCEEDED(rv) ? tmpbool : CHECK_MALWARE_DEFAULT;
@@ -3908,17 +3987,17 @@ nsUrlClassifierDBService::Init()
   rv = NS_NewThread(&gDbBackgroundThread);
   if (NS_FAILED(rv))
     return rv;
 
   mWorker = new nsUrlClassifierDBServiceWorker();
   if (!mWorker)
     return NS_ERROR_OUT_OF_MEMORY;
 
-  rv = mWorker->Init(gethashNoise);
+  rv = mWorker->Init(gethashNoise, mPrefixSet);
   if (NS_FAILED(rv)) {
     mWorker = nsnull;
     return rv;
   }
 
   // Proxy for calling the worker on the background thread
   mWorkerProxy = new UrlClassifierDBServiceWorkerProxy(mWorker);
 
--- a/toolkit/components/url-classifier/nsUrlClassifierDBService.h
+++ b/toolkit/components/url-classifier/nsUrlClassifierDBService.h
@@ -40,16 +40,17 @@
 #ifndef nsUrlClassifierDBService_h_
 #define nsUrlClassifierDBService_h_
 
 #include <nsISupportsUtils.h>
 
 #include "nsID.h"
 #include "nsInterfaceHashtable.h"
 #include "nsIObserver.h"
+#include "nsIUrlClassifierPrefixSet.h"
 #include "nsIUrlClassifierHashCompleter.h"
 #include "nsIUrlClassifierDBService.h"
 #include "nsIURIClassifier.h"
 #include "nsToolkitCompsCID.h"
 
 // The hash length for a domain key.
 #define DOMAIN_LENGTH 4
 
@@ -119,15 +120,18 @@ private:
   // CancelUpdate()/FinishUpdate().  This is used to prevent competing
   // updates, not to determine whether an update is still being
   // processed.
   PRBool mInUpdate;
 
   // The list of tables that can use the default hash completer object.
   nsTArray<nsCString> mGethashWhitelist;
 
+  // Set of prefixes known to be in the database
+  nsCOMPtr<nsIUrlClassifierPrefixSet> mPrefixSet;
+
   // Thread that we do the updates on.
   static nsIThread* gDbBackgroundThread;
 };
 
 NS_DEFINE_STATIC_IID_ACCESSOR(nsUrlClassifierDBService, NS_URLCLASSIFIERDBSERVICE_CID)
 
 #endif // nsUrlClassifierDBService_h_