Bug 453723: Short circuit known-clean hosts and fragments in the url-classifier. r=tony
☠☠ backed out by 0b2b3095603a ☠ ☠
authorDave Camp <dcamp@mozilla.com>
Sun, 28 Sep 2008 16:42:19 -0700
changeset 19861 1a971f9406f8036a7255b92950a5d36811a07891
parent 19860 ad3074e580096ffd6a8cb29b1e91effeee176fd6
child 19862 0b2b3095603ac4b5a63273936bda46173fa2d73d
push id2499
push userdcamp@mozilla.com
push dateSun, 28 Sep 2008 23:44:06 +0000
treeherdermozilla-central@1a971f9406f8 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerstony
bugs453723
milestone1.9.1b1pre
Bug 453723: Short circuit known-clean hosts and fragments in the url-classifier. r=tony
toolkit/components/url-classifier/src/nsUrlClassifierDBService.cpp
toolkit/components/url-classifier/src/nsUrlClassifierDBService.h
toolkit/components/url-classifier/src/nsUrlClassifierUtils.h
toolkit/components/url-classifier/tests/unit/test_cleankeycache.js
--- a/toolkit/components/url-classifier/src/nsUrlClassifierDBService.cpp
+++ b/toolkit/components/url-classifier/src/nsUrlClassifierDBService.cpp
@@ -152,16 +152,20 @@ static const PRLogModuleInfo *gUrlClassi
 #define GETHASH_TABLES_PREF     "urlclassifier.gethashtables"
 
 #define CONFIRM_AGE_PREF        "urlclassifier.confirm-age"
 #define CONFIRM_AGE_DEFAULT_SEC (45 * 60)
 
 #define UPDATE_CACHE_SIZE_PREF    "urlclassifier.updatecachemax"
 #define UPDATE_CACHE_SIZE_DEFAULT -1
 
+// MRU cache sizes for remembering clean lookups
+#define CLEAN_HOST_KEYS_SIZE 16
+#define CLEAN_FRAGMENTS_SIZE 32
+
 // Amount of time to spend updating before committing and delaying, in
 // seconds.  This is checked after each update stream, so the actual
 // time spent can be higher than this, depending on update stream size.
 #define UPDATE_WORKING_TIME         "urlclassifier.workingtime"
 #define UPDATE_WORKING_TIME_DEFAULT 5
 
 // The amount of time to delay after hitting UPDATE_WORKING_TIME, in
 // seconds.
@@ -457,16 +461,20 @@ public:
                        nsTArray<nsUrlClassifierEntry>& entry);
 
   // Read the entries for a given key/table/chunk from the database
   nsresult ReadEntries(const nsUrlClassifierDomainHash& key,
                        PRUint32 tableId,
                        PRUint32 chunkId,
                        nsTArray<nsUrlClassifierEntry>& entry);
 
+  // Read the entries for a given host key from the database.
+  nsresult ReadEntries(const nsUrlClassifierDomainHash& key,
+                       nsTArray<nsUrlClassifierEntry>& entry);
+
   // Read the entry with a given ID from the database
   nsresult ReadEntry(PRInt64 id, nsUrlClassifierEntry& entry, PRBool *exists);
 
   // Remove an entry from the database
   nsresult DeleteEntry(nsUrlClassifierEntry& entry);
 
   // Write an entry to the database
   nsresult WriteEntry(nsUrlClassifierEntry& entry);
@@ -765,16 +773,29 @@ nsUrlClassifierStore::ReadEntries(const 
   NS_ENSURE_SUCCESS(rv, rv);
   rv = mLookupWithChunkStatement->BindInt32Parameter(2, chunkId);
   NS_ENSURE_SUCCESS(rv, rv);
 
   return ReadEntries(mLookupWithChunkStatement, entries);
 }
 
 nsresult
+nsUrlClassifierStore::ReadEntries(const nsUrlClassifierDomainHash& hash,
+                                  nsTArray<nsUrlClassifierEntry>& entries)
+{
+  mozStorageStatementScoper scoper(mLookupStatement);
+
+  nsresult rv = mLookupStatement->BindBlobParameter
+                  (0, hash.buf, DOMAIN_LENGTH);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  return ReadEntries(mLookupStatement, entries);
+}
+
+nsresult
 nsUrlClassifierStore::ReadEntry(PRInt64 id,
                                 nsUrlClassifierEntry& entry,
                                 PRBool *exists)
 {
   entry.Clear();
 
   mozStorageStatementScoper scoper(mLookupWithIDStatement);
 
@@ -1012,16 +1033,20 @@ public:
 
   // Initialize, called in the main thread
   nsresult Init(PRInt32 gethashNoise);
 
   // Queue a lookup for the worker to perform, called in the main thread.
   nsresult QueueLookup(const nsACString& lookupKey,
                        nsIUrlClassifierLookupCallback* callback);
 
+  // Check if the key is on a known-clean host.
+  nsresult CheckCleanHost(const nsACString &lookupKey,
+                          PRBool *clean);
+
   // Handle any queued-up lookups.  We call this function during long-running
   // update operations to prevent lookups from blocking for too long.
   nsresult HandlePendingLookups();
 
 private:
   // No subclassing
   ~nsUrlClassifierDBServiceWorker();
 
@@ -1130,32 +1155,40 @@ private:
 
   // Reset the in-progress update
   void ResetUpdate();
 
   // take a lookup string (www.hostname.com/path/to/resource.html) and
   // expand it into the set of fragments that should be searched for in an
   // entry
   nsresult GetLookupFragments(const nsCSubstring& spec,
-                              nsTArray<nsUrlClassifierCompleteHash>& fragments);
+                              nsTArray<nsCString>& fragments);
 
   // Check for a canonicalized IP address.
   PRBool IsCanonicalizedIP(const nsACString& host);
 
   // Get the database key for a given URI.  This is the top three
   // domain components if they exist, otherwise the top two.
   //  hostname.com/foo/bar -> hostname.com
   //  mail.hostname.com/foo/bar -> mail.hostname.com
   //  www.mail.hostname.com/foo/bar -> mail.hostname.com
   nsresult GetKey(const nsACString& spec, nsUrlClassifierDomainHash& hash);
 
+  // Similar to GetKey(), but if the domain contains three or more components,
+  // two keys will be returned:
+  //  hostname.com/foo/bar -> [hostname.com]
+  //  mail.hostname.com/foo/bar -> [hostname.com, mail.hostname.com]
+  //  www.mail.hostname.com/foo/bar -> [hostname.com, mail.hostname.com]
+  nsresult GetHostKeys(const nsACString &spec,
+                       nsTArray<nsCString> &hostKeys);
+
   // Look for a given lookup string (www.hostname.com/path/to/resource.html)
   // in the entries at the given key.  Returns a list of entries that match.
   nsresult CheckKey(const nsCSubstring& spec,
-                    const nsUrlClassifierDomainHash& key,
+                    const nsACString& key,
                     nsTArray<nsUrlClassifierLookupResult>& results);
 
   // Perform a classifier lookup for a given url.
   nsresult DoLookup(const nsACString& spec, nsIUrlClassifierLookupCallback* c);
 
   // Add entries to the results.
   nsresult AddNoise(PRInt64 nearID,
                     PRInt32 count,
@@ -1243,16 +1276,34 @@ private:
   // Start time of the current update interval.  This will be reset
   // every time we apply the update.
   PRIntervalTime mUpdateStartTime;
 
   nsCOMPtr<nsICryptoHMAC> mHMAC;
   // The number of noise entries to add to the set of lookup results.
   PRInt32 mGethashNoise;
 
+  // We maintain an MRU cache of clean host keys (host keys with no
+  // entry in the db).
+  nsUrlClassifierFragmentSet mCleanHostKeys;
+
+  // The clean-host-key cache is updated in the worker thread, but
+  // checked in the main thread (to avoid posting lookup requests if
+  // not necessary).
+  PRLock* mCleanHostKeysLock;
+
+  // We maintain an MRU cache of clean fragments (fragments with no
+  // entry in the db).
+  nsUrlClassifierFragmentSet mCleanFragments;
+
+  // The host keys from the last host to be checked for malware are
+  // cached for quicker lookup next time through.
+  nsCString mCachedHostKey;
+  nsTArray<nsUrlClassifierEntry> mCachedEntries;
+
   // Pending lookups are stored in a queue for processing.  The queue
   // is protected by mPendingLookupLock.
   PRLock* mPendingLookupLock;
 
   class PendingLookup {
   public:
     nsCString mKey;
     nsCOMPtr<nsIUrlClassifierLookupCallback> mCallback;
@@ -1280,25 +1331,30 @@ nsUrlClassifierDBServiceWorker::nsUrlCla
   , mInStream(PR_FALSE)
   , mPrimaryStream(PR_FALSE)
   , mHaveCachedLists(PR_FALSE)
   , mCachedListsTable(PR_UINT32_MAX)
   , mHaveCachedAddChunks(PR_FALSE)
   , mHaveCachedSubChunks(PR_FALSE)
   , mUpdateStartTime(0)
   , mGethashNoise(0)
+  , mCleanHostKeysLock(nsnull)
   , mPendingLookupLock(nsnull)
 {
 }
 
 nsUrlClassifierDBServiceWorker::~nsUrlClassifierDBServiceWorker()
 {
   NS_ASSERTION(!mConnection,
                "Db connection not closed, leaking memory!  Call CloseDb "
                "to close the connection.");
+
+  if (mCleanHostKeysLock)
+    PR_DestroyLock(mCleanHostKeysLock);
+
   if (mPendingLookupLock)
     PR_DestroyLock(mPendingLookupLock);
 }
 
 nsresult
 nsUrlClassifierDBServiceWorker::Init(PRInt32 gethashNoise)
 {
   mGethashNoise = gethashNoise;
@@ -1315,16 +1371,26 @@ nsUrlClassifierDBServiceWorker::Init(PRI
                                 getter_AddRefs(mDBFile));
   }
 
   if (NS_FAILED(rv)) return NS_ERROR_NOT_AVAILABLE;
 
   rv = mDBFile->Append(NS_LITERAL_STRING(DATABASE_FILENAME));
   NS_ENSURE_SUCCESS(rv, rv);
 
+  mCleanHostKeysLock = PR_NewLock();
+  if (!mCleanHostKeysLock)
+    return NS_ERROR_OUT_OF_MEMORY;
+
+  if (!mCleanHostKeys.Init(CLEAN_HOST_KEYS_SIZE))
+    return NS_ERROR_OUT_OF_MEMORY;
+
+  if (!mCleanFragments.Init(CLEAN_FRAGMENTS_SIZE))
+    return NS_ERROR_OUT_OF_MEMORY;
+
   mPendingLookupLock = PR_NewLock();
   if (!mPendingLookupLock)
     return NS_ERROR_OUT_OF_MEMORY;
 
   ResetUpdate();
 
   mTableFreshness.Init();
 
@@ -1342,18 +1408,39 @@ nsUrlClassifierDBServiceWorker::QueueLoo
 
   lookup->mKey = spec;
   lookup->mCallback = callback;
 
   return NS_OK;
 }
 
 nsresult
+nsUrlClassifierDBServiceWorker::CheckCleanHost(const nsACString &spec,
+                                               PRBool *clean)
+{
+  nsAutoTArray<nsCString, 2> lookupHosts;
+  nsresult rv = GetHostKeys(spec, lookupHosts);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  nsAutoLock lock(mCleanHostKeysLock);
+
+  for (PRUint32 i = 0; i < lookupHosts.Length(); i++) {
+    if (!mCleanHostKeys.Has(lookupHosts[i])) {
+      *clean = PR_FALSE;
+      return NS_OK;
+    }
+  }
+
+  *clean = PR_TRUE;
+  return NS_OK;
+}
+
+nsresult
 nsUrlClassifierDBServiceWorker::GetLookupFragments(const nsACString& spec,
-                                                   nsTArray<nsUrlClassifierCompleteHash>& fragments)
+                                                   nsTArray<nsCString>& fragments)
 {
   fragments.Clear();
 
   nsACString::const_iterator begin, end, iter;
   spec.BeginReading(begin);
   spec.EndReading(end);
 
   iter = begin;
@@ -1425,72 +1512,91 @@ nsUrlClassifierDBServiceWorker::GetLooku
          numComponents < MAX_PATH_COMPONENTS) {
     iter++;
     paths.AppendCString(Substring(begin, iter));
     numComponents++;
   }
 
   for (int hostIndex = 0; hostIndex < hosts.Count(); hostIndex++) {
     for (int pathIndex = 0; pathIndex < paths.Count(); pathIndex++) {
-      nsCAutoString key;
+      nsCString key;
       key.Assign(*hosts[hostIndex]);
       key.Append('/');
       key.Append(*paths[pathIndex]);
       LOG(("Chking %s", key.get()));
 
-      nsUrlClassifierCompleteHash* hash = fragments.AppendElement();
-      if (!hash) return NS_ERROR_OUT_OF_MEMORY;
-      hash->FromPlaintext(key, mCryptoHash);
+      fragments.AppendElement(key);
     }
   }
 
   return NS_OK;
 }
 
 nsresult
 nsUrlClassifierDBServiceWorker::CheckKey(const nsACString& spec,
-                                         const nsUrlClassifierDomainHash& hash,
+                                         const nsACString& hostKey,
                                          nsTArray<nsUrlClassifierLookupResult>& results)
 {
-  mozStorageStatementScoper lookupScoper(mMainStore.LookupStatement());
-
-  nsresult rv = mMainStore.LookupStatement()->BindBlobParameter
-    (0, hash.buf, DOMAIN_LENGTH);
-  NS_ENSURE_SUCCESS(rv, rv);
-
-  nsTArray<nsUrlClassifierCompleteHash> fragments;
-  PRBool haveFragments = PR_FALSE;
-
-  PRBool exists;
-  rv = mMainStore.LookupStatement()->ExecuteStep(&exists);
+  // First, if this key has been checked since our last update and had
+  // no entries, we can exit early.  We also do this check before
+  // posting the lookup to this thread, but in case multiple lookups
+  // are queued at the same time, it's worth checking again here.
+  {
+    nsAutoLock lock(mCleanHostKeysLock);
+    if (mCleanHostKeys.Has(hostKey))
+      return NS_OK;
+  }
+
+  // Now read host key entries from the db if necessary.
+  if (hostKey != mCachedHostKey) {
+    mCachedEntries.Clear();
+    nsUrlClassifierDomainHash hostKeyHash;
+    hostKeyHash.FromPlaintext(hostKey, mCryptoHash);
+    mMainStore.ReadEntries(hostKeyHash, mCachedEntries);
+    mCachedHostKey = hostKey;
+  }
+
+  if (mCachedEntries.Length() == 0) {
+    // There were no entries in the db for this host key.  Go ahead
+    // and mark the host key as clean to help short-circuit future
+    // lookups.
+    nsAutoLock lock(mCleanHostKeysLock);
+    mCleanHostKeys.Put(hostKey);
+    return NS_OK;
+  }
+
+  // Now get the set of fragments to look up.
+  nsTArray<nsCString> fragments;
+  nsresult rv = GetLookupFragments(spec, fragments);
   NS_ENSURE_SUCCESS(rv, rv);
-  while (exists) {
-    if (!haveFragments) {
-      rv = GetLookupFragments(spec, fragments);
-      NS_ENSURE_SUCCESS(rv, rv);
-      haveFragments = PR_TRUE;
-    }
-
-    nsUrlClassifierEntry entry;
-    if (!mMainStore.ReadStatement(mMainStore.LookupStatement(), entry))
-      return NS_ERROR_FAILURE;
-
-    PRInt64 now = (PR_Now() / PR_USEC_PER_SEC);
-
-    for (PRUint32 i = 0; i < fragments.Length(); i++) {
-      if (entry.Match(fragments[i])) {
+
+  PRInt64 now = (PR_Now() / PR_USEC_PER_SEC);
+
+  // Now check each lookup fragment against the entries in the DB.
+  for (PRUint32 i = 0; i < fragments.Length(); i++) {
+    // If this fragment has been previously checked, ignore it.
+    if (mCleanFragments.Has(fragments[i]))
+      continue;
+
+    nsUrlClassifierCompleteHash lookupHash;
+    lookupHash.FromPlaintext(fragments[i], mCryptoHash);
+
+    PRBool foundMatch = PR_FALSE;
+    for (PRUint32 j = 0; j < mCachedEntries.Length(); j++) {
+      nsUrlClassifierEntry &entry = mCachedEntries[j];
+      if (entry.Match(lookupHash)) {
         // If the entry doesn't contain a complete hash, we need to
         // save it here so that it can be compared against the
         // complete hash.  However, we don't set entry.mHaveComplete
         // because it isn't a verified part of the entry yet.
         nsUrlClassifierLookupResult *result = results.AppendElement();
         if (!result)
           return NS_ERROR_OUT_OF_MEMORY;
 
-        result->mLookupFragment = fragments[i];
+        result->mLookupFragment = lookupHash;
         result->mEntry = entry;
 
         // Fill in the table name.
         GetTableName(entry.mTableId, result->mTableName);
 
         PRBool fresh;
         PRInt64 tableUpdateTime;
         if (mTableFreshness.Get(result->mTableName, &tableUpdateTime)) {
@@ -1501,24 +1607,27 @@ nsUrlClassifierDBServiceWorker::CheckKey
           LOG(("No expiration time for this table.\n"));
           fresh = PR_FALSE;
         }
 
         // This is a confirmed result if we match a complete fragment in
         // an up-to-date table.
         result->mConfirmed = entry.mHaveComplete && fresh;
 
+        foundMatch = PR_TRUE;
         LOG(("Found a result.  complete=%d, fresh=%d",
              entry.mHaveComplete, fresh));
-        break;
       }
     }
 
-    rv = mMainStore.LookupStatement()->ExecuteStep(&exists);
-    NS_ENSURE_SUCCESS(rv, rv);
+    if (!foundMatch) {
+      // This fragment is clean, we don't need to bother checking it
+      // again until the next update.
+      mCleanFragments.Put(fragments[i]);
+    }
   }
 
   return NS_OK;
 }
 
 /**
  * Lookup up a key in the database is a two step process:
  *
@@ -1547,76 +1656,30 @@ nsUrlClassifierDBServiceWorker::DoLookup
 
 #if defined(PR_LOGGING)
   PRIntervalTime clockStart = 0;
   if (LOG_ENABLED()) {
     clockStart = PR_IntervalNow();
   }
 #endif
 
-  nsACString::const_iterator begin, end, iter;
-  spec.BeginReading(begin);
-  spec.EndReading(end);
-
-  iter = begin;
-  if (!FindCharInReadable('/', iter, end)) {
-    return NS_OK;
-  }
-
-  const nsCSubstring& host = Substring(begin, iter++);
-
   nsAutoPtr<nsTArray<nsUrlClassifierLookupResult> > results;
   results = new nsTArray<nsUrlClassifierLookupResult>();
   if (!results) {
     c->LookupComplete(nsnull);
     return NS_ERROR_OUT_OF_MEMORY;
   }
 
-  nsUrlClassifierDomainHash hash;
-
-  if (IsCanonicalizedIP(host)) {
-    // Don't break up the host into components
-    nsCAutoString lookupHost;
-    lookupHost.Assign(host);
-    lookupHost.Append("/");
-    hash.FromPlaintext(lookupHost, mCryptoHash);
-    CheckKey(spec, hash, *results);
-  } else {
-    nsCStringArray hostComponents;
-    hostComponents.ParseString(PromiseFlatCString(host).get(), ".");
-
-    if (hostComponents.Count() < 2) {
-      // no host or toplevel host, this won't match anything in the db
-      c->LookupComplete(nsnull);
-      return NS_OK;
-    }
-
-    // First check with two domain components
-    PRInt32 last = hostComponents.Count() - 1;
-    nsCAutoString lookupHost;
-    lookupHost.Assign(*hostComponents[last - 1]);
-    lookupHost.Append(".");
-    lookupHost.Append(*hostComponents[last]);
-    lookupHost.Append("/");
-    hash.FromPlaintext(lookupHost, mCryptoHash);
-
-    // we ignore failures from CheckKey because we'd rather try to find
-    // more results than fail.
-    CheckKey(spec, hash, *results);
-
-    // Now check with three domain components
-    if (hostComponents.Count() > 2) {
-      nsCAutoString lookupHost2;
-      lookupHost2.Assign(*hostComponents[last - 2]);
-      lookupHost2.Append(".");
-      lookupHost2.Append(lookupHost);
-      hash.FromPlaintext(lookupHost2, mCryptoHash);
-
-      CheckKey(spec, hash, *results);
-    }
+  nsAutoTArray<nsCString, 2> lookupHosts;
+  rv = GetHostKeys(spec, lookupHosts);
+
+  for (PRUint32 i = 0; i < lookupHosts.Length(); i++) {
+    // we ignore failures from CheckKey because we'd rather try to
+    // find more results than fail.
+    CheckKey(spec, lookupHosts[i], *results);
   }
 
 #if defined(PR_LOGGING)
   if (LOG_ENABLED()) {
     PRIntervalTime clockEnd = PR_IntervalNow();
     LOG(("query took %dms\n",
          PR_IntervalToMilliseconds(clockEnd - clockStart)));
   }
@@ -1977,16 +2040,73 @@ nsUrlClassifierDBServiceWorker::GetKey(c
   lookupHost.Append(".");
   lookupHost.Append(*hostComponents[last]);
   lookupHost.Append("/");
 
   return hash.FromPlaintext(lookupHost, mCryptoHash);
 }
 
 nsresult
+nsUrlClassifierDBServiceWorker::GetHostKeys(const nsACString &spec,
+                                            nsTArray<nsCString> &hostKeys)
+{
+  nsACString::const_iterator begin, end, iter;
+  spec.BeginReading(begin);
+  spec.EndReading(end);
+
+  iter = begin;
+  if (!FindCharInReadable('/', iter, end)) {
+    return NS_OK;
+  }
+
+  const nsCSubstring& host = Substring(begin, iter);
+
+  if (IsCanonicalizedIP(host)) {
+    nsCString *key = hostKeys.AppendElement();
+    if (!key)
+      return NS_ERROR_OUT_OF_MEMORY;
+
+    key->Assign(host);
+    key->Append("/");
+    return NS_OK;
+  }
+
+  nsCStringArray hostComponents;
+  hostComponents.ParseString(PromiseFlatCString(host).get(), ".");
+
+  if (hostComponents.Count() < 2) {
+    // no host or toplevel host, this won't match anything in the db
+    return NS_OK;
+  }
+
+  // First check with two domain components
+  PRInt32 last = hostComponents.Count() - 1;
+  nsCString *lookupHost = hostKeys.AppendElement();
+  if (!lookupHost)
+    return NS_ERROR_OUT_OF_MEMORY;
+
+  lookupHost->Assign(*hostComponents[last - 1]);
+  lookupHost->Append(".");
+  lookupHost->Append(*hostComponents[last]);
+  lookupHost->Append("/");
+
+  // Now check with three domain components
+  if (hostComponents.Count() > 2) {
+    nsCString *lookupHost2 = hostKeys.AppendElement();
+    if (!lookupHost2)
+      return NS_ERROR_OUT_OF_MEMORY;
+    lookupHost2->Assign(*hostComponents[last - 2]);
+    lookupHost2->Append(".");
+    lookupHost2->Append(*lookupHost);
+  }
+
+  return NS_OK;
+}
+
+nsresult
 nsUrlClassifierDBServiceWorker::GetShaEntries(PRUint32 tableId,
                                               PRUint32 chunkType,
                                               PRUint32 chunkNum,
                                               PRUint32 domainSize,
                                               PRUint32 fragmentSize,
                                               nsACString& chunk,
                                               nsTArray<nsUrlClassifierEntry>& entries)
 {
@@ -3030,16 +3150,28 @@ nsUrlClassifierDBServiceWorker::ApplyUpd
     mConnection->RollbackTransaction();
   } else {
     mUpdateStatus = FlushChunkLists();
     if (NS_SUCCEEDED(mUpdateStatus)) {
       mUpdateStatus = mConnection->CommitTransaction();
     }
   }
 
+  if (NS_SUCCEEDED(mUpdateStatus)) {
+    // We have modified the db, we can't trust the set of clean
+    // fragments or domains anymore.
+    mCachedHostKey.Truncate();
+    mCachedEntries.Clear();
+
+    mCleanFragments.Clear();
+
+    nsAutoLock lock(mCleanHostKeysLock);
+    mCleanHostKeys.Clear();
+  }
+
   if (mGrewCache) {
     // During the update we increased the page cache to bigger than we
     // want to keep around.  At the moment, the only reliable way to make
     // sure that the page cache is freed is to reopen the connection.
     mGrewCache = PR_FALSE;
     CloseDb();
     OpenDb();
   }
@@ -3185,16 +3317,21 @@ nsUrlClassifierDBServiceWorker::CacheCom
   for (PRUint32 i = 0; i < results->Length(); i++) {
     nsUrlClassifierLookupResult& result = results->ElementAt(i);
     // Failing to update here shouldn't be fatal (and might be common,
     // if we're updating entries that were removed since they were
     // returned after a lookup).
     mMainStore.UpdateEntry(result.mEntry);
   }
 
+  // Completions change entries in the DB, the cached set of entries is
+  // no longer valid.
+  mCachedHostKey.Truncate();
+  mCachedEntries.Clear();
+
   return NS_OK;
 }
 
 nsresult
 nsUrlClassifierDBServiceWorker::OpenDb()
 {
   // Connection already open, don't do anything.
   if (mConnection)
@@ -3803,25 +3940,24 @@ nsUrlClassifierDBService::Classify(nsIUR
     *result = PR_FALSE;
     return NS_OK;
   }
 
   nsRefPtr<nsUrlClassifierClassifyCallback> callback =
     new nsUrlClassifierClassifyCallback(c, mCheckMalware, mCheckPhishing);
   if (!callback) return NS_ERROR_OUT_OF_MEMORY;
 
-  nsresult rv = LookupURI(uri, callback);
+  nsresult rv = LookupURI(uri, callback, PR_FALSE, result);
   if (rv == NS_ERROR_MALFORMED_URI) {
+    *result = PR_FALSE;
     // The URI had no hostname, don't try to classify it.
-    *result = PR_FALSE;
     return NS_OK;
   }
   NS_ENSURE_SUCCESS(rv, rv);
 
-  *result = PR_TRUE;
   return NS_OK;
 }
 
 NS_IMETHODIMP
 nsUrlClassifierDBService::Lookup(const nsACString& spec,
                                  nsIUrlClassifierCallback* c)
 {
   NS_ENSURE_TRUE(gDbBackgroundThread, NS_ERROR_NOT_INITIALIZED);
@@ -3831,33 +3967,51 @@ nsUrlClassifierDBService::Lookup(const n
   nsresult rv = NS_NewURI(getter_AddRefs(uri), spec);
   NS_ENSURE_SUCCESS(rv, rv);
 
   uri = NS_GetInnermostURI(uri);
   if (!uri) {
     return NS_ERROR_FAILURE;
   }
 
-  return LookupURI(uri, c);
+  PRBool didLookup;
+  return LookupURI(uri, c, PR_TRUE, &didLookup);
 }
 
 nsresult
 nsUrlClassifierDBService::LookupURI(nsIURI* uri,
-                                    nsIUrlClassifierCallback* c)
+                                    nsIUrlClassifierCallback* c,
+                                    PRBool forceLookup,
+                                    PRBool *didLookup)
 {
   NS_ENSURE_TRUE(gDbBackgroundThread, NS_ERROR_NOT_INITIALIZED);
 
   nsCAutoString key;
   // Canonicalize the url
   nsCOMPtr<nsIUrlClassifierUtils> utilsService =
     do_GetService(NS_URLCLASSIFIERUTILS_CONTRACTID);
   nsresult rv = utilsService->GetKeyForURI(uri, key);
   if (NS_FAILED(rv))
     return rv;
 
+  if (forceLookup) {
+    *didLookup = PR_TRUE;
+  } else {
+    // Check if the URI is on a clean host.  If so, we don't need to
+    // bother queueing up a lookup, we can just return.
+    PRBool clean;
+    rv = mWorker->CheckCleanHost(key, &clean);
+    NS_ENSURE_SUCCESS(rv, rv);
+
+    *didLookup = !clean;
+    if (clean) {
+      return NS_OK;
+    }
+  }
+
   // Create an nsUrlClassifierLookupCallback object.  This object will
   // take care of confirming partial hash matches if necessary before
   // calling the client's callback.
   nsCOMPtr<nsIUrlClassifierLookupCallback> callback =
     new nsUrlClassifierLookupCallback(this, c);
   if (!callback)
     return NS_ERROR_OUT_OF_MEMORY;
 
--- a/toolkit/components/url-classifier/src/nsUrlClassifierDBService.h
+++ b/toolkit/components/url-classifier/src/nsUrlClassifierDBService.h
@@ -88,17 +88,18 @@ public:
 
 private:
   // No subclassing
   ~nsUrlClassifierDBService();
 
   // Disallow copy constructor
   nsUrlClassifierDBService(nsUrlClassifierDBService&);
 
-  nsresult LookupURI(nsIURI* uri, nsIUrlClassifierCallback* c);
+  nsresult LookupURI(nsIURI* uri, nsIUrlClassifierCallback* c,
+                     PRBool forceCheck, PRBool *didCheck);
 
   // Close db connection and join the background thread if it exists. 
   nsresult Shutdown();
   
   nsCOMPtr<nsUrlClassifierDBServiceWorker> mWorker;
   nsCOMPtr<nsUrlClassifierDBServiceWorker> mWorkerProxy;
 
   nsInterfaceHashtable<nsCStringHashKey, nsIUrlClassifierHashCompleter> mCompleters;
--- a/toolkit/components/url-classifier/src/nsUrlClassifierUtils.h
+++ b/toolkit/components/url-classifier/src/nsUrlClassifierUtils.h
@@ -34,16 +34,18 @@
  *
  * ***** END LICENSE BLOCK ***** */
 
 #ifndef nsUrlClassifierUtils_h_
 #define nsUrlClassifierUtils_h_
 
 #include "nsAutoPtr.h"
 #include "nsIUrlClassifierUtils.h"
+#include "nsTArray.h"
+#include "nsDataHashtable.h"
 
 class nsUrlClassifierUtils : public nsIUrlClassifierUtils
 {
 private:
   /**
    * A fast, bit-vector map for ascii characters.
    *
    * Internally stores 256 bits in an array of 8 ints.
@@ -117,9 +119,108 @@ private:
   // Function to tell if we should encode a character.
   PRBool ShouldURLEscape(const unsigned char c) const;
 
   void CleanupHostname(const nsACString & host, nsACString & _retval);
 
   nsAutoPtr<Charmap> mEscapeCharmap;
 };
 
+// An MRU list of fragments.  This is used by the DB service to
+// keep a set of known-clean fragments that don't need a database
+// lookup.
+class nsUrlClassifierFragmentSet
+{
+public:
+  nsUrlClassifierFragmentSet() : mFirst(nsnull), mLast(nsnull) {}
+
+  PRBool Init(PRUint32 maxEntries) {
+    if (!mEntryStorage.SetCapacity(maxEntries))
+      return PR_FALSE;
+
+    if (!mEntries.Init())
+      return PR_FALSE;
+
+    return PR_TRUE;
+  }
+
+  PRBool Put(const nsACString &fragment) {
+    Entry *entry;
+    if (mEntries.Get(fragment, &entry)) {
+      // Remove this entry from the list, we'll add it back
+      // to the front.
+      UnlinkEntry(entry);
+    } else {
+      if (mEntryStorage.Length() < mEntryStorage.Capacity()) {
+        entry = mEntryStorage.AppendElement();
+        if (!entry)
+          return PR_FALSE;
+      } else {
+        // Reuse the oldest entry.
+        entry = mLast;
+        UnlinkEntry(entry);
+        mEntries.Remove(entry->mFragment);
+      }
+      entry->mFragment = fragment;
+      mEntries.Put(fragment, entry);
+    }
+
+    // Add the entry to the front of the list
+    entry->mPrev = nsnull;
+    entry->mNext = mFirst;
+    mFirst = entry;
+    if (!mLast) {
+      mLast = entry;
+    }
+
+    return PR_TRUE;
+  }
+
+  PRBool Has(const nsACString &fragment) {
+    return mEntries.Get(fragment, nsnull);
+  }
+
+  void Clear() {
+    mFirst = mLast = nsnull;
+    mEntries.Clear();
+  }
+
+private:
+  // One entry in the set.  We maintain a doubly-linked list, with
+  // the most recently used entry at the front.
+  class Entry {
+  public:
+    Entry() : mNext(nsnull), mPrev(nsnull) {};
+    ~Entry() { }
+
+    Entry *mNext;
+    Entry *mPrev;
+    nsCString mFragment;
+  };
+
+  void UnlinkEntry(Entry *entry)
+  {
+    if (entry->mPrev)
+      entry->mPrev->mNext = entry->mNext;
+    else
+      mFirst = entry->mNext;
+
+    if (entry->mNext)
+      entry->mNext->mPrev = entry->mPrev;
+    else
+      mLast = entry->mPrev;
+
+    entry->mPrev = entry->mNext = nsnull;
+  }
+
+  // The newest entry in the cache.
+  Entry *mFirst;
+  // The oldest entry in the cache.
+  Entry *mLast;
+
+  // Storage for the entries in this set.
+  nsTArray<Entry> mEntryStorage;
+
+  // Entry lookup by fragment.
+  nsDataHashtable<nsCStringHashKey, Entry*> mEntries;
+};
+
 #endif // nsUrlClassifierUtils_h_
new file mode 100644
--- /dev/null
+++ b/toolkit/components/url-classifier/tests/unit/test_cleankeycache.js
@@ -0,0 +1,117 @@
+// Test an add of two urls to a fresh database
+function testCleanHostKeys() {
+  var addUrls = [ "foo.com/a" ];
+  var update = buildPhishingUpdate(
+        [
+          { "chunkNum" : 1,
+            "urls" : addUrls
+          }]);
+
+  doStreamUpdate(update, function() {
+      var ios = Components.classes["@mozilla.org/network/io-service;1"].
+        getService(Components.interfaces.nsIIOService);
+
+      // Check with a clean host key
+      var uri = ios.newURI("http://bar.com/a", null, null);
+
+      // Use the nsIURIClassifier interface (the
+      // nsIUrlClassifierDBService will always queue a lookup,
+      // nsIURIClassifier won't if the host key is known to be clean.
+      var classifier = dbservice.QueryInterface(Ci.nsIURIClassifier);
+      var result = classifier.classify(uri, function(errorCode) {
+          var result2 = classifier.classify(uri, function() {
+              do_throw("shouldn't get a callback");
+            });
+          // second call shouldn't result in a callback.
+          do_check_eq(result2, false);
+
+          runNextTest();
+        });
+
+      // The first classifier call should result in a callback.
+      do_check_eq(result, true);
+    }, updateError);
+}
+
+// Test an add of two urls to a fresh database
+function testDirtyHostKeys() {
+  var addUrls = [ "foo.com/a" ];
+  var update = buildPhishingUpdate(
+        [
+          { "chunkNum" : 1,
+            "urls" : addUrls
+          }]);
+
+  doStreamUpdate(update, function() {
+      var ios = Components.classes["@mozilla.org/network/io-service;1"].
+        getService(Components.interfaces.nsIIOService);
+
+      // Check with a dirty host key - both checks should happen.
+      var uri = ios.newURI("http://foo.com/b", null, null);
+
+      // Use the nsIURIClassifier interface (the
+      // nsIUrlClassifierDBService will always queue a lookup,
+      // nsIURIClassifier won't if the host key is known to be clean.
+      var classifier = dbservice.QueryInterface(Ci.nsIURIClassifier);
+      var result = classifier.classify(uri, function(errorCode) {
+          var result2 = classifier.classify(uri, function() {
+              runNextTest();
+            });
+          // second call should result in a callback.
+          do_check_eq(result2, true);
+        });
+
+      // The first classifier call should result in a callback.
+      do_check_eq(result, true);
+    }, updateError);
+}
+
+// Make sure that an update properly clears the host key cache
+function testUpdate() {
+  var ios = Components.classes["@mozilla.org/network/io-service;1"].
+    getService(Components.interfaces.nsIIOService);
+
+  // First lookup should happen...
+  var uri = ios.newURI("http://foo.com/a", null, null);
+
+  // Use the nsIURIClassifier interface (the
+  // nsIUrlClassifierDBService will always queue a lookup,
+  // nsIURIClassifier won't if the host key is known to be clean.
+  var classifier = dbservice.QueryInterface(Ci.nsIURIClassifier);
+  var result = classifier.classify(uri, function(errorCode) {
+      // This check will succeed, which will cause the key to
+      // be put in the clean host key cache...
+      do_check_eq(errorCode, Cr.NS_OK);
+
+      // Now add the url to the db...
+      var addUrls = [ "foo.com/a" ];
+      var update = buildPhishingUpdate(
+        [
+          { "chunkNum" : 1,
+            "urls" : addUrls
+          }]);
+      doStreamUpdate(update, function() {
+          // The clean key cache should be blown now that we've
+          // added, and this callback should execute.
+          var result2 = classifier.classify(uri, function(errorCode) {
+              do_check_neq(errorCode, Cr.NS_OK);
+              runNextTest();
+            });
+          // second call should result in a callback.
+          do_check_eq(result2, true);
+        }, updateError);
+    });
+}
+
+
+
+function run_test()
+{
+  runTests([
+             testCleanHostKeys,
+             testDirtyHostKeys,
+             testUpdate,
+  ]);
+}
+
+do_test_pending();