Bug 1173439 P5 Cache should index on a hash instead of the url itself. r=ehsan
authorBen Kelly <ben@wanderview.com>
Tue, 16 Jun 2015 17:39:05 -0700
changeset 249287 eefb95916e40b1066d9df526a3dc29021b74cdeb
parent 249286 12ccbd45dcb5d403481fa5f9441eb6e4f2a41361
child 249288 a2580fee415c60cc5485b7801423491c52ae679b
push id28923
push userryanvm@gmail.com
push dateWed, 17 Jun 2015 18:57:11 +0000
treeherdermozilla-central@099d6cd6725e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersehsan
bugs1173439
milestone41.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1173439 P5 Cache should index on a hash instead of the url itself. r=ehsan
dom/cache/DBSchema.cpp
--- a/dom/cache/DBSchema.cpp
+++ b/dom/cache/DBSchema.cpp
@@ -165,16 +165,17 @@ static nsresult MatchByVaryHeader(mozISt
                                   const CacheRequest& aRequest,
                                   EntryId entryId, bool* aSuccessOut);
 static nsresult DeleteEntries(mozIStorageConnection* aConn,
                               const nsTArray<EntryId>& aEntryIdList,
                               nsTArray<nsID>& aDeletedBodyIdListOut,
                               nsTArray<IdCount>& aDeletedSecurityIdListOut,
                               uint32_t aPos=0, int32_t aLen=-1);
 static nsresult InsertSecurityInfo(mozIStorageConnection* aConn,
+                                   nsICryptoHash* aCrypto,
                                    const nsACString& aData, int32_t *aIdOut);
 static nsresult DeleteSecurityInfo(mozIStorageConnection* aConn, int32_t aId,
                                    int32_t aCount);
 static nsresult DeleteSecurityInfoList(mozIStorageConnection* aConn,
                                        const nsTArray<IdCount>& aDeletedStorageIdList);
 static nsresult InsertEntry(mozIStorageConnection* aConn, CacheId aCacheId,
                             const CacheRequest& aRequest,
                             const nsID* aRequestBodyId,
@@ -194,16 +195,18 @@ static nsresult BindListParamsToQuery(mo
 static nsresult BindId(mozIStorageStatement* aState, const nsACString& aName,
                        const nsID* aId);
 static nsresult ExtractId(mozIStorageStatement* aState, uint32_t aPos,
                           nsID* aIdOut);
 static nsresult CreateAndBindKeyStatement(mozIStorageConnection* aConn,
                                           const char* aQueryFormat,
                                           const nsAString& aKey,
                                           mozIStorageStatement** aStateOut);
+static nsresult HashCString(nsICryptoHash* aCrypto, const nsACString& aIn,
+                            nsACString& aOut);
 } // anonymous namespace
 
 nsresult
 CreateSchema(mozIStorageConnection* aConn)
 {
   MOZ_ASSERT(!NS_IsMainThread());
   MOZ_ASSERT(aConn);
 
@@ -257,17 +260,19 @@ CreateSchema(mozIStorageConnection* aCon
     ));
     if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
 
     rv = aConn->ExecuteSimpleSQL(NS_LITERAL_CSTRING(
       "CREATE TABLE entries ("
         "id INTEGER NOT NULL PRIMARY KEY, "
         "request_method TEXT NOT NULL, "
         "request_url_no_query TEXT NOT NULL, "
+        "request_url_no_query_hash BLOB NOT NULL, " // first 8-bytes of sha1 hash
         "request_url_query TEXT NOT NULL, "
+        "request_url_query_hash BLOB NOT NULL, "    // first 8-bytes of sha1 hash
         "request_referrer TEXT NOT NULL, "
         "request_headers_guard INTEGER NOT NULL, "
         "request_mode INTEGER NOT NULL, "
         "request_credentials INTEGER NOT NULL, "
         "request_contentpolicytype INTEGER NOT NULL, "
         "request_cache INTEGER NOT NULL, "
         "request_body_id TEXT NULL, "
         "response_type INTEGER NOT NULL, "
@@ -281,20 +286,28 @@ CreateSchema(mozIStorageConnection* aCon
         // Note that response_redirected_url is either going to be empty, or
         // it's going to be a URL different than response_url.
         "response_redirected_url TEXT NOT NULL, "
         "cache_id INTEGER NOT NULL REFERENCES caches(id) ON DELETE CASCADE"
       ");"
     ));
     if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
 
-    // TODO: see if we can remove these indices on TEXT columns (bug 1110458)
+    // Create an index to support the QueryCache() matching algorithm.  This
+    // needs to quickly find entries in a given Cache that match the request
+    // URL.  The url query is separated in order to support the ignoreSearch
+    // option.  Finally, we index hashes of the URL values instead of the
+    // actual strings to avoid excessive disk bloat.  The index will duplicate
+    // the contents of the columsn in the index.  The hash index will prune
+    // the vast majority of values from the query result so that normal
+    // scanning only has to be done on a few values to find an exact URL match.
     rv = aConn->ExecuteSimpleSQL(NS_LITERAL_CSTRING(
-      "CREATE INDEX entries_request_url_no_query_index "
-                "ON entries (request_url_no_query);"
+      "CREATE INDEX entries_request_match_index "
+                "ON entries (cache_id, request_url_no_query_hash, "
+                            "request_url_query_hash);"
     ));
     if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
 
     rv = aConn->ExecuteSimpleSQL(NS_LITERAL_CSTRING(
       "CREATE TABLE request_headers ("
         "name TEXT NOT NULL, "
         "value TEXT NOT NULL, "
         "entry_id INTEGER NOT NULL REFERENCES entries(id) ON DELETE CASCADE"
@@ -925,32 +938,60 @@ QueryCache(mozIStorageConnection* aConn,
   }
 
   nsAutoCString query(
     "SELECT id, COUNT(response_headers.name) AS vary_count "
     "FROM entries "
     "LEFT OUTER JOIN response_headers ON entries.id=response_headers.entry_id "
                                     "AND response_headers.name='vary' "
     "WHERE entries.cache_id=:cache_id "
-      "AND entries.request_url_no_query=:url_no_query "
+      "AND entries.request_url_no_query_hash=:url_no_query_hash "
   );
 
   if (!aParams.ignoreSearch()) {
+    query.AppendLiteral("AND entries.request_url_query_hash=:url_query_hash ");
+  }
+
+  query.AppendLiteral("AND entries.request_url_no_query=:url_no_query ");
+
+  if (!aParams.ignoreSearch()) {
     query.AppendLiteral("AND entries.request_url_query=:url_query ");
   }
 
   query.AppendLiteral("GROUP BY entries.id ORDER BY entries.id;");
 
   nsCOMPtr<mozIStorageStatement> state;
   nsresult rv = aConn->CreateStatement(query, getter_AddRefs(state));
   if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
 
   rv = state->BindInt64ByName(NS_LITERAL_CSTRING("cache_id"), aCacheId);
   if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
 
+  nsCOMPtr<nsICryptoHash> crypto =
+    do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv);
+  if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
+
+  nsAutoCString urlWithoutQueryHash;
+  rv = HashCString(crypto, aRequest.urlWithoutQuery(), urlWithoutQueryHash);
+  if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
+
+  rv = state->BindUTF8StringAsBlobByName(NS_LITERAL_CSTRING("url_no_query_hash"),
+                                         urlWithoutQueryHash);
+  if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
+
+  if (!aParams.ignoreSearch()) {
+    nsAutoCString urlQueryHash;
+    rv = HashCString(crypto, aRequest.urlQuery(), urlQueryHash);
+    if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
+
+    rv = state->BindUTF8StringAsBlobByName(NS_LITERAL_CSTRING("url_query_hash"),
+                                           urlQueryHash);
+    if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
+  }
+
   rv = state->BindUTF8StringByName(NS_LITERAL_CSTRING("url_no_query"),
                                    aRequest.urlWithoutQuery());
   if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
 
   if (!aParams.ignoreSearch()) {
     rv = state->BindUTF8StringByName(NS_LITERAL_CSTRING("url_query"),
                                      aRequest.urlQuery());
     if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
@@ -1211,45 +1252,32 @@ DeleteEntries(mozIStorageConnection* aCo
 
   rv = state->Execute();
   if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
 
   return rv;
 }
 
 nsresult
-InsertSecurityInfo(mozIStorageConnection* aConn, const nsACString& aData,
-                   int32_t *aIdOut)
+InsertSecurityInfo(mozIStorageConnection* aConn, nsICryptoHash* aCrypto,
+                   const nsACString& aData, int32_t *aIdOut)
 {
   MOZ_ASSERT(aConn);
+  MOZ_ASSERT(aCrypto);
   MOZ_ASSERT(aIdOut);
   MOZ_ASSERT(!aData.IsEmpty());
 
   // We want to use an index to find existing security blobs, but indexing
   // the full blob would be quite expensive.  Instead, we index a small
   // hash value.  Calculate this hash as the first 8 bytes of the SHA1 of
   // the full data.
-  nsresult rv;
-  nsCOMPtr<nsICryptoHash> crypto =
-    do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv);
-  if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
-
-  rv = crypto->Init(nsICryptoHash::SHA1);
+  nsAutoCString hash;
+  nsresult rv = HashCString(aCrypto, aData, hash);
   if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
 
-  rv = crypto->Update(reinterpret_cast<const uint8_t*>(aData.BeginReading()),
-                      aData.Length());
-  if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
-
-  nsAutoCString fullHash;
-  rv = crypto->Finish(false /* based64 result */, fullHash);
-  if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
-
-  nsDependentCSubstring hash(fullHash, 0, 8);
-
   // Next, search for an existing entry for this blob by comparing the hash
   // value first and then the full data.  SQLite is smart enough to use
   // the index on the hash to search the table before doing the expensive
   // comparison of the large data column.  (This was verified with EXPLAIN.)
   nsCOMPtr<mozIStorageStatement> state;
   rv = aConn->CreateStatement(NS_LITERAL_CSTRING(
     // Note that hash and data are blobs, but we can use = here since the
     // columns are NOT NULL.
@@ -1409,31 +1437,37 @@ InsertEntry(mozIStorageConnection* aConn
             const nsID* aRequestBodyId,
             const CacheResponse& aResponse,
             const nsID* aResponseBodyId)
 {
   MOZ_ASSERT(!NS_IsMainThread());
   MOZ_ASSERT(aConn);
 
   nsresult rv = NS_OK;
-  int32_t securityId = -1;
 
+  nsCOMPtr<nsICryptoHash> crypto =
+    do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv);
+  if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
+
+  int32_t securityId = -1;
   if (!aResponse.channelInfo().securityInfo().IsEmpty()) {
-    rv = InsertSecurityInfo(aConn,
+    rv = InsertSecurityInfo(aConn, crypto,
                             aResponse.channelInfo().securityInfo(),
                             &securityId);
     if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
   }
 
   nsCOMPtr<mozIStorageStatement> state;
   rv = aConn->CreateStatement(NS_LITERAL_CSTRING(
     "INSERT INTO entries ("
       "request_method, "
       "request_url_no_query, "
+      "request_url_no_query_hash, "
       "request_url_query, "
+      "request_url_query_hash, "
       "request_referrer, "
       "request_headers_guard, "
       "request_mode, "
       "request_credentials, "
       "request_contentpolicytype, "
       "request_cache, "
       "request_body_id, "
       "response_type, "
@@ -1444,17 +1478,19 @@ InsertEntry(mozIStorageConnection* aConn
       "response_body_id, "
       "response_security_info_id, "
       "response_redirected, "
       "response_redirected_url, "
       "cache_id "
     ") VALUES ("
       ":request_method, "
       ":request_url_no_query, "
+      ":request_url_no_query_hash, "
       ":request_url_query, "
+      ":request_url_query_hash, "
       ":request_referrer, "
       ":request_headers_guard, "
       ":request_mode, "
       ":request_credentials, "
       ":request_contentpolicytype, "
       ":request_cache, "
       ":request_body_id, "
       ":response_type, "
@@ -1474,20 +1510,36 @@ InsertEntry(mozIStorageConnection* aConn
   rv = state->BindUTF8StringByName(NS_LITERAL_CSTRING("request_method"),
                                    aRequest.method());
   if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
 
   rv = state->BindUTF8StringByName(NS_LITERAL_CSTRING("request_url_no_query"),
                                    aRequest.urlWithoutQuery());
   if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
 
+  nsAutoCString urlWithoutQueryHash;
+  rv = HashCString(crypto, aRequest.urlWithoutQuery(), urlWithoutQueryHash);
+  if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
+
+  rv = state->BindUTF8StringAsBlobByName(
+    NS_LITERAL_CSTRING("request_url_no_query_hash"), urlWithoutQueryHash);
+  if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
+
   rv = state->BindUTF8StringByName(NS_LITERAL_CSTRING("request_url_query"),
                                    aRequest.urlQuery());
   if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
 
+  nsAutoCString urlQueryHash;
+  rv = HashCString(crypto, aRequest.urlQuery(), urlQueryHash);
+  if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
+
+  rv = state->BindUTF8StringAsBlobByName(
+    NS_LITERAL_CSTRING("request_url_query_hash"), urlQueryHash);
+  if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
+
   rv = state->BindStringByName(NS_LITERAL_CSTRING("request_referrer"),
                                aRequest.referrer());
   if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
 
   rv = state->BindInt32ByName(NS_LITERAL_CSTRING("request_headers_guard"),
     static_cast<int32_t>(aRequest.headersGuard()));
   if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
 
@@ -1940,16 +1992,36 @@ CreateAndBindKeyStatement(mozIStorageCon
     if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
   }
 
   state.forget(aStateOut);
 
   return rv;
 }
 
+nsresult
+HashCString(nsICryptoHash* aCrypto, const nsACString& aIn, nsACString& aOut)
+{
+  MOZ_ASSERT(aCrypto);
+
+  nsresult rv = aCrypto->Init(nsICryptoHash::SHA1);
+  if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
+
+  rv = aCrypto->Update(reinterpret_cast<const uint8_t*>(aIn.BeginReading()),
+                       aIn.Length());
+  if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
+
+  nsAutoCString fullHash;
+  rv = aCrypto->Finish(false /* based64 result */, fullHash);
+  if (NS_WARN_IF(NS_FAILED(rv))) { return rv; }
+
+  aOut = Substring(fullHash, 0, 8);
+  return rv;
+}
+
 } // anonymouns namespace
 
 nsresult
 IncrementalVacuum(mozIStorageConnection* aConn)
 {
   // Determine how much free space is in the database.
   nsCOMPtr<mozIStorageStatement> state;
   nsresult rv = aConn->CreateStatement(NS_LITERAL_CSTRING(