Backout a01cf079ee0b (Bug 730247). rs=dcamp a=akeybl
authorGian-Carlo Pascutto <gpascutto@mozilla.com>
Tue, 01 May 2012 12:11:46 +0200
changeset 92148 bcf80cb89d21f4a941711553f76171f03b955b73
parent 92147 4cd50468c3e75f6131c00a0a8db49450253790e7
child 92149 0fa60c5f22887dbf040900b265fb4248a6d25fe6
push idunknown
push userunknown
push dateunknown
reviewersdcamp, akeybl
bugs730247
milestone13.0
backs outa01cf079ee0bca3f876c00d5bb8e5fdfa2b22cb3
Backout a01cf079ee0b (Bug 730247). rs=dcamp a=akeybl
toolkit/components/url-classifier/HashStore.cpp
--- a/toolkit/components/url-classifier/HashStore.cpp
+++ b/toolkit/components/url-classifier/HashStore.cpp
@@ -35,79 +35,62 @@
 #include "nsISeekableStream.h"
 #include "nsIStreamConverterService.h"
 #include "nsNetUtil.h"
 #include "nsCheckSummedOutputStream.h"
 #include "prlog.h"
 #include "zlib.h"
 
 // Main store for SafeBrowsing protocol data. We store
-// known add/sub chunks, prefixes and completions in memory
+// known add/sub chunks, prefixe and completions s in memory
 // during an update, and serialize to disk.
 // We do not store the add prefixes, those are retrieved by
 // decompressing the PrefixSet cache whenever we need to apply
 // an update.
-//
-// byte slicing: Many of the 4-byte values stored here are strongly
-// correlated in the upper bytes, and uncorrelated in the lower
-// bytes. Because zlib/DEFLATE requires match lengths of at least
-// 3 to achieve good compression, and we don't get those if only
-// the upper 16-bits are correlated, it is worthwhile to slice 32-bit
-// values into 4 1-byte slices and compress the slices individually.
-// The slices corresponding to MSBs will compress very well, and the
-// slice corresponding to LSB almost nothing. Because of this, we
-// only apply DEFLATE to the 3 most significant bytes, and store the
-// LSB uncompressed.
-//
-// byte sliced (numValues) data format:
-//    uint32 compressed-size
-//    compressed-size bytes    zlib DEFLATE data
-//        0...numValues        byte MSB of 4-byte numValues data
-//    uint32 compressed-size
-//    compressed-size bytes    zlib DEFLATE data
-//        0...numValues        byte 2nd byte of 4-byte numValues data
-//    uint32 compressed-size
-//    compressed-size bytes    zlib DEFLATE data
-//        0...numValues        byte 3rd byte of 4-byte numValues data
-//    0...numValues            byte LSB of 4-byte numValues data
-//
-// Store data format:
+
+// Data format:
 //    uint32 magic
 //    uint32 version
 //    uint32 numAddChunks
 //    uint32 numSubChunks
 //    uint32 numAddPrefixes
 //    uint32 numSubPrefixes
 //    uint32 numAddCompletes
 //    uint32 numSubCompletes
-//    0...numAddChunks               uint32 addChunk
-//    0...numSubChunks               uint32 subChunk
-//    byte sliced (numAddPrefixes)   uint32 add chunk of AddPrefixes
-//    byte sliced (numSubPrefixes)   uint32 sub chunk of SubPrefixes
-//    byte sliced (numSubPrefixes)   uint32 add chunk of SubPrefixes
-//    byte sliced (numSubPrefixes)   uint32 SubPrefixes
-//    0...numAddCompletes           32-byte Completions
-//    0...numSubCompletes           32-byte Completions
+//    0...numAddChunks          uint32 addChunk
+//    0...numSubChunks          uint32 subChunk
+//    uint32 compressed-size
+//        compressed-size bytes zlib inflate data
+//        0...numAddPrefixes    uint32 addChunk
+//    uint32 compressed-size
+//        compressed-size bytes zlib inflate data
+//        0...numSubPrefixes    uint32 addChunk
+//    uint32 compressed-size
+//        compressed-size bytes zlib inflate data
+//        0...numSubPrefixes    uint32 subChunk
+//    0...numSubPrefixes        uint32 subPrefix
+//    0...numAddCompletes       32-byte Completions
+//    0...numSubCompletes       32-byte Completions
 //    16-byte MD5 of all preceding data
 
 // NSPR_LOG_MODULES=UrlClassifierDbService:5
 extern PRLogModuleInfo *gUrlClassifierDbServiceLog;
 #if defined(PR_LOGGING)
 #define LOG(args) PR_LOG(gUrlClassifierDbServiceLog, PR_LOG_DEBUG, args)
 #define LOG_ENABLED() PR_LOG_TEST(gUrlClassifierDbServiceLog, 4)
 #else
 #define LOG(args)
 #define LOG_ENABLED() (PR_FALSE)
 #endif
 
 namespace mozilla {
 namespace safebrowsing {
 
 const uint32 STORE_MAGIC = 0x1231af3b;
-const uint32 CURRENT_VERSION = 2;
+const uint32 CURRENT_VERSION = 1;
 
 void
 TableUpdate::NewAddPrefix(PRUint32 aAddChunk, const Prefix& aHash)
 {
   AddPrefix *add = mAddPrefixes.AppendElement();
   add->addChunk = aAddChunk;
   add->prefix = aHash;
 }
@@ -654,170 +637,106 @@ nsresult InflateReadTArray(nsIInputStrea
   }
   LOG(("InflateReadTArray: %d in %d out", insize, outsize));
 
   NS_ASSERTION(outsize == aExpectedSize * sizeof(T), "Decompression size mismatch");
 
   return NS_OK;
 }
 
-static nsresult
-ByteSliceWrite(nsIOutputStream* aOut, nsTArray<PRUint32>& aData)
-{
-  nsTArray<PRUint8> slice1;
-  nsTArray<PRUint8> slice2;
-  nsTArray<PRUint8> slice3;
-  nsTArray<PRUint8> slice4;
-  PRUint32 count = aData.Length();
-
-  slice1.SetCapacity(count);
-  slice2.SetCapacity(count);
-  slice3.SetCapacity(count);
-  slice4.SetCapacity(count);
-
-  for (PRUint32 i = 0; i < count; i++) {
-    slice1.AppendElement( aData[i] >> 24);
-    slice2.AppendElement((aData[i] >> 16) & 0xFF);
-    slice3.AppendElement((aData[i] >>  8) & 0xFF);
-    slice4.AppendElement( aData[i]        & 0xFF);
-  }
-
-  nsresult rv = DeflateWriteTArray(aOut, slice1);
-  NS_ENSURE_SUCCESS(rv, rv);
-  rv = DeflateWriteTArray(aOut, slice2);
-  NS_ENSURE_SUCCESS(rv, rv);
-  rv = DeflateWriteTArray(aOut, slice3);
-  NS_ENSURE_SUCCESS(rv, rv);
-  // The LSB slice is generally uncompressible, don't bother
-  // compressing it.
-  rv = WriteTArray(aOut, slice4);
-  NS_ENSURE_SUCCESS(rv, rv);
-
-  return NS_OK;
-}
-
-static nsresult
-ByteSliceRead(nsIInputStream* aInStream, nsTArray<PRUint32>* aData, PRUint32 count)
-{
-  nsTArray<PRUint8> slice1;
-  nsTArray<PRUint8> slice2;
-  nsTArray<PRUint8> slice3;
-  nsTArray<PRUint8> slice4;
-
-  nsresult rv = InflateReadTArray(aInStream, &slice1, count);
-  NS_ENSURE_SUCCESS(rv, rv);
-
-  rv = InflateReadTArray(aInStream, &slice2, count);
-  NS_ENSURE_SUCCESS(rv, rv);
-
-  rv = InflateReadTArray(aInStream, &slice3, count);
-  NS_ENSURE_SUCCESS(rv, rv);
-
-  rv = ReadTArray(aInStream, &slice4, count);
-  NS_ENSURE_SUCCESS(rv, rv);
-
-  aData->SetCapacity(count);
-
-  for (uint32 i = 0; i < count; i++) {
-    aData->AppendElement((slice1[i] << 24) | (slice2[i] << 16)
-                         | (slice3[i] << 8) | (slice4[i]));
-  }
-
-  return NS_OK;
-}
-
 nsresult
 HashStore::ReadAddPrefixes()
 {
-  nsTArray<PRUint32> chunks;
+  nsTArray<uint32> chunks;
   PRUint32 count = mHeader.numAddPrefixes;
 
-  nsresult rv = ByteSliceRead(mInputStream, &chunks, count);
+  nsresult rv = InflateReadTArray(mInputStream, &chunks, count);
   NS_ENSURE_SUCCESS(rv, rv);
 
   mAddPrefixes.SetCapacity(count);
-  for (PRUint32 i = 0; i < count; i++) {
+  for (uint32 i = 0; i < count; i++) {
     AddPrefix *add = mAddPrefixes.AppendElement();
     add->prefix.FromUint32(0);
     add->addChunk = chunks[i];
   }
 
   return NS_OK;
 }
 
 nsresult
 HashStore::ReadSubPrefixes()
 {
   nsTArray<PRUint32> addchunks;
   nsTArray<PRUint32> subchunks;
-  nsTArray<PRUint32> prefixes;
+  nsTArray<Prefix> prefixes;
   PRUint32 count = mHeader.numSubPrefixes;
 
-  nsresult rv = ByteSliceRead(mInputStream, &addchunks, count);
+  nsresult rv = InflateReadTArray(mInputStream, &addchunks, count);
   NS_ENSURE_SUCCESS(rv, rv);
 
-  rv = ByteSliceRead(mInputStream, &subchunks, count);
+  rv = InflateReadTArray(mInputStream, &subchunks, count);
   NS_ENSURE_SUCCESS(rv, rv);
 
-  rv = ByteSliceRead(mInputStream, &prefixes, count);
+  rv = ReadTArray(mInputStream, &prefixes, count);
   NS_ENSURE_SUCCESS(rv, rv);
 
   mSubPrefixes.SetCapacity(count);
   for (uint32 i = 0; i < count; i++) {
     SubPrefix *sub = mSubPrefixes.AppendElement();
     sub->addChunk = addchunks[i];
-    sub->prefix.FromUint32(prefixes[i]);
+    sub->prefix = prefixes[i];
     sub->subChunk = subchunks[i];
   }
 
   return NS_OK;
 }
 
 // Split up PrefixArray back into the constituents
 nsresult
 HashStore::WriteAddPrefixes(nsIOutputStream* aOut)
 {
-  nsTArray<PRUint32> chunks;
+  nsTArray<uint32> chunks;
   PRUint32 count = mAddPrefixes.Length();
   chunks.SetCapacity(count);
 
   for (uint32 i = 0; i < count; i++) {
     chunks.AppendElement(mAddPrefixes[i].Chunk());
   }
 
-  nsresult rv = ByteSliceWrite(aOut, chunks);
+  nsresult rv = DeflateWriteTArray(aOut, chunks);
   NS_ENSURE_SUCCESS(rv, rv);
 
   return NS_OK;
 }
 
 nsresult
 HashStore::WriteSubPrefixes(nsIOutputStream* aOut)
 {
-  nsTArray<PRUint32> addchunks;
-  nsTArray<PRUint32> subchunks;
-  nsTArray<PRUint32> prefixes;
+  nsTArray<uint32> addchunks;
+  nsTArray<uint32> subchunks;
+  nsTArray<Prefix> prefixes;
   PRUint32 count = mSubPrefixes.Length();
   addchunks.SetCapacity(count);
   subchunks.SetCapacity(count);
   prefixes.SetCapacity(count);
 
   for (uint32 i = 0; i < count; i++) {
     addchunks.AppendElement(mSubPrefixes[i].AddChunk());
-    prefixes.AppendElement(mSubPrefixes[i].PrefixHash().ToUint32());
+    prefixes.AppendElement(mSubPrefixes[i].PrefixHash());
     subchunks.AppendElement(mSubPrefixes[i].Chunk());
   }
 
-  nsresult rv = ByteSliceWrite(aOut, addchunks);
+  nsresult rv = DeflateWriteTArray(aOut, addchunks);
   NS_ENSURE_SUCCESS(rv, rv);
 
-  rv = ByteSliceWrite(aOut, subchunks);
+  rv = DeflateWriteTArray(aOut, subchunks);
   NS_ENSURE_SUCCESS(rv, rv);
 
-  rv = ByteSliceWrite(aOut, prefixes);
+  // chunk-ordered prefixes are not compressible
+  rv = WriteTArray(aOut, prefixes);
   NS_ENSURE_SUCCESS(rv, rv);
 
   return NS_OK;
 }
 
 nsresult
 HashStore::WriteFile()
 {