toolkit/components/url-classifier/HashStore.cpp
author Kartikaya Gupta <kgupta@mozilla.com>
Wed, 26 Jul 2017 16:03:57 -0400
changeset 371237 dd104d6d9adf98ce733028dbb698b8f658a69cc8
parent 371148 84e29e660f0cfd4a11d8081cea2d07f93f1ede00
child 386675 4666cde209096d4aca8c6c0f938b5142c7d0ca49
permissions -rw-r--r--
Bug 1384233 - Remove SizePrintfMacros.h. r=froydnj We have a minimum requirement of VS 2015 for Windows builds, which supports the z length modifier for format specifiers. So we don't need SizePrintfMacros.h any more, and can just use %zu and friends directly everywhere. MozReview-Commit-ID: 6s78RvPFMzv

//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
// Originally based on Chrome sources:
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//    * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//    * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//    * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "HashStore.h"
#include "nsICryptoHash.h"
#include "nsISeekableStream.h"
#include "nsIStreamConverterService.h"
#include "nsNetUtil.h"
#include "nsCheckSummedOutputStream.h"
#include "prio.h"
#include "mozilla/Logging.h"
#include "mozilla/IntegerPrintfMacros.h"
#include "zlib.h"
#include "Classifier.h"
#include "nsUrlClassifierDBService.h"
#include "mozilla/Telemetry.h"

// Main store for SafeBrowsing protocol data. We store
// known add/sub chunks, prefixes and completions in memory
// during an update, and serialize to disk.
// We do not store the add prefixes, those are retrieved by
// decompressing the PrefixSet cache whenever we need to apply
// an update.
//
// byte slicing: Many of the 4-byte values stored here are strongly
// correlated in the upper bytes, and uncorrelated in the lower
// bytes. Because zlib/DEFLATE requires match lengths of at least
// 3 to achieve good compression, and we don't get those if only
// the upper 16-bits are correlated, it is worthwhile to slice 32-bit
// values into 4 1-byte slices and compress the slices individually.
// The slices corresponding to MSBs will compress very well, and the
// slice corresponding to LSB almost nothing. Because of this, we
// only apply DEFLATE to the 3 most significant bytes, and store the
// LSB uncompressed.
//
// byte sliced (numValues) data format:
//    uint32_t compressed-size
//    compressed-size bytes    zlib DEFLATE data
//        0...numValues        byte MSB of 4-byte numValues data
//    uint32_t compressed-size
//    compressed-size bytes    zlib DEFLATE data
//        0...numValues        byte 2nd byte of 4-byte numValues data
//    uint32_t compressed-size
//    compressed-size bytes    zlib DEFLATE data
//        0...numValues        byte 3rd byte of 4-byte numValues data
//    0...numValues            byte LSB of 4-byte numValues data
//
// Store data format:
//    uint32_t magic
//    uint32_t version
//    uint32_t numAddChunks
//    uint32_t numSubChunks
//    uint32_t numAddPrefixes
//    uint32_t numSubPrefixes
//    uint32_t numAddCompletes
//    uint32_t numSubCompletes
//    0...numAddChunks               uint32_t addChunk
//    0...numSubChunks               uint32_t subChunk
//    byte sliced (numAddPrefixes)   uint32_t add chunk of AddPrefixes
//    byte sliced (numSubPrefixes)   uint32_t add chunk of SubPrefixes
//    byte sliced (numSubPrefixes)   uint32_t sub chunk of SubPrefixes
//    byte sliced (numSubPrefixes)   uint32_t SubPrefixes
//    0...numAddCompletes            32-byte Completions + uint32_t addChunk
//    0...numSubCompletes            32-byte Completions + uint32_t addChunk
//                                                       + uint32_t subChunk
//    16-byte MD5 of all preceding data

// Name of the SafeBrowsing store
#define STORE_SUFFIX ".sbstore"

// MOZ_LOG=UrlClassifierDbService:5
extern mozilla::LazyLogModule gUrlClassifierDbServiceLog;
#define LOG(args) MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args)
#define LOG_ENABLED() MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug)

namespace mozilla {
namespace safebrowsing {

const uint32_t STORE_MAGIC = 0x1231af3b;
const uint32_t CURRENT_VERSION = 3;

nsresult
TableUpdateV2::NewAddPrefix(uint32_t aAddChunk, const Prefix& aHash)
{
  AddPrefix *add = mAddPrefixes.AppendElement(fallible);
  if (!add) return NS_ERROR_OUT_OF_MEMORY;
  add->addChunk = aAddChunk;
  add->prefix = aHash;
  return NS_OK;
}

nsresult
TableUpdateV2::NewSubPrefix(uint32_t aAddChunk, const Prefix& aHash, uint32_t aSubChunk)
{
  SubPrefix *sub = mSubPrefixes.AppendElement(fallible);
  if (!sub) return NS_ERROR_OUT_OF_MEMORY;
  sub->addChunk = aAddChunk;
  sub->prefix = aHash;
  sub->subChunk = aSubChunk;
  return NS_OK;
}

nsresult
TableUpdateV2::NewAddComplete(uint32_t aAddChunk, const Completion& aHash)
{
  AddComplete *add = mAddCompletes.AppendElement(fallible);
  if (!add) return NS_ERROR_OUT_OF_MEMORY;
  add->addChunk = aAddChunk;
  add->complete = aHash;
  return NS_OK;
}

nsresult
TableUpdateV2::NewSubComplete(uint32_t aAddChunk, const Completion& aHash, uint32_t aSubChunk)
{
  SubComplete *sub = mSubCompletes.AppendElement(fallible);
  if (!sub) return NS_ERROR_OUT_OF_MEMORY;
  sub->addChunk = aAddChunk;
  sub->complete = aHash;
  sub->subChunk = aSubChunk;
  return NS_OK;
}

nsresult
TableUpdateV2::NewMissPrefix(const Prefix& aPrefix)
{
  Prefix *prefix = mMissPrefixes.AppendElement(aPrefix, fallible);
  if (!prefix) return NS_ERROR_OUT_OF_MEMORY;
  return NS_OK;
}

void
TableUpdateV4::NewPrefixes(int32_t aSize, std::string& aPrefixes)
{
  NS_ENSURE_TRUE_VOID(aSize >= 4 && aSize <= COMPLETE_SIZE);
  NS_ENSURE_TRUE_VOID(aPrefixes.size() % aSize == 0);
  NS_ENSURE_TRUE_VOID(!mPrefixesMap.Get(aSize));

  int numOfPrefixes = aPrefixes.size() / aSize;

  if (aSize > 4) {
    // TODO Bug 1364043 we may have a better API to record multiple samples into
    // histograms with one call
#ifdef NIGHTLY_BUILD
    for (int i = 0; i < std::min(20, numOfPrefixes); i++) {
      Telemetry::Accumulate(Telemetry::URLCLASSIFIER_VLPS_LONG_PREFIXES, aSize);
    }
#endif
  } else if (LOG_ENABLED()) {
    uint32_t* p = (uint32_t*)aPrefixes.c_str();

    // Dump the first/last 10 fixed-length prefixes for debugging.
    LOG(("* The first 10 (maximum) fixed-length prefixes: "));
    for (int i = 0; i < std::min(10, numOfPrefixes); i++) {
      uint8_t* c = (uint8_t*)&p[i];
      LOG(("%.2X%.2X%.2X%.2X", c[0], c[1], c[2], c[3]));
    }

    LOG(("* The last 10 (maximum) fixed-length prefixes: "));
    for (int i = std::max(0, numOfPrefixes - 10); i < numOfPrefixes; i++) {
      uint8_t* c = (uint8_t*)&p[i];
      LOG(("%.2X%.2X%.2X%.2X", c[0], c[1], c[2], c[3]));
    }

    LOG(("---- %zu fixed-length prefixes in total.", aPrefixes.size() / aSize));
  }

  PrefixStdString* prefix = new PrefixStdString(aPrefixes);
  mPrefixesMap.Put(aSize, prefix);
}

nsresult
TableUpdateV4::NewRemovalIndices(const uint32_t* aIndices, size_t aNumOfIndices)
{
  MOZ_ASSERT(mRemovalIndiceArray.IsEmpty(), "mRemovalIndiceArray must be empty");

  if (!mRemovalIndiceArray.SetCapacity(aNumOfIndices, fallible)) {
    return NS_ERROR_OUT_OF_MEMORY;
  }

  for (size_t i = 0; i < aNumOfIndices; i++) {
    mRemovalIndiceArray.AppendElement(aIndices[i]);
  }
  return NS_OK;
}

void
TableUpdateV4::NewChecksum(const std::string& aChecksum)
{
  mChecksum.Assign(aChecksum.data(), aChecksum.size());
}

nsresult
TableUpdateV4::NewFullHashResponse(const Prefix& aPrefix,
                                   CachedFullHashResponse& aResponse)
{
  CachedFullHashResponse* response =
    mFullHashResponseMap.LookupOrAdd(aPrefix.ToUint32());
  if (!response) {
    return NS_ERROR_OUT_OF_MEMORY;
  }
  *response = aResponse;
  return NS_OK;
}

HashStore::HashStore(const nsACString& aTableName,
                     const nsACString& aProvider,
                     nsIFile* aRootStoreDir)
  : mTableName(aTableName)
  , mInUpdate(false)
  , mFileSize(0)
{
  nsresult rv = Classifier::GetPrivateStoreDirectory(aRootStoreDir,
                                                     aTableName,
                                                     aProvider,
                                                     getter_AddRefs(mStoreDirectory));
  if (NS_FAILED(rv)) {
    LOG(("Failed to get private store directory for %s", mTableName.get()));
    mStoreDirectory = aRootStoreDir;
  }
}

HashStore::~HashStore()
= default;

nsresult
HashStore::Reset()
{
  LOG(("HashStore resetting"));

  nsCOMPtr<nsIFile> storeFile;
  nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile));
  NS_ENSURE_SUCCESS(rv, rv);

  rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(STORE_SUFFIX));
  NS_ENSURE_SUCCESS(rv, rv);

  rv = storeFile->Remove(false);
  NS_ENSURE_SUCCESS(rv, rv);

  mFileSize = 0;

  return NS_OK;
}

nsresult
HashStore::CheckChecksum(uint32_t aFileSize)
{
  if (!mInputStream) {
    return NS_OK;
  }

  // Check for file corruption by
  // comparing the stored checksum to actual checksum of data
  nsAutoCString hash;
  nsAutoCString compareHash;
  char *data;
  uint32_t read;

  nsresult rv = CalculateChecksum(hash, aFileSize, true);
  NS_ENSURE_SUCCESS(rv, rv);

  compareHash.GetMutableData(&data, hash.Length());

  if (hash.Length() > aFileSize) {
    NS_WARNING("SafeBrowing file not long enough to store its hash");
    return NS_ERROR_FAILURE;
  }
  nsCOMPtr<nsISeekableStream> seekIn = do_QueryInterface(mInputStream);
  rv = seekIn->Seek(nsISeekableStream::NS_SEEK_SET, aFileSize - hash.Length());
  NS_ENSURE_SUCCESS(rv, rv);

  rv = mInputStream->Read(data, hash.Length(), &read);
  NS_ENSURE_SUCCESS(rv, rv);
  NS_ASSERTION(read == hash.Length(), "Could not read hash bytes");

  if (!hash.Equals(compareHash)) {
    NS_WARNING("Safebrowing file failed checksum.");
    return NS_ERROR_FAILURE;
  }

  return NS_OK;
}

nsresult
HashStore::Open()
{
  nsCOMPtr<nsIFile> storeFile;
  nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile));
  NS_ENSURE_SUCCESS(rv, rv);

  rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(".sbstore"));
  NS_ENSURE_SUCCESS(rv, rv);

  nsCOMPtr<nsIInputStream> origStream;
  rv = NS_NewLocalFileInputStream(getter_AddRefs(origStream), storeFile,
                                  PR_RDONLY | nsIFile::OS_READAHEAD);

  if (rv == NS_ERROR_FILE_NOT_FOUND) {
    UpdateHeader();
    return NS_OK;
  }
  NS_ENSURE_SUCCESS(rv, rv);

  int64_t fileSize;
  rv = storeFile->GetFileSize(&fileSize);
  NS_ENSURE_SUCCESS(rv, rv);

  if (fileSize < 0 || fileSize > UINT32_MAX) {
    return NS_ERROR_FAILURE;
  }

  mFileSize = static_cast<uint32_t>(fileSize);
  mInputStream = NS_BufferInputStream(origStream, mFileSize);

  rv = ReadHeader();
  NS_ENSURE_SUCCESS(rv, rv);

  rv = SanityCheck();
  NS_ENSURE_SUCCESS(rv, rv);

  return NS_OK;
}

nsresult
HashStore::ReadHeader()
{
  if (!mInputStream) {
    UpdateHeader();
    return NS_OK;
  }

  nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);
  nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, 0);
  NS_ENSURE_SUCCESS(rv, rv);

  void *buffer = &mHeader;
  rv = NS_ReadInputStreamToBuffer(mInputStream,
                                  &buffer,
                                  sizeof(Header));
  NS_ENSURE_SUCCESS(rv, rv);

  return NS_OK;
}

nsresult
HashStore::SanityCheck()
{
  if (mHeader.magic != STORE_MAGIC || mHeader.version != CURRENT_VERSION) {
    NS_WARNING("Unexpected header data in the store.");
    return NS_ERROR_FAILURE;
  }

  return NS_OK;
}

nsresult
HashStore::CalculateChecksum(nsAutoCString& aChecksum,
                             uint32_t aFileSize,
                             bool aChecksumPresent)
{
  aChecksum.Truncate();

  // Reset mInputStream to start
  nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);
  nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, 0);

  nsCOMPtr<nsICryptoHash> hash = do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv);
  NS_ENSURE_SUCCESS(rv, rv);

  // Size of MD5 hash in bytes
  const uint32_t CHECKSUM_SIZE = 16;

  // MD5 is not a secure hash function, but since this is a filesystem integrity
  // check, this usage is ok.
  rv = hash->Init(nsICryptoHash::MD5);
  NS_ENSURE_SUCCESS(rv, rv);

  if (!aChecksumPresent) {
    // Hash entire file
    rv = hash->UpdateFromStream(mInputStream, UINT32_MAX);
  } else {
    // Hash everything but last checksum bytes
    if (aFileSize < CHECKSUM_SIZE) {
      NS_WARNING("SafeBrowsing file isn't long enough to store its checksum");
      return NS_ERROR_FAILURE;
    }
    rv = hash->UpdateFromStream(mInputStream, aFileSize - CHECKSUM_SIZE);
  }
  NS_ENSURE_SUCCESS(rv, rv);

  rv = hash->Finish(false, aChecksum);
  NS_ENSURE_SUCCESS(rv, rv);

  return NS_OK;
}

void
HashStore::UpdateHeader()
{
  mHeader.magic = STORE_MAGIC;
  mHeader.version = CURRENT_VERSION;

  mHeader.numAddChunks = mAddChunks.Length();
  mHeader.numSubChunks = mSubChunks.Length();
  mHeader.numAddPrefixes = mAddPrefixes.Length();
  mHeader.numSubPrefixes = mSubPrefixes.Length();
  mHeader.numAddCompletes = mAddCompletes.Length();
  mHeader.numSubCompletes = mSubCompletes.Length();
}

nsresult
HashStore::ReadChunkNumbers()
{
  if (!mInputStream || AlreadyReadChunkNumbers()) {
    return NS_OK;
  }

  nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);
  nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET,
                               sizeof(Header));
  NS_ENSURE_SUCCESS(rv, rv);

  rv = mAddChunks.Read(mInputStream, mHeader.numAddChunks);
  NS_ENSURE_SUCCESS(rv, rv);
  NS_ASSERTION(mAddChunks.Length() == mHeader.numAddChunks, "Read the right amount of add chunks.");

  rv = mSubChunks.Read(mInputStream, mHeader.numSubChunks);
  NS_ENSURE_SUCCESS(rv, rv);
  NS_ASSERTION(mSubChunks.Length() == mHeader.numSubChunks, "Read the right amount of sub chunks.");

  return NS_OK;
}

nsresult
HashStore::ReadHashes()
{
  if (!mInputStream) {
    // BeginUpdate has been called but Open hasn't initialized mInputStream,
    // because the existing HashStore is empty.
    return NS_OK;
  }

  nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);

  uint32_t offset = sizeof(Header);
  offset += (mHeader.numAddChunks + mHeader.numSubChunks) * sizeof(uint32_t);
  nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, offset);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = ReadAddPrefixes();
  NS_ENSURE_SUCCESS(rv, rv);

  rv = ReadSubPrefixes();
  NS_ENSURE_SUCCESS(rv, rv);

  // If completions was read before, then we are done here.
  if (AlreadyReadCompletions()) {
    return NS_OK;
  }

  rv = ReadTArray(mInputStream, &mAddCompletes, mHeader.numAddCompletes);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = ReadTArray(mInputStream, &mSubCompletes, mHeader.numSubCompletes);
  NS_ENSURE_SUCCESS(rv, rv);

  return NS_OK;
}


nsresult
HashStore::ReadCompletions()
{
  if (!mInputStream || AlreadyReadCompletions()) {
    return NS_OK;
  }

  nsCOMPtr<nsIFile> storeFile;
  nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile));
  NS_ENSURE_SUCCESS(rv, rv);

  rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(STORE_SUFFIX));
  NS_ENSURE_SUCCESS(rv, rv);

  uint32_t offset = mFileSize -
                    sizeof(struct AddComplete) * mHeader.numAddCompletes -
                    sizeof(struct SubComplete) * mHeader.numSubCompletes -
                    nsCheckSummedOutputStream::CHECKSUM_SIZE;

  nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);

  rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, offset);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = ReadTArray(mInputStream, &mAddCompletes, mHeader.numAddCompletes);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = ReadTArray(mInputStream, &mSubCompletes, mHeader.numSubCompletes);
  NS_ENSURE_SUCCESS(rv, rv);

  return NS_OK;
}

nsresult
HashStore::PrepareForUpdate()
{
  nsresult rv = CheckChecksum(mFileSize);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = ReadChunkNumbers();
  NS_ENSURE_SUCCESS(rv, rv);

  rv = ReadHashes();
  NS_ENSURE_SUCCESS(rv, rv);

  return NS_OK;
}

nsresult
HashStore::BeginUpdate()
{
  // Check wether the file is corrupted and read the rest of the store
  // in memory.
  nsresult rv = PrepareForUpdate();
  NS_ENSURE_SUCCESS(rv, rv);

  // Close input stream, won't be needed any more and
  // we will rewrite ourselves.
  if (mInputStream) {
    rv = mInputStream->Close();
    NS_ENSURE_SUCCESS(rv, rv);
  }

  mInUpdate = true;

  return NS_OK;
}

template<class T>
static nsresult
Merge(ChunkSet* aStoreChunks,
      FallibleTArray<T>* aStorePrefixes,
      ChunkSet& aUpdateChunks,
      FallibleTArray<T>& aUpdatePrefixes,
      bool aAllowMerging = false)
{
  EntrySort(aUpdatePrefixes);

  auto storeIter = aStorePrefixes->begin();
  auto storeEnd = aStorePrefixes->end();

  // use a separate array so we can keep the iterators valid
  // if the nsTArray grows
  nsTArray<T> adds;

  for (const auto& updatePrefix : aUpdatePrefixes) {
    // skip this chunk if we already have it, unless we're
    // merging completions, in which case we'll always already
    // have the chunk from the original prefix
    if (aStoreChunks->Has(updatePrefix.Chunk()))
      if (!aAllowMerging)
        continue;
    // XXX: binary search for insertion point might be faster in common
    // case?
    while (storeIter < storeEnd && (storeIter->Compare(updatePrefix) < 0)) {
      // skip forward to matching element (or not...)
      storeIter++;
    }
    // no match, add
    if (storeIter == storeEnd
        || storeIter->Compare(updatePrefix) != 0) {
      if (!adds.AppendElement(updatePrefix, fallible)) {
        return NS_ERROR_OUT_OF_MEMORY;
      }
    }
  }

  // Chunks can be empty, but we should still report we have them
  // to make the chunkranges continuous.
  aStoreChunks->Merge(aUpdateChunks);

  if (!aStorePrefixes->AppendElements(adds, fallible))
    return NS_ERROR_OUT_OF_MEMORY;

  EntrySort(*aStorePrefixes);

  return NS_OK;
}

nsresult
HashStore::ApplyUpdate(TableUpdate &aUpdate)
{
  auto updateV2 = TableUpdate::Cast<TableUpdateV2>(&aUpdate);
  NS_ENSURE_TRUE(updateV2, NS_ERROR_FAILURE);

  TableUpdateV2& update = *updateV2;

  nsresult rv = mAddExpirations.Merge(update.AddExpirations());
  NS_ENSURE_SUCCESS(rv, rv);

  rv = mSubExpirations.Merge(update.SubExpirations());
  NS_ENSURE_SUCCESS(rv, rv);

  rv = Expire();
  NS_ENSURE_SUCCESS(rv, rv);

  rv = Merge(&mAddChunks, &mAddPrefixes,
             update.AddChunks(), update.AddPrefixes());
  NS_ENSURE_SUCCESS(rv, rv);

  rv = Merge(&mAddChunks, &mAddCompletes,
             update.AddChunks(), update.AddCompletes(), true);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = Merge(&mSubChunks, &mSubPrefixes,
             update.SubChunks(), update.SubPrefixes());
  NS_ENSURE_SUCCESS(rv, rv);

  rv = Merge(&mSubChunks, &mSubCompletes,
             update.SubChunks(), update.SubCompletes(), true);
  NS_ENSURE_SUCCESS(rv, rv);

  return NS_OK;
}

nsresult
HashStore::Rebuild()
{
  NS_ASSERTION(mInUpdate, "Must be in update to rebuild.");

  nsresult rv = ProcessSubs();
  NS_ENSURE_SUCCESS(rv, rv);

  UpdateHeader();

  return NS_OK;
}

void
HashStore::ClearCompletes()
{
  NS_ASSERTION(mInUpdate, "Must be in update to clear completes.");

  mAddCompletes.Clear();
  mSubCompletes.Clear();

  UpdateHeader();
}

template<class T>
static void
ExpireEntries(FallibleTArray<T>* aEntries, ChunkSet& aExpirations)
{
  auto addIter = aEntries->begin();

  for (const auto& entry : *aEntries) {
    if (!aExpirations.Has(entry.Chunk())) {
      *addIter = entry;
      addIter++;
    }
  }

  aEntries->TruncateLength(addIter - aEntries->begin());
}

nsresult
HashStore::Expire()
{
  ExpireEntries(&mAddPrefixes, mAddExpirations);
  ExpireEntries(&mAddCompletes, mAddExpirations);
  ExpireEntries(&mSubPrefixes, mSubExpirations);
  ExpireEntries(&mSubCompletes, mSubExpirations);

  mAddChunks.Remove(mAddExpirations);
  mSubChunks.Remove(mSubExpirations);

  mAddExpirations.Clear();
  mSubExpirations.Clear();

  return NS_OK;
}

template<class T>
nsresult DeflateWriteTArray(nsIOutputStream* aStream, nsTArray<T>& aIn)
{
  uLongf insize = aIn.Length() * sizeof(T);
  uLongf outsize = compressBound(insize);
  FallibleTArray<char> outBuff;
  if (!outBuff.SetLength(outsize, fallible)) {
    return NS_ERROR_OUT_OF_MEMORY;
  }

  int zerr = compress(reinterpret_cast<Bytef*>(outBuff.Elements()),
                      &outsize,
                      reinterpret_cast<const Bytef*>(aIn.Elements()),
                      insize);
  if (zerr != Z_OK) {
    return NS_ERROR_FAILURE;
  }
  LOG(("DeflateWriteTArray: %lu in %lu out", insize, outsize));

  outBuff.TruncateLength(outsize);

  // Length of compressed data stream
  uint32_t dataLen = outBuff.Length();
  uint32_t written;
  nsresult rv = aStream->Write(reinterpret_cast<char*>(&dataLen), sizeof(dataLen), &written);
  NS_ENSURE_SUCCESS(rv, rv);

  NS_ASSERTION(written == sizeof(dataLen), "Error writing deflate length");

  // Store to stream
  rv = WriteTArray(aStream, outBuff);
  NS_ENSURE_SUCCESS(rv, rv);

  return NS_OK;
}

template<class T>
nsresult InflateReadTArray(nsIInputStream* aStream, FallibleTArray<T>* aOut,
                           uint32_t aExpectedSize)
{

  uint32_t inLen;
  uint32_t read;
  nsresult rv = aStream->Read(reinterpret_cast<char*>(&inLen), sizeof(inLen), &read);
  NS_ENSURE_SUCCESS(rv, rv);

  NS_ASSERTION(read == sizeof(inLen), "Error reading inflate length");

  FallibleTArray<char> inBuff;
  if (!inBuff.SetLength(inLen, fallible)) {
    return NS_ERROR_OUT_OF_MEMORY;
  }

  rv = ReadTArray(aStream, &inBuff, inLen);
  NS_ENSURE_SUCCESS(rv, rv);

  uLongf insize = inLen;
  uLongf outsize = aExpectedSize * sizeof(T);
  if (!aOut->SetLength(aExpectedSize, fallible)) {
    return NS_ERROR_OUT_OF_MEMORY;
  }

  int zerr = uncompress(reinterpret_cast<Bytef*>(aOut->Elements()),
                        &outsize,
                        reinterpret_cast<const Bytef*>(inBuff.Elements()),
                        insize);
  if (zerr != Z_OK) {
    return NS_ERROR_FAILURE;
  }
  LOG(("InflateReadTArray: %lu in %lu out", insize, outsize));

  NS_ASSERTION(outsize == aExpectedSize * sizeof(T), "Decompression size mismatch");

  return NS_OK;
}

static nsresult
ByteSliceWrite(nsIOutputStream* aOut, nsTArray<uint32_t>& aData)
{
  nsTArray<uint8_t> slice;
  uint32_t count = aData.Length();

  // Only process one slice at a time to avoid using too much memory.
  if (!slice.SetLength(count, fallible)) {
    return NS_ERROR_OUT_OF_MEMORY;
  }

  // Process slice 1.
  for (uint32_t i = 0; i < count; i++) {
    slice[i] = (aData[i] >> 24);
  }

  nsresult rv = DeflateWriteTArray(aOut, slice);
  NS_ENSURE_SUCCESS(rv, rv);

  // Process slice 2.
  for (uint32_t i = 0; i < count; i++) {
    slice[i] = ((aData[i] >> 16) & 0xFF);
  }

  rv = DeflateWriteTArray(aOut, slice);
  NS_ENSURE_SUCCESS(rv, rv);

  // Process slice 3.
  for (uint32_t i = 0; i < count; i++) {
    slice[i] = ((aData[i] >> 8) & 0xFF);
  }

  rv = DeflateWriteTArray(aOut, slice);
  NS_ENSURE_SUCCESS(rv, rv);

  // Process slice 4.
  for (uint32_t i = 0; i < count; i++) {
    slice[i] = (aData[i] & 0xFF);
  }

  // The LSB slice is generally uncompressible, don't bother
  // compressing it.
  rv = WriteTArray(aOut, slice);
  NS_ENSURE_SUCCESS(rv, rv);

  return NS_OK;
}

static nsresult
ByteSliceRead(nsIInputStream* aInStream, FallibleTArray<uint32_t>* aData, uint32_t count)
{
  FallibleTArray<uint8_t> slice1;
  FallibleTArray<uint8_t> slice2;
  FallibleTArray<uint8_t> slice3;
  FallibleTArray<uint8_t> slice4;

  nsresult rv = InflateReadTArray(aInStream, &slice1, count);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = InflateReadTArray(aInStream, &slice2, count);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = InflateReadTArray(aInStream, &slice3, count);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = ReadTArray(aInStream, &slice4, count);
  NS_ENSURE_SUCCESS(rv, rv);

  if (!aData->SetCapacity(count, fallible)) {
    return NS_ERROR_OUT_OF_MEMORY;
  }

  for (uint32_t i = 0; i < count; i++) {
    aData->AppendElement((slice1[i] << 24) |
                           (slice2[i] << 16) |
                           (slice3[i] << 8) |
                           (slice4[i]),
                         fallible);
  }

  return NS_OK;
}

nsresult
HashStore::ReadAddPrefixes()
{
  FallibleTArray<uint32_t> chunks;
  uint32_t count = mHeader.numAddPrefixes;

  nsresult rv = ByteSliceRead(mInputStream, &chunks, count);
  NS_ENSURE_SUCCESS(rv, rv);

  if (!mAddPrefixes.SetCapacity(count, fallible)) {
    return NS_ERROR_OUT_OF_MEMORY;
  }
  for (uint32_t i = 0; i < count; i++) {
    AddPrefix *add = mAddPrefixes.AppendElement(fallible);
    add->prefix.FromUint32(0);
    add->addChunk = chunks[i];
  }

  return NS_OK;
}

nsresult
HashStore::ReadSubPrefixes()
{
  FallibleTArray<uint32_t> addchunks;
  FallibleTArray<uint32_t> subchunks;
  FallibleTArray<uint32_t> prefixes;
  uint32_t count = mHeader.numSubPrefixes;

  nsresult rv = ByteSliceRead(mInputStream, &addchunks, count);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = ByteSliceRead(mInputStream, &subchunks, count);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = ByteSliceRead(mInputStream, &prefixes, count);
  NS_ENSURE_SUCCESS(rv, rv);

  if (!mSubPrefixes.SetCapacity(count, fallible)) {
    return NS_ERROR_OUT_OF_MEMORY;
  }
  for (uint32_t i = 0; i < count; i++) {
    SubPrefix *sub = mSubPrefixes.AppendElement(fallible);
    sub->addChunk = addchunks[i];
    sub->prefix.FromUint32(prefixes[i]);
    sub->subChunk = subchunks[i];
  }

  return NS_OK;
}

// Split up PrefixArray back into the constituents
nsresult
HashStore::WriteAddPrefixes(nsIOutputStream* aOut)
{
  nsTArray<uint32_t> chunks;
  uint32_t count = mAddPrefixes.Length();
  if (!chunks.SetCapacity(count, fallible)) {
    return NS_ERROR_OUT_OF_MEMORY;
  }

  for (uint32_t i = 0; i < count; i++) {
    chunks.AppendElement(mAddPrefixes[i].Chunk());
  }

  nsresult rv = ByteSliceWrite(aOut, chunks);
  NS_ENSURE_SUCCESS(rv, rv);

  return NS_OK;
}

nsresult
HashStore::WriteSubPrefixes(nsIOutputStream* aOut)
{
  nsTArray<uint32_t> addchunks;
  nsTArray<uint32_t> subchunks;
  nsTArray<uint32_t> prefixes;
  uint32_t count = mSubPrefixes.Length();
  if (!addchunks.SetCapacity(count, fallible) ||
      !subchunks.SetCapacity(count, fallible) ||
      !prefixes.SetCapacity(count, fallible)) {
    return NS_ERROR_OUT_OF_MEMORY;
  }

  for (uint32_t i = 0; i < count; i++) {
    addchunks.AppendElement(mSubPrefixes[i].AddChunk());
    prefixes.AppendElement(mSubPrefixes[i].PrefixHash().ToUint32());
    subchunks.AppendElement(mSubPrefixes[i].Chunk());
  }

  nsresult rv = ByteSliceWrite(aOut, addchunks);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = ByteSliceWrite(aOut, subchunks);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = ByteSliceWrite(aOut, prefixes);
  NS_ENSURE_SUCCESS(rv, rv);

  return NS_OK;
}

nsresult
HashStore::WriteFile()
{
  NS_ASSERTION(mInUpdate, "Must be in update to write database.");
  if (nsUrlClassifierDBService::ShutdownHasStarted()) {
    return NS_ERROR_ABORT;
  }

  nsCOMPtr<nsIFile> storeFile;
  nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile));
  NS_ENSURE_SUCCESS(rv, rv);
  rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(".sbstore"));
  NS_ENSURE_SUCCESS(rv, rv);

  nsCOMPtr<nsIOutputStream> out;
  rv = NS_NewCheckSummedOutputStream(getter_AddRefs(out), storeFile);
  NS_ENSURE_SUCCESS(rv, rv);

  uint32_t written;
  rv = out->Write(reinterpret_cast<char*>(&mHeader), sizeof(mHeader), &written);
  NS_ENSURE_SUCCESS(rv, rv);

  // Write chunk numbers.
  rv = mAddChunks.Write(out);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = mSubChunks.Write(out);
  NS_ENSURE_SUCCESS(rv, rv);

  // Write hashes.
  rv = WriteAddPrefixes(out);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = WriteSubPrefixes(out);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = WriteTArray(out, mAddCompletes);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = WriteTArray(out, mSubCompletes);
  NS_ENSURE_SUCCESS(rv, rv);

  nsCOMPtr<nsISafeOutputStream> safeOut = do_QueryInterface(out, &rv);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = safeOut->Finish();
  NS_ENSURE_SUCCESS(rv, rv);

  return NS_OK;
}

template <class T>
static void
Erase(FallibleTArray<T>* array,
      typename nsTArray<T>::iterator& iterStart,
      typename nsTArray<T>::iterator& iterEnd)
{
  uint32_t start = iterStart - array->begin();
  uint32_t count = iterEnd - iterStart;

  if (count > 0) {
    array->RemoveElementsAt(start, count);
  }
}

// Find items matching between |subs| and |adds|, and remove them,
// recording the item from |adds| in |adds_removed|.  To minimize
// copies, the inputs are processing in parallel, so |subs| and |adds|
// should be compatibly ordered (either by SBAddPrefixLess or
// SBAddPrefixHashLess).
//
// |predAS| provides add < sub, |predSA| provides sub < add, for the
// tightest compare appropriate (see calls in SBProcessSubs).
template<class TSub, class TAdd>
static void
KnockoutSubs(FallibleTArray<TSub>* aSubs, FallibleTArray<TAdd>* aAdds)
{
  // Keep a pair of output iterators for writing kept items.  Due to
  // deletions, these may lag the main iterators.  Using erase() on
  // individual items would result in O(N^2) copies.  Using a list
  // would work around that, at double or triple the memory cost.
  auto addOut = aAdds->begin();
  auto addIter = aAdds->begin();

  auto subOut = aSubs->begin();
  auto subIter = aSubs->begin();

  auto addEnd = aAdds->end();
  auto subEnd = aSubs->end();

  while (addIter != addEnd && subIter != subEnd) {
    // additer compare, so it compares on add chunk
    int32_t cmp = addIter->Compare(*subIter);
    if (cmp > 0) {
      // If |*sub_iter| < |*add_iter|, retain the sub.
      *subOut = *subIter;
      ++subOut;
      ++subIter;
    } else if (cmp < 0) {
      // If |*add_iter| < |*sub_iter|, retain the add.
      *addOut = *addIter;
      ++addOut;
      ++addIter;
    } else {
      // Drop equal items
      ++addIter;
      ++subIter;
    }
  }

  Erase(aAdds, addOut, addIter);
  Erase(aSubs, subOut, subIter);
}

// Remove items in |removes| from |fullHashes|.  |fullHashes| and
// |removes| should be ordered by SBAddPrefix component.
template <class T>
static void
RemoveMatchingPrefixes(const SubPrefixArray& aSubs, FallibleTArray<T>* aFullHashes)
{
  // Where to store kept items.
  auto out = aFullHashes->begin();
  auto hashIter = aFullHashes->begin();
  auto hashEnd = aFullHashes->end();

  auto removeIter = aSubs.begin();
  auto removeEnd = aSubs.end();

  while (hashIter != hashEnd && removeIter != removeEnd) {
    int32_t cmp = removeIter->CompareAlt(*hashIter);
    if (cmp > 0) {
      // Keep items less than |*removeIter|.
      *out = *hashIter;
      ++out;
      ++hashIter;
    } else if (cmp < 0) {
      // No hit for |*removeIter|, bump it forward.
      ++removeIter;
    } else {
      // Drop equal items, there may be multiple hits.
      do {
        ++hashIter;
      } while (hashIter != hashEnd &&
               !(removeIter->CompareAlt(*hashIter) < 0));
      ++removeIter;
    }
  }
  Erase(aFullHashes, out, hashIter);
}

static void
RemoveDeadSubPrefixes(SubPrefixArray& aSubs, ChunkSet& aAddChunks)
{
  auto subIter = aSubs.begin();

  for (const auto& sub : aSubs) {
    bool hasChunk = aAddChunks.Has(sub.AddChunk());
    // Keep the subprefix if the chunk it refers to is one
    // we haven't seen it yet.
    if (!hasChunk) {
      *subIter = sub;
      subIter++;
    }
  }

  LOG(("Removed %" PRId64 " dead SubPrefix entries.",
       static_cast<int64_t>(aSubs.end() - subIter)));
  aSubs.TruncateLength(subIter - aSubs.begin());
}

#ifdef DEBUG
template <class T>
static void EnsureSorted(FallibleTArray<T>* aArray)
{
  auto start = aArray->begin();
  auto end = aArray->end();
  auto iter = start;
  auto previous = start;

  while (iter != end) {
    previous = iter;
    ++iter;
    if (iter != end) {
      MOZ_ASSERT(iter->Compare(*previous) >= 0);
    }
  }

  return;
}
#endif

nsresult
HashStore::ProcessSubs()
{
#ifdef DEBUG
  EnsureSorted(&mAddPrefixes);
  EnsureSorted(&mSubPrefixes);
  EnsureSorted(&mAddCompletes);
  EnsureSorted(&mSubCompletes);
  LOG(("All databases seem to have a consistent sort order."));
#endif

  RemoveMatchingPrefixes(mSubPrefixes, &mAddCompletes);
  RemoveMatchingPrefixes(mSubPrefixes, &mSubCompletes);

  // Remove any remaining subbed prefixes from both addprefixes
  // and addcompletes.
  KnockoutSubs(&mSubPrefixes, &mAddPrefixes);
  KnockoutSubs(&mSubCompletes, &mAddCompletes);

  // Remove any remaining subprefixes referring to addchunks that
  // we have (and hence have been processed above).
  RemoveDeadSubPrefixes(mSubPrefixes, mAddChunks);

#ifdef DEBUG
  EnsureSorted(&mAddPrefixes);
  EnsureSorted(&mSubPrefixes);
  EnsureSorted(&mAddCompletes);
  EnsureSorted(&mSubCompletes);
  LOG(("All databases seem to have a consistent sort order."));
#endif

  return NS_OK;
}

nsresult
HashStore::AugmentAdds(const nsTArray<uint32_t>& aPrefixes)
{
  uint32_t cnt = aPrefixes.Length();
  if (cnt != mAddPrefixes.Length()) {
    LOG(("Amount of prefixes in cache not consistent with store (%zu vs %zu)",
         aPrefixes.Length(), mAddPrefixes.Length()));
    return NS_ERROR_FAILURE;
  }
  for (uint32_t i = 0; i < cnt; i++) {
    mAddPrefixes[i].prefix.FromUint32(aPrefixes[i]);
  }
  return NS_OK;
}

ChunkSet&
HashStore::AddChunks()
{
  ReadChunkNumbers();

  return mAddChunks;
}

ChunkSet&
HashStore::SubChunks()
{
  ReadChunkNumbers();

  return mSubChunks;
}

AddCompleteArray&
HashStore::AddCompletes()
{
  ReadCompletions();

  return mAddCompletes;
}

SubCompleteArray&
HashStore::SubCompletes()
{
  ReadCompletions();

  return mSubCompletes;
}

bool
HashStore::AlreadyReadChunkNumbers()
{
  // If there are chunks but chunk set not yet contains any data
  // Then we haven't read chunk numbers.
  if ((mHeader.numAddChunks != 0 && mAddChunks.Length() == 0) ||
      (mHeader.numSubChunks != 0 && mSubChunks.Length() == 0)) {
    return false;
  }
  return true;
}

bool
HashStore::AlreadyReadCompletions()
{
  // If there are completions but completion set not yet contains any data
  // Then we haven't read completions.
  if ((mHeader.numAddCompletes != 0 && mAddCompletes.Length() == 0) ||
      (mHeader.numSubCompletes != 0 && mSubCompletes.Length() == 0)) {
    return false;
  }
  return true;
}

} // namespace safebrowsing
} // namespace mozilla