author Dorel Luca <dluca@mozilla.com>
Tue, 28 Aug 2018 23:56:54 +0300
changeset 488602 56e7b9127e8939a1c1a964fd67ebab8caddf8394
parent 484638 a75c5a2a6bea6dfb924c04a350af97c7690e08b9
child 493260 3d099fce8fa41d9d93981b743761c88c282a3ce9
permissions -rw-r--r--
Merge mozilla-central to mozilla-beta. a=merge l10n=pre-version-bump-merge CLOSED TREE

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

// See the comment at the top of mfbt/HashTable.h for a comparison between
// PLDHashTable and mozilla::HashTable.

#ifndef PLDHashTable_h
#define PLDHashTable_h

#include "mozilla/Atomics.h"
#include "mozilla/Attributes.h" // for MOZ_ALWAYS_INLINE
#include "mozilla/fallible.h"
#include "mozilla/HashFunctions.h"
#include "mozilla/MemoryReporting.h"
#include "mozilla/Move.h"
#include "mozilla/Types.h"
#include "nscore.h"

using PLDHashNumber = mozilla::HashNumber;
static const uint32_t kPLDHashNumberBits = mozilla::kHashNumberBits;

class PLDHashTable;
struct PLDHashTableOps;

// Table entry header structure.
// In order to allow in-line allocation of key and value, we do not declare
// either here. Instead, the API uses const void *key as a formal parameter.
// The key need not be stored in the entry; it may be part of the value, but
// need not be stored at all.
// Callback types are defined below and grouped into the PLDHashTableOps
// structure, for single static initialization per hash table sub-type.
// Each hash table sub-type should make its entry type a subclass of
// PLDHashEntryHdr. The mKeyHash member contains the result of suitably
// scrambling the hash code returned from the hashKey callback (see below),
// then constraining the result to avoid the magic 0 and 1 values. The stored
// mKeyHash value is table size invariant, and it is maintained automatically
// -- users need never access it.
struct PLDHashEntryHdr
  friend class PLDHashTable;

  PLDHashNumber mKeyHash;

#ifdef DEBUG

// This class does three kinds of checking:
// - that calls to one of |mOps| or to an enumerator do not cause re-entry into
//   the table in an unsafe way;
// - that multiple threads do not access the table in an unsafe way;
// - that a table marked as immutable is not modified.
// "Safe" here means that multiple concurrent read operations are ok, but a
// write operation (i.e. one that can cause the entry storage to be reallocated
// or destroyed) cannot safely run concurrently with another read or write
// operation. This meaning of "safe" is only partial; for example, it does not
// cover whether a single entry in the table is modified by two separate
// threads. (Doing such checking would be much harder.)
// It does this with two variables:
// - mState, which embodies a tri-stage tagged union with the following
//   variants:
//   - Idle
//   - Read(n), where 'n' is the number of concurrent read operations
//   - Write
// - mIsWritable, which indicates if the table is mutable.
class Checker
  constexpr Checker() : mState(kIdle), mIsWritable(1) {}

  Checker& operator=(Checker&& aOther) {
    // Atomic<> doesn't have an |operator=(Atomic<>&&)|.
    mState = uint32_t(aOther.mState);
    mIsWritable = uint32_t(aOther.mIsWritable);

    aOther.mState = kIdle;

    return *this;

  static bool IsIdle(uint32_t aState)  { return aState == kIdle; }
  static bool IsRead(uint32_t aState)  { return kRead1 <= aState &&
                                                aState <= kReadMax; }
  static bool IsRead1(uint32_t aState) { return aState == kRead1; }
  static bool IsWrite(uint32_t aState) { return aState == kWrite; }

  bool IsIdle() const { return mState == kIdle; }

  bool IsWritable() const { return !!mIsWritable; }

  void SetNonWritable() { mIsWritable = 0; }

  // NOTE: the obvious way to implement these functions is to (a) check
  // |mState| is reasonable, and then (b) update |mState|. But the lack of
  // atomicity in such an implementation can cause problems if we get unlucky
  // thread interleaving between (a) and (b).
  // So instead for |mState| we are careful to (a) first get |mState|'s old
  // value and assign it a new value in single atomic operation, and only then
  // (b) check the old value was reasonable. This ensures we don't have
  // interleaving problems.
  // For |mIsWritable| we don't need to be as careful because it can only in
  // transition in one direction (from writable to non-writable).

  void StartReadOp()
    uint32_t oldState = mState++;     // this is an atomic increment
    MOZ_ASSERT(IsIdle(oldState) || IsRead(oldState));
    MOZ_ASSERT(oldState < kReadMax);  // check for overflow

  void EndReadOp()
    uint32_t oldState = mState--;     // this is an atomic decrement

  void StartWriteOp()
    uint32_t oldState = mState.exchange(kWrite);

  void EndWriteOp()
    // Check again that the table is writable, in case it was marked as
    // non-writable just after the IsWritable() assertion in StartWriteOp()
    // occurred.
    uint32_t oldState = mState.exchange(kIdle);

  void StartIteratorRemovalOp()
    // When doing removals at the end of iteration, we go from Read1 state to
    // Write and then back.
    uint32_t oldState = mState.exchange(kWrite);

  void EndIteratorRemovalOp()
    // Check again that the table is writable, in case it was marked as
    // non-writable just after the IsWritable() assertion in
    // StartIteratorRemovalOp() occurred.
    uint32_t oldState = mState.exchange(kRead1);

  void StartDestructorOp()
    // A destructor op is like a write, but the table doesn't need to be
    // writable.
    uint32_t oldState = mState.exchange(kWrite);

  void EndDestructorOp()
    uint32_t oldState = mState.exchange(kIdle);

  // Things of note about the representation of |mState|.
  // - The values between kRead1..kReadMax represent valid Read(n) values.
  // - kIdle and kRead1 are deliberately chosen so that incrementing the -
  //   former gives the latter.
  // - 9999 concurrent readers should be enough for anybody.
  static const uint32_t kIdle    = 0;
  static const uint32_t kRead1   = 1;
  static const uint32_t kReadMax = 9999;
  static const uint32_t kWrite   = 10000;

  mutable mozilla::Atomic<uint32_t,
                          mozilla::recordreplay::Behavior::DontPreserve> mState;
  mutable mozilla::Atomic<uint32_t,
                          mozilla::recordreplay::Behavior::DontPreserve> mIsWritable;

// A PLDHashTable may be allocated on the stack or within another structure or
// class. No entry storage is allocated until the first element is added. This
// means that empty hash tables are cheap, which is good because they are
// common.
// There used to be a long, math-heavy comment here about the merits of
// double hashing vs. chaining; it was removed in bug 1058335. In short, double
// hashing is more space-efficient unless the element size gets large (in which
// case you should keep using double hashing but switch to using pointer
// elements). Also, with double hashing, you can't safely hold an entry pointer
// and use it after an add or remove operation, unless you sample Generation()
// before adding or removing, and compare the sample after, dereferencing the
// entry pointer only if Generation() has not changed.
class PLDHashTable
  // This class maintains the invariant that every time the entry store is
  // changed, the generation is updated.
  // Note: It would be natural to store the generation within this class, but
  // we can't do that without bloating sizeof(PLDHashTable) on 64-bit machines.
  // So instead we store it outside this class, and Set() takes a pointer to it
  // and ensures it is updated as necessary.
  class EntryStore
    char* mEntryStore;

    EntryStore() : mEntryStore(nullptr) {}

      mEntryStore = nullptr;

    char* Get() { return mEntryStore; }
    const char* Get() const { return mEntryStore; }

    void Set(char* aEntryStore, uint16_t* aGeneration)
      mEntryStore = aEntryStore;
      *aGeneration += 1;

  // These fields are packed carefully. On 32-bit platforms,
  // sizeof(PLDHashTable) is 20. On 64-bit platforms, sizeof(PLDHashTable) is
  // 32; 28 bytes of data followed by 4 bytes of padding for alignment.
  const PLDHashTableOps* const mOps;  // Virtual operations; see below.
  EntryStore          mEntryStore;    // (Lazy) entry storage and generation.
  uint16_t            mGeneration;    // The storage generation.
  uint8_t             mHashShift;     // Multiplicative hash shift.
  const uint8_t       mEntrySize;     // Number of bytes in an entry.
  uint32_t            mEntryCount;    // Number of entries in table.
  uint32_t            mRemovedCount;  // Removed entry sentinels in table.

#ifdef DEBUG
  mutable Checker mChecker;

  // Table capacity limit; do not exceed. The max capacity used to be 1<<23 but
  // that occasionally that wasn't enough. Making it much bigger than 1<<26
  // probably isn't worthwhile -- tables that big are kind of ridiculous.
  // Also, the growth operation will (deliberately) fail if |capacity *
  // mEntrySize| overflows a uint32_t, and mEntrySize is always at least 8
  // bytes.
  static const uint32_t kMaxCapacity = ((uint32_t)1 << 26);

  static const uint32_t kMinCapacity = 8;

  // Making this half of kMaxCapacity ensures it'll fit. Nobody should need an
  // initial length anywhere nearly this large, anyway.
  static const uint32_t kMaxInitialLength = kMaxCapacity / 2;

  // This gives a default initial capacity of 8.
  static const uint32_t kDefaultInitialLength = 4;

  // Initialize the table with |aOps| and |aEntrySize|. The table's initial
  // capacity is chosen such that |aLength| elements can be inserted without
  // rehashing; if |aLength| is a power-of-two, this capacity will be
  // |2*length|. However, because entry storage is allocated lazily, this
  // initial capacity won't be relevant until the first element is added; prior
  // to that the capacity will be zero.
  // This will crash if |aEntrySize| and/or |aLength| are too large.
  PLDHashTable(const PLDHashTableOps* aOps, uint32_t aEntrySize,
               uint32_t aLength = kDefaultInitialLength);

  PLDHashTable(PLDHashTable&& aOther)
      // Initialize fields which are checked by the move assignment operator
      // and the destructor (which the move assignment operator calls).
    : mOps(nullptr)
    , mEntryStore()
    , mGeneration(0)
    , mEntrySize(0)
#ifdef DEBUG
    , mChecker()
    *this = std::move(aOther);

  PLDHashTable& operator=(PLDHashTable&& aOther);


  // This should be used rarely.
  const PLDHashTableOps* Ops() const
    return mozilla::recordreplay::UnwrapPLDHashTableCallbacks(mOps);

  // Size in entries (gross, not net of free and removed sentinels) for table.
  // This can be zero if no elements have been added yet, in which case the
  // entry storage will not have yet been allocated.
  uint32_t Capacity() const
    return mEntryStore.Get() ? CapacityFromHashShift() : 0;

  uint32_t EntrySize()  const { return mEntrySize; }
  uint32_t EntryCount() const { return mEntryCount; }
  uint32_t Generation() const { return mGeneration; }

  // To search for a |key| in |table|, call:
  //   entry = table.Search(key);
  // If |entry| is non-null, |key| was found. If |entry| is null, key was not
  // found.
  PLDHashEntryHdr* Search(const void* aKey) const;

  // To add an entry identified by |key| to table, call:
  //   entry = table.Add(key, mozilla::fallible);
  // If |entry| is null upon return, then the table is severely overloaded and
  // memory can't be allocated for entry storage.
  // Otherwise, |aEntry->mKeyHash| has been set so that
  // PLDHashTable::EntryIsFree(entry) is false, and it is up to the caller to
  // initialize the key and value parts of the entry sub-type, if they have not
  // been set already (i.e. if entry was not already in the table, and if the
  // optional initEntry hook was not used).
  PLDHashEntryHdr* Add(const void* aKey, const mozilla::fallible_t&);

  // This is like the other Add() function, but infallible, and so never
  // returns null.
  PLDHashEntryHdr* Add(const void* aKey);

  // To remove an entry identified by |key| from table, call:
  //   table.Remove(key);
  // If |key|'s entry is found, it is cleared (via table->mOps->clearEntry).
  // The table's capacity may be reduced afterwards.
  void Remove(const void* aKey);

  // To remove an entry found by a prior search, call:
  //   table.RemoveEntry(entry);
  // The entry, which must be present and in use, is cleared (via
  // table->mOps->clearEntry). The table's capacity may be reduced afterwards.
  void RemoveEntry(PLDHashEntryHdr* aEntry);

  // Remove an entry already accessed via Search() or Add().
  // NB: this is a "raw" or low-level method. It does not shrink the table if
  // it is underloaded. Don't use it unless necessary and you know what you are
  // doing, and if so, please explain in a comment why it is necessary instead
  // of RemoveEntry().
  void RawRemove(PLDHashEntryHdr* aEntry);

  // This function is equivalent to
  // ClearAndPrepareForLength(kDefaultInitialLength).
  void Clear();

  // This function clears the table's contents and frees its entry storage,
  // leaving it in a empty state ready to be used again. Afterwards, when the
  // first element is added the entry storage that gets allocated will have a
  // capacity large enough to fit |aLength| elements without rehashing.
  // It's conceptually the same as calling the destructor and then re-calling
  // the constructor with the original |aOps| and |aEntrySize| arguments, and
  // a new |aLength| argument.
  void ClearAndPrepareForLength(uint32_t aLength);

  // Measure the size of the table's entry storage. If the entries contain
  // pointers to other heap blocks, you have to iterate over the table and
  // measure those separately; hence the "Shallow" prefix.
  size_t ShallowSizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;

  // Like ShallowSizeOfExcludingThis(), but includes sizeof(*this).
  size_t ShallowSizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;

#ifdef DEBUG
  // Mark a table as immutable for the remainder of its lifetime. This
  // changes the implementation from asserting one set of invariants to
  // asserting a different set.
  void MarkImmutable();

  // If you use PLDHashEntryStub or a subclass of it as your entry struct, and
  // if your entries move via memcpy and clear via memset(0), you can use these
  // stub operations.
  static const PLDHashTableOps* StubOps();

  // The individual stub operations in StubOps().
  static PLDHashNumber HashVoidPtrKeyStub(const void* aKey);
  static bool MatchEntryStub(const PLDHashEntryHdr* aEntry, const void* aKey);
  static void MoveEntryStub(PLDHashTable* aTable, const PLDHashEntryHdr* aFrom,
                            PLDHashEntryHdr* aTo);
  static void ClearEntryStub(PLDHashTable* aTable, PLDHashEntryHdr* aEntry);

  // Hash/match operations for tables holding C strings.
  static PLDHashNumber HashStringKey(const void* aKey);
  static bool MatchStringKey(const PLDHashEntryHdr* aEntry, const void* aKey);

  // This is an iterator for PLDHashtable. Assertions will detect some, but not
  // all, mid-iteration table modifications that might invalidate (e.g.
  // reallocate) the entry storage.
  // Any element can be removed during iteration using Remove(). If any
  // elements are removed, the table may be resized once iteration ends.
  // Example usage:
  //   for (auto iter = table.Iter(); !iter.Done(); iter.Next()) {
  //     auto entry = static_cast<FooEntry*>(iter.Get());
  //     // ... do stuff with |entry| ...
  //     // ... possibly call iter.Remove() once ...
  //   }
  // or:
  //   for (PLDHashTable::Iterator iter(&table); !iter.Done(); iter.Next()) {
  //     auto entry = static_cast<FooEntry*>(iter.Get());
  //     // ... do stuff with |entry| ...
  //     // ... possibly call iter.Remove() once ...
  //   }
  // The latter form is more verbose but is easier to work with when
  // making subclasses of Iterator.
  class Iterator
    explicit Iterator(PLDHashTable* aTable);
    Iterator(Iterator&& aOther);

    // Have we finished?
    bool Done() const { return mNexts == mNextsLimit; }

    // Get the current entry.
    PLDHashEntryHdr* Get() const

      PLDHashEntryHdr* entry = reinterpret_cast<PLDHashEntryHdr*>(mCurrent);
      return entry;

    // Advance to the next entry.
    void Next();

    // Remove the current entry. Must only be called once per entry, and Get()
    // must not be called on that entry afterwards.
    void Remove();

    PLDHashTable* mTable;             // Main table pointer.

    char* mStart;                     // The first entry.
    char* mLimit;                     // One past the last entry.
    char* mCurrent;                   // Pointer to the current entry.
    uint32_t mNexts;                  // Number of Next() calls.
    uint32_t mNextsLimit;             // Next() call limit.

    bool mHaveRemoved;                // Have any elements been removed?

    bool IsOnNonLiveEntry() const;
    void MoveToNextEntry();

    Iterator() = delete;
    Iterator(const Iterator&) = delete;
    Iterator& operator=(const Iterator&) = delete;
    Iterator& operator=(const Iterator&&) = delete;

  Iterator Iter() { return Iterator(this); }

  // Use this if you need to initialize an Iterator in a const method. If you
  // use this case, you should not call Remove() on the iterator.
  Iterator ConstIter() const
    return Iterator(const_cast<PLDHashTable*>(this));

  static uint32_t HashShift(uint32_t aEntrySize, uint32_t aLength);

  static const PLDHashNumber kCollisionFlag = 1;

  static bool EntryIsFree(const PLDHashEntryHdr* aEntry)
    return aEntry->mKeyHash == 0;
  static bool EntryIsRemoved(const PLDHashEntryHdr* aEntry)
    return aEntry->mKeyHash == 1;
  static bool EntryIsLive(const PLDHashEntryHdr* aEntry)
    return aEntry->mKeyHash >= 2;

  static void MarkEntryFree(PLDHashEntryHdr* aEntry)
    aEntry->mKeyHash = 0;
  static void MarkEntryRemoved(PLDHashEntryHdr* aEntry)
    aEntry->mKeyHash = 1;

  PLDHashNumber Hash1(PLDHashNumber aHash0) const;
  void Hash2(PLDHashNumber aHash,
             uint32_t& aHash2Out, uint32_t& aSizeMaskOut) const;

  static bool MatchEntryKeyhash(const PLDHashEntryHdr* aEntry,
                                const PLDHashNumber aHash);
  PLDHashEntryHdr* AddressEntry(uint32_t aIndex) const;

  // We store mHashShift rather than sizeLog2 to optimize the collision-free
  // case in SearchTable.
  uint32_t CapacityFromHashShift() const
    return ((uint32_t)1 << (kPLDHashNumberBits - mHashShift));

  PLDHashNumber ComputeKeyHash(const void* aKey) const;

  enum SearchReason { ForSearchOrRemove, ForAdd };

  template <SearchReason Reason>
    SearchTable(const void* aKey, PLDHashNumber aKeyHash) const;

  PLDHashEntryHdr* FindFreeEntry(PLDHashNumber aKeyHash) const;

  bool ChangeTable(int aDeltaLog2);

  void ShrinkIfAppropriate();

  PLDHashTable(const PLDHashTable& aOther) = delete;
  PLDHashTable& operator=(const PLDHashTable& aOther) = delete;

// Compute the hash code for a given key to be looked up, added, or removed.
// A hash code may have any PLDHashNumber value.
typedef PLDHashNumber (*PLDHashHashKey)(const void* aKey);

// Compare the key identifying aEntry with the provided key parameter. Return
// true if keys match, false otherwise.
typedef bool (*PLDHashMatchEntry)(const PLDHashEntryHdr* aEntry,
                                  const void* aKey);

// Copy the data starting at aFrom to the new entry storage at aTo. Do not add
// reference counts for any strong references in the entry, however, as this
// is a "move" operation: the old entry storage at from will be freed without
// any reference-decrementing callback shortly.
typedef void (*PLDHashMoveEntry)(PLDHashTable* aTable,
                                 const PLDHashEntryHdr* aFrom,
                                 PLDHashEntryHdr* aTo);

// Clear the entry and drop any strong references it holds. This callback is
// invoked by Remove(), but only if the given key is found in the table.
typedef void (*PLDHashClearEntry)(PLDHashTable* aTable,
                                  PLDHashEntryHdr* aEntry);

// Initialize a new entry, apart from mKeyHash. This function is called when
// Add() finds no existing entry for the given key, and must add a new one. At
// that point, |aEntry->mKeyHash| is not set yet, to avoid claiming the last
// free entry in a severely overloaded table.
typedef void (*PLDHashInitEntry)(PLDHashEntryHdr* aEntry, const void* aKey);

// Finally, the "vtable" structure for PLDHashTable. The first four hooks
// must be provided by implementations; they're called unconditionally by the
// generic PLDHashTable.cpp code. Hooks after these may be null.
// Summary of allocation-related hook usage with C++ placement new emphasis:
//  initEntry           Call placement new using default key-based ctor.
//  moveEntry           Call placement new using copy ctor, run dtor on old
//                      entry storage.
//  clearEntry          Run dtor on entry.
// Note the reason why initEntry is optional: the default hooks (stubs) clear
// entry storage:  On successful Add(tbl, key), the returned entry pointer
// addresses an entry struct whose mKeyHash member has been set non-zero, but
// all other entry members are still clear (null). Add() callers can test such
// members to see whether the entry was newly created by the Add() call that
// just succeeded. If placement new or similar initialization is required,
// define an |initEntry| hook. Of course, the |clearEntry| hook must zero or
// null appropriately.
// XXX assumes 0 is null for pointer types.
struct PLDHashTableOps
  // Mandatory hooks. All implementations must provide these.
  PLDHashHashKey      hashKey;
  PLDHashMatchEntry   matchEntry;
  PLDHashMoveEntry    moveEntry;
  PLDHashClearEntry   clearEntry;

  // Optional hooks start here. If null, these are not called.
  PLDHashInitEntry    initEntry;

// A minimal entry is a subclass of PLDHashEntryHdr and has a void* key pointer.
struct PLDHashEntryStub : public PLDHashEntryHdr
  const void* key;

#endif /* PLDHashTable_h */