xpcom/ds/PLDHashTable.h
author Kris Maglione <maglione.k@gmail.com>
Thu, 10 May 2018 10:36:53 -0700
changeset 473641 1becc594554c1a273535ed175895e801667735e9
parent 433135 4dce29de45e3c5ecf8476bd77195c11a4ebe64ad
child 475080 b54db66223586b4e04f5cb926fccdacf8a176b91
permissions -rw-r--r--
Bug 1460600: Remove unsupported --enable-system-hunspell flag. r=glandium Our bundled Hunspell now significantly differs from upstream Hunspell. Most importantly, it supports loading dictionaries from jar: URIs, which is now a requirement for loading bundled and extension dictionaries. This means that system Hunspell libraries are no longer compatible with our spell checker code. We should remove the option to use them so that users don't fall into the trap of trying to use them. MozReview-Commit-ID: 2ihJe6YOnGf

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef PLDHashTable_h
#define PLDHashTable_h

#include "mozilla/Atomics.h"
#include "mozilla/Attributes.h" // for MOZ_ALWAYS_INLINE
#include "mozilla/fallible.h"
#include "mozilla/MemoryReporting.h"
#include "mozilla/Move.h"
#include "mozilla/Types.h"
#include "nscore.h"

typedef uint32_t PLDHashNumber;

class PLDHashTable;
struct PLDHashTableOps;

// Table entry header structure.
//
// In order to allow in-line allocation of key and value, we do not declare
// either here. Instead, the API uses const void *key as a formal parameter.
// The key need not be stored in the entry; it may be part of the value, but
// need not be stored at all.
//
// Callback types are defined below and grouped into the PLDHashTableOps
// structure, for single static initialization per hash table sub-type.
//
// Each hash table sub-type should make its entry type a subclass of
// PLDHashEntryHdr. The mKeyHash member contains the result of multiplying the
// hash code returned from the hashKey callback (see below) by kGoldenRatio,
// then constraining the result to avoid the magic 0 and 1 values. The stored
// mKeyHash value is table size invariant, and it is maintained automatically
// -- users need never access it.
struct PLDHashEntryHdr
{
private:
  friend class PLDHashTable;

  PLDHashNumber mKeyHash;
};

#ifdef DEBUG

// This class does three kinds of checking:
//
// - that calls to one of |mOps| or to an enumerator do not cause re-entry into
//   the table in an unsafe way;
//
// - that multiple threads do not access the table in an unsafe way;
//
// - that a table marked as immutable is not modified.
//
// "Safe" here means that multiple concurrent read operations are ok, but a
// write operation (i.e. one that can cause the entry storage to be reallocated
// or destroyed) cannot safely run concurrently with another read or write
// operation. This meaning of "safe" is only partial; for example, it does not
// cover whether a single entry in the table is modified by two separate
// threads. (Doing such checking would be much harder.)
//
// It does this with two variables:
//
// - mState, which embodies a tri-stage tagged union with the following
//   variants:
//   - Idle
//   - Read(n), where 'n' is the number of concurrent read operations
//   - Write
//
// - mIsWritable, which indicates if the table is mutable.
//
class Checker
{
public:
  constexpr Checker() : mState(kIdle), mIsWritable(1) {}

  Checker& operator=(Checker&& aOther) {
    // Atomic<> doesn't have an |operator=(Atomic<>&&)|.
    mState = uint32_t(aOther.mState);
    mIsWritable = uint32_t(aOther.mIsWritable);

    aOther.mState = kIdle;

    return *this;
  }

  static bool IsIdle(uint32_t aState)  { return aState == kIdle; }
  static bool IsRead(uint32_t aState)  { return kRead1 <= aState &&
                                                aState <= kReadMax; }
  static bool IsRead1(uint32_t aState) { return aState == kRead1; }
  static bool IsWrite(uint32_t aState) { return aState == kWrite; }

  bool IsIdle() const { return mState == kIdle; }

  bool IsWritable() const { return !!mIsWritable; }

  void SetNonWritable() { mIsWritable = 0; }

  // NOTE: the obvious way to implement these functions is to (a) check
  // |mState| is reasonable, and then (b) update |mState|. But the lack of
  // atomicity in such an implementation can cause problems if we get unlucky
  // thread interleaving between (a) and (b).
  //
  // So instead for |mState| we are careful to (a) first get |mState|'s old
  // value and assign it a new value in single atomic operation, and only then
  // (b) check the old value was reasonable. This ensures we don't have
  // interleaving problems.
  //
  // For |mIsWritable| we don't need to be as careful because it can only in
  // transition in one direction (from writable to non-writable).

  void StartReadOp()
  {
    uint32_t oldState = mState++;     // this is an atomic increment
    MOZ_ASSERT(IsIdle(oldState) || IsRead(oldState));
    MOZ_ASSERT(oldState < kReadMax);  // check for overflow
  }

  void EndReadOp()
  {
    uint32_t oldState = mState--;     // this is an atomic decrement
    MOZ_ASSERT(IsRead(oldState));
  }

  void StartWriteOp()
  {
    MOZ_ASSERT(IsWritable());
    uint32_t oldState = mState.exchange(kWrite);
    MOZ_ASSERT(IsIdle(oldState));
  }

  void EndWriteOp()
  {
    // Check again that the table is writable, in case it was marked as
    // non-writable just after the IsWritable() assertion in StartWriteOp()
    // occurred.
    MOZ_ASSERT(IsWritable());
    uint32_t oldState = mState.exchange(kIdle);
    MOZ_ASSERT(IsWrite(oldState));
  }

  void StartIteratorRemovalOp()
  {
    // When doing removals at the end of iteration, we go from Read1 state to
    // Write and then back.
    MOZ_ASSERT(IsWritable());
    uint32_t oldState = mState.exchange(kWrite);
    MOZ_ASSERT(IsRead1(oldState));
  }

  void EndIteratorRemovalOp()
  {
    // Check again that the table is writable, in case it was marked as
    // non-writable just after the IsWritable() assertion in
    // StartIteratorRemovalOp() occurred.
    MOZ_ASSERT(IsWritable());
    uint32_t oldState = mState.exchange(kRead1);
    MOZ_ASSERT(IsWrite(oldState));
  }

  void StartDestructorOp()
  {
    // A destructor op is like a write, but the table doesn't need to be
    // writable.
    uint32_t oldState = mState.exchange(kWrite);
    MOZ_ASSERT(IsIdle(oldState));
  }

  void EndDestructorOp()
  {
    uint32_t oldState = mState.exchange(kIdle);
    MOZ_ASSERT(IsWrite(oldState));
  }

private:
  // Things of note about the representation of |mState|.
  // - The values between kRead1..kReadMax represent valid Read(n) values.
  // - kIdle and kRead1 are deliberately chosen so that incrementing the -
  //   former gives the latter.
  // - 9999 concurrent readers should be enough for anybody.
  static const uint32_t kIdle    = 0;
  static const uint32_t kRead1   = 1;
  static const uint32_t kReadMax = 9999;
  static const uint32_t kWrite   = 10000;

  mutable mozilla::Atomic<uint32_t> mState;
  mutable mozilla::Atomic<uint32_t> mIsWritable;
};
#endif

// A PLDHashTable may be allocated on the stack or within another structure or
// class. No entry storage is allocated until the first element is added. This
// means that empty hash tables are cheap, which is good because they are
// common.
//
// There used to be a long, math-heavy comment here about the merits of
// double hashing vs. chaining; it was removed in bug 1058335. In short, double
// hashing is more space-efficient unless the element size gets large (in which
// case you should keep using double hashing but switch to using pointer
// elements). Also, with double hashing, you can't safely hold an entry pointer
// and use it after an add or remove operation, unless you sample Generation()
// before adding or removing, and compare the sample after, dereferencing the
// entry pointer only if Generation() has not changed.
class PLDHashTable
{
private:
  // This class maintains the invariant that every time the entry store is
  // changed, the generation is updated.
  //
  // Note: It would be natural to store the generation within this class, but
  // we can't do that without bloating sizeof(PLDHashTable) on 64-bit machines.
  // So instead we store it outside this class, and Set() takes a pointer to it
  // and ensures it is updated as necessary.
  class EntryStore
  {
  private:
    char* mEntryStore;

  public:
    EntryStore() : mEntryStore(nullptr) {}

    ~EntryStore()
    {
      free(mEntryStore);
      mEntryStore = nullptr;
    }

    char* Get() { return mEntryStore; }
    const char* Get() const { return mEntryStore; }

    void Set(char* aEntryStore, uint16_t* aGeneration)
    {
      mEntryStore = aEntryStore;
      *aGeneration += 1;
    }
  };

  // These fields are packed carefully. On 32-bit platforms,
  // sizeof(PLDHashTable) is 20. On 64-bit platforms, sizeof(PLDHashTable) is
  // 32; 28 bytes of data followed by 4 bytes of padding for alignment.
  const PLDHashTableOps* const mOps;  // Virtual operations; see below.
  EntryStore          mEntryStore;    // (Lazy) entry storage and generation.
  uint16_t            mGeneration;    // The storage generation.
  uint8_t             mHashShift;     // Multiplicative hash shift.
  const uint8_t       mEntrySize;     // Number of bytes in an entry.
  uint32_t            mEntryCount;    // Number of entries in table.
  uint32_t            mRemovedCount;  // Removed entry sentinels in table.

#ifdef DEBUG
  mutable Checker mChecker;
#endif

public:
  // Table capacity limit; do not exceed. The max capacity used to be 1<<23 but
  // that occasionally that wasn't enough. Making it much bigger than 1<<26
  // probably isn't worthwhile -- tables that big are kind of ridiculous.
  // Also, the growth operation will (deliberately) fail if |capacity *
  // mEntrySize| overflows a uint32_t, and mEntrySize is always at least 8
  // bytes.
  static const uint32_t kMaxCapacity = ((uint32_t)1 << 26);

  static const uint32_t kMinCapacity = 8;

  // Making this half of kMaxCapacity ensures it'll fit. Nobody should need an
  // initial length anywhere nearly this large, anyway.
  static const uint32_t kMaxInitialLength = kMaxCapacity / 2;

  // This gives a default initial capacity of 8.
  static const uint32_t kDefaultInitialLength = 4;

  // Initialize the table with |aOps| and |aEntrySize|. The table's initial
  // capacity is chosen such that |aLength| elements can be inserted without
  // rehashing; if |aLength| is a power-of-two, this capacity will be
  // |2*length|. However, because entry storage is allocated lazily, this
  // initial capacity won't be relevant until the first element is added; prior
  // to that the capacity will be zero.
  //
  // This will crash if |aEntrySize| and/or |aLength| are too large.
  PLDHashTable(const PLDHashTableOps* aOps, uint32_t aEntrySize,
               uint32_t aLength = kDefaultInitialLength);

  PLDHashTable(PLDHashTable&& aOther)
      // We initialize mOps and mEntrySize here because they are |const|, and
      // the move assignment operator cannot modify them.
      // We initialize mEntryStore because it is required for a safe call to
      // the destructor, which the move assignment operator does.
      // We initialize mGeneration because it is modified by the move
      // assignment operator.
    : mOps(aOther.mOps)
    , mEntryStore()
    , mGeneration(0)
    , mEntrySize(aOther.mEntrySize)
#ifdef DEBUG
    , mChecker()
#endif
  {
    *this = mozilla::Move(aOther);
  }

  PLDHashTable& operator=(PLDHashTable&& aOther);

  ~PLDHashTable();

  // This should be used rarely.
  const PLDHashTableOps* Ops() const { return mOps; }

  // Size in entries (gross, not net of free and removed sentinels) for table.
  // This can be zero if no elements have been added yet, in which case the
  // entry storage will not have yet been allocated.
  uint32_t Capacity() const
  {
    return mEntryStore.Get() ? CapacityFromHashShift() : 0;
  }

  uint32_t EntrySize()  const { return mEntrySize; }
  uint32_t EntryCount() const { return mEntryCount; }
  uint32_t Generation() const { return mGeneration; }

  // To search for a |key| in |table|, call:
  //
  //   entry = table.Search(key);
  //
  // If |entry| is non-null, |key| was found. If |entry| is null, key was not
  // found.
  PLDHashEntryHdr* Search(const void* aKey);

  // To add an entry identified by |key| to table, call:
  //
  //   entry = table.Add(key, mozilla::fallible);
  //
  // If |entry| is null upon return, then the table is severely overloaded and
  // memory can't be allocated for entry storage.
  //
  // Otherwise, |aEntry->mKeyHash| has been set so that
  // PLDHashTable::EntryIsFree(entry) is false, and it is up to the caller to
  // initialize the key and value parts of the entry sub-type, if they have not
  // been set already (i.e. if entry was not already in the table, and if the
  // optional initEntry hook was not used).
  PLDHashEntryHdr* Add(const void* aKey, const mozilla::fallible_t&);

  // This is like the other Add() function, but infallible, and so never
  // returns null.
  PLDHashEntryHdr* Add(const void* aKey);

  // To remove an entry identified by |key| from table, call:
  //
  //   table.Remove(key);
  //
  // If |key|'s entry is found, it is cleared (via table->mOps->clearEntry).
  // The table's capacity may be reduced afterwards.
  void Remove(const void* aKey);

  // To remove an entry found by a prior search, call:
  //
  //   table.RemoveEntry(entry);
  //
  // The entry, which must be present and in use, is cleared (via
  // table->mOps->clearEntry). The table's capacity may be reduced afterwards.
  void RemoveEntry(PLDHashEntryHdr* aEntry);

  // Remove an entry already accessed via Search() or Add().
  //
  // NB: this is a "raw" or low-level method. It does not shrink the table if
  // it is underloaded. Don't use it unless necessary and you know what you are
  // doing, and if so, please explain in a comment why it is necessary instead
  // of RemoveEntry().
  void RawRemove(PLDHashEntryHdr* aEntry);

  // This function is equivalent to
  // ClearAndPrepareForLength(kDefaultInitialLength).
  void Clear();

  // This function clears the table's contents and frees its entry storage,
  // leaving it in a empty state ready to be used again. Afterwards, when the
  // first element is added the entry storage that gets allocated will have a
  // capacity large enough to fit |aLength| elements without rehashing.
  //
  // It's conceptually the same as calling the destructor and then re-calling
  // the constructor with the original |aOps| and |aEntrySize| arguments, and
  // a new |aLength| argument.
  void ClearAndPrepareForLength(uint32_t aLength);

  // Measure the size of the table's entry storage. If the entries contain
  // pointers to other heap blocks, you have to iterate over the table and
  // measure those separately; hence the "Shallow" prefix.
  size_t ShallowSizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;

  // Like ShallowSizeOfExcludingThis(), but includes sizeof(*this).
  size_t ShallowSizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;

#ifdef DEBUG
  // Mark a table as immutable for the remainder of its lifetime. This
  // changes the implementation from asserting one set of invariants to
  // asserting a different set.
  void MarkImmutable();
#endif

  // If you use PLDHashEntryStub or a subclass of it as your entry struct, and
  // if your entries move via memcpy and clear via memset(0), you can use these
  // stub operations.
  static const PLDHashTableOps* StubOps();

  // The individual stub operations in StubOps().
  static PLDHashNumber HashVoidPtrKeyStub(const void* aKey);
  static bool MatchEntryStub(const PLDHashEntryHdr* aEntry, const void* aKey);
  static void MoveEntryStub(PLDHashTable* aTable, const PLDHashEntryHdr* aFrom,
                            PLDHashEntryHdr* aTo);
  static void ClearEntryStub(PLDHashTable* aTable, PLDHashEntryHdr* aEntry);

  // Hash/match operations for tables holding C strings.
  static PLDHashNumber HashStringKey(const void* aKey);
  static bool MatchStringKey(const PLDHashEntryHdr* aEntry, const void* aKey);

  // This is an iterator for PLDHashtable. Assertions will detect some, but not
  // all, mid-iteration table modifications that might invalidate (e.g.
  // reallocate) the entry storage.
  //
  // Any element can be removed during iteration using Remove(). If any
  // elements are removed, the table may be resized once iteration ends.
  //
  // Example usage:
  //
  //   for (auto iter = table.Iter(); !iter.Done(); iter.Next()) {
  //     auto entry = static_cast<FooEntry*>(iter.Get());
  //     // ... do stuff with |entry| ...
  //     // ... possibly call iter.Remove() once ...
  //   }
  //
  // or:
  //
  //   for (PLDHashTable::Iterator iter(&table); !iter.Done(); iter.Next()) {
  //     auto entry = static_cast<FooEntry*>(iter.Get());
  //     // ... do stuff with |entry| ...
  //     // ... possibly call iter.Remove() once ...
  //   }
  //
  // The latter form is more verbose but is easier to work with when
  // making subclasses of Iterator.
  //
  class Iterator
  {
  public:
    explicit Iterator(PLDHashTable* aTable);
    Iterator(Iterator&& aOther);
    ~Iterator();

    // Have we finished?
    bool Done() const { return mNexts == mNextsLimit; }

    // Get the current entry.
    PLDHashEntryHdr* Get() const
    {
      MOZ_ASSERT(!Done());

      PLDHashEntryHdr* entry = reinterpret_cast<PLDHashEntryHdr*>(mCurrent);
      MOZ_ASSERT(EntryIsLive(entry));
      return entry;
    }

    // Advance to the next entry.
    void Next();

    // Remove the current entry. Must only be called once per entry, and Get()
    // must not be called on that entry afterwards.
    void Remove();

  protected:
    PLDHashTable* mTable;             // Main table pointer.

  private:
    char* mStart;                     // The first entry.
    char* mLimit;                     // One past the last entry.
    char* mCurrent;                   // Pointer to the current entry.
    uint32_t mNexts;                  // Number of Next() calls.
    uint32_t mNextsLimit;             // Next() call limit.

    bool mHaveRemoved;                // Have any elements been removed?

    bool IsOnNonLiveEntry() const;
    void MoveToNextEntry();

    Iterator() = delete;
    Iterator(const Iterator&) = delete;
    Iterator& operator=(const Iterator&) = delete;
    Iterator& operator=(const Iterator&&) = delete;
  };

  Iterator Iter() { return Iterator(this); }

  // Use this if you need to initialize an Iterator in a const method. If you
  // use this case, you should not call Remove() on the iterator.
  Iterator ConstIter() const
  {
    return Iterator(const_cast<PLDHashTable*>(this));
  }

private:
  // Multiplicative hash uses an unsigned 32 bit integer and the golden ratio,
  // expressed as a fixed-point 32-bit fraction.
  static const uint32_t kHashBits = 32;
  static const uint32_t kGoldenRatio = 0x9E3779B9U;

  static uint32_t HashShift(uint32_t aEntrySize, uint32_t aLength);

  static const PLDHashNumber kCollisionFlag = 1;

  static bool EntryIsFree(PLDHashEntryHdr* aEntry)
  {
    return aEntry->mKeyHash == 0;
  }
  static bool EntryIsRemoved(PLDHashEntryHdr* aEntry)
  {
    return aEntry->mKeyHash == 1;
  }
  static bool EntryIsLive(PLDHashEntryHdr* aEntry)
  {
    return aEntry->mKeyHash >= 2;
  }

  static void MarkEntryFree(PLDHashEntryHdr* aEntry)
  {
    aEntry->mKeyHash = 0;
  }
  static void MarkEntryRemoved(PLDHashEntryHdr* aEntry)
  {
    aEntry->mKeyHash = 1;
  }

  PLDHashNumber Hash1(PLDHashNumber aHash0);
  void Hash2(PLDHashNumber aHash, uint32_t& aHash2Out, uint32_t& aSizeMaskOut);

  static bool MatchEntryKeyhash(PLDHashEntryHdr* aEntry, PLDHashNumber aHash);
  PLDHashEntryHdr* AddressEntry(uint32_t aIndex);

  // We store mHashShift rather than sizeLog2 to optimize the collision-free
  // case in SearchTable.
  uint32_t CapacityFromHashShift() const
  {
    return ((uint32_t)1 << (kHashBits - mHashShift));
  }

  PLDHashNumber ComputeKeyHash(const void* aKey);

  enum SearchReason { ForSearchOrRemove, ForAdd };

  template <SearchReason Reason>
  PLDHashEntryHdr* NS_FASTCALL
    SearchTable(const void* aKey, PLDHashNumber aKeyHash);

  PLDHashEntryHdr* FindFreeEntry(PLDHashNumber aKeyHash);

  bool ChangeTable(int aDeltaLog2);

  void ShrinkIfAppropriate();

  PLDHashTable(const PLDHashTable& aOther) = delete;
  PLDHashTable& operator=(const PLDHashTable& aOther) = delete;
};

// Compute the hash code for a given key to be looked up, added, or removed.
// A hash code may have any PLDHashNumber value.
typedef PLDHashNumber (*PLDHashHashKey)(const void* aKey);

// Compare the key identifying aEntry with the provided key parameter. Return
// true if keys match, false otherwise.
typedef bool (*PLDHashMatchEntry)(const PLDHashEntryHdr* aEntry,
                                  const void* aKey);

// Copy the data starting at aFrom to the new entry storage at aTo. Do not add
// reference counts for any strong references in the entry, however, as this
// is a "move" operation: the old entry storage at from will be freed without
// any reference-decrementing callback shortly.
typedef void (*PLDHashMoveEntry)(PLDHashTable* aTable,
                                 const PLDHashEntryHdr* aFrom,
                                 PLDHashEntryHdr* aTo);

// Clear the entry and drop any strong references it holds. This callback is
// invoked by Remove(), but only if the given key is found in the table.
typedef void (*PLDHashClearEntry)(PLDHashTable* aTable,
                                  PLDHashEntryHdr* aEntry);

// Initialize a new entry, apart from mKeyHash. This function is called when
// Add() finds no existing entry for the given key, and must add a new one. At
// that point, |aEntry->mKeyHash| is not set yet, to avoid claiming the last
// free entry in a severely overloaded table.
typedef void (*PLDHashInitEntry)(PLDHashEntryHdr* aEntry, const void* aKey);

// Finally, the "vtable" structure for PLDHashTable. The first four hooks
// must be provided by implementations; they're called unconditionally by the
// generic PLDHashTable.cpp code. Hooks after these may be null.
//
// Summary of allocation-related hook usage with C++ placement new emphasis:
//  initEntry           Call placement new using default key-based ctor.
//  moveEntry           Call placement new using copy ctor, run dtor on old
//                      entry storage.
//  clearEntry          Run dtor on entry.
//
// Note the reason why initEntry is optional: the default hooks (stubs) clear
// entry storage:  On successful Add(tbl, key), the returned entry pointer
// addresses an entry struct whose mKeyHash member has been set non-zero, but
// all other entry members are still clear (null). Add() callers can test such
// members to see whether the entry was newly created by the Add() call that
// just succeeded. If placement new or similar initialization is required,
// define an |initEntry| hook. Of course, the |clearEntry| hook must zero or
// null appropriately.
//
// XXX assumes 0 is null for pointer types.
struct PLDHashTableOps
{
  // Mandatory hooks. All implementations must provide these.
  PLDHashHashKey      hashKey;
  PLDHashMatchEntry   matchEntry;
  PLDHashMoveEntry    moveEntry;
  PLDHashClearEntry   clearEntry;

  // Optional hooks start here. If null, these are not called.
  PLDHashInitEntry    initEntry;
};

// A minimal entry is a subclass of PLDHashEntryHdr and has a void* key pointer.
struct PLDHashEntryStub : public PLDHashEntryHdr
{
  const void* key;
};

#endif /* PLDHashTable_h */