Bug 1440824 - Overhaul the atom infrastructure to support multiple subtables. r=froydnj
authorBobby Holley <bobbyholley@gmail.com>
Thu, 22 Feb 2018 14:02:48 -0800
changeset 405442 5d83e440bbb0d9c9039db6d19cced9c8952d8501
parent 405441 973afb5d4909e524dca5e8faabc455836fad84eb
child 405443 ef3ac3531192f937ba2e4758bf137503f0c5d916
push id33519
push useraiakab@mozilla.com
push dateTue, 27 Feb 2018 09:56:16 +0000
treeherdermozilla-central@c4425fcdfb5b [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersfroydnj
bugs1440824
milestone60.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1440824 - Overhaul the atom infrastructure to support multiple subtables. r=froydnj MozReview-Commit-ID: E1bcchzuMOu
xpcom/ds/nsAtom.h
xpcom/ds/nsAtomTable.cpp
--- a/xpcom/ds/nsAtom.h
+++ b/xpcom/ds/nsAtom.h
@@ -74,17 +74,18 @@ public:
   // We can't use NS_INLINE_DECL_THREADSAFE_REFCOUNTING because the refcounting
   // of this type is special.
   MozExternalRefCountType AddRef();
   MozExternalRefCountType Release();
 
   typedef mozilla::TrueType HasThreadSafeRefCnt;
 
 private:
-  friend class nsAtomFriend;
+  friend class nsAtomTable;
+  friend class nsAtomSubTable;
   friend class nsHtml5AtomEntry;
 
   // Dynamic atom construction is done by |friend|s.
   nsAtom(AtomKind aKind, const nsAString& aString, uint32_t aHash);
 
 protected:
   nsAtom(const char16_t* aString, uint32_t aLength, uint32_t aHash);
 
@@ -116,17 +117,17 @@ public:
   MozExternalRefCountType AddRef() = delete;
   MozExternalRefCountType Release() = delete;
 
   already_AddRefed<nsAtom> ToAddRefed() {
     return already_AddRefed<nsAtom>(static_cast<nsAtom*>(this));
   }
 
 private:
-  friend class nsAtomFriend;
+  friend class nsAtomTable;
 
   // Construction is done entirely by |friend|s.
   nsStaticAtom(const char16_t* aString, uint32_t aLength, uint32_t aHash)
     : nsAtom(aString, aLength, aHash)
   {}
 };
 
 // The four forms of NS_Atomize (for use with |RefPtr<nsAtom>|) return the
@@ -148,16 +149,21 @@ already_AddRefed<nsAtom> NS_Atomize(cons
 
 // Find an atom that matches the given UTF-16 string. Never returns null.
 already_AddRefed<nsAtom> NS_Atomize(const nsAString& aUTF16String);
 
 // An optimized version of the method above for the main thread.
 already_AddRefed<nsAtom> NS_AtomizeMainThread(const nsAString& aUTF16String);
 
 // Return a count of the total number of atoms currently alive in the system.
+//
+// Note that the result is imprecise and racy if other threads are currently
+// operating on atoms. It's also slow, since it triggers a GC before counting.
+// Currently this function is only used in tests, which should probably remain
+// the case.
 nsrefcnt NS_GetNumberOfAtoms();
 
 // Return a pointer for a static atom for the string or null if there's no
 // static atom for this string.
 nsStaticAtom* NS_GetStaticAtom(const nsAString& aUTF16String);
 
 // Seal the static atom table.
 void NS_SealStaticAtomTable();
--- a/xpcom/ds/nsAtomTable.cpp
+++ b/xpcom/ds/nsAtomTable.cpp
@@ -36,47 +36,28 @@
 //   table, removing and deleting dynamic atoms with refcount zero. This allows
 //   us to avoid acquiring the atom table lock during normal refcounting.
 //
 // - Static: the atom itself is heap allocated, but it points to a static
 //   nsStringBuffer. |gAtomTable| effectively owns static atoms, because such
 //   atoms ignore all AddRef/Release calls, which ensures they stay alive until
 //   |gAtomTable| itself is destroyed whereupon they are explicitly deleted.
 //
-// Note that gAtomTable is used on multiple threads, and callers must acquire
-// gAtomTableLock before touching it.
+// Note that gAtomTable is used on multiple threads, and has internal
+// synchronization.
 
 using namespace mozilla;
 
 //----------------------------------------------------------------------
 
 enum class GCKind {
   RegularOperation,
   Shutdown,
 };
 
-// This class encapsulates the functions that need access to nsAtom's private
-// members.
-class nsAtomFriend
-{
-public:
-  static void RegisterStaticAtoms(const nsStaticAtomSetup* aSetup,
-                                  uint32_t aCount);
-
-  static void AtomTableClearEntry(PLDHashTable* aTable,
-                                  PLDHashEntryHdr* aEntry);
-
-  static void GCAtomTableLocked(const MutexAutoLock& aProofOfLock,
-                                GCKind aKind);
-
-  static already_AddRefed<nsAtom> Atomize(const nsACString& aUTF8String);
-  static already_AddRefed<nsAtom> Atomize(const nsAString& aUTF16String);
-  static already_AddRefed<nsAtom> AtomizeMainThread(const nsAString& aUTF16Str);
-};
-
 //----------------------------------------------------------------------
 
 // gUnusedAtomCount is incremented when an atom loses its last reference
 // (and thus turned into unused state), and decremented when an unused
 // atom gets a reference again. The atom table relies on this value to
 // schedule GC. This value can temporarily go below zero when multiple
 // threads are operating the same atom, so it has to be signed so that
 // we wouldn't use overflow value for comparison.
@@ -173,24 +154,16 @@ nsAtom::SizeOfIncludingThis(MallocSizeOf
   } else {
     MOZ_ASSERT(IsStaticAtom());
   }
   return n;
 }
 
 //----------------------------------------------------------------------
 
-/**
- * The shared hash table for atom lookups.
- *
- * Callers must hold gAtomTableLock before manipulating the table.
- */
-static PLDHashTable* gAtomTable;
-static Mutex* gAtomTableLock;
-
 struct AtomTableKey
 {
   AtomTableKey(const char16_t* aUTF16String, uint32_t aLength, uint32_t aHash)
     : mUTF16String(aUTF16String)
     , mUTF8String(nullptr)
     , mLength(aLength)
     , mHash(aHash)
   {
@@ -245,16 +218,70 @@ struct AtomTableEntry : public PLDHashEn
   // See the comment at the top of this file for more details.
   nsAtom* MOZ_NON_OWNING_REF mAtom;
 };
 
 #define RECENTLY_USED_MAIN_THREAD_ATOM_CACHE_SIZE 31
 static nsAtom*
   sRecentlyUsedMainThreadAtoms[RECENTLY_USED_MAIN_THREAD_ATOM_CACHE_SIZE] = {};
 
+// In order to reduce locking contention for concurrent atomization, we segment
+// the atom table into N subtables, each with a separate lock. If the hash
+// values we use to select the subtable are evenly distributed, this reduces the
+// probability of contention by a factor of N. See bug 1440824.
+//
+// NB: This is somewhat similar to the technique used by Java's ConcurrentHashTable.
+class nsAtomSubTable
+{
+  friend class nsAtomTable;
+  Mutex mLock;
+  PLDHashTable mTable;
+  nsAtomSubTable();
+  void GCLocked(GCKind aKind);
+  size_t SizeOfExcludingThisLocked(MallocSizeOf aMallocSizeOf);
+
+  AtomTableEntry* Add(AtomTableKey& aKey)
+  {
+    mLock.AssertCurrentThreadOwns();
+    return static_cast<AtomTableEntry*>(mTable.Add(&aKey)); // Infallible
+  }
+};
+
+// The outer atom table, which coordinates access to the inner array of
+// subtables.
+class nsAtomTable
+{
+public:
+  nsAtomSubTable& SelectSubTable(AtomTableKey& aKey);
+  size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf);
+  void GC(GCKind aKind);
+  already_AddRefed<nsAtom> Atomize(const nsAString& aUTF16String);
+  already_AddRefed<nsAtom> Atomize(const nsACString& aUTF8String);
+  already_AddRefed<nsAtom> AtomizeMainThread(const nsAString& aUTF16String);
+  void RegisterStaticAtoms(const nsStaticAtomSetup* aSetup, uint32_t aCount);
+
+  // The result of this function may be imprecise if other threads are operating
+  // on atoms concurrently. It's also slow, since it triggers a GC before
+  // counting.
+  size_t RacySlowCount();
+
+  // This hash table op is a static member of this class so that it can take
+  // advantage of |friend| declarations.
+  static void AtomTableClearEntry(PLDHashTable* aTable, PLDHashEntryHdr* aEntry);
+
+  // XXXbholley: We enable multiple subtables in the next patch.
+  const static size_t kNumSubTables = 1; // Must be power of two.
+
+private:
+  nsAtomSubTable mSubTables[kNumSubTables];
+};
+
+// Static singleton instance for the atom table.
+static nsAtomTable* gAtomTable;
+
 static PLDHashNumber
 AtomTableGetHash(const void* aKey)
 {
   const AtomTableKey* k = static_cast<const AtomTableKey*>(aKey);
   return k->mHash;
 }
 
 static bool
@@ -269,17 +296,17 @@ AtomTableMatchKey(const PLDHashEntryHdr*
                                                k->mUTF8String + k->mLength),
                          nsDependentAtomString(he->mAtom)) == 0;
   }
 
   return he->mAtom->Equals(k->mUTF16String, k->mLength);
 }
 
 void
-nsAtomFriend::AtomTableClearEntry(PLDHashTable* aTable, PLDHashEntryHdr* aEntry)
+nsAtomTable::AtomTableClearEntry(PLDHashTable* aTable, PLDHashEntryHdr* aEntry)
 {
   auto entry = static_cast<AtomTableEntry*>(aEntry);
   nsAtom* atom = entry->mAtom;
   if (atom->IsStaticAtom()) {
     // This case -- when the entry being cleared holds a static atom -- only
     // occurs when gAtomTable is destroyed, whereupon all static atoms within it
     // must be explicitly deleted.
     delete atom;
@@ -291,45 +318,142 @@ AtomTableInitEntry(PLDHashEntryHdr* aEnt
 {
   static_cast<AtomTableEntry*>(aEntry)->mAtom = nullptr;
 }
 
 static const PLDHashTableOps AtomTableOps = {
   AtomTableGetHash,
   AtomTableMatchKey,
   PLDHashTable::MoveEntryStub,
-  nsAtomFriend::AtomTableClearEntry,
+  nsAtomTable::AtomTableClearEntry,
   AtomTableInitEntry
 };
 
 // The atom table very quickly gets 10,000+ entries in it (or even 100,000+).
-// But choosing the best initial length has some subtleties: we add ~2700
-// static atoms to the table at start-up, and then we start adding and removing
-// dynamic atoms. If we make the table too big to start with, when the first
-// dynamic atom gets removed the load factor will be < 25% and so we will
-// shrink it to 4096 entries.
+// But choosing the best initial subtable length has some subtleties: we add
+// ~2700 static atoms at start-up, and then we start adding and removing
+// dynamic atoms. If we make the tables too big to start with, when the first
+// dynamic atom gets removed from a given table the load factor will be < 25%
+// and we will shrink it.
+//
+// So we first make the simplifying assumption that the atoms are more or less
+// evenly-distributed across the subtables (which is the case empirically).
+// Then, we take the total atom count when the first dynamic atom is removed
+// (~2700), divide that across the N subtables, and the largest capacity that
+// will allow each subtable to be > 25% full with that count.
 //
-// By choosing an initial length of 4096, we get an initial capacity of 8192.
-// That's the biggest initial capacity that will let us be > 25% full when the
-// first dynamic atom is removed (when the count is ~2700), thus avoiding any
-// shrinking.
-#define ATOM_HASHTABLE_INITIAL_LENGTH  4096
+// So want an initial subtable capacity less than (2700 / N) * 4 = 10800 / N.
+// Rounding down to the nearest power of two gives us 8192 / N. Since the
+// capacity is double the initial length, we end up with (4096 / N) per subtable.
+#define INITIAL_SUBTABLE_LENGTH (4096 / nsAtomTable::kNumSubTables)
 
-void
-nsAtomFriend::GCAtomTableLocked(const MutexAutoLock& aProofOfLock, GCKind aKind)
+nsAtomSubTable&
+nsAtomTable::SelectSubTable(AtomTableKey& aKey)
+{
+  // There are a few considerations around how we select subtables.
+  //
+  // First, we want entries to be evenly distributed across the subtables. This
+  // can be achieved by using any bits in the hash key, assuming the key itself
+  // is evenly-distributed. Empirical measurements indicate that this method
+  // produces a roughly-even distribution across subtables.
+  //
+  // Second, we want to use the hash bits that are least likely to influence an
+  // entry's position within the subtable. If we used the exact same bits used
+  // by the subtables, then each subtable would compute the same position for
+  // every entry it observes, leading to pessimal performance. In this case,
+  // we're using PLDHashTable, whose primary hash function uses the N leftmost
+  // bits of the hash value (where N is the log2 capacity of the table). This
+  // means we should prefer the rightmost bits here.
+  //
+  // Note that the below is equivalent to mHash % kNumSubTables, a replacement
+  // which an optimizing compiler should make, but let's avoid any doubt.
+  static_assert((kNumSubTables & (kNumSubTables - 1)) == 0, "must be power of two");
+  return mSubTables[aKey.mHash & (kNumSubTables - 1)];
+}
+
+size_t
+nsAtomTable::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf)
+{
+  MOZ_ASSERT(NS_IsMainThread());
+  size_t size = aMallocSizeOf(this);
+  for (auto& table : mSubTables) {
+    MutexAutoLock lock(table.mLock);
+    size += table.SizeOfExcludingThisLocked(aMallocSizeOf);
+  }
+
+  return size;
+}
+
+void nsAtomTable::GC(GCKind aKind)
 {
   MOZ_ASSERT(NS_IsMainThread());
   for (uint32_t i = 0; i < RECENTLY_USED_MAIN_THREAD_ATOM_CACHE_SIZE; ++i) {
     sRecentlyUsedMainThreadAtoms[i] = nullptr;
   }
 
+  // Note that this is effectively an incremental GC, since only one subtable
+  // is locked at a time.
+  for (auto& table: mSubTables) {
+    MutexAutoLock lock(table.mLock);
+    table.GCLocked(aKind);
+  }
+
+  // We would like to assert that gUnusedAtomCount matches the number of atoms
+  // we found in the table which we removed. However, there are two problems
+  // with this:
+  // * We have multiple subtables, each with their own lock. For optimal
+  //   performance we only want to hold one lock at a time, but this means
+  //   that atoms can be added and removed between GC slices.
+  // * Even if we held all the locks and performed all GC slices atomically,
+  //   the locks are not acquired for AddRef() and Release() calls. This means
+  //   we might see a gUnusedAtomCount value in between, say, AddRef()
+  //   incrementing mRefCnt and it decrementing gUnusedAtomCount.
+  //
+  // So, we don't bother asserting that there are no unused atoms at the end of
+  // a regular GC. But we can (and do) assert this just after the last GC at
+  // shutdown.
+  //
+  // Note that, barring refcounting bugs, an atom can only go from a zero
+  // refcount to a non-zero refcount while the atom table lock is held, so
+  // so we won't try to resurrect a zero refcount atom while trying to delete
+  // it.
+
+  MOZ_ASSERT_IF(aKind == GCKind::Shutdown, gUnusedAtomCount == 0);
+}
+
+size_t
+nsAtomTable::RacySlowCount()
+{
+  // Trigger a GC so that the result is deterministic modulo other threads.
+  GC(GCKind::RegularOperation);
+  size_t count = 0;
+  for (auto& table: mSubTables) {
+    MutexAutoLock lock(table.mLock);
+    count += table.mTable.EntryCount();
+  }
+
+  return count;
+}
+
+nsAtomSubTable::nsAtomSubTable()
+  : mLock("Atom Sub-Table Lock")
+  , mTable(&AtomTableOps, sizeof(AtomTableEntry), INITIAL_SUBTABLE_LENGTH)
+{
+}
+
+void
+nsAtomSubTable::GCLocked(GCKind aKind)
+{
+  MOZ_ASSERT(NS_IsMainThread());
+  mLock.AssertCurrentThreadOwns();
+
   int32_t removedCount = 0; // Use a non-atomic temporary for cheaper increments.
   nsAutoCString nonZeroRefcountAtoms;
   uint32_t nonZeroRefcountAtomsCount = 0;
-  for (auto i = gAtomTable->Iter(); !i.Done(); i.Next()) {
+  for (auto i = mTable.Iter(); !i.Done(); i.Next()) {
     auto entry = static_cast<AtomTableEntry*>(i.Get());
     if (entry->mAtom->IsStaticAtom()) {
       continue;
     }
 
     nsAtom* atom = entry->mAtom;
     if (atom->mRefCnt == 0) {
       i.Remove();
@@ -357,41 +481,25 @@ nsAtomFriend::GCAtomTableLocked(const Mu
 
   }
   if (nonZeroRefcountAtomsCount) {
     nsPrintfCString msg("%d dynamic atom(s) with non-zero refcount: %s",
                         nonZeroRefcountAtomsCount, nonZeroRefcountAtoms.get());
     NS_ASSERTION(nonZeroRefcountAtomsCount == 0, msg.get());
   }
 
-  // We would like to assert that gUnusedAtomCount matches the number of atoms
-  // we found in the table which we removed. During the course of this function,
-  // the atom table is locked, but this lock is not acquired for AddRef() and
-  // Release() calls. This means we might see a gUnusedAtomCount value in
-  // between, say, AddRef() incrementing mRefCnt and it decrementing
-  // gUnusedAtomCount. So, we don't bother asserting that there are no unused
-  // atoms at the end of a regular GC. But we can (and do) assert thist just
-  // after the last GC at shutdown.
-  //
-  // Note that, barring refcounting bugs, an atom can only go from a zero
-  // refcount to a non-zero refcount while the atom table lock is held, so
-  // so we won't try to resurrect a zero refcount atom while trying to delete
-  // it.
-
-  MOZ_ASSERT_IF(aKind == GCKind::Shutdown, removedCount == gUnusedAtomCount);
-
   gUnusedAtomCount -= removedCount;
 }
 
 static void
 GCAtomTable()
 {
+  MOZ_ASSERT(gAtomTable);
   if (NS_IsMainThread()) {
-    MutexAutoLock lock(*gAtomTableLock);
-    nsAtomFriend::GCAtomTableLocked(lock, GCKind::RegularOperation);
+    gAtomTable->GC(GCKind::RegularOperation);
   }
 }
 
 MozExternalRefCountType
 nsAtom::AddRef()
 {
   MOZ_ASSERT(!IsHTML5Atom(), "Attempt to AddRef an HTML5 atom");
   if (!IsDynamicAtom()) {
@@ -494,109 +602,100 @@ NS_STATIC_ATOM_BUFFER(empty, "")
 static const nsStaticAtomSetup sDefaultAtomSetup[] = {
   NS_STATIC_ATOM_SETUP(DefaultAtoms, empty)
 };
 
 void
 NS_InitAtomTable()
 {
   MOZ_ASSERT(!gAtomTable);
-  gAtomTable = new PLDHashTable(&AtomTableOps, sizeof(AtomTableEntry),
-                                ATOM_HASHTABLE_INITIAL_LENGTH);
-  gAtomTableLock = new Mutex("Atom Table Lock");
+  gAtomTable = new nsAtomTable();
 
   // Bug 1340710 has caused us to generate an empty atom at arbitrary times
   // after startup.  If we end up creating one before nsGkAtoms::_empty is
   // registered, we get an assertion about transmuting a dynamic atom into a
   // static atom.  In order to avoid that, we register an empty string static
   // atom as soon as we initialize the atom table to guarantee that the empty
   // string atom will always be static.
   NS_RegisterStaticAtoms(sDefaultAtomSetup);
 }
 
 void
 NS_ShutdownAtomTable()
 {
+  MOZ_ASSERT(NS_IsMainThread());
+  MOZ_ASSERT(gAtomTable);
   delete gStaticAtomTable;
   gStaticAtomTable = nullptr;
 
 #ifdef NS_FREE_PERMANENT_DATA
   // Do a final GC to satisfy leak checking. We skip this step in release
   // builds.
-  {
-    MutexAutoLock lock(*gAtomTableLock);
-    nsAtomFriend::GCAtomTableLocked(lock, GCKind::Shutdown);
-  }
+  gAtomTable->GC(GCKind::Shutdown);
 #endif
 
   delete gAtomTable;
   gAtomTable = nullptr;
-  delete gAtomTableLock;
-  gAtomTableLock = nullptr;
 }
 
 void
 NS_SizeOfAtomTablesIncludingThis(MallocSizeOf aMallocSizeOf,
                                  size_t* aMain, size_t* aStatic)
 {
-  MutexAutoLock lock(*gAtomTableLock);
-  *aMain = gAtomTable->ShallowSizeOfIncludingThis(aMallocSizeOf);
-  for (auto iter = gAtomTable->Iter(); !iter.Done(); iter.Next()) {
-    auto entry = static_cast<AtomTableEntry*>(iter.Get());
-    *aMain += entry->mAtom->SizeOfIncludingThis(aMallocSizeOf);
-  }
+  MOZ_ASSERT(NS_IsMainThread());
+  MOZ_ASSERT(gAtomTable);
+  *aMain = gAtomTable->SizeOfIncludingThis(aMallocSizeOf);
 
   // The atoms pointed to by gStaticAtomTable are also pointed to by gAtomTable,
-  // and they're measured by the loop above. So no need to measure them here.
+  // and they're measured by the call above. So no need to measure them here.
   *aStatic = gStaticAtomTable
            ? gStaticAtomTable->ShallowSizeOfIncludingThis(aMallocSizeOf)
            : 0;
 }
 
-static inline AtomTableEntry*
-GetAtomHashEntry(const char* aString, uint32_t aLength, uint32_t* aHashOut)
+size_t
+nsAtomSubTable::SizeOfExcludingThisLocked(MallocSizeOf aMallocSizeOf)
 {
-  gAtomTableLock->AssertCurrentThreadOwns();
-  AtomTableKey key(aString, aLength, aHashOut);
-  // This is an infallible add.
-  return static_cast<AtomTableEntry*>(gAtomTable->Add(&key));
-}
+  mLock.AssertCurrentThreadOwns();
+  size_t size = mTable.ShallowSizeOfExcludingThis(aMallocSizeOf);
+  for (auto iter = mTable.Iter(); !iter.Done(); iter.Next()) {
+    auto entry = static_cast<AtomTableEntry*>(iter.Get());
+    size += entry->mAtom->SizeOfIncludingThis(aMallocSizeOf);
+  }
 
-static inline AtomTableEntry*
-GetAtomHashEntry(const char16_t* aString, uint32_t aLength, uint32_t* aHashOut)
-{
-  gAtomTableLock->AssertCurrentThreadOwns();
-  AtomTableKey key(aString, aLength, aHashOut);
-  // This is an infallible add.
-  return static_cast<AtomTableEntry*>(gAtomTable->Add(&key));
+  return size;
 }
 
 void
-nsAtomFriend::RegisterStaticAtoms(const nsStaticAtomSetup* aSetup,
-                                  uint32_t aCount)
+nsAtomTable::RegisterStaticAtoms(const nsStaticAtomSetup* aSetup,
+                                 uint32_t aCount)
 {
-  MutexAutoLock lock(*gAtomTableLock);
-
+  // Note: gStaticAtomTable is main-thread-only until the table is sealed,
+  // after which it is immutable. So there is no lock protecting it.
+  MOZ_ASSERT(NS_IsMainThread());
   MOZ_RELEASE_ASSERT(!gStaticAtomTableSealed,
                      "Atom table has already been sealed!");
 
   if (!gStaticAtomTable) {
     gStaticAtomTable = new StaticAtomTable();
   }
 
   for (uint32_t i = 0; i < aCount; ++i) {
     const char16_t* string = aSetup[i].mString;
     nsStaticAtom** atomp = aSetup[i].mAtom;
 
     MOZ_ASSERT(nsCRT::IsAscii(string));
 
     uint32_t stringLen = NS_strlen(string);
 
     uint32_t hash;
-    AtomTableEntry* he = GetAtomHashEntry(string, stringLen, &hash);
+    AtomTableKey key(string, stringLen, &hash);
+    nsAtomSubTable& table = SelectSubTable(key);
+    MutexAutoLock lock(table.mLock);
+    AtomTableEntry* he = table.Add(key);
 
     nsStaticAtom* atom;
     if (he->mAtom) {
       // Disallow creating a dynamic atom, and then later, while the
       // dynamic atom is still alive, registering that same atom as a
       // static atom.  It causes subtle bugs, and we're programming in
       // C++ here, not Smalltalk.
       if (!he->mAtom->IsStaticAtom()) {
@@ -619,33 +718,35 @@ nsAtomFriend::RegisterStaticAtoms(const 
       entry->mAtom = atom;
     }
   }
 }
 
 void
 RegisterStaticAtoms(const nsStaticAtomSetup* aSetup, uint32_t aCount)
 {
-  nsAtomFriend::RegisterStaticAtoms(aSetup, aCount);
+  MOZ_ASSERT(gAtomTable);
+  gAtomTable->RegisterStaticAtoms(aSetup, aCount);
 }
 
 already_AddRefed<nsAtom>
 NS_Atomize(const char* aUTF8String)
 {
-  return nsAtomFriend::Atomize(nsDependentCString(aUTF8String));
+  MOZ_ASSERT(gAtomTable);
+  return gAtomTable->Atomize(nsDependentCString(aUTF8String));
 }
 
 already_AddRefed<nsAtom>
-nsAtomFriend::Atomize(const nsACString& aUTF8String)
+nsAtomTable::Atomize(const nsACString& aUTF8String)
 {
-  MutexAutoLock lock(*gAtomTableLock);
   uint32_t hash;
-  AtomTableEntry* he = GetAtomHashEntry(aUTF8String.Data(),
-                                        aUTF8String.Length(),
-                                        &hash);
+  AtomTableKey key(aUTF8String.Data(), aUTF8String.Length(), &hash);
+  nsAtomSubTable& table = SelectSubTable(key);
+  MutexAutoLock lock(table.mLock);
+  AtomTableEntry* he = table.Add(key);
 
   if (he->mAtom) {
     RefPtr<nsAtom> atom = he->mAtom;
 
     return atom.forget();
   }
 
   // This results in an extra addref/release of the nsStringBuffer.
@@ -659,33 +760,35 @@ nsAtomFriend::Atomize(const nsACString& 
   he->mAtom = atom;
 
   return atom.forget();
 }
 
 already_AddRefed<nsAtom>
 NS_Atomize(const nsACString& aUTF8String)
 {
-  return nsAtomFriend::Atomize(aUTF8String);
+  MOZ_ASSERT(gAtomTable);
+  return gAtomTable->Atomize(aUTF8String);
 }
 
 already_AddRefed<nsAtom>
 NS_Atomize(const char16_t* aUTF16String)
 {
-  return nsAtomFriend::Atomize(nsDependentString(aUTF16String));
+  MOZ_ASSERT(gAtomTable);
+  return gAtomTable->Atomize(nsDependentString(aUTF16String));
 }
 
 already_AddRefed<nsAtom>
-nsAtomFriend::Atomize(const nsAString& aUTF16String)
+nsAtomTable::Atomize(const nsAString& aUTF16String)
 {
-  MutexAutoLock lock(*gAtomTableLock);
   uint32_t hash;
-  AtomTableEntry* he = GetAtomHashEntry(aUTF16String.Data(),
-                                        aUTF16String.Length(),
-                                        &hash);
+  AtomTableKey key(aUTF16String.Data(), aUTF16String.Length(), &hash);
+  nsAtomSubTable& table = SelectSubTable(key);
+  MutexAutoLock lock(table.mLock);
+  AtomTableEntry* he = table.Add(key);
 
   if (he->mAtom) {
     RefPtr<nsAtom> atom = he->mAtom;
 
     return atom.forget();
   }
 
   RefPtr<nsAtom> atom =
@@ -693,21 +796,22 @@ nsAtomFriend::Atomize(const nsAString& a
   he->mAtom = atom;
 
   return atom.forget();
 }
 
 already_AddRefed<nsAtom>
 NS_Atomize(const nsAString& aUTF16String)
 {
-  return nsAtomFriend::Atomize(aUTF16String);
+  MOZ_ASSERT(gAtomTable);
+  return gAtomTable->Atomize(aUTF16String);
 }
 
 already_AddRefed<nsAtom>
-nsAtomFriend::AtomizeMainThread(const nsAString& aUTF16String)
+nsAtomTable::AtomizeMainThread(const nsAString& aUTF16String)
 {
   MOZ_ASSERT(NS_IsMainThread());
   RefPtr<nsAtom> retVal;
   uint32_t hash;
   AtomTableKey key(aUTF16String.Data(), aUTF16String.Length(), &hash);
   uint32_t index = hash % RECENTLY_USED_MAIN_THREAD_ATOM_CACHE_SIZE;
   nsAtom* atom = sRecentlyUsedMainThreadAtoms[index];
   if (atom) {
@@ -715,18 +819,19 @@ nsAtomFriend::AtomizeMainThread(const ns
     if (length == key.mLength &&
         (memcmp(atom->GetUTF16String(),
                 key.mUTF16String, length * sizeof(char16_t)) == 0)) {
       retVal = atom;
       return retVal.forget();
     }
   }
 
-  MutexAutoLock lock(*gAtomTableLock);
-  AtomTableEntry* he = static_cast<AtomTableEntry*>(gAtomTable->Add(&key));
+  nsAtomSubTable& table = SelectSubTable(key);
+  MutexAutoLock lock(table.mLock);
+  AtomTableEntry* he = table.Add(key);
 
   if (he->mAtom) {
     retVal = he->mAtom;
   } else {
     RefPtr<nsAtom> newAtom = dont_AddRef(
       new nsAtom(nsAtom::AtomKind::DynamicAtom, aUTF16String, hash));
     he->mAtom = newAtom;
     retVal = newAtom.forget();
@@ -734,25 +839,25 @@ nsAtomFriend::AtomizeMainThread(const ns
 
   sRecentlyUsedMainThreadAtoms[index] = he->mAtom;
   return retVal.forget();
 }
 
 already_AddRefed<nsAtom>
 NS_AtomizeMainThread(const nsAString& aUTF16String)
 {
-  return nsAtomFriend::AtomizeMainThread(aUTF16String);
+  MOZ_ASSERT(gAtomTable);
+  return gAtomTable->AtomizeMainThread(aUTF16String);
 }
 
 nsrefcnt
 NS_GetNumberOfAtoms(void)
 {
-  GCAtomTable(); // Trigger a GC so we return a deterministic result.
-  MutexAutoLock lock(*gAtomTableLock);
-  return gAtomTable->EntryCount();
+  MOZ_ASSERT(gAtomTable);
+  return gAtomTable->RacySlowCount();
 }
 
 int32_t
 NS_GetUnusedAtomCount(void)
 {
   return gUnusedAtomCount;
 }
 
@@ -763,10 +868,11 @@ NS_GetStaticAtom(const nsAString& aUTF16
   NS_PRECONDITION(gStaticAtomTableSealed, "Static atom table not sealed yet.");
   StaticAtomEntry* entry = gStaticAtomTable->GetEntry(aUTF16String);
   return entry ? entry->mAtom : nullptr;
 }
 
 void
 NS_SealStaticAtomTable()
 {
+  MOZ_ASSERT(NS_IsMainThread());
   gStaticAtomTableSealed = true;
 }