Bug 1483121 - Generate static atom hash in StaticAtoms.py. r=njn,emilio
authorCameron McCormack <cam@mcc.id.au>
Wed, 15 Aug 2018 15:52:42 +1000
changeset 486780 e6a44943b17774c94e3c22e85e5260b0a62121ac
parent 486779 8d6f6eef3c76805d95cde66f354f150b75fbe9e4
child 486781 0d04a3f89940536d56cd5415a78316f0960e9f0a
push id9719
push userffxbld-merge
push dateFri, 24 Aug 2018 17:49:46 +0000
treeherdermozilla-beta@719ec98fba77 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersnjn, emilio
bugs1483121
milestone63.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1483121 - Generate static atom hash in StaticAtoms.py. r=njn,emilio Summary: Depends On D3286 Reviewers: njn!, emilio! Tags: #secure-revision Bug #: 1483121 Differential Revision: https://phabricator.services.mozilla.com/D3295
servo/components/style/gecko/regen_atoms.py
xpcom/ds/Atom.py
xpcom/ds/StaticAtoms.py
xpcom/ds/nsAtom.h
xpcom/ds/nsAtomTable.cpp
xpcom/ds/nsGkAtoms.cpp
xpcom/ds/nsGkAtoms.h
--- a/servo/components/style/gecko/regen_atoms.py
+++ b/servo/components/style/gecko/regen_atoms.py
@@ -19,17 +19,18 @@ PRELUDE = """
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 /* Autogenerated file created by components/style/gecko/regen_atoms.py, DO NOT EDIT DIRECTLY */
 """[1:]   # NOQA: E501
 
 
-PATTERN = re.compile('^GK_ATOM\(([^,]*),[^"]*"([^"]*)",\s*([^,]*),\s*([^)]*)\)',
+# Matches lines like `GK_ATOM(foo, "foo", 0x12345678, nsStaticAtom, PseudoElementAtom)`.
+PATTERN = re.compile('^GK_ATOM\(([^,]*),[^"]*"([^"]*)",\s*(0x[0-9a-f]+),\s*([^,]*),\s*([^)]*)\)',
                      re.MULTILINE)
 FILE = "include/nsGkAtomList.h"
 CLASS = "nsGkAtoms"
 
 
 def gnu_symbolify(ident):
     return "_ZN{}{}{}{}E".format(len(CLASS), CLASS, len(ident), ident)
 
@@ -47,20 +48,21 @@ def msvc32_symbolify(ident, ty):
 def map_atom(ident):
     if ident in {"box", "loop", "match", "mod", "ref",
                  "self", "type", "use", "where", "in"}:
         return ident + "_"
     return ident
 
 
 class Atom:
-    def __init__(self, ident, value, ty, atom_type):
+    def __init__(self, ident, value, hash, ty, atom_type):
         self.ident = "{}_{}".format(CLASS, ident)
         self.original_ident = ident
         self.value = value
+        self.hash = hash
         # The Gecko type: "nsStaticAtom", "nsICSSPseudoElement", or "nsIAnonBoxPseudo"
         self.ty = ty
         # The type of atom: "Atom", "PseudoElement", "NonInheritingAnonBox",
         # or "InheritingAnonBox"
         self.atom_type = atom_type
         if self.is_pseudo() or self.is_anon_box():
             self.pseudo_ident = (ident.split("_", 1))[1]
         if self.is_anon_box():
@@ -99,17 +101,18 @@ class Atom:
 
 def collect_atoms(objdir):
     atoms = []
     path = os.path.abspath(os.path.join(objdir, FILE))
     print("cargo:rerun-if-changed={}".format(path))
     with open(path) as f:
         content = f.read()
         for result in PATTERN.finditer(content):
-            atoms.append(Atom(result.group(1), result.group(2), result.group(3), result.group(4)))
+            atoms.append(Atom(result.group(1), result.group(2), result.group(3),
+                              result.group(4), result.group(5)))
     return atoms
 
 
 class FileAvoidWrite(BytesIO):
     """File-like object that buffers output and only writes if content changed."""
     def __init__(self, filename):
         BytesIO.__init__(self)
         self.name = filename
--- a/xpcom/ds/Atom.py
+++ b/xpcom/ds/Atom.py
@@ -4,16 +4,17 @@
 
 
 class Atom():
     def __init__(self, ident, string, ty="nsStaticAtom"):
         self.ident = ident
         self.string = string
         self.ty = ty
         self.atom_type = self.__class__.__name__
+        self.hash = hash_string(string)
 
 
 class PseudoElementAtom(Atom):
     def __init__(self, ident, string):
         Atom.__init__(self, ident, string, ty="nsICSSPseudoElement")
 
 
 class AnonBoxAtom(Atom):
@@ -24,8 +25,30 @@ class AnonBoxAtom(Atom):
 class NonInheritingAnonBoxAtom(AnonBoxAtom):
     def __init__(self, ident, string):
         AnonBoxAtom.__init__(self, ident, string)
 
 
 class InheritingAnonBoxAtom(AnonBoxAtom):
     def __init__(self, ident, string):
         AnonBoxAtom.__init__(self, ident, string)
+
+
+GOLDEN_RATIO_U32 = 0x9E3779B9
+
+
+def rotate_left_5(value):
+    return ((value << 5) | (value >> 27)) & 0xFFFFFFFF
+
+
+def wrapping_multiply(x, y):
+    return (x * y) & 0xFFFFFFFF
+
+
+# Calculate the precomputed hash of the static atom. This is a port of
+# mozilla::HashString(const char16_t*), which is what we use for atomizing
+# strings. An assertion in nsAtomTable::RegisterStaticAtoms ensures that
+# the value we compute here matches what HashString() would produce.
+def hash_string(s):
+    h = 0
+    for c in s:
+        h = wrapping_multiply(GOLDEN_RATIO_U32, rotate_left_5(h) ^ ord(c))
+    return h
--- a/xpcom/ds/StaticAtoms.py
+++ b/xpcom/ds/StaticAtoms.py
@@ -2349,19 +2349,19 @@ def verify():
 
 
 def generate_nsgkatomlist_h(output, *ignore):
     verify()
     output.write("/* THIS FILE IS AUTOGENERATED BY StaticAtoms.py.  DO NOT EDIT */\n\n"
                  "#ifdef small\n"
                  "#undef small\n"
                  "#endif\n\n"
-                 "// GK_ATOM(identifier, string, gecko_type, atom_type)\n" +
-                 "".join(["GK_ATOM(%s, \"%s\", %s, %s)\n" %
-                            (a.ident, a.string, a.ty, a.atom_type)
+                 "// GK_ATOM(identifier, string, hash, gecko_type, atom_type)\n" +
+                 "".join(["GK_ATOM(%s, \"%s\", 0x%08x, %s, %s)\n" %
+                            (a.ident, a.string, a.hash, a.ty, a.atom_type)
                           for a in STATIC_ATOMS]))
 
 
 def generate_nsgkatomconsts_h(output, *ignore):
     pseudo_index = None
     anon_box_index = None
     pseudo_count = 0
     anon_box_count = 0
--- a/xpcom/ds/nsAtom.h
+++ b/xpcom/ds/nsAtom.h
@@ -4,17 +4,16 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifndef nsAtom_h
 #define nsAtom_h
 
 #include "nsISupportsImpl.h"
 #include "nsString.h"
-#include "mozilla/HashFunctions.h"
 #include "mozilla/UniquePtr.h"
 
 namespace mozilla {
 struct AtomsSizes;
 }
 
 class nsStaticAtom;
 class nsDynamicAtom;
@@ -99,20 +98,20 @@ public:
   // of this type is special.
   MozExternalRefCountType AddRef();
   MozExternalRefCountType Release();
 
   typedef mozilla::TrueType HasThreadSafeRefCnt;
 
 protected:
   // Used by nsStaticAtom.
-  constexpr nsAtom(const char16_t* aStr, uint32_t aLength)
+  constexpr nsAtom(const char16_t* aStr, uint32_t aLength, uint32_t aHash)
     : mLength(aLength)
     , mKind(static_cast<uint32_t>(nsAtom::AtomKind::Static))
-    , mHash(mozilla::HashString(aStr))
+    , mHash(aHash)
   {}
 
   // Used by nsDynamicAtom.
   nsAtom(AtomKind aKind, const nsAString& aString, uint32_t aHash)
     : mLength(aString.Length())
     , mKind(static_cast<uint32_t>(aKind))
     , mHash(aHash)
   {
@@ -133,19 +132,24 @@ protected:
 class nsStaticAtom : public nsAtom
 {
 public:
   // These are deleted so it's impossible to RefPtr<nsStaticAtom>. Raw
   // nsStaticAtom pointers should be used instead.
   MozExternalRefCountType AddRef() = delete;
   MozExternalRefCountType Release() = delete;
 
+  // The static atom's precomputed hash value is an argument here, but it
+  // must be the same as would be computed by mozilla::HashString(aStr),
+  // which is what we use when atomizing strings. We compute this hash in
+  // Atom.py and assert in nsAtomTable::RegisterStaticAtoms that the two
+  // hashes match.
   constexpr nsStaticAtom(const char16_t* aStr, uint32_t aLength,
-                         uint32_t aStringOffset)
-    : nsAtom(aStr, aLength)
+                         uint32_t aHash, uint32_t aStringOffset)
+    : nsAtom(aStr, aLength, aHash)
     , mStringOffset(aStringOffset)
   {}
 
   const char16_t* String() const
   {
     return reinterpret_cast<const char16_t*>(uintptr_t(this) - mStringOffset);
   }
 
--- a/xpcom/ds/nsAtomTable.cpp
+++ b/xpcom/ds/nsAtomTable.cpp
@@ -642,16 +642,21 @@ nsAtomTable::RegisterStaticAtoms(const n
   MOZ_ASSERT(NS_IsMainThread());
   MOZ_RELEASE_ASSERT(!gStaticAtomsDone, "Static atom insertion is finished!");
 
   for (uint32_t i = 0; i < aAtomsLen; ++i) {
     const nsStaticAtom* atom = &aAtoms[i];
     MOZ_ASSERT(nsCRT::IsAscii(atom->String()));
     MOZ_ASSERT(NS_strlen(atom->String()) == atom->GetLength());
 
+    // This assertion ensures the static atom's precomputed hash value matches
+    // what would be computed by mozilla::HashString(aStr), which is what we use
+    // when atomizing strings. We compute this hash in Atom.py.
+    MOZ_ASSERT(HashString(atom->String()) == atom->hash());
+
     AtomTableKey key(atom);
     nsAtomSubTable& table = SelectSubTable(key);
     MutexAutoLock lock(table.mLock);
     AtomTableEntry* he = table.Add(key);
 
     if (he->mAtom) {
       // There are two ways we could get here.
       // - Register two static atoms with the same string.
--- a/xpcom/ds/nsGkAtoms.cpp
+++ b/xpcom/ds/nsGkAtoms.cpp
@@ -8,48 +8,47 @@
 
 // Register an array of static atoms with the atom table.
 void
 NS_RegisterStaticAtoms(const nsStaticAtom* aAtoms, size_t aAtomsLen);
 
 namespace mozilla {
 namespace detail {
 
-MOZ_PUSH_DISABLE_INTEGRAL_CONSTANT_OVERFLOW_WARNING
 extern constexpr GkAtoms gGkAtoms = {
   // The initialization of each atom's string.
-  #define GK_ATOM(name_, value_, type_, atom_type_) \
+  #define GK_ATOM(name_, value_, hash_, type_, atom_type_) \
     u"" value_,
   #include "nsGkAtomList.h"
   #undef GK_ATOM
   {
     // The initialization of the atoms themselves.
     //
     // Note that |value_| is an 8-bit string, and so |sizeof(value_)| is equal
     // to the number of chars (including the terminating '\0'). The |u""| prefix
     // converts |value_| to a 16-bit string.
-    #define GK_ATOM(name_, value_, type_, atom_type_)                     \
+    #define GK_ATOM(name_, value_, hash_, type_, atom_type_)              \
       nsStaticAtom(u"" value_,                                            \
           sizeof(value_) - 1,                                             \
+          hash_,                                                          \
           offsetof(GkAtoms,                                               \
                    mAtoms[static_cast<size_t>(GkAtoms::Atoms::name_)]) -  \
           offsetof(GkAtoms, name_##_string)),
     #include "nsGkAtomList.h"
     #undef GK_ATOM
   }
 };
-MOZ_POP_DISABLE_INTEGRAL_CONSTANT_OVERFLOW_WARNING
 
 } // namespace detail
 } // namespace mozilla
 
 const nsStaticAtom* const nsGkAtoms::sAtoms = mozilla::detail::gGkAtoms.mAtoms;
 
 // Definition of the pointer to the static atom.
-#define GK_ATOM(name_, value_, type_, atom_type_)                          \
+#define GK_ATOM(name_, value_, hash_, type_, atom_type_)                   \
   type_* nsGkAtoms::name_ = const_cast<type_*>(static_cast<const type_*>(  \
     &mozilla::detail::gGkAtoms.mAtoms[                                     \
       static_cast<size_t>(mozilla::detail::GkAtoms::Atoms::name_)]));
 #include "nsGkAtomList.h"
 #undef GK_ATOM
 
 void nsGkAtoms::RegisterStaticAtoms()
 {
--- a/xpcom/ds/nsGkAtoms.h
+++ b/xpcom/ds/nsGkAtoms.h
@@ -43,19 +43,19 @@
 //
 // nsGkAtoms below defines static atoms in a way that satisfies these
 // constraints. It uses nsGkAtomList.h, which defines the names and values of
 // the atoms.
 //
 // nsGkAtomList.h is generated by StaticAtoms.py and has entries that look
 // like this:
 //
-//   GK_ATOM(one, "one", nsStaticAtom, Atom)
-//   GK_ATOM(two, "two", nsICSSPseudoElement, PseudoElementAtom)
-//   GK_ATOM(three, "three", nsICSSAnonBoxPseudo, InheritingAnonBoxAtom)
+//   GK_ATOM(one, "one", 0x01234567, nsStaticAtom, Atom)
+//   GK_ATOM(two, "two", 0x12345678, nsICSSPseudoElement, PseudoElementAtom)
+//   GK_ATOM(three, "three", 0x23456789, nsICSSAnonBoxPseudo, InheritingAnonBoxAtom)
 //
 // After macro expansion, the atom definitions look like the following:
 //
 //   ====> nsGkAtoms.h <====
 //
 //   namespace mozilla {
 //   namespace detail {
 //
@@ -115,24 +115,27 @@
 //     // The initialization of each atom's string.
 //     u"one",
 //     u"two",
 //     u"three",
 //     {
 //       // The initialization of the atoms themselves.
 //       nsStaticAtom(
 //         u"one", 3,
+//         0x01234567,
 //         offsetof(GkAtoms, mAtoms[static_cast<size_t>(GkAtoms::Atoms::one)]) -
 //         offsetof(GkAtoms, one_string)),
 //       nsStaticAtom(
 //         u"two", 3,
+//         0x12345678,
 //         offsetof(GkAtoms, mAtoms[static_cast<size_t>(GkAtoms::Atoms::two)]) -
 //         offsetof(GkAtoms, two_string)),
 //       nsStaticAtom(
 //         u"three", 3,
+//         0x23456789,
 //         offsetof(GkAtoms, mAtoms[static_cast<size_t>(GkAtoms::Atoms::three)]) -
 //         offsetof(GkAtoms, three_string)),
 //     }
 //   };
 //   MOZ_POP_DISABLE_INTEGRAL_CONSTANT_OVERFLOW_WARNING
 //
 //   } // namespace detail
 //   } // namespace mozilla
@@ -151,22 +154,23 @@
 //         static_cast<size_t>(detail::GkAtoms::Atoms::two)]);
 //   nsICSSAnonBoxPseudo* nsGkAtoms::three =
 //     const_cast<nsICSSAnonBoxPseudo*>(static_cast<const nsICSSAnonBoxPseudo*>(
 //       &detail::gGkAtoms.mAtoms[
 //         static_cast<size_t>(detail::GkAtoms::Atoms::three)]);
 
 // Trivial subclasses of nsStaticAtom so that function signatures can require
 // an atom from a specific atom list.
-#define DEFINE_STATIC_ATOM_SUBCLASS(name_)                                    \
-  class name_ : public nsStaticAtom                                           \
-  {                                                                           \
-  public:                                                                     \
-    constexpr name_(const char16_t* aStr, uint32_t aLength, uint32_t aOffset) \
-      : nsStaticAtom(aStr, aLength, aOffset) {}                               \
+#define DEFINE_STATIC_ATOM_SUBCLASS(name_)                   \
+  class name_ : public nsStaticAtom                          \
+  {                                                          \
+  public:                                                    \
+    constexpr name_(const char16_t* aStr, uint32_t aLength,  \
+                    uint32_t aHash, uint32_t aOffset)        \
+      : nsStaticAtom(aStr, aLength, aHash, aOffset) {}       \
   };
 
 DEFINE_STATIC_ATOM_SUBCLASS(nsICSSAnonBoxPseudo)
 DEFINE_STATIC_ATOM_SUBCLASS(nsICSSPseudoElement)
 
 #undef DEFINE_STATIC_ATOM_SUBCLASS
 
 namespace mozilla {
@@ -176,24 +180,24 @@ namespace detail {
 // Because they are together in a class, the mStringOffset field of the
 // atoms will be small and can be initialized at compile time.
 //
 // A `detail` namespace is used because the things within it aren't directly
 // referenced by external users of these static atoms.
 struct GkAtoms
 {
   // The declaration of each atom's string.
-  #define GK_ATOM(name_, value_, type_, atom_type_) \
+  #define GK_ATOM(name_, value_, hash_, type_, atom_type_) \
     const char16_t name_##_string[sizeof(value_)];
   #include "nsGkAtomList.h"
   #undef GK_ATOM
 
   // The enum value for each atom.
   enum class Atoms {
-    #define GK_ATOM(name_, value_, type_, atom_type_) \
+    #define GK_ATOM(name_, value_, hash_, type_, atom_type_) \
       name_,
     #include "nsGkAtomList.h"
     #undef GK_ATOM
     AtomsCount
   };
 
   const nsStaticAtom mAtoms[static_cast<size_t>(Atoms::AtomsCount)];
 };
@@ -216,15 +220,15 @@ public:
     MOZ_ASSERT(aIndex < sAtomsLen);
     return const_cast<nsStaticAtom*>(&sAtoms[aIndex]);
   }
 
   // The declaration of the pointer to each static atom.
   //
   // XXX: Eventually this should be combined with its definition and the
   // pointer should be made `constexpr`. See bug 1449787.
-  #define GK_ATOM(name_, value_, type_, atom_type_) \
+  #define GK_ATOM(name_, value_, hash_, type_, atom_type_) \
     static type_* name_;
   #include "nsGkAtomList.h"
   #undef GK_ATOM
 };
 
 #endif /* nsGkAtoms_h___ */