Bug 984783 - Remove all invalidation tracking from the WebGL element array cache - r=jgilbert
authorBenoit Jacob <bjacob@mozilla.com>
Fri, 09 May 2014 13:49:27 -0400
changeset 182517 24ff9dbc3f20bff7a1e866863aff70c1f073ae2b
parent 182516 417962f378332366b56526afed215acf3b5632fa
child 182518 77b1f329c17401c8a1aff8bf98c2fc34b5b14e84
push id26764
push usercbook@mozilla.com
push dateMon, 12 May 2014 11:35:17 +0000
treeherdermozilla-central@a64ed5aba131 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjgilbert
bugs984783
milestone32.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 984783 - Remove all invalidation tracking from the WebGL element array cache - r=jgilbert
content/canvas/compiledtest/TestWebGLElementArrayCache.cpp
content/canvas/src/WebGLElementArrayCache.cpp
content/canvas/src/WebGLElementArrayCache.h
--- a/content/canvas/compiledtest/TestWebGLElementArrayCache.cpp
+++ b/content/canvas/compiledtest/TestWebGLElementArrayCache.cpp
@@ -169,17 +169,17 @@ int main(int argc, char *argv[])
   for (int maxBufferSize = 1; maxBufferSize <= 4096; maxBufferSize *= 2) {
     // See bug 800612. We originally had | repeat = min(maxBufferSize, 20) |
     // and a real bug was only caught on Windows and not on Linux due to rand()
     // producing different values. In that case, the minimum value by which to replace
     // this 20 to reproduce the bug on Linux, was 25. Replacing it with 64 should give
     // us some comfort margin.
     int repeat = std::min(maxBufferSize, 64);
     for (int i = 0; i < repeat; i++) {
-      size_t size = RandomInteger<size_t>(1, maxBufferSize);
+      size_t size = RandomInteger<size_t>(0, maxBufferSize);
       MakeRandomVector(v, size);
       b.BufferData(v.Elements(), size);
       CheckValidate(b, 0, size);
 
       for (int j = 0; j < 16; j++) {
         for (int bufferSubDataCalls = 1; bufferSubDataCalls <= 8; bufferSubDataCalls *= 2) {
           for (int validateCalls = 1; validateCalls <= 8; validateCalls *= 2) {
 
--- a/content/canvas/src/WebGLElementArrayCache.cpp
+++ b/content/canvas/src/WebGLElementArrayCache.cpp
@@ -119,63 +119,52 @@ UpdateUpperBound(uint32_t* out_upperBoun
  * by the time WebGL bufferData/bufferSubData methods are called. The type of elements is only
  * specified in the drawElements call. This means that we may potentially have to store caches for
  * multiple element types, for the same element array buffer. Since we don't know yet how many
  * element types we'll eventually support (extensions add more), the concern about memory usage is serious.
  * This is addressed by sSkippedBottomTreeLevels as explained above. Of course, in the typical
  * case where each element array buffer is only ever used with one type, this is also addressed
  * by having WebGLElementArrayCache lazily create trees for each type only upon first use.
  *
- * Another consequence of this constraint is that when invalidating the trees, we have to invalidate
+ * Another consequence of this constraint is that when updating the trees, we have to update
  * all existing trees. So if trees for types uint8_t, uint16_t and uint32_t have ever been constructed for this buffer,
- * every subsequent invalidation will have to invalidate all trees even if one of the types is never
- * used again. This implies that it is important to minimize the cost of invalidation i.e.
- * do lazy updates upon use as opposed to immediately updating invalidated trees. This poses a problem:
- * it is nontrivial to keep track of the part of the tree that's invalidated. The current solution
- * can only keep track of an invalidated interval, from |mFirstInvalidatedLeaf| to |mLastInvalidatedLeaf|.
- * The problem is that if one does two small, far-apart partial buffer updates, the resulting invalidated
- * area is very large even though only a small part of the array really needed to be invalidated.
- * The real solution to this problem would be to use a smarter data structure to keep track of the
- * invalidated area, probably an interval tree. Meanwhile, we can probably live with the current situation
- * as the unfavorable case seems to be a small corner case: in order to run into performance issues,
- * the number of bufferSubData in between two consecutive draws must be small but greater than 1, and
- * the partial buffer updates must be small and far apart. Anything else than this corner case
- * should run fast in the current setting.
+ * every subsequent update will have to update all trees even if one of the types is never
+ * used again. That's inefficient, but content should not put indices of different types in the
+ * same element array buffer anyways. Different index types can only be consumed in separate
+ * drawElements calls, so nothing particular is to be achieved by lumping them in the same
+ * buffer object.
  */
 template<typename T>
 struct WebGLElementArrayCacheTree
 {
   // A too-high sSkippedBottomTreeLevels would harm the performance of small drawElements calls
   // A too-low sSkippedBottomTreeLevels would cause undue memory usage.
   // The current value has been validated by some benchmarking. See bug 732660.
   static const size_t sSkippedBottomTreeLevels = 3;
   static const size_t sElementsPerLeaf = 1 << sSkippedBottomTreeLevels;
   static const size_t sElementsPerLeafMask = sElementsPerLeaf - 1; // sElementsPerLeaf is POT
 
 private:
   WebGLElementArrayCache& mParent;
   FallibleTArray<T> mTreeData;
   size_t mNumLeaves;
-  bool mInvalidated;
-  size_t mFirstInvalidatedLeaf;
-  size_t mLastInvalidatedLeaf;
+  size_t mParentByteSize;
 
 public:
   WebGLElementArrayCacheTree(WebGLElementArrayCache& p)
     : mParent(p)
     , mNumLeaves(0)
-    , mInvalidated(false)
-    , mFirstInvalidatedLeaf(0)
-    , mLastInvalidatedLeaf(0)
+    , mParentByteSize(0)
   {
-    ResizeToParentSize();
+    if (mParent.ByteSize()) {
+      Update(0, mParent.ByteSize() - 1);
+    }
   }
 
   T GlobalMaximum() const {
-    MOZ_ASSERT(!mInvalidated);
     return mTreeData[1];
   }
 
   // returns the index of the parent node; if treeIndex=1 (the root node),
   // the return value is 0.
   static size_t ParentNode(size_t treeIndex) {
     MOZ_ASSERT(treeIndex > 1);
     return treeIndex >> 1;
@@ -237,24 +226,23 @@ public:
     return element | sElementsPerLeafMask;
   }
 
   static size_t FirstElementUnderSameLeaf(size_t element) {
     return element & ~sElementsPerLeafMask;
   }
 
   static size_t NextMultipleOfElementsPerLeaf(size_t numElements) {
+    MOZ_ASSERT(numElements >= 1);
     return ((numElements - 1) | sElementsPerLeafMask) + 1;
   }
 
   bool Validate(T maxAllowed, size_t firstLeaf, size_t lastLeaf,
                 uint32_t* out_upperBound)
   {
-    MOZ_ASSERT(!mInvalidated);
-
     size_t firstTreeIndex = TreeIndexForLeaf(firstLeaf);
     size_t lastTreeIndex  = TreeIndexForLeaf(lastLeaf);
 
     while (true) {
       // given that we tweak these values in nontrivial ways, it doesn't hurt to do
       // this sanity check
       MOZ_ASSERT(firstTreeIndex <= lastTreeIndex);
 
@@ -305,38 +293,17 @@ public:
     U result = 1;
     while (result < x)
       result <<= 1;
     MOZ_ASSERT(result >= x);
     MOZ_ASSERT((result & (result - 1)) == 0);
     return result;
   }
 
-  bool ResizeToParentSize()
-  {
-    size_t numberOfElements = mParent.ByteSize() / sizeof(T);
-    size_t requiredNumLeaves = (numberOfElements + sElementsPerLeaf - 1) / sElementsPerLeaf;
-
-    size_t oldNumLeaves = mNumLeaves;
-    mNumLeaves = NextPowerOfTwo(requiredNumLeaves);
-    Invalidate(0, mParent.ByteSize() - 1);
-
-    // see class comment for why we the tree storage size is 2 * mNumLeaves
-    if (!mTreeData.SetLength(2 * mNumLeaves)) {
-      return false;
-    }
-    if (mNumLeaves != oldNumLeaves) {
-      memset(mTreeData.Elements(), 0, mTreeData.Length() * sizeof(mTreeData[0]));
-    }
-    return true;
-  }
-
-  void Invalidate(size_t firstByte, size_t lastByte);
-
-  void Update();
+  bool Update(size_t firstByte, size_t lastByte);
 
   size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const
   {
     return aMallocSizeOf(this) + mTreeData.SizeOfExcludingThis(aMallocSizeOf);
   }
 };
 
 // TreeForType: just a template helper to select the right tree object for a given
@@ -357,86 +324,94 @@ struct TreeForType<uint16_t>
 };
 
 template<>
 struct TreeForType<uint32_t>
 {
   static WebGLElementArrayCacheTree<uint32_t>*& Run(WebGLElementArrayCache *b) { return b->mUint32Tree; }
 };
 
-// When the buffer gets updated from firstByte to lastByte,
-// calling this method will notify the tree accordingly
+// Calling this method will 1) update the leaves in this interval
+// from the raw buffer data, and 2) propagate this update up the tree
 template<typename T>
-void WebGLElementArrayCacheTree<T>::Invalidate(size_t firstByte, size_t lastByte)
+bool WebGLElementArrayCacheTree<T>::Update(size_t firstByte, size_t lastByte)
 {
+  MOZ_ASSERT(firstByte <= lastByte);
+  MOZ_ASSERT(lastByte < mParent.ByteSize());
+
+  // Step #0: if needed, resize our tree data storage.
+  if (mParentByteSize != mParent.ByteSize())
+  {
+    size_t numberOfElements = mParent.ByteSize() / sizeof(T);
+    if (numberOfElements == 0) {
+      mParentByteSize = mParent.ByteSize();
+      return true;
+    }
+
+    size_t requiredNumLeaves = (numberOfElements + sElementsPerLeaf - 1) / sElementsPerLeaf;
+    size_t oldNumLeaves = mNumLeaves;
+    mNumLeaves = NextPowerOfTwo(requiredNumLeaves);
+    if (mNumLeaves != oldNumLeaves) {
+      // see class comment for why we the tree storage size is 2 * mNumLeaves
+      if (!mTreeData.SetLength(2 * mNumLeaves)) {
+        return false;
+      }
+      // when resizing, update the whole tree, not just the subset corresponding
+      // to the part of the buffer being updated.
+      memset(mTreeData.Elements(), 0, mTreeData.Length() * sizeof(T));
+      firstByte = 0;
+      lastByte = mParent.ByteSize() - 1;
+    }
+
+    mParentByteSize = mParent.ByteSize();
+  }
+
   lastByte = std::min(lastByte, mNumLeaves * sElementsPerLeaf * sizeof(T) - 1);
   if (firstByte > lastByte) {
-    return;
+    return true;
   }
 
   size_t firstLeaf = LeafForByte(firstByte);
   size_t lastLeaf = LeafForByte(lastByte);
 
-  if (mInvalidated) {
-    mFirstInvalidatedLeaf = std::min(firstLeaf, mFirstInvalidatedLeaf);
-    mLastInvalidatedLeaf = std::max(lastLeaf, mLastInvalidatedLeaf);
-  } else {
-    mInvalidated = true;
-    mFirstInvalidatedLeaf = firstLeaf;
-    mLastInvalidatedLeaf = lastLeaf;
-  }
-}
-
+  MOZ_ASSERT(firstLeaf <= lastLeaf && lastLeaf < mNumLeaves);
 
-// When tree has been partially invalidated, from mFirstInvalidatedLeaf to
-// mLastInvalidatedLeaf, calling this method will 1) update the leaves in this interval
-// from the raw buffer data, and 2) propagate this update up the tree
-template<typename T>
-void WebGLElementArrayCacheTree<T>::Update()
-{
-  if (!mInvalidated) {
-    return;
-  }
-
-  MOZ_ASSERT(mLastInvalidatedLeaf < mNumLeaves);
-
-  size_t firstTreeIndex = TreeIndexForLeaf(mFirstInvalidatedLeaf);
-  size_t lastTreeIndex = TreeIndexForLeaf(mLastInvalidatedLeaf);
+  size_t firstTreeIndex = TreeIndexForLeaf(firstLeaf);
+  size_t lastTreeIndex = TreeIndexForLeaf(lastLeaf);
 
   // Step #1: initialize the tree leaves from plain buffer data.
   // That is, each tree leaf must be set to the max of the |sElementsPerLeaf| corresponding
   // buffer entries.
   // condition-less scope to prevent leaking this scope's variables into the code below
   {
     // treeIndex is the index of the tree leaf we're writing, i.e. the destination index
     size_t treeIndex = firstTreeIndex;
     // srcIndex is the index in the source buffer
-    size_t srcIndex = mFirstInvalidatedLeaf * sElementsPerLeaf;
-    size_t numberOfElements = mParent.ByteSize() / sizeof(T);
+    size_t srcIndex = firstLeaf * sElementsPerLeaf;
+    size_t numberOfElements = mParentByteSize / sizeof(T);
     while (treeIndex <= lastTreeIndex) {
       T m = 0;
       size_t a = srcIndex;
       size_t srcIndexNextLeaf = std::min(a + sElementsPerLeaf, numberOfElements);
       for (; srcIndex < srcIndexNextLeaf; srcIndex++) {
         m = std::max(m, mParent.Element<T>(srcIndex));
       }
       mTreeData[treeIndex] = m;
       treeIndex++;
     }
   }
 
   // Step #2: propagate the values up the tree. This is simply a matter of walking up
   // the tree and setting each node to the max of its two children.
   while (firstTreeIndex > 1) {
-
     // move up 1 level
     firstTreeIndex = ParentNode(firstTreeIndex);
     lastTreeIndex = ParentNode(lastTreeIndex);
 
-    // fast-exit case where only one node is invalidated at the current level
+    // fast-exit case where only one node is updated at the current level
     if (firstTreeIndex == lastTreeIndex) {
       mTreeData[firstTreeIndex] = std::max(mTreeData[LeftChildNode(firstTreeIndex)], mTreeData[RightChildNode(firstTreeIndex)]);
       continue;
     }
 
     // initialize local iteration variables: child and parent.
     size_t child = LeftChildNode(firstTreeIndex);
     size_t parent = firstTreeIndex;
@@ -463,61 +438,64 @@ void WebGLElementArrayCacheTree<T>::Upda
       child = RightNeighborNode(child);
       T b = mTreeData[child];
       child = RightNeighborNode(child);
       mTreeData[parent] = std::max(a, b);
       parent = RightNeighborNode(parent);
     }
   }
 
-  mInvalidated = false;
+  return true;
 }
 
 WebGLElementArrayCache::~WebGLElementArrayCache() {
   delete mUint8Tree;
   delete mUint16Tree;
   delete mUint32Tree;
   free(mUntypedData);
 }
 
 bool WebGLElementArrayCache::BufferData(const void* ptr, size_t byteSize) {
-  mByteSize = byteSize;
-  if (mUint8Tree)
-    if (!mUint8Tree->ResizeToParentSize())
-      return false;
-  if (mUint16Tree)
-    if (!mUint16Tree->ResizeToParentSize())
+  if (byteSize == 0) {
+    mByteSize = 0;
+    free(mUntypedData);
+    mUntypedData = nullptr;
+    return true;
+  }
+  if (byteSize != mByteSize) {
+    void* newUntypedData = realloc(mUntypedData, byteSize);
+    if (!newUntypedData)
       return false;
-  if (mUint32Tree)
-    if (!mUint32Tree->ResizeToParentSize())
-      return false;
-  mUntypedData = realloc(mUntypedData, byteSize);
-  if (!mUntypedData)
-    return false;
+    mByteSize = byteSize;
+    mUntypedData = newUntypedData;
+  }
+
   BufferSubData(0, ptr, byteSize);
   return true;
 }
 
-void WebGLElementArrayCache::BufferSubData(size_t pos, const void* ptr, size_t updateByteSize) {
-  if (!updateByteSize) return;
+bool WebGLElementArrayCache::BufferSubData(size_t pos, const void* ptr, size_t updateByteSize) {
+  if (!updateByteSize) return true;
   if (ptr)
       memcpy(static_cast<uint8_t*>(mUntypedData) + pos, ptr, updateByteSize);
   else
       memset(static_cast<uint8_t*>(mUntypedData) + pos, 0, updateByteSize);
-  InvalidateTrees(pos, pos + updateByteSize - 1);
+  return UpdateTrees(pos, pos + updateByteSize - 1);
 }
 
-void WebGLElementArrayCache::InvalidateTrees(size_t firstByte, size_t lastByte)
+bool WebGLElementArrayCache::UpdateTrees(size_t firstByte, size_t lastByte)
 {
+  bool result = true;
   if (mUint8Tree)
-    mUint8Tree->Invalidate(firstByte, lastByte);
+    result &= mUint8Tree->Update(firstByte, lastByte);
   if (mUint16Tree)
-    mUint16Tree->Invalidate(firstByte, lastByte);
+    result &= mUint16Tree->Update(firstByte, lastByte);
   if (mUint32Tree)
-    mUint32Tree->Invalidate(firstByte, lastByte);
+    result &= mUint32Tree->Update(firstByte, lastByte);
+  return result;
 }
 
 template<typename T>
 bool
 WebGLElementArrayCache::Validate(uint32_t maxAllowed, size_t firstElement,
                                  size_t countElements, uint32_t* out_upperBound)
 {
   SetUpperBound(out_upperBound, 0);
@@ -540,18 +518,16 @@ WebGLElementArrayCache::Validate(uint32_
 
   WebGLElementArrayCacheTree<T>*& tree = TreeForType<T>::Run(this);
   if (!tree) {
     tree = new WebGLElementArrayCacheTree<T>(*this);
   }
 
   size_t lastElement = firstElement + countElements - 1;
 
-  tree->Update();
-
   // fast exit path when the global maximum for the whole element array buffer
   // falls in the allowed range
   T globalMax = tree->GlobalMaximum();
   if (globalMax <= maxAllowedT)
   {
     SetUpperBound(out_upperBound, globalMax);
     return true;
   }
--- a/content/canvas/src/WebGLElementArrayCache.h
+++ b/content/canvas/src/WebGLElementArrayCache.h
@@ -26,17 +26,17 @@ struct WebGLElementArrayCacheTree;
  *
  * Most of the implementation is hidden in the auxilary class template, WebGLElementArrayCacheTree.
  * Refer to its code for design comments.
  */
 class WebGLElementArrayCache {
 
 public:
   bool BufferData(const void* ptr, size_t byteSize);
-  void BufferSubData(size_t pos, const void* ptr, size_t updateByteSize);
+  bool BufferSubData(size_t pos, const void* ptr, size_t updateByteSize);
 
   bool Validate(GLenum type, uint32_t maxAllowed, size_t first, size_t count,
                 uint32_t* out_upperBound = nullptr);
 
   template<typename T>
   T Element(size_t i) const { return Elements<T>()[i]; }
 
   WebGLElementArrayCache()
@@ -61,17 +61,17 @@ private:
     return mByteSize;
   }
 
   template<typename T>
   const T* Elements() const { return static_cast<const T*>(mUntypedData); }
   template<typename T>
   T* Elements() { return static_cast<T*>(mUntypedData); }
 
-  void InvalidateTrees(size_t firstByte, size_t lastByte);
+  bool UpdateTrees(size_t firstByte, size_t lastByte);
 
   template<typename T>
   friend struct WebGLElementArrayCacheTree;
   template<typename T>
   friend struct TreeForType;
 
   void* mUntypedData;
   size_t mByteSize;