☠☠ backed out by ec7113f437f3 ☠ ☠ | |
author | Benoit Jacob <bjacob@mozilla.com> |
Fri, 09 May 2014 13:49:27 -0400 | |
changeset 182506 | d2242d05c93c54a7e7bbc40e8157f54c3ccfa064 |
parent 182505 | 9fda399b4e88f707c976c79081e1de829078be4b |
child 182507 | 73108558281deed47004be77bf9fe8dacd49a1e2 |
push id | 26764 |
push user | cbook@mozilla.com |
push date | Mon, 12 May 2014 11:35:17 +0000 |
treeherder | mozilla-central@a64ed5aba131 [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
reviewers | jgilbert |
bugs | 984783 |
milestone | 32.0a1 |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
--- a/content/canvas/compiledtest/TestWebGLElementArrayCache.cpp +++ b/content/canvas/compiledtest/TestWebGLElementArrayCache.cpp @@ -169,17 +169,17 @@ int main(int argc, char *argv[]) for (int maxBufferSize = 1; maxBufferSize <= 4096; maxBufferSize *= 2) { // See bug 800612. We originally had | repeat = min(maxBufferSize, 20) | // and a real bug was only caught on Windows and not on Linux due to rand() // producing different values. In that case, the minimum value by which to replace // this 20 to reproduce the bug on Linux, was 25. Replacing it with 64 should give // us some comfort margin. int repeat = std::min(maxBufferSize, 64); for (int i = 0; i < repeat; i++) { - size_t size = RandomInteger<size_t>(1, maxBufferSize); + size_t size = RandomInteger<size_t>(0, maxBufferSize); MakeRandomVector(v, size); b.BufferData(v.Elements(), size); CheckValidate(b, 0, size); for (int j = 0; j < 16; j++) { for (int bufferSubDataCalls = 1; bufferSubDataCalls <= 8; bufferSubDataCalls *= 2) { for (int validateCalls = 1; validateCalls <= 8; validateCalls *= 2) {
--- a/content/canvas/src/WebGLElementArrayCache.cpp +++ b/content/canvas/src/WebGLElementArrayCache.cpp @@ -119,63 +119,52 @@ UpdateUpperBound(uint32_t* out_upperBoun * by the time WebGL bufferData/bufferSubData methods are called. The type of elements is only * specified in the drawElements call. This means that we may potentially have to store caches for * multiple element types, for the same element array buffer. Since we don't know yet how many * element types we'll eventually support (extensions add more), the concern about memory usage is serious. * This is addressed by sSkippedBottomTreeLevels as explained above. Of course, in the typical * case where each element array buffer is only ever used with one type, this is also addressed * by having WebGLElementArrayCache lazily create trees for each type only upon first use. * - * Another consequence of this constraint is that when invalidating the trees, we have to invalidate + * Another consequence of this constraint is that when updating the trees, we have to update * all existing trees. So if trees for types uint8_t, uint16_t and uint32_t have ever been constructed for this buffer, - * every subsequent invalidation will have to invalidate all trees even if one of the types is never - * used again. This implies that it is important to minimize the cost of invalidation i.e. - * do lazy updates upon use as opposed to immediately updating invalidated trees. This poses a problem: - * it is nontrivial to keep track of the part of the tree that's invalidated. The current solution - * can only keep track of an invalidated interval, from |mFirstInvalidatedLeaf| to |mLastInvalidatedLeaf|. - * The problem is that if one does two small, far-apart partial buffer updates, the resulting invalidated - * area is very large even though only a small part of the array really needed to be invalidated. - * The real solution to this problem would be to use a smarter data structure to keep track of the - * invalidated area, probably an interval tree. Meanwhile, we can probably live with the current situation - * as the unfavorable case seems to be a small corner case: in order to run into performance issues, - * the number of bufferSubData in between two consecutive draws must be small but greater than 1, and - * the partial buffer updates must be small and far apart. Anything else than this corner case - * should run fast in the current setting. + * every subsequent update will have to update all trees even if one of the types is never + * used again. That's inefficient, but content should not put indices of different types in the + * same element array buffer anyways. Different index types can only be consumed in separate + * drawElements calls, so nothing particular is to be achieved by lumping them in the same + * buffer object. */ template<typename T> struct WebGLElementArrayCacheTree { // A too-high sSkippedBottomTreeLevels would harm the performance of small drawElements calls // A too-low sSkippedBottomTreeLevels would cause undue memory usage. // The current value has been validated by some benchmarking. See bug 732660. static const size_t sSkippedBottomTreeLevels = 3; static const size_t sElementsPerLeaf = 1 << sSkippedBottomTreeLevels; static const size_t sElementsPerLeafMask = sElementsPerLeaf - 1; // sElementsPerLeaf is POT private: WebGLElementArrayCache& mParent; FallibleTArray<T> mTreeData; size_t mNumLeaves; - bool mInvalidated; - size_t mFirstInvalidatedLeaf; - size_t mLastInvalidatedLeaf; + size_t mParentByteSize; public: WebGLElementArrayCacheTree(WebGLElementArrayCache& p) : mParent(p) , mNumLeaves(0) - , mInvalidated(false) - , mFirstInvalidatedLeaf(0) - , mLastInvalidatedLeaf(0) + , mParentByteSize(0) { - ResizeToParentSize(); + if (mParent.ByteSize()) { + Update(0, mParent.ByteSize() - 1); + } } T GlobalMaximum() const { - MOZ_ASSERT(!mInvalidated); return mTreeData[1]; } // returns the index of the parent node; if treeIndex=1 (the root node), // the return value is 0. static size_t ParentNode(size_t treeIndex) { MOZ_ASSERT(treeIndex > 1); return treeIndex >> 1; @@ -237,24 +226,23 @@ public: return element | sElementsPerLeafMask; } static size_t FirstElementUnderSameLeaf(size_t element) { return element & ~sElementsPerLeafMask; } static size_t NextMultipleOfElementsPerLeaf(size_t numElements) { + MOZ_ASSERT(numElements >= 1); return ((numElements - 1) | sElementsPerLeafMask) + 1; } bool Validate(T maxAllowed, size_t firstLeaf, size_t lastLeaf, uint32_t* out_upperBound) { - MOZ_ASSERT(!mInvalidated); - size_t firstTreeIndex = TreeIndexForLeaf(firstLeaf); size_t lastTreeIndex = TreeIndexForLeaf(lastLeaf); while (true) { // given that we tweak these values in nontrivial ways, it doesn't hurt to do // this sanity check MOZ_ASSERT(firstTreeIndex <= lastTreeIndex); @@ -305,38 +293,17 @@ public: U result = 1; while (result < x) result <<= 1; MOZ_ASSERT(result >= x); MOZ_ASSERT((result & (result - 1)) == 0); return result; } - bool ResizeToParentSize() - { - size_t numberOfElements = mParent.ByteSize() / sizeof(T); - size_t requiredNumLeaves = (numberOfElements + sElementsPerLeaf - 1) / sElementsPerLeaf; - - size_t oldNumLeaves = mNumLeaves; - mNumLeaves = NextPowerOfTwo(requiredNumLeaves); - Invalidate(0, mParent.ByteSize() - 1); - - // see class comment for why we the tree storage size is 2 * mNumLeaves - if (!mTreeData.SetLength(2 * mNumLeaves)) { - return false; - } - if (mNumLeaves != oldNumLeaves) { - memset(mTreeData.Elements(), 0, mTreeData.Length() * sizeof(mTreeData[0])); - } - return true; - } - - void Invalidate(size_t firstByte, size_t lastByte); - - void Update(); + bool Update(size_t firstByte, size_t lastByte); size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const { return aMallocSizeOf(this) + mTreeData.SizeOfExcludingThis(aMallocSizeOf); } }; // TreeForType: just a template helper to select the right tree object for a given @@ -357,65 +324,69 @@ struct TreeForType<uint16_t> }; template<> struct TreeForType<uint32_t> { static WebGLElementArrayCacheTree<uint32_t>*& Run(WebGLElementArrayCache *b) { return b->mUint32Tree; } }; -// When the buffer gets updated from firstByte to lastByte, -// calling this method will notify the tree accordingly +// Calling this method will 1) update the leaves in this interval +// from the raw buffer data, and 2) propagate this update up the tree template<typename T> -void WebGLElementArrayCacheTree<T>::Invalidate(size_t firstByte, size_t lastByte) +bool WebGLElementArrayCacheTree<T>::Update(size_t firstByte, size_t lastByte) { + MOZ_ASSERT(firstByte <= lastByte); + MOZ_ASSERT(lastByte < mParent.ByteSize()); + + // Step #0: if needed, resize our tree data storage. + if (mParentByteSize != mParent.ByteSize()) + { + mParentByteSize = mParent.ByteSize(); + + size_t numberOfElements = mParent.ByteSize() / sizeof(T); + if (numberOfElements == 0) { + return true; + } + + size_t requiredNumLeaves = (numberOfElements + sElementsPerLeaf - 1) / sElementsPerLeaf; + size_t oldNumLeaves = mNumLeaves; + mNumLeaves = NextPowerOfTwo(requiredNumLeaves); + if (mNumLeaves != oldNumLeaves) { + // see class comment for why we the tree storage size is 2 * mNumLeaves + if (!mTreeData.SetLength(2 * mNumLeaves)) { + return false; + } + } + + } + lastByte = std::min(lastByte, mNumLeaves * sElementsPerLeaf * sizeof(T) - 1); if (firstByte > lastByte) { - return; + return true; } size_t firstLeaf = LeafForByte(firstByte); size_t lastLeaf = LeafForByte(lastByte); - if (mInvalidated) { - mFirstInvalidatedLeaf = std::min(firstLeaf, mFirstInvalidatedLeaf); - mLastInvalidatedLeaf = std::max(lastLeaf, mLastInvalidatedLeaf); - } else { - mInvalidated = true; - mFirstInvalidatedLeaf = firstLeaf; - mLastInvalidatedLeaf = lastLeaf; - } -} - + MOZ_ASSERT(firstLeaf <= lastLeaf && lastLeaf < mNumLeaves); -// When tree has been partially invalidated, from mFirstInvalidatedLeaf to -// mLastInvalidatedLeaf, calling this method will 1) update the leaves in this interval -// from the raw buffer data, and 2) propagate this update up the tree -template<typename T> -void WebGLElementArrayCacheTree<T>::Update() -{ - if (!mInvalidated) { - return; - } - - MOZ_ASSERT(mLastInvalidatedLeaf < mNumLeaves); - - size_t firstTreeIndex = TreeIndexForLeaf(mFirstInvalidatedLeaf); - size_t lastTreeIndex = TreeIndexForLeaf(mLastInvalidatedLeaf); + size_t firstTreeIndex = TreeIndexForLeaf(firstLeaf); + size_t lastTreeIndex = TreeIndexForLeaf(lastLeaf); // Step #1: initialize the tree leaves from plain buffer data. // That is, each tree leaf must be set to the max of the |sElementsPerLeaf| corresponding // buffer entries. // condition-less scope to prevent leaking this scope's variables into the code below { // treeIndex is the index of the tree leaf we're writing, i.e. the destination index size_t treeIndex = firstTreeIndex; // srcIndex is the index in the source buffer - size_t srcIndex = mFirstInvalidatedLeaf * sElementsPerLeaf; - size_t numberOfElements = mParent.ByteSize() / sizeof(T); + size_t srcIndex = firstLeaf * sElementsPerLeaf; + size_t numberOfElements = mParentByteSize / sizeof(T); while (treeIndex <= lastTreeIndex) { T m = 0; size_t a = srcIndex; size_t srcIndexNextLeaf = std::min(a + sElementsPerLeaf, numberOfElements); for (; srcIndex < srcIndexNextLeaf; srcIndex++) { m = std::max(m, mParent.Element<T>(srcIndex)); } mTreeData[treeIndex] = m; @@ -426,17 +397,17 @@ void WebGLElementArrayCacheTree<T>::Upda // Step #2: propagate the values up the tree. This is simply a matter of walking up // the tree and setting each node to the max of its two children. while (firstTreeIndex > 1) { // move up 1 level firstTreeIndex = ParentNode(firstTreeIndex); lastTreeIndex = ParentNode(lastTreeIndex); - // fast-exit case where only one node is invalidated at the current level + // fast-exit case where only one node is updated at the current level if (firstTreeIndex == lastTreeIndex) { mTreeData[firstTreeIndex] = std::max(mTreeData[LeftChildNode(firstTreeIndex)], mTreeData[RightChildNode(firstTreeIndex)]); continue; } // initialize local iteration variables: child and parent. size_t child = LeftChildNode(firstTreeIndex); size_t parent = firstTreeIndex; @@ -463,61 +434,56 @@ void WebGLElementArrayCacheTree<T>::Upda child = RightNeighborNode(child); T b = mTreeData[child]; child = RightNeighborNode(child); mTreeData[parent] = std::max(a, b); parent = RightNeighborNode(parent); } } - mInvalidated = false; + return true; } WebGLElementArrayCache::~WebGLElementArrayCache() { delete mUint8Tree; delete mUint16Tree; delete mUint32Tree; free(mUntypedData); } bool WebGLElementArrayCache::BufferData(const void* ptr, size_t byteSize) { + void* newUntypedData = realloc(mUntypedData, byteSize); + if (!newUntypedData) + return false; mByteSize = byteSize; - if (mUint8Tree) - if (!mUint8Tree->ResizeToParentSize()) - return false; - if (mUint16Tree) - if (!mUint16Tree->ResizeToParentSize()) - return false; - if (mUint32Tree) - if (!mUint32Tree->ResizeToParentSize()) - return false; - mUntypedData = realloc(mUntypedData, byteSize); - if (!mUntypedData) - return false; + mUntypedData = newUntypedData; + BufferSubData(0, ptr, byteSize); return true; } -void WebGLElementArrayCache::BufferSubData(size_t pos, const void* ptr, size_t updateByteSize) { - if (!updateByteSize) return; +bool WebGLElementArrayCache::BufferSubData(size_t pos, const void* ptr, size_t updateByteSize) { + if (!updateByteSize) return true; if (ptr) memcpy(static_cast<uint8_t*>(mUntypedData) + pos, ptr, updateByteSize); else memset(static_cast<uint8_t*>(mUntypedData) + pos, 0, updateByteSize); - InvalidateTrees(pos, pos + updateByteSize - 1); + return UpdateTrees(pos, pos + updateByteSize - 1); } -void WebGLElementArrayCache::InvalidateTrees(size_t firstByte, size_t lastByte) +bool WebGLElementArrayCache::UpdateTrees(size_t firstByte, size_t lastByte) { + bool result = true; if (mUint8Tree) - mUint8Tree->Invalidate(firstByte, lastByte); + result &= mUint8Tree->Update(firstByte, lastByte); if (mUint16Tree) - mUint16Tree->Invalidate(firstByte, lastByte); + result &= mUint16Tree->Update(firstByte, lastByte); if (mUint32Tree) - mUint32Tree->Invalidate(firstByte, lastByte); + result &= mUint32Tree->Update(firstByte, lastByte); + return result; } template<typename T> bool WebGLElementArrayCache::Validate(uint32_t maxAllowed, size_t firstElement, size_t countElements, uint32_t* out_upperBound) { SetUpperBound(out_upperBound, 0); @@ -540,18 +506,16 @@ WebGLElementArrayCache::Validate(uint32_ WebGLElementArrayCacheTree<T>*& tree = TreeForType<T>::Run(this); if (!tree) { tree = new WebGLElementArrayCacheTree<T>(*this); } size_t lastElement = firstElement + countElements - 1; - tree->Update(); - // fast exit path when the global maximum for the whole element array buffer // falls in the allowed range T globalMax = tree->GlobalMaximum(); if (globalMax <= maxAllowedT) { SetUpperBound(out_upperBound, globalMax); return true; }
--- a/content/canvas/src/WebGLElementArrayCache.h +++ b/content/canvas/src/WebGLElementArrayCache.h @@ -26,17 +26,17 @@ struct WebGLElementArrayCacheTree; * * Most of the implementation is hidden in the auxilary class template, WebGLElementArrayCacheTree. * Refer to its code for design comments. */ class WebGLElementArrayCache { public: bool BufferData(const void* ptr, size_t byteSize); - void BufferSubData(size_t pos, const void* ptr, size_t updateByteSize); + bool BufferSubData(size_t pos, const void* ptr, size_t updateByteSize); bool Validate(GLenum type, uint32_t maxAllowed, size_t first, size_t count, uint32_t* out_upperBound = nullptr); template<typename T> T Element(size_t i) const { return Elements<T>()[i]; } WebGLElementArrayCache() @@ -61,17 +61,17 @@ private: return mByteSize; } template<typename T> const T* Elements() const { return static_cast<const T*>(mUntypedData); } template<typename T> T* Elements() { return static_cast<T*>(mUntypedData); } - void InvalidateTrees(size_t firstByte, size_t lastByte); + bool UpdateTrees(size_t firstByte, size_t lastByte); template<typename T> friend struct WebGLElementArrayCacheTree; template<typename T> friend struct TreeForType; void* mUntypedData; size_t mByteSize;