Bug 732660 - Efficient drawElements validation on subarray and/or dynamically updated array - r=jgilbert
authorBenoit Jacob <bjacob@mozilla.com>
Fri, 21 Sep 2012 13:44:35 -0400
changeset 114033 ff0e65d0ee17ee217113b03217557d94eefae025
parent 114032 b8ee83073a77b650649a8f8154be845c9737b20f
child 114034 224ba5e9fb70caa8a22e5d8bc2e604b5abe54619
push id1708
push userakeybl@mozilla.com
push dateMon, 19 Nov 2012 21:10:21 +0000
treeherdermozilla-beta@27b14fe50103 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjgilbert
bugs732660
milestone18.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 732660 - Efficient drawElements validation on subarray and/or dynamically updated array - r=jgilbert
content/canvas/Makefile.in
content/canvas/src/Makefile.in
content/canvas/src/WebGLContext.h
content/canvas/src/WebGLContextGL.cpp
content/canvas/src/WebGLElementArrayCache.cpp
content/canvas/src/WebGLElementArrayCache.h
content/canvas/test/Makefile.in
content/canvas/test/compiled/Makefile.in
content/canvas/test/compiled/TestWebGLElementArrayCache.cpp
--- a/content/canvas/Makefile.in
+++ b/content/canvas/Makefile.in
@@ -7,12 +7,12 @@ DEPTH		= @DEPTH@
 topsrcdir	= @top_srcdir@
 srcdir		= @srcdir@
 VPATH		= @srcdir@
 
 include $(DEPTH)/config/autoconf.mk
 
 PARALLEL_DIRS	= public src
 
-TEST_DIRS += test
+TOOLS_DIRS += test
 
 include $(topsrcdir)/config/rules.mk
 
--- a/content/canvas/src/Makefile.in
+++ b/content/canvas/src/Makefile.in
@@ -15,16 +15,17 @@ include $(DEPTH)/config/autoconf.mk
 MODULE		= content
 LIBRARY_NAME	= gkconcvs_s
 LIBXUL_LIBRARY  = 1
 
 EXPORTS = \
 	CustomQS_Canvas.h \
 	CustomQS_Canvas2D.h \
 	WebGLContext.h \
+	WebGLElementArrayCache.h \
 	$(NULL)
 
 EXPORTS_NAMESPACES = mozilla/dom
 
 EXPORTS_mozilla/dom = \
   ImageData.h \
   $(NULL)
 
@@ -47,16 +48,17 @@ CPPSRCS += \
 	WebGLContextReporter.cpp \
 	WebGLContextValidate.cpp \
 	WebGLExtensionStandardDerivatives.cpp \
 	WebGLExtensionTextureFilterAnisotropic.cpp \
 	WebGLExtensionLoseContext.cpp \
 	WebGLTexelConversions.cpp \
 	WebGLExtensionCompressedTextureS3TC.cpp \
 	WebGLExtensionDepthTexture.cpp \
+	WebGLElementArrayCache.cpp \
 	$(NULL)
 
 DEFINES += -DUSE_ANGLE
 USE_ANGLE=1
 
 else
 
 CPPSRCS += WebGLContextNotSupported.cpp
--- a/content/canvas/src/WebGLContext.h
+++ b/content/canvas/src/WebGLContext.h
@@ -36,16 +36,18 @@
 
 #include "angle/ShaderLang.h"
 
 #include "mozilla/dom/TypedArray.h"
 #include "mozilla/dom/Nullable.h"
 #include "mozilla/ErrorResult.h"
 #include "mozilla/dom/BindingUtils.h"
 
+#include "WebGLElementArrayCache.h"
+
 /* 
  * Minimum value constants defined in 6.2 State Tables of OpenGL ES - 2.0.25
  *   https://bugzilla.mozilla.org/show_bug.cgi?id=686732
  * 
  * Exceptions: some of the following values are set to higher values than in the spec because
  * the values in the spec are ridiculously low. They are explicitly marked below
 */
 #define MINVALUE_GL_MAX_TEXTURE_SIZE                  1024  // Different from the spec, which sets it to 64 on page 162
@@ -1491,140 +1493,79 @@ class WebGLBuffer MOZ_FINAL
     , public WebGLContextBoundObject
 {
 public:
     WebGLBuffer(WebGLContext *context)
         : WebGLContextBoundObject(context)
         , mHasEverBeenBound(false)
         , mByteLength(0)
         , mTarget(LOCAL_GL_NONE)
-        , mData(nullptr)
     {
         mContext->MakeContextCurrent();
         mContext->gl->fGenBuffers(1, &mGLName);
         mContext->mBuffers.insertBack(this);
     }
 
     ~WebGLBuffer() {
         DeleteOnce();
     }
 
     void Delete() {
         mContext->MakeContextCurrent();
         mContext->gl->fDeleteBuffers(1, &mGLName);
-        free(mData);
-        mData = nullptr;
         mByteLength = 0;
+        mCache = nullptr;
         LinkedListElement<WebGLBuffer>::remove(); // remove from mContext->mBuffers
     }
 
     size_t SizeOfIncludingThis(nsMallocSizeOfFun aMallocSizeOf) const {
-        return aMallocSizeOf(this) + aMallocSizeOf(mData);
+        size_t sizeOfCache = mCache ? mCache->SizeOfIncludingThis(aMallocSizeOf) : 0;
+        return aMallocSizeOf(this) + sizeOfCache;
     }
 
     bool HasEverBeenBound() { return mHasEverBeenBound; }
     void SetHasEverBeenBound(bool x) { mHasEverBeenBound = x; }
     GLuint GLName() const { return mGLName; }
     GLuint ByteLength() const { return mByteLength; }
     GLenum Target() const { return mTarget; }
-    const void *Data() const { return mData; }
 
     void SetByteLength(GLuint byteLength) { mByteLength = byteLength; }
-    void SetTarget(GLenum target) { mTarget = target; }
-
-    // element array buffers are the only buffers for which we need to keep a copy of the data.
-    // this method assumes that the byte length has previously been set by calling SetByteLength.
-    bool CopyDataIfElementArray(const void* data) {
-        if (mTarget == LOCAL_GL_ELEMENT_ARRAY_BUFFER) {
-            mData = realloc(mData, mByteLength);
-            if (!mData) {
-                mByteLength = 0;
-                return false;
-            }
-            memcpy(mData, data, mByteLength);
-        }
-        return true;
-    }
-
-    // same comments as for CopyElementArrayData
-    bool ZeroDataIfElementArray() {
-        if (mTarget == LOCAL_GL_ELEMENT_ARRAY_BUFFER) {
-            mData = realloc(mData, mByteLength);
-            if (!mData) {
-                mByteLength = 0;
-                return false;
-            }
-            memset(mData, 0, mByteLength);
-        }
+
+    void SetTarget(GLenum target) {
+        mTarget = target;
+        if (!mCache && mTarget == LOCAL_GL_ELEMENT_ARRAY_BUFFER)
+            mCache = new WebGLElementArrayCache;
+     }
+
+    bool ElementArrayCacheBufferData(const void* ptr, size_t buffer_size_in_bytes) {
+        if (mTarget == LOCAL_GL_ELEMENT_ARRAY_BUFFER)
+            return mCache->BufferData(ptr, buffer_size_in_bytes);
         return true;
     }
 
-    // same comments as for CopyElementArrayData
-    void CopySubDataIfElementArray(GLuint byteOffset, GLuint byteLength, const void* data) {
-        if (mTarget == LOCAL_GL_ELEMENT_ARRAY_BUFFER && mByteLength) {
-            memcpy((void*) (size_t(mData)+byteOffset), data, byteLength);
-        }
-    }
-
-    // this method too is only for element array buffers. It returns the maximum value in the part of
-    // the buffer starting at given offset, consisting of given count of elements. The type T is the type
-    // to interpret the array elements as, must be GLushort or GLubyte.
-    template<typename T>
-    T FindMaxElementInSubArray(GLuint count, GLuint byteOffset)
-    {
-        const T* start = reinterpret_cast<T*>(reinterpret_cast<size_t>(mData) + byteOffset);
-        const T* stop = start + count;
-        T result = 0;
-        for(const T* ptr = start; ptr != stop; ++ptr) {
-            if (*ptr > result) result = *ptr;
-        }
-        return result;
+    void ElementArrayCacheBufferSubData(size_t pos, const void* ptr, size_t update_size_in_bytes) {
+        if (mTarget == LOCAL_GL_ELEMENT_ARRAY_BUFFER)
+            mCache->BufferSubData(pos, ptr, update_size_in_bytes);
     }
 
-    void InvalidateCachedMaxElements() {
-      mHasCachedMaxUbyteElement = false;
-      mHasCachedMaxUshortElement = false;
-    }
-
-    int32_t FindMaxUbyteElement() {
-      if (mHasCachedMaxUbyteElement) {
-        return mCachedMaxUbyteElement;
-      } else {
-        mHasCachedMaxUbyteElement = true;
-        mCachedMaxUbyteElement = FindMaxElementInSubArray<GLubyte>(mByteLength, 0);
-        return mCachedMaxUbyteElement;
-      }
-    }
-
-    int32_t FindMaxUshortElement() {
-      if (mHasCachedMaxUshortElement) {
-        return mCachedMaxUshortElement;
-      } else {
-        mHasCachedMaxUshortElement = true;
-        mCachedMaxUshortElement = FindMaxElementInSubArray<GLushort>(mByteLength>>1, 0);
-        return mCachedMaxUshortElement;
-      }
+    bool Validate(WebGLenum type, uint32_t max_allowed, size_t first, size_t count) {
+        return mCache->Validate(type, max_allowed, first, count);
     }
 
     NS_DECL_ISUPPORTS
     NS_DECL_NSIWEBGLBUFFER
 
 protected:
 
     WebGLuint mGLName;
     bool mHasEverBeenBound;
     GLuint mByteLength;
     GLenum mTarget;
 
-    uint8_t mCachedMaxUbyteElement;
-    bool mHasCachedMaxUbyteElement;
-    uint16_t mCachedMaxUshortElement;
-    bool mHasCachedMaxUshortElement;
-
-    void* mData; // in the case of an Element Array Buffer, we keep a copy.
+    nsAutoPtr<WebGLElementArrayCache> mCache;
 };
 
 // NOTE: When this class is switched to new DOM bindings, update the (then-slow)
 // WrapObject calls in GetParameter and GetFramebufferAttachmentParameter.
 class WebGLTexture MOZ_FINAL
     : public nsIWebGLTexture
     , public WebGLRefCountedObject<WebGLTexture>
     , public LinkedListElement<WebGLTexture>
--- a/content/canvas/src/WebGLContextGL.cpp
+++ b/content/canvas/src/WebGLContextGL.cpp
@@ -355,19 +355,19 @@ WebGLContext::BufferData(WebGLenum targe
 
     GLenum error = CheckedBufferData(target, size, 0, usage);
     if (error) {
         GenerateWarning("bufferData generated error %s", ErrorName(error));
         return;
     }
 
     boundBuffer->SetByteLength(size);
-    boundBuffer->InvalidateCachedMaxElements();
-    if (!boundBuffer->ZeroDataIfElementArray())
+    if (!boundBuffer->ElementArrayCacheBufferData(nullptr, size)) {
         return ErrorOutOfMemory("bufferData: out of memory");
+    }
 }
 
 void
 WebGLContext::BufferData(WebGLenum target, ArrayBuffer *data, WebGLenum usage)
 {
     if (!IsContextStable())
         return;
 
@@ -398,19 +398,19 @@ WebGLContext::BufferData(WebGLenum targe
     GLenum error = CheckedBufferData(target, data->Length(), data->Data(), usage);
 
     if (error) {
         GenerateWarning("bufferData generated error %s", ErrorName(error));
         return;
     }
 
     boundBuffer->SetByteLength(data->Length());
-    boundBuffer->InvalidateCachedMaxElements();
-    if (!boundBuffer->CopyDataIfElementArray(data->Data()))
+    if (!boundBuffer->ElementArrayCacheBufferData(data->Data(), data->Length())) {
         return ErrorOutOfMemory("bufferData: out of memory");
+    }
 }
 
 void
 WebGLContext::BufferData(WebGLenum target, ArrayBufferView& data, WebGLenum usage)
 {
     if (!IsContextStable())
         return;
 
@@ -435,19 +435,19 @@ WebGLContext::BufferData(WebGLenum targe
 
     GLenum error = CheckedBufferData(target, data.Length(), data.Data(), usage);
     if (error) {
         GenerateWarning("bufferData generated error %s", ErrorName(error));
         return;
     }
 
     boundBuffer->SetByteLength(data.Length());
-    boundBuffer->InvalidateCachedMaxElements();
-    if (!boundBuffer->CopyDataIfElementArray(data.Data()))
+    if (!boundBuffer->ElementArrayCacheBufferData(data.Data(), data.Length())) {
         return ErrorOutOfMemory("bufferData: out of memory");
+    }
 }
 
 void
 WebGLContext::BufferSubData(GLenum target, WebGLsizeiptr byteOffset,
                             ArrayBuffer *data)
 {
     if (!IsContextStable())
         return;
@@ -478,18 +478,17 @@ WebGLContext::BufferSubData(GLenum targe
         return ErrorInvalidOperation("bufferSubData: integer overflow computing the needed byte length");
 
     if (checked_neededByteLength.value() > boundBuffer->ByteLength())
         return ErrorInvalidOperation("bufferSubData: not enough data - operation requires %d bytes, but buffer only has %d bytes",
                                      checked_neededByteLength.value(), boundBuffer->ByteLength());
 
     MakeContextCurrent();
 
-    boundBuffer->CopySubDataIfElementArray(byteOffset, data->Length(), data->Data());
-    boundBuffer->InvalidateCachedMaxElements();
+    boundBuffer->ElementArrayCacheBufferSubData(byteOffset, data->Data(), data->Length());
 
     gl->fBufferSubData(target, byteOffset, data->Length(), data->Data());
 }
 
 void
 WebGLContext::BufferSubData(WebGLenum target, WebGLsizeiptr byteOffset,
                             ArrayBufferView& data)
 {
@@ -515,21 +514,19 @@ WebGLContext::BufferSubData(WebGLenum ta
     CheckedUint32 checked_neededByteLength = CheckedUint32(byteOffset) + data.Length();
     if (!checked_neededByteLength.isValid())
         return ErrorInvalidOperation("bufferSubData: integer overflow computing the needed byte length");
 
     if (checked_neededByteLength.value() > boundBuffer->ByteLength())
         return ErrorInvalidOperation("bufferSubData: not enough data -- operation requires %d bytes, but buffer only has %d bytes",
                                      checked_neededByteLength.value(), boundBuffer->ByteLength());
 
+    boundBuffer->ElementArrayCacheBufferSubData(byteOffset, data.Data(), data.Length());
+
     MakeContextCurrent();
-
-    boundBuffer->CopySubDataIfElementArray(byteOffset, data.Length(), data.Data());
-    boundBuffer->InvalidateCachedMaxElements();
-
     gl->fBufferSubData(target, byteOffset, data.Length(), data.Data());
 }
 
 WebGLenum
 WebGLContext::CheckFramebufferStatus(WebGLenum target)
 {
     if (!IsContextStable())
     {
@@ -1454,92 +1451,75 @@ WebGLContext::DrawElements(WebGLenum mod
         return;
 
     // If count is 0, there's nothing to do.
     if (count == 0)
         return;
 
     CheckedUint32 checked_byteCount;
 
+    WebGLsizei first = 0;
+
     if (type == LOCAL_GL_UNSIGNED_SHORT) {
         checked_byteCount = 2 * CheckedUint32(count);
         if (byteOffset % 2 != 0)
             return ErrorInvalidOperation("drawElements: invalid byteOffset for UNSIGNED_SHORT (must be a multiple of 2)");
+        first = byteOffset / 2;
     } else if (type == LOCAL_GL_UNSIGNED_BYTE) {
         checked_byteCount = count;
+        first = byteOffset;
     } else {
         return ErrorInvalidEnum("drawElements: type must be UNSIGNED_SHORT or UNSIGNED_BYTE");
     }
 
     if (!checked_byteCount.isValid())
         return ErrorInvalidValue("drawElements: overflow in byteCount");
 
     // If there is no current program, this is silently ignored.
     // Any checks below this depend on a program being available.
     if (!mCurrentProgram)
         return;
 
     if (!mBoundElementArrayBuffer)
         return ErrorInvalidOperation("drawElements: must have element array buffer binding");
 
-    if (!mBoundElementArrayBuffer->Data())
+    if (!mBoundElementArrayBuffer->ByteLength())
         return ErrorInvalidOperation("drawElements: bound element array buffer doesn't have any data");
 
     CheckedUint32 checked_neededByteCount = checked_byteCount + byteOffset;
 
     if (!checked_neededByteCount.isValid())
         return ErrorInvalidOperation("drawElements: overflow in byteOffset+byteCount");
 
     if (checked_neededByteCount.value() > mBoundElementArrayBuffer->ByteLength())
         return ErrorInvalidOperation("drawElements: bound element array buffer is too small for given count and offset");
 
     int32_t maxAllowedCount = 0;
     if (!ValidateBuffers(&maxAllowedCount, "drawElements"))
       return;
 
-    int32_t maxIndex
-      = type == LOCAL_GL_UNSIGNED_SHORT
-        ? mBoundElementArrayBuffer->FindMaxUshortElement()
-        : mBoundElementArrayBuffer->FindMaxUbyteElement();
-
-    CheckedInt32 checked_maxIndexPlusOne = CheckedInt32(maxIndex) + 1;
-
-    if (!checked_maxIndexPlusOne.isValid() ||
-        checked_maxIndexPlusOne.value() > maxAllowedCount)
-    {
-        // the index array contains invalid indices for the current drawing state, but they
-        // might not be used by the present drawElements call, depending on first and count.
-
-        int32_t maxIndexInSubArray
-          = type == LOCAL_GL_UNSIGNED_SHORT
-            ? mBoundElementArrayBuffer->FindMaxElementInSubArray<GLushort>(count, byteOffset)
-            : mBoundElementArrayBuffer->FindMaxElementInSubArray<GLubyte>(count, byteOffset);
-
-        CheckedInt32 checked_maxIndexInSubArrayPlusOne = CheckedInt32(maxIndexInSubArray) + 1;
-
-        if (!checked_maxIndexInSubArrayPlusOne.isValid() ||
-            checked_maxIndexInSubArrayPlusOne.value() > maxAllowedCount)
-        {
-            return ErrorInvalidOperation(
-                "DrawElements: bound vertex attribute buffers do not have sufficient "
-                "size for given indices from the bound element array");
-        }
+    int32_t maxAllowedIndex = NS_MAX(maxAllowedCount - 1, 0);
+
+    if (!mBoundElementArrayBuffer->Validate(type, maxAllowedIndex, first, count)) {
+        return ErrorInvalidOperation(
+            "DrawElements: bound vertex attribute buffers do not have sufficient "
+            "size for given indices from the bound element array");
     }
 
     MakeContextCurrent();
 
     if (mBoundFramebuffer) {
         if (!mBoundFramebuffer->CheckAndInitializeRenderbuffers())
             return ErrorInvalidFramebufferOperation("drawElements: incomplete framebuffer");
     } else {
         EnsureBackbufferClearedAsNeeded();
     }
 
     BindFakeBlackTextures();
-    if (!DoFakeVertexAttrib0(checked_maxIndexPlusOne.value()))
+    if (!DoFakeVertexAttrib0(maxAllowedCount))
         return;
 
     SetupContextLossTimer();
     gl->fDrawElements(mode, count, type, reinterpret_cast<GLvoid*>(byteOffset));
 
     UndoFakeVertexAttrib0();
     UnbindFakeBlackTextures();
 
new file mode 100644
--- /dev/null
+++ b/content/canvas/src/WebGLElementArrayCache.cpp
@@ -0,0 +1,539 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "WebGLElementArrayCache.h"
+
+#include "nsTArray.h"
+#include "mozilla/Assertions.h"
+
+#include <cstdlib>
+#include <cstring>
+
+namespace mozilla {
+
+/*
+ * WebGLElementArrayCacheTree contains most of the implementation of WebGLElementArrayCache,
+ * which performs WebGL element array buffer validation for drawElements.
+ *
+ * Attention: Here lie nontrivial data structures, bug-prone algorithms, and non-canonical tweaks!
+ * Whence the explanatory comments, and compiled unit test.
+ *
+ * *** What problem are we solving here? ***
+ *
+ * WebGL::DrawElements has to validate that the elements are in range wrt the current vertex attribs.
+ * This boils down to the problem, given an array of integers, of computing the maximum in an arbitrary
+ * sub-array. The naive algorithm has linear complexity; this has been a major performance problem,
+ * see bug 569431. In that bug, we took the approach of caching the max for the whole array, which
+ * does cover most cases (DrawElements typically consumes the whole element array buffer) but doesn't
+ * help in other use cases:
+ *  - when doing "partial DrawElements" i.e. consuming only part of the element array buffer
+ *  - when doing frequent "partial buffer updates" i.e. bufferSubData calls updating parts of the
+ *    element array buffer
+ *
+ * *** The solution: a binary tree ***
+ *
+ * The solution implemented here is to use a binary tree as the cache data structure. Each tree node
+ * contains the max of its two children nodes. In this way, finding the maximum in any contiguous sub-array
+ * has log complexity instead of linear complexity.
+ *
+ * Simplistically, if the element array is
+ *
+ *     1   4   3   2
+ *
+ * then the corresponding tree is
+ *
+ *           4
+ *         _/ \_
+ *       4       3
+ *      / \     / \
+ *     1   4   3   2
+ *
+ * In practice, the bottom-most levels of the tree are both the largest to store (because they
+ * have more nodes), and the least useful performance-wise (because each node in the bottom
+ * levels concerns only few entries in the elements array buffer, it is cheap to compute).
+ *
+ * For this reason, we stop the tree a few levels above, so that each tree leaf actually corresponds
+ * to more than one element array entry.
+ *
+ * The number of levels that we "drop" is |sSkippedBottomTreeLevels| and the number of element array entries
+ * that each leaf corresponds to, is |sElementsPerLeaf|. This being a binary tree, we have
+ *
+ *   sElementsPerLeaf = 2 ^ sSkippedBottomTreeLevels.
+ *
+ * *** Storage layout of the binary tree ***
+ *
+ * We take advantage of the specifics of the situation to avoid generalist tree storage and instead
+ * store the tree entries in a vector, mTreeData.
+ *
+ * The number of leaves is given by mNumLeaves, and mTreeData is always a vector of length
+ *
+ *    2 * mNumLeaves.
+ *
+ * Its data layout is as follows: mTreeData[0] is unused, mTreeData[1] is the root node,
+ * then at offsets 2..3 is the tree level immediately below the root node, then at offsets 4..7
+ * is the tree level below that, etc.
+ *
+ * The figure below illustrates this by writing at each tree node the offset into mTreeData at
+ * which it is stored:
+ *
+ *           1
+ *         _/ \_
+ *       2       3
+ *      / \     / \
+ *     4   5   6   7
+ *    ...
+ *
+ * Thus, under the convention that the root level is level 0, we see that level N is stored at offsets
+ *
+ *    [ 2^n .. 2^(n+1) - 1 ]
+ *
+ * in mTreeData. Likewise, all the usual tree operations have simple mathematical expressions in
+ * terms of mTreeData offsets, see all the methods such as ParentNode, LeftChildNode, etc.
+ *
+ * *** Design constraint: element types aren't known at buffer-update time ***
+ *
+ * Note that a key constraint that we're operating under, is that we don't know the types of the elements
+ * by the time WebGL bufferData/bufferSubData methods are called. The type of elements is only
+ * specified in the drawElements call. This means that we may potentially have to store caches for
+ * multiple element types, for the same element array buffer. Since we don't know yet how many
+ * element types we'll eventually support (extensions add more), the concern about memory usage is serious.
+ * This is addressed by sSkippedBottomTreeLevels as explained above. Of course, in the typical
+ * case where each element array buffer is only ever used with one type, this is also addressed
+ * by having WebGLElementArrayCache lazily create trees for each type only upon first use.
+ *
+ * Another consequence of this constraint is that when invalidating the trees, we have to invalidate
+ * all existing trees. So if trees for types uint8_t and uint16_t have ever been constructed for this buffer,
+ * every subsequent invalidation will have to invalidate both trees even if one of the two types is never
+ * used again. This implies that it is important to minimize the cost of invalidation i.e.
+ * do lazy updates upon use as opposed to immediately updating invalidated trees. This poses a problem:
+ * it is nontrivial to keep track of the part of the tree that's invalidated. The current solution
+ * can only keep track of an invalidated interval, from |mFirstInvalidatedLeaf| to |mLastInvalidatedLeaf|.
+ * The problem is that if one does two small, far-apart partial buffer updates, the resulting invalidated
+ * area is very large even though only a small part of the array really needed to be invalidated.
+ * The real solution to this problem would be to use a smarter data structure to keep track of the
+ * invalidated area, probably an interval tree. Meanwhile, we can probably live with the current situation
+ * as the unfavorable case seems to be a small corner case: in order to run into performance issues,
+ * the number of bufferSubData in between two consecutive draws must be small but greater than 1, and
+ * the partial buffer updates must be small and far apart. Anything else than this corner case
+ * should run fast in the current setting.
+ */
+template<typename T>
+struct WebGLElementArrayCacheTree
+{
+  // A too-high sSkippedBottomTreeLevels would harm the performance of small drawElements calls
+  // A too-low sSkippedBottomTreeLevels would cause undue memory usage.
+  // The current value has been validated by some benchmarking. See bug 732660.
+  static const size_t sSkippedBottomTreeLevels = 3;
+  static const size_t sElementsPerLeaf = 1 << sSkippedBottomTreeLevels;
+  static const size_t sElementsPerLeafMask = sElementsPerLeaf - 1; // sElementsPerLeaf is POT
+
+private:
+  WebGLElementArrayCache& mParent;
+  nsTArray<T> mTreeData;
+  size_t mNumLeaves;
+  bool mInvalidated;
+  size_t mFirstInvalidatedLeaf;
+  size_t mLastInvalidatedLeaf;
+
+public:
+  WebGLElementArrayCacheTree(WebGLElementArrayCache& p)
+    : mParent(p)
+    , mNumLeaves(0)
+    , mInvalidated(false)
+    , mFirstInvalidatedLeaf(0)
+    , mLastInvalidatedLeaf(0)
+  {
+    ResizeToParentSize();
+    Invalidate(0, mParent.ByteSize() - 1);
+  }
+
+  T GlobalMaximum() const {
+    MOZ_ASSERT(!mInvalidated);
+    return mTreeData[1];
+  }
+
+  // returns the index of the parent node; if treeIndex=1 (the root node),
+  // the return value is 0.
+  static size_t ParentNode(size_t treeIndex) {
+    MOZ_ASSERT(treeIndex);
+    return treeIndex >> 1;
+  }
+
+  static bool IsRightNode(size_t treeIndex) {
+    MOZ_ASSERT(treeIndex);
+    return treeIndex & 1;
+  }
+
+  static bool IsLeftNode(size_t treeIndex) {
+    MOZ_ASSERT(treeIndex);
+    return !IsRightNode(treeIndex);
+  }
+
+  static size_t SiblingNode(size_t treeIndex) {
+    MOZ_ASSERT(treeIndex);
+    return treeIndex ^ 1;
+  }
+
+  static size_t LeftChildNode(size_t treeIndex) {
+    MOZ_ASSERT(treeIndex);
+    return treeIndex << 1;
+  }
+
+  static size_t RightChildNode(size_t treeIndex) {
+    MOZ_ASSERT(treeIndex);
+    return SiblingNode(LeftChildNode(treeIndex));
+  }
+
+  static size_t LeftNeighborNode(size_t treeIndex, size_t distance = 1) {
+    MOZ_ASSERT(treeIndex);
+    return treeIndex - distance;
+  }
+
+  static size_t RightNeighborNode(size_t treeIndex, size_t distance = 1) {
+    MOZ_ASSERT(treeIndex);
+    return treeIndex + distance;
+  }
+
+  size_t LeafForElement(size_t element) {
+    size_t leaf = element / sElementsPerLeaf;
+    MOZ_ASSERT(leaf < mNumLeaves);
+    return leaf;
+  }
+
+  size_t LeafForByte(size_t byte) {
+    return LeafForElement(byte / sizeof(T));
+  }
+
+  // Returns the index, into the tree storage, where a given leaf is stored
+  size_t TreeIndexForLeaf(size_t leaf) {
+    // See above class comment. The tree storage is an array of length 2*mNumLeaves.
+    // The leaves are stored in its second half.
+    return leaf + mNumLeaves;
+  }
+
+  static size_t LastElementUnderSameLeaf(size_t element) {
+    return element | sElementsPerLeafMask;
+  }
+
+  static size_t FirstElementUnderSameLeaf(size_t element) {
+    return element & ~sElementsPerLeafMask;
+  }
+
+  static size_t NextMultipleOfElementsPerLeaf(size_t numElements) {
+    return ((numElements - 1) | sElementsPerLeafMask) + 1;
+  }
+
+  bool Validate(T maxAllowed, size_t firstLeaf, size_t lastLeaf) {
+    MOZ_ASSERT(!mInvalidated);
+
+    size_t firstTreeIndex = TreeIndexForLeaf(firstLeaf);
+    size_t lastTreeIndex  = TreeIndexForLeaf(lastLeaf);
+
+    while (true) {
+      // given that we tweak these values in nontrivial ways, it doesn't hurt to do
+      // this sanity check
+      MOZ_ASSERT(firstTreeIndex <= lastTreeIndex);
+
+      // final case where there is only 1 node to validate at the current tree level
+      if (lastTreeIndex == firstTreeIndex) {
+        return mTreeData[firstTreeIndex] <= maxAllowed;
+      }
+
+      // if the first node at current tree level is a right node, handle it individually
+      // and replace it with its right neighbor, which is a left node
+      if (IsRightNode(firstTreeIndex)) {
+        if (mTreeData[firstTreeIndex] > maxAllowed)
+          return false;
+        firstTreeIndex = RightNeighborNode(firstTreeIndex);
+      }
+
+      // if the last node at current tree level is a left node, handle it individually
+      // and replace it with its left neighbor, which is a right node
+      if (IsLeftNode(lastTreeIndex)) {
+        if (mTreeData[lastTreeIndex] > maxAllowed)
+          return false;
+        lastTreeIndex = LeftNeighborNode(lastTreeIndex);
+      }
+
+      // at this point it can happen that firstTreeIndex and lastTreeIndex "crossed" each
+      // other. That happens if firstTreeIndex was a right node and lastTreeIndex was its
+      // right neighor: in that case, both above tweaks happened and as a result, they ended
+      // up being swapped: lastTreeIndex is now the _left_ neighbor of firstTreeIndex.
+      // When that happens, there is nothing left to validate.
+      if (lastTreeIndex == LeftNeighborNode(firstTreeIndex)) {
+        return true;
+      }
+
+      // walk up 1 level
+      firstTreeIndex = ParentNode(firstTreeIndex);
+      lastTreeIndex = ParentNode(lastTreeIndex);
+    }
+  }
+
+  template<typename U>
+  static U NextPowerOfTwo(U x) {
+    U result = 1;
+    while (result < x)
+      result <<= 1;
+    MOZ_ASSERT(result >= x);
+    MOZ_ASSERT((result & (result - 1)) == 0);
+    return result;
+  }
+
+  bool ResizeToParentSize()
+  {
+    size_t numberOfElements = mParent.ByteSize() / sizeof(T);
+    size_t requiredNumLeaves = (numberOfElements + sElementsPerLeaf - 1) / sElementsPerLeaf;
+
+    mNumLeaves = NextPowerOfTwo(requiredNumLeaves);
+
+    // see class comment for why we the tree storage size is 2 * mNumLeaves
+    return mTreeData.SetLength(2 * mNumLeaves);
+  }
+
+  void Invalidate(size_t firstByte, size_t lastByte);
+
+  void Update();
+
+  size_t SizeOfIncludingThis(nsMallocSizeOfFun aMallocSizeOf) const
+  {
+    return aMallocSizeOf(this) + aMallocSizeOf(mTreeData.Elements());
+  }
+};
+
+// TreeForType: just a template helper to select the right tree object for a given
+// element type.
+template<typename T>
+struct TreeForType {};
+
+template<>
+struct TreeForType<uint8_t>
+{
+  static WebGLElementArrayCacheTree<uint8_t>*& Run(WebGLElementArrayCache *b) { return b->mUint8Tree; }
+};
+
+template<>
+struct TreeForType<uint16_t>
+{
+  static WebGLElementArrayCacheTree<uint16_t>*& Run(WebGLElementArrayCache *b) { return b->mUint16Tree; }
+};
+
+// When the buffer gets updated from firstByte to lastByte,
+// calling this method will notify the tree accordingly
+template<typename T>
+void WebGLElementArrayCacheTree<T>::Invalidate(size_t firstByte, size_t lastByte)
+{
+  lastByte = NS_MIN(lastByte, mNumLeaves * sElementsPerLeaf * sizeof(T) - 1);
+
+  size_t firstLeaf = LeafForByte(firstByte);
+  size_t lastLeaf = LeafForByte(lastByte);
+
+  if (mInvalidated) {
+    mFirstInvalidatedLeaf = NS_MIN(firstLeaf, mFirstInvalidatedLeaf);
+    mLastInvalidatedLeaf = NS_MAX(lastLeaf, mLastInvalidatedLeaf);
+  } else {
+    mInvalidated = true;
+    mFirstInvalidatedLeaf = firstLeaf;
+    mLastInvalidatedLeaf = lastLeaf;
+  }
+}
+
+
+// When tree has been partially invalidated, from mFirstInvalidatedLeaf to
+// mLastInvalidatedLeaf, calling this method will 1) update the leaves in this interval
+// from the raw buffer data, and 2) propagate this update up the tree
+template<typename T>
+void WebGLElementArrayCacheTree<T>::Update()
+{
+  if (!mInvalidated) {
+    return;
+  }
+
+  MOZ_ASSERT(mLastInvalidatedLeaf < mNumLeaves);
+
+  size_t firstTreeIndex = TreeIndexForLeaf(mFirstInvalidatedLeaf);
+  size_t lastTreeIndex = TreeIndexForLeaf(mLastInvalidatedLeaf);
+
+  // Step #1: initialize the tree leaves from plain buffer data.
+  // That is, each tree leaf must be set to the max of the |sElementsPerLeaf| corresponding
+  // buffer entries.
+  // condition-less scope to prevent leaking this scope's variables into the code below
+  {
+    // treeIndex is the index of the tree leaf we're writing, i.e. the destination index
+    size_t treeIndex = firstTreeIndex;
+    // srcIndex is the index in the source buffer
+    size_t srcIndex = mFirstInvalidatedLeaf * sElementsPerLeaf;
+    size_t numberOfElements = mParent.ByteSize() / sizeof(T);
+    while (treeIndex <= lastTreeIndex) {
+      T m = 0;
+      size_t a = srcIndex;
+      size_t srcIndexNextLeaf = NS_MIN(a + sElementsPerLeaf, numberOfElements);
+      for (; srcIndex < srcIndexNextLeaf; srcIndex++) {
+        m = NS_MAX(m, mParent.Element<T>(srcIndex));
+      }
+      mTreeData[treeIndex] = m;
+      treeIndex++;
+    }
+  }
+
+  // Step #2: propagate the values up the tree. This is simply a matter of walking up
+  // the tree and setting each node to the max of its two children.
+  while (true) {
+
+    // fast-exit case where only one node is invalidated at the current level
+    if (firstTreeIndex == lastTreeIndex) {
+      size_t firstTreeIndexParent = ParentNode(firstTreeIndex);
+      while (firstTreeIndexParent) {
+        mTreeData[firstTreeIndexParent] = NS_MAX(mTreeData[firstTreeIndex], mTreeData[SiblingNode(firstTreeIndex)]);
+        firstTreeIndex = firstTreeIndexParent;
+        firstTreeIndexParent = ParentNode(firstTreeIndex);
+      }
+      break;
+    }
+
+    // move up 1 level
+    firstTreeIndex = ParentNode(firstTreeIndex);
+    lastTreeIndex = ParentNode(lastTreeIndex);
+
+    // initialize local iteration variables: child and parent.
+    size_t child = LeftChildNode(firstTreeIndex);
+    size_t parent = firstTreeIndex;
+
+    // the unrolling makes this look more complicated than it is; the plain non-unrolled
+    // version is in the second while loop below
+    const int unrollSize = 8;
+    while (RightNeighborNode(parent, unrollSize - 1) <= lastTreeIndex)
+    {
+      for (int unroll = 0; unroll < unrollSize; unroll++)
+      {
+        T a = mTreeData[child];
+        child = RightNeighborNode(child);
+        T b = mTreeData[child];
+        child = RightNeighborNode(child);
+        mTreeData[parent] = NS_MAX(a, b);
+        parent = RightNeighborNode(parent);
+      }
+    }
+    // plain non-unrolled version, used to terminate the job after the last unrolled iteration
+    while (parent <= lastTreeIndex)
+    {
+      T a = mTreeData[child];
+      child = RightNeighborNode(child);
+      T b = mTreeData[child];
+      child = RightNeighborNode(child);
+      mTreeData[parent] = NS_MAX(a, b);
+      parent = RightNeighborNode(parent);
+    }
+  }
+
+  mInvalidated = false;
+}
+
+WebGLElementArrayCache::~WebGLElementArrayCache() {
+  delete mUint8Tree;
+  delete mUint16Tree;
+  free(mUntypedData);
+}
+
+bool WebGLElementArrayCache::BufferData(const void* ptr, size_t byteSize) {
+  mByteSize = byteSize;
+  if (mUint8Tree)
+    if (!mUint8Tree->ResizeToParentSize())
+      return false;
+  if (mUint16Tree)
+    if (!mUint16Tree->ResizeToParentSize())
+      return false;
+  mUntypedData = realloc(mUntypedData, byteSize);
+  if (!mUntypedData)
+    return false;
+  BufferSubData(0, ptr, byteSize);
+  return true;
+}
+
+void WebGLElementArrayCache::BufferSubData(size_t pos, const void* ptr, size_t updateByteSize) {
+  if (!updateByteSize) return;
+  if (ptr)
+      memcpy(static_cast<uint8_t*>(mUntypedData) + pos, ptr, updateByteSize);
+  else
+      memset(static_cast<uint8_t*>(mUntypedData) + pos, 0, updateByteSize);
+  InvalidateTrees(pos, pos + updateByteSize - 1);
+}
+
+void WebGLElementArrayCache::InvalidateTrees(size_t firstByte, size_t lastByte)
+{
+  if (mUint8Tree)
+    mUint8Tree->Invalidate(firstByte, lastByte);
+  if (mUint16Tree)
+    mUint16Tree->Invalidate(firstByte, lastByte);
+}
+
+template<typename T>
+bool WebGLElementArrayCache::Validate(T maxAllowed, size_t firstElement, size_t countElements) {
+  if (!mByteSize || !countElements)
+    return true;
+
+  WebGLElementArrayCacheTree<T>*& tree = TreeForType<T>::Run(this);
+  if (!tree) {
+    tree = new WebGLElementArrayCacheTree<T>(*this);
+  }
+
+  size_t lastElement = firstElement + countElements - 1;
+
+  tree->Update();
+
+  // fast exit path when the global maximum for the whole element array buffer
+  // falls in the allowed range
+  if (tree->GlobalMaximum() <= maxAllowed)
+  {
+    return true;
+  }
+
+  const T* elements = Elements<T>();
+
+  // before calling tree->Validate, we have to validate ourselves the boundaries of the elements span,
+  // to round them to the nearest multiple of sElementsPerLeaf.
+  size_t firstElementAdjustmentEnd = NS_MIN(lastElement,
+                                            tree->LastElementUnderSameLeaf(firstElement));
+  while (firstElement <= firstElementAdjustmentEnd) {
+    if (elements[firstElement] > maxAllowed)
+      return false;
+    firstElement++;
+  }
+  size_t lastElementAdjustmentEnd = NS_MAX(firstElement,
+                                           tree->FirstElementUnderSameLeaf(lastElement));
+  while (lastElement >= lastElementAdjustmentEnd) {
+    if (elements[lastElement] > maxAllowed)
+      return false;
+    lastElement--;
+  }
+
+  // at this point, for many tiny validations, we're already done.
+  if (firstElement > lastElement)
+    return true;
+
+  // general case
+  return tree->Validate(maxAllowed,
+                        tree->LeafForElement(firstElement),
+                        tree->LeafForElement(lastElement));
+}
+
+bool WebGLElementArrayCache::Validate(GLenum type, uint32_t maxAllowed, size_t firstElement, size_t countElements) {
+  if (type == LOCAL_GL_UNSIGNED_BYTE)
+    return Validate<uint8_t>(uint8_t(maxAllowed), firstElement, countElements);
+  if (type == LOCAL_GL_UNSIGNED_SHORT)
+    return Validate<uint16_t>(uint16_t(maxAllowed), firstElement, countElements);
+  return false;
+}
+
+size_t WebGLElementArrayCache::SizeOfIncludingThis(nsMallocSizeOfFun aMallocSizeOf) const {
+  size_t uint8TreeSize  = mUint8Tree  ? mUint8Tree->SizeOfIncludingThis(aMallocSizeOf) : 0;
+  size_t uint16TreeSize = mUint16Tree ? mUint16Tree->SizeOfIncludingThis(aMallocSizeOf) : 0;
+  return aMallocSizeOf(this) +
+          mByteSize +
+          uint8TreeSize +
+          uint16TreeSize;
+}
+
+} // end namespace mozilla
new file mode 100644
--- /dev/null
+++ b/content/canvas/src/WebGLElementArrayCache.h
@@ -0,0 +1,81 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef WEBGLELEMENTARRAYCACHE_H
+#define WEBGLELEMENTARRAYCACHE_H
+
+#include "mozilla/StandardInteger.h"
+#include "nscore.h"
+#include "GLDefs.h"
+
+namespace mozilla {
+
+template<typename T>
+struct WebGLElementArrayCacheTree;
+
+/*
+ * WebGLElementArrayCache implements WebGL element array buffer validation for drawElements.
+ *
+ * Its exposes methods meant to be called by WebGL method implementations:
+ *  - Validate, to be called by WebGLContext::DrawElements, is where we use the cache
+ *  - BufferData and BufferSubData, to be called by eponymous WebGL methods, are how
+ *    data is fed into the cache
+ *
+ * Most of the implementation is hidden in the auxilary class template, WebGLElementArrayCacheTree.
+ * Refer to its code for design comments.
+ */
+class WebGLElementArrayCache {
+
+public:
+  bool BufferData(const void* ptr, size_t byteSize);
+  void BufferSubData(size_t pos, const void* ptr, size_t updateByteSize);
+
+  bool Validate(GLenum type, uint32_t maxAllowed, size_t first, size_t count);
+
+  template<typename T>
+  T Element(size_t i) const { return Elements<T>()[i]; }
+
+  WebGLElementArrayCache()
+    : mUntypedData(nullptr)
+    , mByteSize(0)
+    , mUint8Tree(nullptr)
+    , mUint16Tree(nullptr)
+  {}
+
+  ~WebGLElementArrayCache();
+
+  size_t SizeOfIncludingThis(nsMallocSizeOfFun aMallocSizeOf) const;
+
+private:
+
+  template<typename T>
+  bool Validate(T maxAllowed, size_t first, size_t count);
+
+  size_t ByteSize() const {
+    return mByteSize;
+  }
+
+  template<typename T>
+  const T* Elements() const { return static_cast<const T*>(mUntypedData); }
+  template<typename T>
+  T* Elements() { return static_cast<T*>(mUntypedData); }
+
+  void InvalidateTrees(size_t firstByte, size_t lastByte);
+
+  template<typename T>
+  friend class WebGLElementArrayCacheTree;
+  template<typename T>
+  friend struct TreeForType;
+
+  void* mUntypedData;
+  size_t mByteSize;
+  WebGLElementArrayCacheTree<uint8_t>* mUint8Tree;
+  WebGLElementArrayCacheTree<uint16_t>* mUint16Tree;
+};
+
+
+} // end namespace mozilla
+
+#endif // WEBGLELEMENTARRAYCACHE_H
--- a/content/canvas/test/Makefile.in
+++ b/content/canvas/test/Makefile.in
@@ -5,16 +5,18 @@
 
 DEPTH		= @DEPTH@
 topsrcdir	= @top_srcdir@
 srcdir		= @srcdir@
 VPATH		= @srcdir@
 relativesrcdir  = @relativesrcdir@
 DIRS		+= webgl crossorigin
 
+TOOLS_DIRS += compiled
+
 include $(DEPTH)/config/autoconf.mk
 MOCHITEST_FILES = \
 	test_canvas.html \
 	image_transparent50.png \
 	image_redtransparent.png \
 	image_yellow.png \
 	image_anim-poster-gr.png \
 	image_green-16x16.png \
new file mode 100644
--- /dev/null
+++ b/content/canvas/test/compiled/Makefile.in
@@ -0,0 +1,22 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+DEPTH := @DEPTH@
+topsrcdir := @top_srcdir@
+srcdir := @srcdir@
+VPATH := @srcdir@
+
+FAIL_ON_WARNINGS = 1
+
+include $(DEPTH)/config/autoconf.mk
+
+LOCAL_INCLUDES := \
+    -I$(srcdir)/../../src \
+    $(NULL)
+
+CPP_UNIT_TESTS := \
+  TestWebGLElementArrayCache.cpp \
+  $(NULL)
+
+include $(topsrcdir)/config/rules.mk
new file mode 100644
--- /dev/null
+++ b/content/canvas/test/compiled/TestWebGLElementArrayCache.cpp
@@ -0,0 +1,167 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/Assertions.h"
+
+#include "WebGLElementArrayCache.cpp"
+
+#include <cstdlib>
+#include <iostream>
+#include "nscore.h"
+#include "nsTArray.h"
+
+using namespace mozilla;
+
+int gTestsPassed = 0;
+
+void VerifyImplFunction(bool condition, const char* file, int line)
+{
+  if (condition) {
+    gTestsPassed++;
+  } else {
+    std::cerr << "Test failed at " << file << ":" << line << std::endl;
+    abort();
+  }
+}
+
+#define VERIFY(condition) \
+    VerifyImplFunction((condition), __FILE__, __LINE__)
+
+void MakeRandomVector(nsTArray<uint8_t>& a, size_t size) {
+  a.SetLength(size);
+  // only the most-significant bits of rand() are reasonably random
+  // 16 here is arbitrary, may fail on platforms where RAND_MAX is low,
+  // but guarded by an assertion.
+  enum { bitsToIgnore = 16 };
+  MOZ_STATIC_ASSERT((unsigned int)(RAND_MAX) >> (8 + bitsToIgnore),
+                    "Didn't expect RAND_MAX to be so low");
+  for (size_t i = 0; i < size; i++)
+    a[i] = static_cast<uint8_t>((unsigned int)(rand()) >> bitsToIgnore);
+}
+
+template<typename T>
+T RandomInteger(T a, T b)
+{
+  T result(a + rand() % (b - a + 1));
+  return result;
+}
+
+template<typename T>
+GLenum GLType()
+{
+  return sizeof(T) == 1
+             ? LOCAL_GL_UNSIGNED_BYTE
+             : LOCAL_GL_UNSIGNED_SHORT;
+}
+
+template<typename T>
+void CheckValidateOneType(WebGLElementArrayCache& c, size_t firstByte, size_t countBytes)
+{
+  size_t first = firstByte / sizeof(T);
+  size_t count = countBytes / sizeof(T);
+
+  GLenum type = GLType<T>();
+
+  T max = 0;
+  for (size_t i = 0; i < count; i++)
+    if (c.Element<T>(first + i) > max)
+      max = c.Element<T>(first + i);
+
+  VERIFY(c.Validate(type, max, first, count));
+  VERIFY(c.Validate(type, T(-1), first, count));
+  if (T(max + 1)) VERIFY(c.Validate(type, T(max + 1), first, count));
+  if (max > 0) {
+    VERIFY(!c.Validate(type, max - 1, first, count));
+    VERIFY(!c.Validate(type, 0, first, count));
+  }
+}
+
+void CheckValidate(WebGLElementArrayCache& c, size_t firstByte, size_t countBytes)
+{
+  CheckValidateOneType<uint8_t>(c, firstByte, countBytes);
+  CheckValidateOneType<uint16_t>(c, firstByte, countBytes);
+}
+
+template<typename T>
+void CheckSanity()
+{
+  const size_t numElems = 64; // should be significantly larger than tree leaf size to
+                        // ensure we exercise some nontrivial tree-walking
+  T data[numElems] = {1,0,3,1,2,6,5,4}; // intentionally specify only 8 elements for now
+  size_t numBytes = numElems * sizeof(T);
+  MOZ_ASSERT(numBytes == sizeof(data));
+
+  GLenum type = GLType<T>();
+
+  WebGLElementArrayCache c;
+  c.BufferData(data, numBytes);
+  VERIFY( c.Validate(type, 6, 0, 8));
+  VERIFY(!c.Validate(type, 5, 0, 8));
+  VERIFY( c.Validate(type, 3, 0, 3));
+  VERIFY(!c.Validate(type, 2, 0, 3));
+  VERIFY( c.Validate(type, 6, 2, 4));
+  VERIFY(!c.Validate(type, 5, 2, 4));
+
+  c.BufferSubData(5*sizeof(T), data, sizeof(T));
+  VERIFY( c.Validate(type, 5, 0, 8));
+  VERIFY(!c.Validate(type, 4, 0, 8));
+
+  // now test a somewhat larger size to ensure we exceed the size of a tree leaf
+  for(size_t i = 0; i < numElems; i++)
+    data[i] = numElems - i;
+  c.BufferData(data, numBytes);
+  VERIFY( c.Validate(type, numElems,     0, numElems));
+  VERIFY(!c.Validate(type, numElems - 1, 0, numElems));
+
+  MOZ_ASSERT(numElems > 10);
+  VERIFY( c.Validate(type, numElems - 10, 10, numElems - 10));
+  VERIFY(!c.Validate(type, numElems - 11, 10, numElems - 10));
+}
+
+int main(int argc, char *argv[])
+{
+  srand(0); // do not want a random seed here.
+
+  CheckSanity<uint8_t>();
+  CheckSanity<uint16_t>();
+
+  nsTArray<uint8_t> v, vsub;
+  WebGLElementArrayCache b;
+
+  for (int maxBufferSize = 1; maxBufferSize <= 4096; maxBufferSize *= 2) {
+    int repeat = std::min(maxBufferSize, 20);
+    for (int i = 0; i < repeat; i++) {
+      size_t size = RandomInteger<size_t>(1, maxBufferSize);
+      MakeRandomVector(v, size);
+      b.BufferData(v.Elements(), size);
+      CheckValidate(b, 0, size);
+
+      for (int j = 0; j < 16; j++) {
+        for (int bufferSubDataCalls = 1; bufferSubDataCalls <= 8; bufferSubDataCalls *= 2) {
+          for (int validateCalls = 1; validateCalls <= 8; validateCalls *= 2) {
+
+            size_t offset = 0, subsize = 0;
+
+            for (int k = 0; k < bufferSubDataCalls; k++) {
+              offset = RandomInteger<size_t>(0, size);
+              subsize = RandomInteger<size_t>(0, size - offset);
+              MakeRandomVector(vsub, subsize);
+              b.BufferSubData(offset, vsub.Elements(), subsize);
+            }
+
+            for (int k = 0; k < validateCalls; k++) {
+              offset = RandomInteger<size_t>(0, size);
+              subsize = RandomInteger<size_t>(0, size - offset);
+              CheckValidate(b, offset, subsize);
+            }
+          } // validateCalls
+        } // bufferSubDataCalls
+      } // j
+    } // i
+  } // maxBufferSize
+
+  std::cerr << argv[0] << ": all " << gTestsPassed << " tests passed" << std::endl;
+  return 0;
+}