Bug 1280499 - Support paranoid uploading for nVidia. - r=jrmuizel
authorJeff Gilbert <jgilbert@mozilla.com>
Thu, 21 Jul 2016 20:03:16 -0700
changeset 348682 eaf7778fef47cfe476e1b9f7da88d29de398b05b
parent 348681 07259f1b5eb7f0b69643d115a57ec3418d841d92
child 348683 9e90e83343b6d2c380b9e08ffaf05c7e343083fc
push id1230
push userjlund@mozilla.com
push dateMon, 31 Oct 2016 18:13:35 +0000
treeherdermozilla-release@5e06e3766db2 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjrmuizel
bugs1280499
milestone50.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1280499 - Support paranoid uploading for nVidia. - r=jrmuizel MozReview-Commit-ID: 3CMNoHiBACT
dom/canvas/TexUnpackBlob.cpp
dom/canvas/TexUnpackBlob.h
dom/canvas/WebGLContext.h
dom/canvas/WebGLTextureUpload.cpp
--- a/dom/canvas/TexUnpackBlob.cpp
+++ b/dom/canvas/TexUnpackBlob.cpp
@@ -7,16 +7,17 @@
 
 #include "GLBlitHelper.h"
 #include "GLContext.h"
 #include "GLDefs.h"
 #include "mozilla/dom/Element.h"
 #include "mozilla/dom/HTMLCanvasElement.h"
 #include "mozilla/RefPtr.h"
 #include "nsLayoutUtils.h"
+#include "WebGLBuffer.h"
 #include "WebGLContext.h"
 #include "WebGLTexelConversions.h"
 #include "WebGLTexture.h"
 
 namespace mozilla {
 namespace webgl {
 
 static bool
@@ -148,26 +149,28 @@ TexUnpackBlob::TexUnpackBlob(const WebGL
     , mSkipRows(webgl->mPixelStore_UnpackSkipRows)
     , mSkipImages(IsTarget3D(target) ? webgl->mPixelStore_UnpackSkipImages : 0)
 
     , mWidth(width)
     , mHeight(height)
     , mDepth(depth)
 
     , mIsSrcPremult(isSrcPremult)
+
+    , mNeedsExactUpload(false)
 {
     MOZ_ASSERT_IF(!IsTarget3D(target), mDepth == 1);
 }
 
 bool
 TexUnpackBlob::ConvertIfNeeded(WebGLContext* webgl, const char* funcName,
-                               const void* srcBytes, uint32_t srcStride, uint8_t srcBPP,
-                               WebGLTexelFormat srcFormat,
+                               const uint8_t* srcBytes, uint32_t srcStride,
+                               uint8_t srcBPP, WebGLTexelFormat srcFormat,
                                const webgl::DriverUnpackInfo* dstDUI,
-                               const void** const out_bytes,
+                               const uint8_t** const out_bytes,
                                UniqueBuffer* const out_anchoredBuffer) const
 {
     *out_bytes = srcBytes;
 
     if (!HasData() || !mWidth || !mHeight || !mDepth)
         return true;
 
     //////
@@ -176,17 +179,17 @@ TexUnpackBlob::ConvertIfNeeded(WebGLCont
     const auto offset = mSkipPixels * CheckedUint32(srcBPP) + totalSkipRows * srcStride;
     if (!offset.isValid()) {
         webgl->ErrorOutOfMemory("%s: Invalid offset calculation during conversion.",
                                 funcName);
         return false;
     }
     const uint32_t skipBytes = offset.value();
 
-    auto const srcBegin = (const uint8_t*)srcBytes + skipBytes;
+    auto const srcBegin = srcBytes + skipBytes;
 
     //////
 
     const auto srcOrigin = (webgl->mPixelStore_FlipY ? gl::OriginPos::TopLeft
                                                      : gl::OriginPos::BottomLeft);
     const auto dstOrigin = gl::OriginPos::BottomLeft;
     const bool isDstPremult = webgl->mPixelStore_PremultiplyAlpha;
 
@@ -232,23 +235,25 @@ TexUnpackBlob::ConvertIfNeeded(WebGLCont
         // No conversion needed!
         return true;
     }
 
     //////
     // We need some sort of conversion, so create the dest buffer.
 
     *out_anchoredBuffer = calloc(1, dstSize.value());
-    *out_bytes = out_anchoredBuffer->get();
-    if (!*out_bytes) {
+    const auto dstBytes = (uint8_t*)out_anchoredBuffer->get();
+
+    if (!dstBytes) {
         webgl->ErrorOutOfMemory("%s: Unable to allocate buffer during conversion.",
                                 funcName);
         return false;
     }
-    const auto dstBegin = (uint8_t*)(*out_bytes) + skipBytes;
+    *out_bytes = dstBytes;
+    const auto dstBegin = dstBytes + skipBytes;
 
     //////
     // Row conversion
 
     if (!needsPixelConversion) {
         webgl->GenerateWarning("%s: Incurred CPU row conversion, which is slow.",
                                funcName);
 
@@ -314,17 +319,17 @@ DoTexOrSubImage(bool isSubImage, gl::GLC
     }
 }
 
 //////////////////////////////////////////////////////////////////////////////////////////
 // TexUnpackBytes
 
 TexUnpackBytes::TexUnpackBytes(const WebGLContext* webgl, TexImageTarget target,
                                uint32_t width, uint32_t height, uint32_t depth,
-                               bool isClientData, const void* ptr)
+                               bool isClientData, const uint8_t* ptr)
     : TexUnpackBlob(webgl, target,
                     FallbackOnZero(webgl->mPixelStore_UnpackRowLength, width), width,
                     height, depth, false)
     , mIsClientData(isClientData)
     , mPtr(ptr)
 { }
 
 bool
@@ -341,27 +346,96 @@ TexUnpackBytes::TexOrSubImage(bool isSub
     const auto bytesPerRow = CheckedUint32(mRowLength) * bytesPerPixel;
     const auto rowStride = RoundUpToMultipleOf(bytesPerRow, mAlignment);
     if (!rowStride.isValid()) {
         MOZ_CRASH("Should be checked earlier.");
     }
 
     const auto format = FormatForPackingInfo(pi);
 
-    const void* uploadBytes;
+    auto uploadPtr = mPtr;
     UniqueBuffer tempBuffer;
     if (mIsClientData &&
         !ConvertIfNeeded(webgl, funcName, mPtr, rowStride.value(), bytesPerPixel, format,
-                         dui, &uploadBytes, &tempBuffer))
+                         dui, &uploadPtr, &tempBuffer))
     {
         return false;
     }
 
-    *out_error = DoTexOrSubImage(isSubImage, webgl->gl, target, level, dui, xOffset,
-                                 yOffset, zOffset, mWidth, mHeight, mDepth, uploadBytes);
+    const auto& gl = webgl->gl;
+
+    //////
+
+    bool useParanoidHandling = false;
+    if (mNeedsExactUpload && webgl->mBoundPixelUnpackBuffer) {
+        webgl->GenerateWarning("%s: Uploads from a buffer with a final row with a byte"
+                               " count smaller than the row stride can incur extra"
+                               " overhead.",
+                               funcName);
+
+        if (gl->WorkAroundDriverBugs()) {
+            useParanoidHandling |= (gl->Vendor() == gl::GLVendor::NVIDIA);
+        }
+    }
+
+    if (!useParanoidHandling) {
+        *out_error = DoTexOrSubImage(isSubImage, gl, target, level, dui, xOffset, yOffset,
+                                     zOffset, mWidth, mHeight, mDepth, uploadPtr);
+        return true;
+    }
+
+    //////
+
+    MOZ_ASSERT(webgl->mBoundPixelUnpackBuffer);
+
+    if (!isSubImage) {
+        // Alloc first to catch OOMs.
+        gl->fBindBuffer(LOCAL_GL_PIXEL_UNPACK_BUFFER, 0);
+        *out_error = DoTexOrSubImage(false, gl, target, level, dui, xOffset, yOffset,
+                                     zOffset, mWidth, mHeight, mDepth, nullptr);
+        gl->fBindBuffer(LOCAL_GL_PIXEL_UNPACK_BUFFER,
+                        webgl->mBoundPixelUnpackBuffer->mGLName);
+    }
+
+    //////
+
+    if (mDepth > 1) {
+        *out_error = DoTexOrSubImage(true, gl, target, level, dui, xOffset, yOffset,
+                                     zOffset, mWidth, mHeight, mDepth-1, uploadPtr);
+    }
+
+    if (mHeight > 1) {
+        *out_error = DoTexOrSubImage(true, gl, target, level, dui, xOffset, yOffset,
+                                     zOffset+mDepth-1, mWidth, mHeight-1, 1, uploadPtr);
+    }
+
+    const uint32_t imageStride = rowStride.value() * mImageHeight;
+
+    const uint32_t usedImages = mSkipImages + mDepth;
+    const uint32_t usedRows = mSkipRows + mHeight;
+
+    uploadPtr += (usedImages - 1) * imageStride;
+    uploadPtr += (usedRows - 1) * rowStride.value();
+
+    //////
+
+    gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, 1);
+    gl->fPixelStorei(LOCAL_GL_UNPACK_IMAGE_HEIGHT, 0);
+    gl->fPixelStorei(LOCAL_GL_UNPACK_SKIP_IMAGES, 0);
+    gl->fPixelStorei(LOCAL_GL_UNPACK_SKIP_ROWS, 0);
+
+    *out_error = DoTexOrSubImage(true, gl, target, level, dui, xOffset,
+                                 yOffset+mHeight-1, zOffset+mDepth-1, mWidth, 1, 1,
+                                 uploadPtr);
+
+    gl->fPixelStorei(LOCAL_GL_UNPACK_ALIGNMENT, webgl->mPixelStore_UnpackAlignment);
+    gl->fPixelStorei(LOCAL_GL_UNPACK_IMAGE_HEIGHT, webgl->mPixelStore_UnpackImageHeight);
+    gl->fPixelStorei(LOCAL_GL_UNPACK_SKIP_IMAGES, webgl->mPixelStore_UnpackSkipImages);
+    gl->fPixelStorei(LOCAL_GL_UNPACK_SKIP_ROWS, webgl->mPixelStore_UnpackSkipRows);
+
     return true;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////
 // TexUnpackImage
 
 TexUnpackImage::TexUnpackImage(const WebGLContext* webgl, TexImageTarget target,
@@ -552,17 +626,17 @@ TexUnpackSurface::TexOrSubImage(bool isS
     const auto srcBytes = map.GetData();
     const auto srcStride = map.GetStride();
 
     // CPU conversion. (++numCopies)
 
     webgl->GenerateWarning("%s: Incurred CPU-side conversion, which is very slow.",
                            funcName);
 
-    const void* uploadBytes;
+    const uint8_t* uploadBytes;
     UniqueBuffer tempBuffer;
     if (!ConvertIfNeeded(webgl, funcName, srcBytes, srcStride, srcBPP, srcFormat,
                          dstDUI, &uploadBytes, &tempBuffer))
     {
         return false;
     }
 
     //////
--- a/dom/canvas/TexUnpackBlob.h
+++ b/dom/canvas/TexUnpackBlob.h
@@ -54,48 +54,51 @@ public:
     const uint32_t mSkipPixels;
     const uint32_t mSkipRows;
     const uint32_t mSkipImages;
     const uint32_t mWidth;
     const uint32_t mHeight;
     const uint32_t mDepth;
     const bool mIsSrcPremult;
 
+    bool mNeedsExactUpload;
+
 protected:
     TexUnpackBlob(const WebGLContext* webgl, TexImageTarget target, uint32_t rowLength,
                   uint32_t width, uint32_t height, uint32_t depth, bool isSrcPremult);
 
 public:
     virtual ~TexUnpackBlob() { }
 
 protected:
-    bool ConvertIfNeeded(WebGLContext* webgl, const char* funcName, const void* srcBytes,
-                         uint32_t srcStride, uint8_t srcBPP, WebGLTexelFormat srcFormat,
+    bool ConvertIfNeeded(WebGLContext* webgl, const char* funcName,
+                         const uint8_t* srcBytes, uint32_t srcStride, uint8_t srcBPP,
+                         WebGLTexelFormat srcFormat,
                          const webgl::DriverUnpackInfo* dstDUI,
-                         const void** const out_bytes,
+                         const uint8_t** const out_bytes,
                          UniqueBuffer* const out_anchoredBuffer) const;
 
 public:
     virtual bool HasData() const { return true; }
 
     virtual bool TexOrSubImage(bool isSubImage, bool needsRespec, const char* funcName,
                                WebGLTexture* tex, TexImageTarget target, GLint level,
                                const webgl::DriverUnpackInfo* dui, GLint xOffset,
                                GLint yOffset, GLint zOffset,
                                GLenum* const out_error) const = 0;
 };
 
 class TexUnpackBytes final : public TexUnpackBlob
 {
 public:
     const bool mIsClientData;
-    const void* const mPtr;
+    const uint8_t* const mPtr;
 
     TexUnpackBytes(const WebGLContext* webgl, TexImageTarget target, uint32_t width,
-                   uint32_t height, uint32_t depth, bool isClientData, const void* ptr);
+                   uint32_t height, uint32_t depth, bool isClientData, const uint8_t* ptr);
 
     virtual bool HasData() const override { return !mIsClientData || bool(mPtr); }
 
     virtual bool TexOrSubImage(bool isSubImage, bool needsRespec, const char* funcName,
                                WebGLTexture* tex, TexImageTarget target, GLint level,
                                const webgl::DriverUnpackInfo* dui, GLint xOffset,
                                GLint yOffset, GLint zOffset,
                                GLenum* const out_error) const override;
--- a/dom/canvas/WebGLContext.h
+++ b/dom/canvas/WebGLContext.h
@@ -891,17 +891,17 @@ public:
         TexSubImage2D(texImageTarget, level, xOffset, yOffset, unpackFormat, unpackType,
                       &elem, &out_error);
     }
 
     //////
     // WebGLTextureUpload.cpp
 public:
     bool ValidateUnpackPixels(const char* funcName, uint32_t fullRows,
-                              uint32_t tailPixels, const webgl::TexUnpackBlob* blob);
+                              uint32_t tailPixels, webgl::TexUnpackBlob* blob);
 
 protected:
     bool ValidateTexImageSpecification(const char* funcName, uint8_t funcDims,
                                        GLenum texImageTarget, GLint level,
                                        GLsizei width, GLsizei height, GLsizei depth,
                                        GLint border,
                                        TexImageTarget* const out_target,
                                        WebGLTexture** const out_texture,
--- a/dom/canvas/WebGLTextureUpload.cpp
+++ b/dom/canvas/WebGLTextureUpload.cpp
@@ -122,17 +122,17 @@ DoesJSTypeMatchUnpackType(GLenum unpackT
 
     default:
         return false;
     }
 }
 
 bool
 WebGLContext::ValidateUnpackPixels(const char* funcName, uint32_t fullRows,
-                                   uint32_t tailPixels, const webgl::TexUnpackBlob* blob)
+                                   uint32_t tailPixels, webgl::TexUnpackBlob* blob)
 {
     const auto usedPixelsPerRow = CheckedUint32(blob->mSkipPixels) + blob->mWidth;
     const auto usedRowsPerImage = CheckedUint32(blob->mSkipRows) + blob->mHeight;
     const auto usedImages = CheckedUint32(blob->mSkipImages) + blob->mDepth;
 
     if (!usedPixelsPerRow.isValid() ||
         !usedRowsPerImage.isValid() ||
         !usedImages.isValid())
@@ -161,31 +161,33 @@ WebGLContext::ValidateUnpackPixels(const
         ErrorOutOfMemory("%s: Invalid calculation for required row count.",
                          funcName);
         return false;
     }
 
     if (fullRows > fullRowsNeeded.value())
         return true;
 
-    if (fullRows == fullRowsNeeded.value() && tailPixels >= usedPixelsPerRow.value())
+    if (fullRows == fullRowsNeeded.value() && tailPixels >= usedPixelsPerRow.value()) {
+        blob->mNeedsExactUpload = true;
         return true;
+    }
 
     ErrorInvalidOperation("%s: Desired upload requires more data than is available: (%u"
                           " rows plus %u pixels needed, %u rows plus %u pixels"
                           " available)",
                           funcName, fullRowsNeeded.value(), usedPixelsPerRow.value(),
                           fullRows, tailPixels);
     return false;
 }
 
 static bool
 ValidateUnpackBytes(WebGLContext* webgl, const char* funcName, uint32_t width,
                     uint32_t height, uint32_t depth, const webgl::PackingInfo& pi,
-                    uint32_t byteCount, const webgl::TexUnpackBlob* blob)
+                    uint32_t byteCount, webgl::TexUnpackBlob* blob)
 {
     const auto bytesPerPixel = webgl::BytesPerPixel(pi);
     const auto bytesPerRow = CheckedUint32(blob->mRowLength) * bytesPerPixel;
     const auto rowStride = RoundUpToMultipleOf(bytesPerRow, blob->mAlignment);
 
     const auto fullRows = byteCount / rowStride;
     if (!fullRows.isValid()) {
         webgl->ErrorOutOfMemory("%s: Unacceptable upload size calculated.");
@@ -236,17 +238,17 @@ WebGLTexture::TexOrSubImage(bool isSubIm
 
     const bool usePBOs = false;
     webgl::PackingInfo pi;
     if (!mContext->ValidateUnpackInfo(funcName, usePBOs, unpackFormat, unpackType, &pi))
         return;
 
     ////
 
-    const void* bytes = nullptr;
+    const uint8_t* bytes = nullptr;
     uint32_t byteCount = 0;
 
     if (!maybeView.IsNull()) {
         const auto& view = maybeView.Value();
 
         const auto jsType = JS_GetArrayBufferViewType(view.Obj());
         if (!DoesJSTypeMatchUnpackType(pi.type, jsType)) {
             mContext->ErrorInvalidOperation("%s: `pixels` not compatible with `type`.",
@@ -258,18 +260,18 @@ WebGLTexture::TexOrSubImage(bool isSubIm
             view.ComputeLengthAndData();
 
             bytes = view.DataAllowShared();
             byteCount = view.LengthAllowShared();
         }
     }
 
     const bool isClientData = true;
-    const webgl::TexUnpackBytes blob(mContext, target, width, height, depth, isClientData,
-                                     bytes);
+    webgl::TexUnpackBytes blob(mContext, target, width, height, depth, isClientData,
+                               bytes);
 
     if (bytes &&
         !ValidateUnpackBytes(mContext, funcName, width, height, depth, pi, byteCount,
                              &blob))
     {
         return;
     }
 
@@ -300,19 +302,18 @@ WebGLTexture::TexOrSubImage(bool isSubIm
     ////
 
     if (offset < 0) {
         mContext->ErrorInvalidValue("%s: offset cannot be negative.", funcName);
         return;
     }
 
     const bool isClientData = false;
-    const auto ptr = (const void*)offset;
-    const webgl::TexUnpackBytes blob(mContext, target, width, height, depth, isClientData,
-                                     ptr);
+    const auto ptr = (const uint8_t*)offset;
+    webgl::TexUnpackBytes blob(mContext, target, width, height, depth, isClientData, ptr);
 
     const auto& packBuffer = mContext->mBoundPixelUnpackBuffer;
     const auto bufferByteCount = packBuffer->ByteLength();
 
     uint32_t byteCount = 0;
     if (bufferByteCount >= offset) {
         byteCount = bufferByteCount - offset;
     }
@@ -391,18 +392,18 @@ WebGLTexture::TexOrSubImage(bool isSubIm
     if (!surf)
         return;
 
     // WhatWG "HTML Living Standard" (30 October 2015):
     // "The getImageData(sx, sy, sw, sh) method [...] Pixels must be returned as
     //  non-premultiplied alpha values."
     const bool isAlphaPremult = false;
 
-    const webgl::TexUnpackSurface blob(mContext, target, width, height, depth, surf,
-                                       isAlphaPremult);
+    webgl::TexUnpackSurface blob(mContext, target, width, height, depth, surf,
+                                 isAlphaPremult);
 
     const uint32_t fullRows = imageData->Height();
     const uint32_t tailPixels = 0;
     if (!mContext->ValidateUnpackPixels(funcName, fullRows, tailPixels, &blob))
         return;
 
     TexOrSubImageBlob(isSubImage, funcName, target, level, internalFormat, xOffset,
                       yOffset, zOffset, pi, &blob);
@@ -501,17 +502,17 @@ WebGLTexture::TexOrSubImage(bool isSubIm
                                   funcName);
         out_error->Throw(NS_ERROR_DOM_SECURITY_ERR);
         return;
     }
 
     //////
     // Ok, we're good!
 
-    UniquePtr<const webgl::TexUnpackBlob> blob;
+    UniquePtr<webgl::TexUnpackBlob> blob;
     const bool isAlphaPremult = sfer.mIsPremultiplied;
 
     if (layersImage) {
         blob.reset(new webgl::TexUnpackImage(mContext, target, width, height, depth,
                                              layersImage, isAlphaPremult));
     } else {
         MOZ_ASSERT(dataSurf);
         blob.reset(new webgl::TexUnpackSurface(mContext, target, width, height, depth,