Bug 626979 - Handle WebM frame size changes. r=kinetik a=blocking2.0
authorChris Pearce <chris@pearce.org.nz>
Fri, 28 Jan 2011 19:36:03 +1300
changeset 61490 408332b0a8342ec2cb2ee4c9a5f4164645785495
parent 61489 822a61861a3831b5d56ded46cc0d138e06281cbb
child 61491 6963333a74d1f7a8460d034f374acbc0b5c92e56
push idunknown
push userunknown
push dateunknown
reviewerskinetik, blocking2
bugs626979
milestone2.0b11pre
Bug 626979 - Handle WebM frame size changes. r=kinetik a=blocking2.0
content/media/nsBuiltinDecoderReader.cpp
content/media/nsBuiltinDecoderReader.h
content/media/nsBuiltinDecoderStateMachine.cpp
content/media/ogg/nsOggCodecState.cpp
content/media/ogg/nsOggReader.cpp
content/media/webm/nsWebMReader.cpp
--- a/content/media/nsBuiltinDecoderReader.cpp
+++ b/content/media/nsBuiltinDecoderReader.cpp
@@ -45,16 +45,31 @@
 #include "nsBuiltinDecoderStateMachine.h"
 #include "mozilla/mozalloc.h"
 #include "VideoUtils.h"
 
 using namespace mozilla;
 using mozilla::layers::ImageContainer;
 using mozilla::layers::PlanarYCbCrImage;
 
+// The maximum height and width of the video. Used for
+// sanitizing the memory allocation of the RGB buffer.
+// The maximum resolution we anticipate encountering in the
+// wild is 2160p - 3840x2160 pixels.
+#define MAX_VIDEO_WIDTH  4000
+#define MAX_VIDEO_HEIGHT 3000
+
+using mozilla::layers::PlanarYCbCrImage;
+
+// Verify these values are sane. Once we've checked the frame sizes, we then
+// can do less integer overflow checking.
+PR_STATIC_ASSERT(MAX_VIDEO_WIDTH < PlanarYCbCrImage::MAX_DIMENSION);
+PR_STATIC_ASSERT(MAX_VIDEO_HEIGHT < PlanarYCbCrImage::MAX_DIMENSION);
+PR_STATIC_ASSERT(PlanarYCbCrImage::MAX_DIMENSION < PR_UINT32_MAX / PlanarYCbCrImage::MAX_DIMENSION);
+
 // Un-comment to enable logging of seek bisections.
 //#define SEEK_LOGGING
 
 #ifdef PR_LOGGING
 extern PRLogModuleInfo* gBuiltinDecoderLog;
 #define LOG(type, msg) PR_LOG(gBuiltinDecoderLog, type, msg)
 #ifdef SEEK_LOGGING
 #define SEEK_LOG(type, msg) PR_LOG(gBuiltinDecoderLog, type, msg)
@@ -66,19 +81,44 @@ extern PRLogModuleInfo* gBuiltinDecoderL
 #define SEEK_LOG(type, msg)
 #endif
 
 static PRBool
 ValidatePlane(const VideoData::YCbCrBuffer::Plane& aPlane)
 {
   return aPlane.mWidth <= PlanarYCbCrImage::MAX_DIMENSION &&
          aPlane.mHeight <= PlanarYCbCrImage::MAX_DIMENSION &&
+         aPlane.mWidth * aPlane.mHeight < MAX_VIDEO_WIDTH * MAX_VIDEO_HEIGHT &&
          aPlane.mStride > 0;
 }
 
+PRBool
+nsVideoInfo::ValidateVideoRegion(const nsIntSize& aFrame,
+                                 const nsIntRect& aPicture,
+                                 const nsIntSize& aDisplay)
+{
+  return
+    aFrame.width <= PlanarYCbCrImage::MAX_DIMENSION &&
+    aFrame.height <= PlanarYCbCrImage::MAX_DIMENSION &&
+    aFrame.width * aFrame.height <= MAX_VIDEO_WIDTH * MAX_VIDEO_HEIGHT &&
+    aFrame.width * aFrame.height != 0 &&
+    aPicture.width <= PlanarYCbCrImage::MAX_DIMENSION &&
+    aPicture.x < PlanarYCbCrImage::MAX_DIMENSION &&
+    aPicture.x + aPicture.width < PlanarYCbCrImage::MAX_DIMENSION &&
+    aPicture.height <= PlanarYCbCrImage::MAX_DIMENSION &&
+    aPicture.y < PlanarYCbCrImage::MAX_DIMENSION &&
+    aPicture.y + aPicture.height < PlanarYCbCrImage::MAX_DIMENSION &&
+    aPicture.width * aPicture.height <= MAX_VIDEO_WIDTH * MAX_VIDEO_HEIGHT &&
+    aPicture.width * aPicture.height != 0 &&
+    aDisplay.width <= PlanarYCbCrImage::MAX_DIMENSION &&
+    aDisplay.height <= PlanarYCbCrImage::MAX_DIMENSION &&
+    aDisplay.width * aDisplay.height <= MAX_VIDEO_WIDTH * MAX_VIDEO_HEIGHT &&
+    aDisplay.width * aDisplay.height != 0;
+}
+
 VideoData* VideoData::Create(nsVideoInfo& aInfo,
                              ImageContainer* aContainer,
                              PRInt64 aOffset,
                              PRInt64 aTime,
                              PRInt64 aEndTime,
                              const YCbCrBuffer& aBuffer,
                              PRBool aKeyframe,
                              PRInt64 aTimecode)
@@ -95,33 +135,45 @@ VideoData* VideoData::Create(nsVideoInfo
     return nsnull;
   }
 
   // The following situations could be triggered by invalid input
   if (aInfo.mPicture.width <= 0 || aInfo.mPicture.height <= 0) {
     NS_WARNING("Empty picture rect");
     return nsnull;
   }
-  if (aBuffer.mPlanes[0].mWidth != PRUint32(aInfo.mFrame.width) ||
-      aBuffer.mPlanes[0].mHeight != PRUint32(aInfo.mFrame.height)) {
-    NS_WARNING("Unexpected frame size");
-    return nsnull;
-  }
   if (!ValidatePlane(aBuffer.mPlanes[0]) || !ValidatePlane(aBuffer.mPlanes[1]) ||
       !ValidatePlane(aBuffer.mPlanes[2])) {
     NS_WARNING("Invalid plane size");
     return nsnull;
   }
+
+  PRUint32 picX = aInfo.mPicture.x;
+  PRUint32 picY = aInfo.mPicture.y;
+  gfxIntSize picSize = gfxIntSize(aInfo.mPicture.width, aInfo.mPicture.height);
+
+  if (aInfo.mFrame.width != aBuffer.mPlanes[0].mWidth ||
+      aInfo.mFrame.height != aBuffer.mPlanes[0].mHeight)
+  {
+    // Frame size is different from what the container reports. This is legal
+    // in WebM, and we will preserve the ratio of the crop rectangle as it
+    // was reported relative to the picture size reported by the container.
+    picX = (aInfo.mPicture.x * aBuffer.mPlanes[0].mWidth) / aInfo.mFrame.width;
+    picY = (aInfo.mPicture.y * aBuffer.mPlanes[0].mHeight) / aInfo.mFrame.height;
+    picSize = gfxIntSize((aBuffer.mPlanes[0].mWidth * aInfo.mPicture.width) / aInfo.mFrame.width,
+                         (aBuffer.mPlanes[0].mHeight * aInfo.mPicture.height) / aInfo.mFrame.height);
+  }
+
   // Ensure the picture size specified in the headers can be extracted out of
   // the frame we've been supplied without indexing out of bounds.
   PRUint32 picXLimit;
   PRUint32 picYLimit;
-  if (!AddOverflow32(aInfo.mPicture.x, aInfo.mPicture.width, picXLimit) ||
+  if (!AddOverflow32(picX, picSize.width, picXLimit) ||
       picXLimit > aBuffer.mPlanes[0].mStride ||
-      !AddOverflow32(aInfo.mPicture.y, aInfo.mPicture.height, picYLimit) ||
+      !AddOverflow32(picY, picSize.height, picYLimit) ||
       picYLimit > aBuffer.mPlanes[0].mHeight)
   {
     // The specified picture dimensions can't be contained inside the video
     // frame, we'll stomp memory if we try to copy it. Fail.
     NS_WARNING("Overflowing picture rect");
     return nsnull;
   }
 
@@ -140,19 +192,19 @@ VideoData* VideoData::Create(nsVideoInfo
   PlanarYCbCrImage::Data data;
   data.mYChannel = aBuffer.mPlanes[0].mData;
   data.mYSize = gfxIntSize(aBuffer.mPlanes[0].mWidth, aBuffer.mPlanes[0].mHeight);
   data.mYStride = aBuffer.mPlanes[0].mStride;
   data.mCbChannel = aBuffer.mPlanes[1].mData;
   data.mCrChannel = aBuffer.mPlanes[2].mData;
   data.mCbCrSize = gfxIntSize(aBuffer.mPlanes[1].mWidth, aBuffer.mPlanes[1].mHeight);
   data.mCbCrStride = aBuffer.mPlanes[1].mStride;
-  data.mPicX = aInfo.mPicture.x;
-  data.mPicY = aInfo.mPicture.y;
-  data.mPicSize = gfxIntSize(aInfo.mPicture.width, aInfo.mPicture.height);
+  data.mPicX = picX;
+  data.mPicY = picY;
+  data.mPicSize = picSize;
   data.mStereoMode = aInfo.mStereoMode;
 
   videoImage->SetData(data); // Copies buffer
   return v.forget();
 }
 
 nsBuiltinDecoderReader::nsBuiltinDecoderReader(nsBuiltinDecoder* aDecoder)
   : mMonitor("media.decoderreader"),
--- a/content/media/nsBuiltinDecoderReader.h
+++ b/content/media/nsBuiltinDecoderReader.h
@@ -54,36 +54,50 @@ class nsBuiltinDecoderStateMachine;
 // Stores info relevant to presenting media samples.
 class nsVideoInfo {
 public:
   nsVideoInfo()
     : mPixelAspectRatio(1.0),
       mAudioRate(0),
       mAudioChannels(0),
       mFrame(0,0),
+      mDisplay(0,0),
       mStereoMode(mozilla::layers::STEREO_MODE_MONO),
       mHasAudio(PR_FALSE),
       mHasVideo(PR_FALSE)
   {}
 
+  // Returns PR_TRUE if it's safe to use aPicture as the picture to be
+  // extracted inside a frame of size aFrame, and scaled up to and displayed
+  // at a size of aDisplay. You should validate the frame, picture, and
+  // display regions before setting them into the mFrame, mPicture and
+  // mDisplay fields of nsVideoInfo.
+  static PRBool ValidateVideoRegion(const nsIntSize& aFrame,
+                                    const nsIntRect& aPicture,
+                                    const nsIntSize& aDisplay);
+
   // Pixel aspect ratio, as stored in the metadata.
   float mPixelAspectRatio;
 
   // Samples per second.
   PRUint32 mAudioRate;
 
   // Number of audio channels.
   PRUint32 mAudioChannels;
 
   // Dimensions of the video frame.
   nsIntSize mFrame;
 
   // The picture region inside the video frame to be displayed.
   nsIntRect mPicture;
 
+  // Display size of the video frame. The picture region will be scaled
+  // to and displayed at this size.
+  nsIntSize mDisplay;
+
   // The offset of the first non-header page in the file, in bytes.
   // Used to seek to the start of the media.
   PRInt64 mDataOffset;
 
   // Indicates the frame layout for single track stereo videos.
   mozilla::layers::StereoMode mStereoMode;
 
   // PR_TRUE if we have an active audio bitstream.
--- a/content/media/nsBuiltinDecoderStateMachine.cpp
+++ b/content/media/nsBuiltinDecoderStateMachine.cpp
@@ -1238,17 +1238,17 @@ void nsBuiltinDecoderStateMachine::Rende
 
   if (aData->mDuplicate) {
     return;
   }
 
   nsRefPtr<Image> image = aData->mImage;
   if (image) {
     const nsVideoInfo& info = mReader->GetInfo();
-    mDecoder->SetVideoData(gfxIntSize(info.mPicture.width, info.mPicture.height), info.mPixelAspectRatio, image);
+    mDecoder->SetVideoData(gfxIntSize(info.mDisplay.width, info.mDisplay.height), info.mPixelAspectRatio, image);
   }
 }
 
 PRInt64
 nsBuiltinDecoderStateMachine::GetAudioClock()
 {
   NS_ASSERTION(IsCurrentThread(mDecoder->mStateMachineThread), "Should be on state machine thread.");
   if (!mAudioStream || !HasAudio())
--- a/content/media/ogg/nsOggCodecState.cpp
+++ b/content/media/ogg/nsOggCodecState.cpp
@@ -37,33 +37,25 @@
  *
  * ***** END LICENSE BLOCK ***** */
 #include "nsDebug.h"
 #include "nsOggCodecState.h"
 #include "nsOggDecoder.h"
 #include <string.h>
 #include "nsTraceRefcnt.h"
 #include "VideoUtils.h"
+#include "nsBuiltinDecoderReader.h"
 
 #ifdef PR_LOGGING
 extern PRLogModuleInfo* gBuiltinDecoderLog;
 #define LOG(type, msg) PR_LOG(gBuiltinDecoderLog, type, msg)
 #else
 #define LOG(type, msg)
 #endif
 
-/*
-   The maximum height and width of the video. Used for
-   sanitizing the memory allocation of the RGB buffer.
-   The maximum resolution we anticipate encountering in the
-   wild is 2160p - 3840x2160 pixels.
-*/
-#define MAX_VIDEO_WIDTH  4000
-#define MAX_VIDEO_HEIGHT 3000
-
 nsOggCodecState*
 nsOggCodecState::Create(ogg_page* aPage)
 {
   nsAutoPtr<nsOggCodecState> codecState;
   if (aPage->body_len > 6 && memcmp(aPage->body+1, "theora", 6) == 0) {
     codecState = new nsTheoraState(aPage);
   } else if (aPage->body_len > 6 && memcmp(aPage->body+1, "vorbis", 6) == 0) {
     codecState = new nsVorbisState(aPage);
@@ -165,30 +157,21 @@ PRBool nsTheoraState::Init() {
   mFrameDuration = static_cast<PRUint32>(f);
 
   n = mInfo.aspect_numerator;
 
   d = mInfo.aspect_denominator;
   mPixelAspectRatio = (n == 0 || d == 0) ?
     1.0f : static_cast<float>(n) / static_cast<float>(d);
 
-  // Ensure the frame region isn't larger than our prescribed maximum.
-  PRUint32 pixels;
-  if (!MulOverflow32(mInfo.frame_width, mInfo.frame_height, pixels) ||
-      pixels > MAX_VIDEO_WIDTH * MAX_VIDEO_HEIGHT ||
-      pixels == 0)
-  {
-    return mActive = PR_FALSE;
-  }
-
-  // Ensure the picture region isn't larger than our prescribed maximum.
-  if (!MulOverflow32(mInfo.pic_width, mInfo.pic_height, pixels) ||
-      pixels > MAX_VIDEO_WIDTH * MAX_VIDEO_HEIGHT ||
-      pixels == 0)
-  {
+  // Ensure the frame and picture regions aren't larger than our prescribed
+  // maximum, or zero sized.
+  nsIntSize frame(mInfo.frame_width, mInfo.frame_height);
+  nsIntRect picture(mInfo.pic_x, mInfo.pic_y, mInfo.pic_width, mInfo.pic_height);
+  if (!nsVideoInfo::ValidateVideoRegion(frame, picture, frame)) {
     return mActive = PR_FALSE;
   }
 
   mCtx = th_decode_alloc(&mInfo, mSetup);
   if (mCtx == NULL) {
     return mActive = PR_FALSE;
   }
 
--- a/content/media/ogg/nsOggReader.cpp
+++ b/content/media/ogg/nsOggReader.cpp
@@ -305,22 +305,24 @@ nsresult nsOggReader::ReadMetadata()
   mInfo.mHasAudio = HasAudio();
   mInfo.mHasVideo = HasVideo();
   if (HasAudio()) {
     mInfo.mAudioRate = mVorbisState->mInfo.rate;
     mInfo.mAudioChannels = mVorbisState->mInfo.channels;
   }
   if (HasVideo()) {
     mInfo.mPixelAspectRatio = mTheoraState->mPixelAspectRatio;
-    mInfo.mPicture.width = mTheoraState->mInfo.pic_width;
-    mInfo.mPicture.height = mTheoraState->mInfo.pic_height;
-    mInfo.mPicture.x = mTheoraState->mInfo.pic_x;
-    mInfo.mPicture.y = mTheoraState->mInfo.pic_y;
-    mInfo.mFrame.width = mTheoraState->mInfo.frame_width;
-    mInfo.mFrame.height = mTheoraState->mInfo.frame_height;
+    mInfo.mPicture = nsIntRect(mTheoraState->mInfo.pic_x,
+                               mTheoraState->mInfo.pic_y,
+                               mTheoraState->mInfo.pic_width,
+                               mTheoraState->mInfo.pic_height);
+    mInfo.mFrame = nsIntSize(mTheoraState->mInfo.frame_width,
+                             mTheoraState->mInfo.frame_height);
+    mInfo.mDisplay = nsIntSize(mInfo.mPicture.width,
+                               mInfo.mPicture.height);
   }
   mInfo.mDataOffset = mDataOffset;
 
   if (mSkeletonState && mSkeletonState->HasIndex()) {
     // Extract the duration info out of the index, so we don't need to seek to
     // the end of stream to get it.
     nsAutoTArray<PRUint32, 2> tracks;
     if (HasVideo()) {
--- a/content/media/webm/nsWebMReader.cpp
+++ b/content/media/webm/nsWebMReader.cpp
@@ -251,35 +251,52 @@ nsresult nsWebMReader::ReadMetadata()
     if (!mHasVideo && type == NESTEGG_TRACK_VIDEO) {
       nestegg_video_params params;
       r = nestegg_track_video_params(mContext, track, &params);
       if (r == -1) {
         Cleanup();
         return NS_ERROR_FAILURE;
       }
 
+      // Picture region, taking into account cropping, before scaling
+      // to the display size.
+      nsIntRect pictureRect(params.crop_left,
+                        params.crop_top,
+                        params.width - (params.crop_right + params.crop_left),
+                        params.height - (params.crop_bottom + params.crop_top));
+
+      // If the cropping data appears invalid then use the frame data
+      if (pictureRect.width <= 0 ||
+          pictureRect.height <= 0 ||
+          pictureRect.x < 0 ||
+          pictureRect.y < 0)
+      {
+        pictureRect.x = 0;
+        pictureRect.y = 0;
+        pictureRect.width = params.width;
+        pictureRect.height = params.height;
+      }
+
+      // Validate the container-reported frame and pictureRect sizes. This ensures
+      // that our video frame creation code doesn't overflow.
+      nsIntSize displaySize(params.display_width, params.display_height);
+      nsIntSize frameSize(params.width, params.height);
+      if (!nsVideoInfo::ValidateVideoRegion(frameSize, pictureRect, displaySize)) {
+        // Video track's frame sizes will overflow. Ignore the video track.
+        continue;
+      }
+          
       mVideoTrack = track;
       mHasVideo = PR_TRUE;
       mInfo.mHasVideo = PR_TRUE;
-      mInfo.mPicture.x = params.crop_left;
-      mInfo.mPicture.y = params.crop_top;
-      mInfo.mPicture.width = params.width - (params.crop_right - params.crop_left);
-      mInfo.mPicture.height = params.height - (params.crop_bottom - params.crop_top);
-      mInfo.mFrame.width = params.width;
-      mInfo.mFrame.height = params.height;
-      mInfo.mPixelAspectRatio = (float(params.display_width) / params.width) /
-                                (float(params.display_height) / params.height);
-
-      // If the cropping data appears invalid then use the frame data
-      if (mInfo.mPicture.width <= 0 || mInfo.mPicture.height <= 0) {
-        mInfo.mPicture.x = 0;
-        mInfo.mPicture.y = 0;
-        mInfo.mPicture.width = params.width;
-        mInfo.mPicture.height = params.height;
-      }
+      mInfo.mPicture = pictureRect;
+      mInfo.mDisplay = displaySize;
+      mInfo.mFrame = frameSize;
+      mInfo.mPixelAspectRatio = (static_cast<float>(params.display_width) / mInfo.mPicture.width) /
+                                (static_cast<float>(params.display_height) / mInfo.mPicture.height);
 
       switch (params.stereo_mode) {
       case NESTEGG_VIDEO_MONO:
         mInfo.mStereoMode = STEREO_MODE_MONO;
         break;
       case NESTEGG_VIDEO_STEREO_LEFT_RIGHT:
         mInfo.mStereoMode = STEREO_MODE_LEFT_RIGHT;
         break;
@@ -680,20 +697,16 @@ PRBool nsWebMReader::DecodeVideoFrame(PR
     if (tstamp_ms < aTimeThreshold) {
       continue;
     }
 
     vpx_codec_iter_t  iter = NULL;
     vpx_image_t      *img;
 
     while((img = vpx_codec_get_frame(&mVP8, &iter))) {
-      NS_ASSERTION(mInfo.mPicture.width == static_cast<PRInt32>(img->d_w), 
-                   "WebM picture width from header does not match decoded frame");
-      NS_ASSERTION(mInfo.mPicture.height == static_cast<PRInt32>(img->d_h),
-                   "WebM picture height from header does not match decoded frame");
       NS_ASSERTION(img->fmt == IMG_FMT_I420, "WebM image format is not I420");
 
       // Chroma shifts are rounded down as per the decoding examples in the VP8 SDK
       VideoData::YCbCrBuffer b;
       b.mPlanes[0].mData = img->planes[0];
       b.mPlanes[0].mStride = img->stride[0];
       b.mPlanes[0].mHeight = img->d_h;
       b.mPlanes[0].mWidth = img->d_w;