Bug 661456 - Scale WebM video frames to display size. r=kinetik
authorChris Pearce <chris@pearce.org.nz>
Fri, 24 Jun 2011 10:08:54 +1200
changeset 71912 6d87b94b1b12c08ee6d2733065267a59eebd90c0
parent 71911 a45545549061381c5292aac030888e2cd8b82cad
child 71913 2c55cb8097d200b7cb6bab617e35056ccf7233b2
push id209
push userbzbarsky@mozilla.com
push dateTue, 05 Jul 2011 17:42:16 +0000
treeherdermozilla-aurora@cc6e30cce8af [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerskinetik
bugs661456
milestone7.0a1
Bug 661456 - Scale WebM video frames to display size. r=kinetik
content/media/VideoUtils.cpp
content/media/VideoUtils.h
content/media/nsBuiltinDecoderReader.cpp
content/media/nsBuiltinDecoderReader.h
content/media/nsBuiltinDecoderStateMachine.cpp
content/media/nsBuiltinDecoderStateMachine.h
content/media/nsMediaDecoder.cpp
content/media/nsMediaDecoder.h
content/media/ogg/nsOggReader.cpp
content/media/ogg/nsOggReader.h
content/media/raw/nsRawReader.cpp
content/media/raw/nsRawReader.h
content/media/webm/nsWebMReader.cpp
content/media/webm/nsWebMReader.h
--- a/content/media/VideoUtils.cpp
+++ b/content/media/VideoUtils.cpp
@@ -31,16 +31,17 @@
  * decision by deleting the provisions above and replace them with the notice
  * and other provisions required by the GPL or the LGPL. If you do not delete
  * the provisions above, a recipient may use your version of this file under
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
 
 #include "VideoUtils.h"
+#include "nsMathUtils.h"
 #include "prtypes.h"
 
 // Adds two 32bit unsigned numbers, retuns PR_TRUE if addition succeeded,
 // or PR_FALSE the if addition would result in an overflow.
 PRBool AddOverflow32(PRUint32 a, PRUint32 b, PRUint32& aResult) {
   PRUint64 rl = static_cast<PRUint64>(a) + static_cast<PRUint64>(b);
   if (rl > PR_UINT32_MAX) {
     return PR_FALSE;
@@ -191,8 +192,27 @@ PRBool SamplesToUsecs(PRInt64 aSamples, 
 PRBool UsecsToSamples(PRInt64 aUsecs, PRUint32 aRate, PRInt64& aOutSamples)
 {
   PRInt64 x;
   if (!MulOverflow(aUsecs, aRate, x))
     return PR_FALSE;
   aOutSamples = x / USECS_PER_S;
   return PR_TRUE;
 }
+
+static PRInt32 ConditionDimension(float aValue)
+{
+  // This will exclude NaNs and too-big values.
+  if (aValue > 1.0 && aValue <= PR_INT32_MAX)
+    return PRInt32(NS_round(aValue));
+  return 0;
+}
+
+void ScaleDisplayByAspectRatio(nsIntSize& aDisplay, float aAspectRatio)
+{
+  if (aAspectRatio > 1.0) {
+    // Increase the intrinsic width
+    aDisplay.width = ConditionDimension(aAspectRatio * aDisplay.width);
+  } else {
+    // Increase the intrinsic height
+    aDisplay.height = ConditionDimension(aDisplay.height / aAspectRatio);
+  }
+}
--- a/content/media/VideoUtils.h
+++ b/content/media/VideoUtils.h
@@ -36,16 +36,18 @@
  *
  * ***** END LICENSE BLOCK ***** */
 
 #ifndef VideoUtils_h
 #define VideoUtils_h
 
 #include "mozilla/ReentrantMonitor.h"
 
+#include "nsRect.h"
+
 // This file contains stuff we'd rather put elsewhere, but which is
 // dependent on other changes which we don't want to wait for. We plan to
 // remove this file in the near future.
 
 
 // This belongs in prtypes.h
 /************************************************************************
  * MACROS:      PR_INT64_MAX
@@ -138,9 +140,21 @@ PRBool SamplesToUsecs(PRInt64 aSamples, 
 PRBool UsecsToSamples(PRInt64 aUsecs, PRUint32 aRate, PRInt64& aOutSamples);
 
 // Number of microseconds per second. 1e6.
 #define USECS_PER_S 1000000
 
 // Number of microseconds per millisecond.
 #define USECS_PER_MS 1000
 
+// The maximum height and width of the video. Used for
+// sanitizing the memory allocation of the RGB buffer.
+// The maximum resolution we anticipate encountering in the
+// wild is 2160p - 3840x2160 pixels.
+#define MAX_VIDEO_WIDTH  4000
+#define MAX_VIDEO_HEIGHT 3000
+
+// Scales the display rect aDisplay by aspect ratio aAspectRatio.
+// Note that aDisplay must be validated by nsVideoInfo::ValidateVideoRegion()
+// before being used!
+void ScaleDisplayByAspectRatio(nsIntSize& aDisplay, float aAspectRatio);
+
 #endif
--- a/content/media/nsBuiltinDecoderReader.cpp
+++ b/content/media/nsBuiltinDecoderReader.cpp
@@ -45,23 +45,16 @@
 #include "nsBuiltinDecoderStateMachine.h"
 #include "mozilla/mozalloc.h"
 #include "VideoUtils.h"
 
 using namespace mozilla;
 using mozilla::layers::ImageContainer;
 using mozilla::layers::PlanarYCbCrImage;
 
-// The maximum height and width of the video. Used for
-// sanitizing the memory allocation of the RGB buffer.
-// The maximum resolution we anticipate encountering in the
-// wild is 2160p - 3840x2160 pixels.
-#define MAX_VIDEO_WIDTH  4000
-#define MAX_VIDEO_HEIGHT 3000
-
 using mozilla::layers::PlanarYCbCrImage;
 
 // Verify these values are sane. Once we've checked the frame sizes, we then
 // can do less integer overflow checking.
 PR_STATIC_ASSERT(MAX_VIDEO_WIDTH < PlanarYCbCrImage::MAX_DIMENSION);
 PR_STATIC_ASSERT(MAX_VIDEO_HEIGHT < PlanarYCbCrImage::MAX_DIMENSION);
 PR_STATIC_ASSERT(PlanarYCbCrImage::MAX_DIMENSION < PR_UINT32_MAX / PlanarYCbCrImage::MAX_DIMENSION);
 
@@ -116,73 +109,63 @@ nsVideoInfo::ValidateVideoRegion(const n
 
 VideoData* VideoData::Create(nsVideoInfo& aInfo,
                              ImageContainer* aContainer,
                              PRInt64 aOffset,
                              PRInt64 aTime,
                              PRInt64 aEndTime,
                              const YCbCrBuffer& aBuffer,
                              PRBool aKeyframe,
-                             PRInt64 aTimecode)
+                             PRInt64 aTimecode,
+                             nsIntRect aPicture)
 {
   if (!aContainer) {
     return nsnull;
   }
 
   // The following situation should never happen unless there is a bug
   // in the decoder
   if (aBuffer.mPlanes[1].mWidth != aBuffer.mPlanes[2].mWidth ||
       aBuffer.mPlanes[1].mHeight != aBuffer.mPlanes[2].mHeight) {
     NS_ERROR("C planes with different sizes");
     return nsnull;
   }
 
   // The following situations could be triggered by invalid input
-  if (aInfo.mPicture.width <= 0 || aInfo.mPicture.height <= 0) {
+  if (aPicture.width <= 0 || aPicture.height <= 0) {
     NS_WARNING("Empty picture rect");
     return nsnull;
   }
   if (!ValidatePlane(aBuffer.mPlanes[0]) || !ValidatePlane(aBuffer.mPlanes[1]) ||
       !ValidatePlane(aBuffer.mPlanes[2])) {
     NS_WARNING("Invalid plane size");
     return nsnull;
   }
 
-  PRUint32 picX = aInfo.mPicture.x;
-  PRUint32 picY = aInfo.mPicture.y;
-  gfxIntSize picSize = gfxIntSize(aInfo.mPicture.width, aInfo.mPicture.height);
-
-  if (aInfo.mFrame.width != aBuffer.mPlanes[0].mWidth ||
-      aInfo.mFrame.height != aBuffer.mPlanes[0].mHeight)
-  {
-    // Frame size is different from what the container reports. This is legal
-    // in WebM, and we will preserve the ratio of the crop rectangle as it
-    // was reported relative to the picture size reported by the container.
-    picX = (aInfo.mPicture.x * aBuffer.mPlanes[0].mWidth) / aInfo.mFrame.width;
-    picY = (aInfo.mPicture.y * aBuffer.mPlanes[0].mHeight) / aInfo.mFrame.height;
-    picSize = gfxIntSize((aBuffer.mPlanes[0].mWidth * aInfo.mPicture.width) / aInfo.mFrame.width,
-                         (aBuffer.mPlanes[0].mHeight * aInfo.mPicture.height) / aInfo.mFrame.height);
-  }
-
   // Ensure the picture size specified in the headers can be extracted out of
   // the frame we've been supplied without indexing out of bounds.
-  PRUint32 picXLimit;
-  PRUint32 picYLimit;
-  if (!AddOverflow32(picX, picSize.width, picXLimit) ||
-      picXLimit > aBuffer.mPlanes[0].mStride ||
-      !AddOverflow32(picY, picSize.height, picYLimit) ||
-      picYLimit > aBuffer.mPlanes[0].mHeight)
+  PRUint32 xLimit;
+  PRUint32 yLimit;
+  if (!AddOverflow32(aPicture.x, aPicture.width, xLimit) ||
+      xLimit > aBuffer.mPlanes[0].mStride ||
+      !AddOverflow32(aPicture.y, aPicture.height, yLimit) ||
+      yLimit > aBuffer.mPlanes[0].mHeight)
   {
     // The specified picture dimensions can't be contained inside the video
     // frame, we'll stomp memory if we try to copy it. Fail.
     NS_WARNING("Overflowing picture rect");
     return nsnull;
   }
 
-  nsAutoPtr<VideoData> v(new VideoData(aOffset, aTime, aEndTime, aKeyframe, aTimecode));
+  nsAutoPtr<VideoData> v(new VideoData(aOffset,
+                                       aTime,
+                                       aEndTime,
+                                       aKeyframe,
+                                       aTimecode,
+                                       aInfo.mDisplay));
   // Currently our decoder only knows how to output to PLANAR_YCBCR
   // format.
   Image::Format format = Image::PLANAR_YCBCR;
   v->mImage = aContainer->CreateImage(&format, 1);
   if (!v->mImage) {
     return nsnull;
   }
   NS_ASSERTION(v->mImage->GetFormat() == Image::PLANAR_YCBCR,
@@ -192,19 +175,19 @@ VideoData* VideoData::Create(nsVideoInfo
   PlanarYCbCrImage::Data data;
   data.mYChannel = aBuffer.mPlanes[0].mData;
   data.mYSize = gfxIntSize(aBuffer.mPlanes[0].mWidth, aBuffer.mPlanes[0].mHeight);
   data.mYStride = aBuffer.mPlanes[0].mStride;
   data.mCbChannel = aBuffer.mPlanes[1].mData;
   data.mCrChannel = aBuffer.mPlanes[2].mData;
   data.mCbCrSize = gfxIntSize(aBuffer.mPlanes[1].mWidth, aBuffer.mPlanes[1].mHeight);
   data.mCbCrStride = aBuffer.mPlanes[1].mStride;
-  data.mPicX = picX;
-  data.mPicY = picY;
-  data.mPicSize = picSize;
+  data.mPicX = aPicture.x;
+  data.mPicY = aPicture.y;
+  data.mPicSize = gfxIntSize(aPicture.width, aPicture.height);
   data.mStereoMode = aInfo.mStereoMode;
 
   videoImage->SetData(data); // Copies buffer
   return v.forget();
 }
 
 nsBuiltinDecoderReader::nsBuiltinDecoderReader(nsBuiltinDecoder* aDecoder)
   : mReentrantMonitor("media.decoderreader"),
--- a/content/media/nsBuiltinDecoderReader.h
+++ b/content/media/nsBuiltinDecoderReader.h
@@ -49,52 +49,40 @@
 #include "mozilla/ReentrantMonitor.h"
 
 class nsBuiltinDecoderStateMachine;
 
 // Stores info relevant to presenting media samples.
 class nsVideoInfo {
 public:
   nsVideoInfo()
-    : mPixelAspectRatio(1.0),
-      mAudioRate(0),
+    : mAudioRate(0),
       mAudioChannels(0),
-      mFrame(0,0),
       mDisplay(0,0),
       mStereoMode(mozilla::layers::STEREO_MODE_MONO),
       mHasAudio(PR_FALSE),
       mHasVideo(PR_FALSE)
   {}
 
   // Returns PR_TRUE if it's safe to use aPicture as the picture to be
   // extracted inside a frame of size aFrame, and scaled up to and displayed
   // at a size of aDisplay. You should validate the frame, picture, and
-  // display regions before setting them into the mFrame, mPicture and
-  // mDisplay fields of nsVideoInfo.
+  // display regions before using them to display video frames.
   static PRBool ValidateVideoRegion(const nsIntSize& aFrame,
                                     const nsIntRect& aPicture,
                                     const nsIntSize& aDisplay);
 
-  // Pixel aspect ratio, as stored in the metadata.
-  float mPixelAspectRatio;
-
   // Samples per second.
   PRUint32 mAudioRate;
 
   // Number of audio channels.
   PRUint32 mAudioChannels;
 
-  // Dimensions of the video frame.
-  nsIntSize mFrame;
-
-  // The picture region inside the video frame to be displayed.
-  nsIntRect mPicture;
-
-  // Display size of the video frame. The picture region will be scaled
-  // to and displayed at this size.
+  // Size in pixels at which the video is rendered. This is after it has
+  // been scaled by its aspect ratio.
   nsIntSize mDisplay;
 
   // Indicates the frame layout for single track stereo videos.
   mozilla::layers::StereoMode mStereoMode;
 
   // PR_TRUE if we have an active audio bitstream.
   PRPackedBool mHasAudio;
 
@@ -211,17 +199,18 @@ public:
   // negative stride).
   static VideoData* Create(nsVideoInfo& aInfo,
                            ImageContainer* aContainer,
                            PRInt64 aOffset,
                            PRInt64 aTime,
                            PRInt64 aEndTime,
                            const YCbCrBuffer &aBuffer,
                            PRBool aKeyframe,
-                           PRInt64 aTimecode);
+                           PRInt64 aTimecode,
+                           nsIntRect aPicture);
 
   // Constructs a duplicate VideoData object. This intrinsically tells the
   // player that it does not need to update the displayed frame when this
   // frame is played; this frame is identical to the previous.
   static VideoData* CreateDuplicate(PRInt64 aOffset,
                                     PRInt64 aTime,
                                     PRInt64 aEndTime,
                                     PRInt64 aTimecode)
@@ -229,23 +218,28 @@ public:
     return new VideoData(aOffset, aTime, aEndTime, aTimecode);
   }
 
   ~VideoData()
   {
     MOZ_COUNT_DTOR(VideoData);
   }
 
+  // Dimensions at which to display the video frame. The picture region
+  // will be scaled to this size. This is should be the picture region's
+  // dimensions scaled with respect to its aspect ratio.
+  nsIntSize mDisplay;
+
   // Approximate byte offset of the end of the frame in the media.
   PRInt64 mOffset;
 
   // Start time of frame in microseconds.
   PRInt64 mTime;
 
-  // End time of frame in microseconds;
+  // End time of frame in microseconds.
   PRInt64 mEndTime;
 
   // Codec specific internal time code. For Ogg based codecs this is the
   // granulepos.
   PRInt64 mTimecode;
 
   // This frame's image.
   nsRefPtr<Image> mImage;
@@ -267,23 +261,25 @@ public:
     MOZ_COUNT_CTOR(VideoData);
     NS_ASSERTION(aEndTime >= aTime, "Frame must start before it ends.");
   }
 
   VideoData(PRInt64 aOffset,
             PRInt64 aTime,
             PRInt64 aEndTime,
             PRBool aKeyframe,
-            PRInt64 aTimecode)
+            PRInt64 aTimecode,
+            nsIntSize aDisplay)
     : mOffset(aOffset),
       mTime(aTime),
       mEndTime(aEndTime),
       mTimecode(aTimecode),
       mDuplicate(PR_FALSE),
-      mKeyframe(aKeyframe)
+      mKeyframe(aKeyframe),
+      mDisplay(aDisplay)
   {
     MOZ_COUNT_CTOR(VideoData);
     NS_ASSERTION(aEndTime >= aTime, "Frame must start before it ends.");
   }
 
 };
 
 // Thread and type safe wrapper around nsDeque.
--- a/content/media/nsBuiltinDecoderStateMachine.cpp
+++ b/content/media/nsBuiltinDecoderStateMachine.cpp
@@ -1072,22 +1072,18 @@ nsresult nsBuiltinDecoderStateMachine::R
       {
         LoadMetadata();
         if (mState == DECODER_STATE_SHUTDOWN) {
           continue;
         }
 
         VideoData* videoData = FindStartTime();
         if (videoData) {
-          nsIntSize display = mInfo.mDisplay;
-          float aspect = mInfo.mPixelAspectRatio;
-          {
-            ReentrantMonitorAutoExit exitMon(mDecoder->GetReentrantMonitor());
-            RenderVideoFrame(videoData, TimeStamp::Now(), display, aspect);
-          }
+          ReentrantMonitorAutoExit exitMon(mDecoder->GetReentrantMonitor());
+          RenderVideoFrame(videoData, TimeStamp::Now());
         }
 
         // Start the decode threads, so that we can pre buffer the streams.
         // and calculate the start time in order to determine the duration.
         if (NS_FAILED(StartDecodeThreads())) {
           continue;
         }
 
@@ -1200,21 +1196,19 @@ nsresult nsBuiltinDecoderStateMachine::R
             PRInt64 startTime = (audio && audio->mTime < seekTime) ? audio->mTime : seekTime;
             mAudioStartTime = startTime;
             mPlayDuration = startTime - mStartTime;
             if (HasVideo()) {
               nsAutoPtr<VideoData> video(mReader->mVideoQueue.PeekFront());
               if (video) {
                 NS_ASSERTION(video->mTime <= seekTime && seekTime <= video->mEndTime,
                              "Seek target should lie inside the first frame after seek");
-                nsIntSize display = mInfo.mDisplay;
-                float aspect = mInfo.mPixelAspectRatio;
                 {
                   ReentrantMonitorAutoExit exitMon(mDecoder->GetReentrantMonitor());
-                  RenderVideoFrame(video, TimeStamp::Now(), display, aspect);
+                  RenderVideoFrame(video, TimeStamp::Now());
                 }
                 mReader->mVideoQueue.PopFront();
                 nsCOMPtr<nsIRunnable> event =
                   NS_NewRunnableMethod(mDecoder, &nsBuiltinDecoder::Invalidate);
                 NS_DispatchToMainThread(event, NS_DISPATCH_NORMAL);
               }
             }
           }
@@ -1361,31 +1355,28 @@ nsresult nsBuiltinDecoderStateMachine::R
       break;
     }
   }
 
   return NS_OK;
 }
 
 void nsBuiltinDecoderStateMachine::RenderVideoFrame(VideoData* aData,
-                                                    TimeStamp aTarget,
-                                                    nsIntSize aDisplaySize,
-                                                    float aAspectRatio)
+                                                    TimeStamp aTarget)
 {
   NS_ASSERTION(IsCurrentThread(mDecoder->mStateMachineThread), "Should be on state machine thread.");
   mDecoder->GetReentrantMonitor().AssertNotCurrentThreadIn();
 
   if (aData->mDuplicate) {
     return;
   }
 
   nsRefPtr<Image> image = aData->mImage;
   if (image) {
-    mDecoder->SetVideoData(gfxIntSize(aDisplaySize.width, aDisplaySize.height),
-                           aAspectRatio, image, aTarget);
+    mDecoder->SetVideoData(aData->mDisplay, image, aTarget);
   }
 }
 
 PRInt64
 nsBuiltinDecoderStateMachine::GetAudioClock()
 {
   NS_ASSERTION(IsCurrentThread(mDecoder->mStateMachineThread), "Should be on state machine thread.");
   if (!mAudioStream || !HasAudio())
@@ -1485,24 +1476,20 @@ void nsBuiltinDecoderStateMachine::Advan
     }
 
     if (currentFrame) {
       // Decode one frame and display it.
       TimeStamp presTime = mPlayStartTime - UsecsToDuration(mPlayDuration) +
                            UsecsToDuration(currentFrame->mTime - mStartTime);
       NS_ASSERTION(currentFrame->mTime >= mStartTime, "Should have positive frame time");
       {
-        nsIntSize display = mInfo.mDisplay;
-        float aspect = mInfo.mPixelAspectRatio;
-        {
-          ReentrantMonitorAutoExit exitMon(mDecoder->GetReentrantMonitor());
-          // If we have video, we want to increment the clock in steps of the frame
-          // duration.
-          RenderVideoFrame(currentFrame, presTime, display, aspect);
-        }
+        ReentrantMonitorAutoExit exitMon(mDecoder->GetReentrantMonitor());
+        // If we have video, we want to increment the clock in steps of the frame
+        // duration.
+        RenderVideoFrame(currentFrame, presTime);
       }
       mDecoder->GetFrameStatistics().NotifyPresentedFrame();
       PRInt64 now = DurationToUsecs(TimeStamp::Now() - mPlayStartTime) + mPlayDuration;
       remainingTime = currentFrame->mEndTime - mStartTime - now;
       currentFrame = nsnull;
     }
 
     // Kick the decode thread in case it filled its buffers and put itself
--- a/content/media/nsBuiltinDecoderStateMachine.h
+++ b/content/media/nsBuiltinDecoderStateMachine.h
@@ -314,18 +314,17 @@ protected:
   // if unknown).  Does not update the playback position on the decoder or
   // media element -- use UpdatePlaybackPosition for that.  Called on the state
   // machine thread, caller must hold the decoder lock.
   void UpdatePlaybackPositionInternal(PRInt64 aTime);
 
   // Performs YCbCr to RGB conversion, and pushes the image down the
   // rendering pipeline. Called on the state machine thread. The decoder
   // monitor must not be held when calling this.
-  void RenderVideoFrame(VideoData* aData, TimeStamp aTarget, 
-                        nsIntSize aDisplaySize, float aAspectRatio);
+  void RenderVideoFrame(VideoData* aData, TimeStamp aTarget);
  
   // If we have video, display a video frame if it's time for display has
   // arrived, otherwise sleep until it's time for the next sample. Update
   // the current frame time as appropriate, and trigger ready state update.
   // The decoder monitor must be held with exactly one lock count. Called
   // on the state machine thread.
   void AdvanceFrame();
 
--- a/content/media/nsMediaDecoder.cpp
+++ b/content/media/nsMediaDecoder.cpp
@@ -69,17 +69,16 @@ using namespace mozilla;
 // fluctuating bitrates.
 #define CAN_PLAY_THROUGH_MARGIN 10
 
 nsMediaDecoder::nsMediaDecoder() :
   mElement(0),
   mRGBWidth(-1),
   mRGBHeight(-1),
   mVideoUpdateLock("nsMediaDecoder.mVideoUpdateLock"),
-  mPixelAspectRatio(1.0),
   mFrameBufferLength(0),
   mPinnedForSeek(PR_FALSE),
   mSizeChanged(PR_FALSE),
   mImageContainerSizeChanged(PR_FALSE),
   mShuttingDown(PR_FALSE)
 {
   MOZ_COUNT_CTOR(nsMediaDecoder);
 }
@@ -111,56 +110,35 @@ nsresult nsMediaDecoder::RequestFrameBuf
   if (aLength < FRAMEBUFFER_LENGTH_MIN || aLength > FRAMEBUFFER_LENGTH_MAX) {
     return NS_ERROR_DOM_INDEX_SIZE_ERR;
   }
 
   mFrameBufferLength = aLength;
   return NS_OK;
 }
 
-
-static PRInt32 ConditionDimension(float aValue, PRInt32 aDefault)
-{
-  // This will exclude NaNs and infinities
-  if (aValue >= 1.0 && aValue <= 10000.0)
-    return PRInt32(NS_round(aValue));
-  return aDefault;
-}
-
 void nsMediaDecoder::Invalidate()
 {
   if (!mElement)
     return;
 
   nsIFrame* frame = mElement->GetPrimaryFrame();
   PRBool invalidateFrame = PR_FALSE;
 
   {
     MutexAutoLock lock(mVideoUpdateLock);
 
     // Get mImageContainerSizeChanged while holding the lock.
     invalidateFrame = mImageContainerSizeChanged;
     mImageContainerSizeChanged = PR_FALSE;
 
     if (mSizeChanged) {
-      nsIntSize scaledSize(mRGBWidth, mRGBHeight);
-      // Apply the aspect ratio to produce the intrinsic size we report
-      // to the element.
-      if (mPixelAspectRatio > 1.0) {
-        // Increase the intrinsic width
-        scaledSize.width =
-          ConditionDimension(mPixelAspectRatio*scaledSize.width, scaledSize.width);
-      } else {
-        // Increase the intrinsic height
-        scaledSize.height =
-          ConditionDimension(scaledSize.height/mPixelAspectRatio, scaledSize.height);
-      }
-      mElement->UpdateMediaSize(scaledSize);
+      mElement->UpdateMediaSize(nsIntSize(mRGBWidth, mRGBHeight));
+      mSizeChanged = PR_FALSE;
 
-      mSizeChanged = PR_FALSE;
       if (frame) {
         nsPresContext* presContext = frame->PresContext();
         nsIPresShell *presShell = presContext->PresShell();
         presShell->FrameNeedsReflow(frame,
                                     nsIPresShell::eStyleChange,
                                     NS_FRAME_IS_DIRTY);
       }
     }
@@ -239,27 +217,24 @@ nsresult nsMediaDecoder::StopProgress()
 void nsMediaDecoder::FireTimeUpdate()
 {
   if (!mElement)
     return;
   mElement->FireTimeUpdate(PR_TRUE);
 }
 
 void nsMediaDecoder::SetVideoData(const gfxIntSize& aSize,
-                                  float aPixelAspectRatio,
                                   Image* aImage,
                                   TimeStamp aTarget)
 {
   MutexAutoLock lock(mVideoUpdateLock);
 
-  if (mRGBWidth != aSize.width || mRGBHeight != aSize.height ||
-      mPixelAspectRatio != aPixelAspectRatio) {
+  if (mRGBWidth != aSize.width || mRGBHeight != aSize.height) {
     mRGBWidth = aSize.width;
     mRGBHeight = aSize.height;
-    mPixelAspectRatio = aPixelAspectRatio;
     mSizeChanged = PR_TRUE;
   }
   if (mImageContainer && aImage) {
     gfxIntSize oldFrameSize = mImageContainer->GetCurrentSize();
 
     TimeStamp paintTime = mImageContainer->GetPaintTime();
     if (!paintTime.IsNull() && !mPaintTarget.IsNull()) {
       mPaintDelay = paintTime - mPaintTarget;
--- a/content/media/nsMediaDecoder.h
+++ b/content/media/nsMediaDecoder.h
@@ -367,17 +367,16 @@ public:
   // the element is not a video element. This can be called from any
   // thread; ImageContainers can be used from any thread.
   ImageContainer* GetImageContainer() { return mImageContainer; }
 
   // Set the video width, height, pixel aspect ratio, current image and
   // target paint time of the next video frame to be displayed.
   // Ownership of the image is transferred to the layers subsystem.
   void SetVideoData(const gfxIntSize& aSize,
-                    float aPixelAspectRatio,
                     Image* aImage,
                     TimeStamp aTarget);
 
   // Constructs the time ranges representing what segments of the media
   // are buffered and playable.
   virtual nsresult GetBuffered(nsTimeRanges* aBuffered) = 0;
 
   // Returns PR_TRUE if we can play the entire media through without stopping
@@ -440,19 +439,16 @@ protected:
   // values while they are out of sync (width changed but
   // not height yet, etc).
   // Backends that are updating the height, width or writing
   // to the RGB buffer must obtain this lock first to ensure that
   // the video element does not use video data or sizes that are
   // in the midst of being changed.
   Mutex mVideoUpdateLock;
 
-  // Pixel aspect ratio (ratio of the pixel width to pixel height)
-  float mPixelAspectRatio;
-
   // The framebuffer size to use for audioavailable events.
   PRUint32 mFrameBufferLength;
 
   // PR_TRUE when our media stream has been pinned. We pin the stream
   // while seeking.
   PRPackedBool mPinnedForSeek;
 
   // Set to PR_TRUE when the video width, height or pixel aspect ratio is
--- a/content/media/ogg/nsOggReader.cpp
+++ b/content/media/ogg/nsOggReader.cpp
@@ -113,16 +113,17 @@ nsOggReader::nsOggReader(nsBuiltinDecode
     mTheoraState(nsnull),
     mVorbisState(nsnull),
     mSkeletonState(nsnull),
     mVorbisSerial(0),
     mTheoraSerial(0),
     mPageOffset(0)
 {
   MOZ_COUNT_CTOR(nsOggReader);
+  memset(&mTheoraInfo, 0, sizeof(mTheoraInfo));
 }
 
 nsOggReader::~nsOggReader()
 {
   ogg_sync_clear(&mOggState);
   MOZ_COUNT_DTOR(nsOggReader);
 }
 
@@ -253,37 +254,44 @@ nsresult nsOggReader::ReadMetadata(nsVid
   for (PRUint32 i = 0; i < bitstreams.Length(); i++) {
     nsOggCodecState* s = bitstreams[i];
     if (s != mVorbisState && s != mTheoraState && s != mSkeletonState) {
       s->Deactivate();
     }
   }
 
   if (mTheoraState && ReadHeaders(mTheoraState)) {
-    mInfo.mHasVideo = PR_TRUE;
-    mInfo.mPixelAspectRatio = mTheoraState->mPixelAspectRatio;
-    mInfo.mPicture = nsIntRect(mTheoraState->mInfo.pic_x,
-                               mTheoraState->mInfo.pic_y,
-                               mTheoraState->mInfo.pic_width,
-                               mTheoraState->mInfo.pic_height);
-    mInfo.mFrame = nsIntSize(mTheoraState->mInfo.frame_width,
-                              mTheoraState->mInfo.frame_height);
-    mInfo.mDisplay = nsIntSize(mInfo.mPicture.width,
-                               mInfo.mPicture.height);
-    gfxIntSize sz(mTheoraState->mInfo.pic_width,
-                  mTheoraState->mInfo.pic_height);
-    mDecoder->SetVideoData(sz,
-                           mTheoraState->mPixelAspectRatio,
-                           nsnull,
-                           TimeStamp::Now());
-    // Copy Theora info data for time computations on other threads.
-    memcpy(&mTheoraInfo, &mTheoraState->mInfo, sizeof(mTheoraInfo));
-    mTheoraSerial = mTheoraState->mSerial;
-  } else {
-    memset(&mTheoraInfo, 0, sizeof(mTheoraInfo));
+    nsIntRect picture = nsIntRect(mTheoraState->mInfo.pic_x,
+                                  mTheoraState->mInfo.pic_y,
+                                  mTheoraState->mInfo.pic_width,
+                                  mTheoraState->mInfo.pic_height);
+
+    nsIntSize displaySize = nsIntSize(mTheoraState->mInfo.pic_width,
+                                      mTheoraState->mInfo.pic_height);
+
+    // Apply the aspect ratio to produce the intrinsic display size we report
+    // to the element.
+    ScaleDisplayByAspectRatio(displaySize, mTheoraState->mPixelAspectRatio);
+
+    nsIntSize frameSize(mTheoraState->mInfo.frame_width,
+                        mTheoraState->mInfo.frame_height);
+    if (nsVideoInfo::ValidateVideoRegion(frameSize, picture, displaySize)) {
+      // Video track's frame sizes will not overflow. Activate the video track.
+      mInfo.mHasVideo = PR_TRUE;
+      mInfo.mDisplay = displaySize;
+      mPicture = picture;
+
+      mDecoder->SetVideoData(gfxIntSize(displaySize.width, displaySize.height),
+                             nsnull,
+                             TimeStamp::Now());
+
+      // Copy Theora info data for time computations on other threads.
+      memcpy(&mTheoraInfo, &mTheoraState->mInfo, sizeof(mTheoraInfo));
+      mTheoraSerial = mTheoraState->mSerial;
+    }
   }
 
   if (mVorbisState && ReadHeaders(mVorbisState)) {
     mInfo.mHasAudio = PR_TRUE;
     mInfo.mAudioRate = mVorbisState->mInfo.rate;
     mInfo.mAudioChannels = mVorbisState->mInfo.channels;
     // Copy Vorbis info data for time computations on other threads.
     memcpy(&mVorbisInfo, &mVorbisState->mInfo, sizeof(mVorbisInfo));
@@ -477,17 +485,18 @@ nsresult nsOggReader::DecodeTheora(ogg_p
     mReentrantMonitor.AssertCurrentThreadIn();
     VideoData *v = VideoData::Create(mInfo,
                                      mDecoder->GetImageContainer(),
                                      mPageOffset,
                                      time,
                                      endTime,
                                      b,
                                      isKeyframe,
-                                     aPacket->granulepos);
+                                     aPacket->granulepos,
+                                     mPicture);
     if (!v) {
       // There may be other reasons for this error, but for
       // simplicity just assume the worst case: out of memory.
       NS_WARNING("Failed to allocate memory for video frame");
       return NS_ERROR_OUT_OF_MEMORY;
     }
     mVideoQueue.Push(v);
   }
--- a/content/media/ogg/nsOggReader.h
+++ b/content/media/ogg/nsOggReader.h
@@ -281,11 +281,15 @@ private:
   PRUint32 mVorbisSerial;
   PRUint32 mTheoraSerial;
   vorbis_info mVorbisInfo;
   th_info mTheoraInfo;
 
   // The offset of the end of the last page we've read, or the start of
   // the page we're about to read.
   PRInt64 mPageOffset;
+
+  // The picture region inside Theora frame to be displayed, if we have
+  // a Theora video track.
+  nsIntRect mPicture;
 };
 
 #endif
--- a/content/media/raw/nsRawReader.cpp
+++ b/content/media/raw/nsRawReader.cpp
@@ -87,37 +87,44 @@ nsresult nsRawReader::ReadMetadata(nsVid
         mMetadata.majorVersion == 0 &&
         mMetadata.minorVersion == 1))
     return NS_ERROR_FAILURE;
 
   PRUint32 dummy;
   if (!MulOverflow32(mMetadata.frameWidth, mMetadata.frameHeight, dummy))
     return NS_ERROR_FAILURE;
 
-  mInfo.mHasVideo = PR_TRUE;
-  mInfo.mPicture.x = 0;
-  mInfo.mPicture.y = 0;
-  mInfo.mPicture.width = mMetadata.frameWidth;
-  mInfo.mPicture.height = mMetadata.frameHeight;
-  mInfo.mFrame.width = mMetadata.frameWidth;
-  mInfo.mFrame.height = mMetadata.frameHeight;
+
   if (mMetadata.aspectDenominator == 0 ||
       mMetadata.framerateDenominator == 0)
     return NS_ERROR_FAILURE; // Invalid data
-  mInfo.mPixelAspectRatio = static_cast<float>(mMetadata.aspectNumerator) / 
+
+  // Determine and verify frame display size.
+  float pixelAspectRatio = static_cast<float>(mMetadata.aspectNumerator) / 
                             mMetadata.aspectDenominator;
+  nsIntSize display(mMetadata.frameWidth, mMetadata.frameHeight);
+  ScaleDisplayByAspectRatio(display, pixelAspectRatio);
+  mPicture = nsIntRect(0, 0, mMetadata.frameWidth, mMetadata.frameHeight);
+  nsIntSize frameSize(mMetadata.frameWidth, mMetadata.frameHeight);
+  if (!nsVideoInfo::ValidateVideoRegion(frameSize, mPicture, display)) {
+    // Video track's frame sizes will overflow. Fail.
+    return NS_ERROR_FAILURE;
+  }
+
+  mInfo.mHasVideo = PR_TRUE;
   mInfo.mHasAudio = PR_FALSE;
+  mInfo.mDisplay = display;
 
   mFrameRate = static_cast<float>(mMetadata.framerateNumerator) /
                mMetadata.framerateDenominator;
 
   // Make some sanity checks
   if (mFrameRate > 45 ||
       mFrameRate == 0 ||
-      mInfo.mPixelAspectRatio == 0 ||
+      pixelAspectRatio == 0 ||
       mMetadata.frameWidth > 2000 ||
       mMetadata.frameHeight > 2000 ||
       mMetadata.chromaChannelBpp != 4 ||
       mMetadata.lumaChannelBpp != 8 ||
       mMetadata.colorspace != 1 /* 4:2:0 */)
     return NS_ERROR_FAILURE;
 
   mFrameSize = mMetadata.frameWidth * mMetadata.frameHeight *
@@ -234,17 +241,18 @@ PRBool nsRawReader::DecodeVideoFrame(PRB
 
   VideoData *v = VideoData::Create(mInfo,
                                    mDecoder->GetImageContainer(),
                                    -1,
                                    currentFrameTime,
                                    currentFrameTime + (USECS_PER_S / mFrameRate),
                                    b,
                                    1, // In raw video every frame is a keyframe
-                                   -1);
+                                   -1,
+                                   mPicture);
   if (!v)
     return PR_FALSE;
 
   mVideoQueue.Push(v);
   mCurrentFrame++;
   decoded++;
   currentFrameTime += USECS_PER_S / mFrameRate;
 
--- a/content/media/raw/nsRawReader.h
+++ b/content/media/raw/nsRawReader.h
@@ -117,11 +117,12 @@ public:
 private:
   PRBool ReadFromStream(nsMediaStream *aStream, PRUint8 *aBuf,
                         PRUint32 aLength);
 
   nsRawVideoHeader mMetadata;
   PRUint32 mCurrentFrame;
   double mFrameRate;
   PRUint32 mFrameSize;
+  nsIntRect mPicture;
 };
 
 #endif
--- a/content/media/webm/nsWebMReader.cpp
+++ b/content/media/webm/nsWebMReader.cpp
@@ -253,19 +253,19 @@ nsresult nsWebMReader::ReadMetadata(nsVi
       if (r == -1) {
         Cleanup();
         return NS_ERROR_FAILURE;
       }
 
       // Picture region, taking into account cropping, before scaling
       // to the display size.
       nsIntRect pictureRect(params.crop_left,
-                        params.crop_top,
-                        params.width - (params.crop_right + params.crop_left),
-                        params.height - (params.crop_bottom + params.crop_top));
+                            params.crop_top,
+                            params.width - (params.crop_right + params.crop_left),
+                            params.height - (params.crop_bottom + params.crop_top));
 
       // If the cropping data appears invalid then use the frame data
       if (pictureRect.width <= 0 ||
           pictureRect.height <= 0 ||
           pictureRect.x < 0 ||
           pictureRect.y < 0)
       {
         pictureRect.x = 0;
@@ -277,25 +277,24 @@ nsresult nsWebMReader::ReadMetadata(nsVi
       // Validate the container-reported frame and pictureRect sizes. This ensures
       // that our video frame creation code doesn't overflow.
       nsIntSize displaySize(params.display_width, params.display_height);
       nsIntSize frameSize(params.width, params.height);
       if (!nsVideoInfo::ValidateVideoRegion(frameSize, pictureRect, displaySize)) {
         // Video track's frame sizes will overflow. Ignore the video track.
         continue;
       }
-          
+
       mVideoTrack = track;
       mHasVideo = PR_TRUE;
       mInfo.mHasVideo = PR_TRUE;
-      mInfo.mPicture = pictureRect;
+
       mInfo.mDisplay = displaySize;
-      mInfo.mFrame = frameSize;
-      mInfo.mPixelAspectRatio = (static_cast<float>(params.display_width) / mInfo.mPicture.width) /
-                                (static_cast<float>(params.display_height) / mInfo.mPicture.height);
+      mPicture = pictureRect;
+      mInitialFrame = frameSize;
 
       switch (params.stereo_mode) {
       case NESTEGG_VIDEO_MONO:
         mInfo.mStereoMode = STEREO_MODE_MONO;
         break;
       case NESTEGG_VIDEO_STEREO_LEFT_RIGHT:
         mInfo.mStereoMode = STEREO_MODE_LEFT_RIGHT;
         break;
@@ -719,24 +718,36 @@ PRBool nsWebMReader::DecodeVideoFrame(PR
       b.mPlanes[1].mHeight = img->d_h >> img->y_chroma_shift;
       b.mPlanes[1].mWidth = img->d_w >> img->x_chroma_shift;
  
       b.mPlanes[2].mData = img->planes[2];
       b.mPlanes[2].mStride = img->stride[2];
       b.mPlanes[2].mHeight = img->d_h >> img->y_chroma_shift;
       b.mPlanes[2].mWidth = img->d_w >> img->x_chroma_shift;
   
+      nsIntRect picture = mPicture;
+      if (img->d_w != mInitialFrame.width || img->d_h != mInitialFrame.height) {
+        // Frame size is different from what the container reports. This is legal
+        // in WebM, and we will preserve the ratio of the crop rectangle as it
+        // was reported relative to the picture size reported by the container.
+        picture.x = (mPicture.x * img->d_w) / mInitialFrame.width;
+        picture.y = (mPicture.y * img->d_h) / mInitialFrame.height;
+        picture.width = (img->d_w * mPicture.width) / mInitialFrame.width;
+        picture.height = (img->d_h * mPicture.height) / mInitialFrame.height;
+      }
+
       VideoData *v = VideoData::Create(mInfo,
                                        mDecoder->GetImageContainer(),
                                        holder->mOffset,
                                        tstamp_usecs,
                                        next_tstamp / NS_PER_USEC,
                                        b,
                                        si.is_kf,
-                                       -1);
+                                       -1,
+                                       picture);
       if (!v) {
         return PR_FALSE;
       }
       parsed++;
       decoded++;
       NS_ASSERTION(decoded <= parsed,
         "Expect only 1 frame per chunk per packet in WebM...");
       mVideoQueue.Push(v);
--- a/content/media/webm/nsWebMReader.h
+++ b/content/media/webm/nsWebMReader.h
@@ -226,14 +226,21 @@ private:
 
   // Number of samples we've decoded since decoding began at mAudioStartMs.
   PRUint64 mAudioSamples;
 
   // Parser state and computed offset-time mappings.  Shared by multiple
   // readers when decoder has been cloned.  Main thread only.
   nsRefPtr<nsWebMBufferedState> mBufferedState;
 
+  // Size of the frame initially present in the stream. The picture region
+  // is defined as a ratio relative to this.
+  nsIntSize mInitialFrame;
+
+  // Picture region, as relative to the initial frame size.
+  nsIntRect mPicture;
+
   // Booleans to indicate if we have audio and/or video data
   PRPackedBool mHasVideo;
   PRPackedBool mHasAudio;
 };
 
 #endif