Bug 1253499 - Implement a new scaling algorithm for simulcast. r=bwc,dminor
authorAndreas Pehrson <pehrsons@mozilla.com>
Wed, 19 Sep 2018 15:00:45 +0000
changeset 495712 ea21cea8c6814083a0d680012769873bcb18b205
parent 495711 6a902d5ec8ea245a83265c8115576176fca310f0
child 495713 06098ed3fac7710de0869505857ee536d52e5da7
push id1864
push userffxbld-merge
push dateMon, 03 Dec 2018 15:51:40 +0000
treeherdermozilla-release@f040763d99ad [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbwc, dminor
bugs1253499
milestone64.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1253499 - Implement a new scaling algorithm for simulcast. r=bwc,dminor webrtc.org is picky about resolutions for simulcasst layers. As of current it will assert that all layers have identical aspect ratio. We handle this by ignoring layers where the aspect ratio is not the same as the highest layer's. The new algorithm will, when simulcast is requested and at least one layer is scaled to something other than 1.0, try to remedy this by: - The highest resolution layer is cropped to 16-pixel alignment, to ensure that scaling options exist. - A separate VideoAdapter is used for simulcast layers, with the highest layer's resolution as an aspect ratio requirement. This forces the simulcast adapter to retain that aspect ratio in any scaling decisions. This doesn't make scaling decisions spec-compliant (floor the width and height respectively) but it does allow for control of scaling via setParameters and keeps scaling decisions in upstream code to ensure good compat with upstream's part of the pipe; encoders, etc. Differential Revision: https://phabricator.services.mozilla.com/D4133
media/webrtc/signaling/src/media-conduit/VideoConduit.cpp
media/webrtc/signaling/src/media-conduit/VideoConduit.h
--- a/media/webrtc/signaling/src/media-conduit/VideoConduit.cpp
+++ b/media/webrtc/signaling/src/media-conduit/VideoConduit.cpp
@@ -79,16 +79,20 @@ static const char* kRedPayloadName = "re
 // The number of frame buffers WebrtcVideoConduit may create before returning
 // errors.
 // Sometimes these are released synchronously but they can be forwarded all the
 // way to the encoder for asynchronous encoding. With a pool size of 5,
 // we allow 1 buffer for the current conversion, and 4 buffers to be queued at
 // the encoder.
 #define SCALER_BUFFER_POOL_SIZE 5
 
+// The pixel alignment to use for the highest resolution layer when simulcast
+// is active and one or more layers are being scaled.
+#define SIMULCAST_RESOLUTION_ALIGNMENT 16
+
 // Convert (SI) kilobits/sec to (SI) bits/sec
 #define KBPS(kbps) kbps * 1000
 
 // 32 bytes is what WebRTC CodecInst expects
 const unsigned int WebrtcVideoConduit::CODEC_PLNAME_SIZE = 32;
 static const int kViEMinCodecBitrate_bps = KBPS(30);
 
 template<typename T>
@@ -799,79 +803,100 @@ CodecsDifferent(const nsTArray<UniquePtr
 
 std::vector<webrtc::VideoStream>
 WebrtcVideoConduit::VideoStreamFactory::CreateEncoderStreams(
   int width, int height, const webrtc::VideoEncoderConfig& config)
 {
   size_t streamCount = config.number_of_streams;
   webrtc::VideoCodecMode codecMode = mConduit->mCodecMode;
 
-  // Disallow odd width and height, they will cause aspect ratio checks to
-  // fail in the webrtc.org code. We can hit transient states after window
-  // sharing ends where odd resolutions are requested for the camera.
-  streamCount = std::min(streamCount, static_cast<size_t>(
-                         1 + std::min(CountTrailingZeroes32(width),
-                                      CountTrailingZeroes32(height))));
-
   // We only allow one layer when screensharing
   if (codecMode == webrtc::VideoCodecMode::kScreensharing) {
     streamCount = 1;
   }
 
   std::vector<webrtc::VideoStream> streams;
   streams.reserve(streamCount);
   MOZ_ASSERT(mConduit);
   MutexAutoLock lock(mConduit->mMutex);
 
-  // XXX webrtc.org code has a restriction on simulcast layers that each
-  // layer must be 1/2 the dimension of the previous layer - not sure why.
-  // This means we can't use scaleResolutionBy/scaleDownBy (yet), even if
-  // the user specified it.  The one exception is that we can apply it on
-  // the full-resolution stream (which also happens to handle the
-  // non-simulcast usage case). NOTE: we make an assumption here, not in the
-  // spec, that the first stream is the full-resolution stream.
-#if 0
-  // XXX What we'd like to do for each simulcast stream...
-  if (simulcastEncoding.constraints.scaleDownBy > 1.0) {
-    uint32_t new_width = width / simulcastEncoding.constraints.scaleDownBy;
-    uint32_t new_height = height / simulcastEncoding.constraints.scaleDownBy;
-
-    if (new_width != width || new_height != height) {
-      if (streamCount == 1) {
-        CSFLogVerbose(LOGTAG, "%s: ConstrainPreservingAspectRatio", __FUNCTION__);
-        // Use less strict scaling in unicast. That way 320x240 / 3 = 106x79.
-        ConstrainPreservingAspectRatio(new_width, new_height,
-                                       &width, &height);
-      } else {
-        CSFLogVerbose(LOGTAG, "%s: ConstrainPreservingAspectRatioExact", __FUNCTION__);
-        // webrtc.org supposedly won't tolerate simulcast unless every stream
-        // is exactly the same aspect ratio. 320x240 / 3 = 80x60.
-        ConstrainPreservingAspectRatioExact(new_width * new_height,
-                                            &width, &height);
-      }
-    }
-  }
-#endif
+  // We assume that the first stream is the full-resolution stream.
+
+  // This ensures all simulcast layers will be of the same aspect ratio as the input.
+  mConduit->mSimulcastAdapter->OnOutputFormatRequest(
+    cricket::VideoFormat(width, height, 0, 0));
 
   for (size_t idx = streamCount - 1; streamCount > 0; idx--, streamCount--) {
     webrtc::VideoStream video_stream;
-    // Stream dimensions must be divisable by 2^(n-1), where n is the number of layers.
-    // Each lower resolution layer is 1/2^(n-1) of the size of largest layer,
-    // where n is the number of the layer
-
-    // width/height will be overridden on the first frame; they must be 'sane' for
-    // SetSendCodec()
-    video_stream.width = width >> idx;
-    video_stream.height = height >> idx;
+    auto& simulcastEncoding = mConduit->mCurSendCodecConfig->mSimulcastEncodings[idx];
+    MOZ_ASSERT(simulcastEncoding.constraints.scaleDownBy >= 1.0);
+
+    // All streams' dimensions must retain the aspect ratio of the input stream.
+    // Note that the first stream might already have been scaled by us.
+    // Webrtc.org doesn't know this, so we have to adjust lower layers manually.
+    int unusedCropWidth, unusedCropHeight, outWidth, outHeight;
+    if (idx == 0) {
+      // This is the highest-resolution stream. We avoid calling
+      // AdaptFrameResolution on this because precision errors in VideoAdapter
+      // can cause the out-resolution to be an odd pixel smaller than the
+      // source (1920x1419 has caused this). We shortcut this instead.
+      outWidth = width;
+      outHeight = height;
+    } else {
+      float effectiveScaleDownBy =
+        simulcastEncoding.constraints.scaleDownBy /
+        mConduit->mCurSendCodecConfig->mSimulcastEncodings[0].constraints.scaleDownBy;
+      MOZ_ASSERT(effectiveScaleDownBy >= 1.0);
+      mConduit->mSimulcastAdapter->OnScaleResolutionBy(
+        effectiveScaleDownBy > 1.0 ?
+          rtc::Optional<float>(effectiveScaleDownBy) :
+          rtc::Optional<float>());
+      bool rv = mConduit->mSimulcastAdapter->AdaptFrameResolution(
+        width,
+        height,
+        0, // Ok, since we don't request an output format with an interval
+        &unusedCropWidth,
+        &unusedCropHeight,
+        &outWidth,
+        &outHeight);
+
+      if (!rv) {
+        // The only thing that can make AdaptFrameResolution fail in this case
+        // is if this layer is scaled so far down that it has less than one pixel.
+        outWidth = 0;
+        outHeight = 0;
+      }
+    }
+
+    if (outWidth == 0 || outHeight == 0) {
+      CSFLogInfo(LOGTAG,
+                 "%s Stream with RID %s ignored because of no resolution.",
+                 __FUNCTION__, simulcastEncoding.rid.c_str());
+      continue;
+    }
+
+    MOZ_ASSERT(outWidth > 0);
+    MOZ_ASSERT(outHeight > 0);
+    video_stream.width = outWidth;
+    video_stream.height = outHeight;
+
+    CSFLogInfo(LOGTAG, "%s Input frame %ux%u, RID %s scaling to %zux%zu",
+               __FUNCTION__, width, height, simulcastEncoding.rid.c_str(),
+               video_stream.width, video_stream.height);
+
+    if (video_stream.width * height != width * video_stream.height) {
+      CSFLogInfo(LOGTAG,
+                 "%s Stream with RID %s ignored because of bad aspect ratio.",
+                 __FUNCTION__, simulcastEncoding.rid.c_str());
+      continue;
+    }
+
     // We want to ensure this picks up the current framerate, so indirect
     video_stream.max_framerate = mConduit->mSendingFramerate;
 
-    auto& simulcastEncoding = mConduit->mCurSendCodecConfig->mSimulcastEncodings[idx];
-    MOZ_ASSERT(simulcastEncoding.constraints.scaleDownBy >= 1.0);
-
     mConduit->SelectBitrates(
       video_stream.width, video_stream.height,
       simulcastEncoding.constraints.maxBr, video_stream);
 
     video_stream.max_qp = kQpMax;
     video_stream.SetRid(simulcastEncoding.rid);
 
     // leave vector temporal_layer_thresholds_bps empty for non-simulcast
@@ -930,18 +955,18 @@ WebrtcVideoConduit::ConfigureSendMediaCo
 
   // validate basic params
   if ((condError = ValidateCodecConfig(codecConfig)) != kMediaConduitNoError) {
     return condError;
   }
 
   size_t streamCount = std::min(codecConfig->mSimulcastEncodings.size(),
                                 (size_t)webrtc::kMaxSimulcastStreams);
-  CSFLogDebug(LOGTAG, "%s for VideoConduit:%p stream count:%d", __FUNCTION__,
-              this, static_cast<int>(streamCount));
+  CSFLogDebug(LOGTAG, "%s for VideoConduit:%p stream count:%zu", __FUNCTION__,
+              this, streamCount);
 
   mSendingFramerate = 0;
   mEncoderConfig.ClearStreams();
   mSendStreamConfig.rtp.rids.clear();
 
   int max_framerate;
   if (codecConfig->mEncodingConstraints.maxFps > 0) {
     max_framerate = codecConfig->mEncodingConstraints.maxFps;
@@ -952,17 +977,17 @@ WebrtcVideoConduit::ConfigureSendMediaCo
   mSendingFramerate = SelectSendFrameRate(codecConfig,
                                           max_framerate,
                                           mLastWidth,
                                           mLastHeight);
 
   // So we can comply with b=TIAS/b=AS/maxbr=X when input resolution changes
   mNegotiatedMaxBitrate = codecConfig->mTias;
 
-  if (mLastWidth == 0 && mMinBitrateEstimate) {
+  if (mLastWidth == 0 && mMinBitrateEstimate != 0) {
     // Only do this at the start; use "have we send a frame" as a reasonable stand-in.
     // min <= start <= max (which can be -1, note!)
     webrtc::Call::Config::BitrateConfig config;
     config.min_bitrate_bps = mMinBitrateEstimate;
     if (config.start_bitrate_bps < mMinBitrateEstimate) {
       config.start_bitrate_bps = mMinBitrateEstimate;
     }
     if (config.max_bitrate_bps > 0 &&
@@ -973,33 +998,35 @@ WebrtcVideoConduit::ConfigureSendMediaCo
   }
 
   // NOTE: the lifetime of this object MUST be less than the lifetime of the Conduit
   mEncoderConfig.SetVideoStreamFactory(
     new rtc::RefCountedObject<WebrtcVideoConduit::VideoStreamFactory>(
       codecConfig->mName, this));
 
   // Reset the VideoAdapter. SelectResolution will ensure limits are set.
-  mVideoAdapter = MakeUnique<cricket::VideoAdapter>();
+  mVideoAdapter = MakeUnique<cricket::VideoAdapter>(
+    streamCount > 1 ? SIMULCAST_RESOLUTION_ALIGNMENT : 1);
+  mSimulcastAdapter = MakeUnique<cricket::VideoAdapter>();
   mVideoAdapter->OnScaleResolutionBy(
     (streamCount >= 1 && codecConfig->mSimulcastEncodings[0].constraints.scaleDownBy > 1.0) ?
       rtc::Optional<float>(codecConfig->mSimulcastEncodings[0].constraints.scaleDownBy) :
       rtc::Optional<float>());
 
   // XXX parse the encoded SPS/PPS data and set spsData/spsLen/ppsData/ppsLen
   mEncoderConfig.SetEncoderSpecificSettings(ConfigureVideoEncoderSettings(codecConfig, this));
   mEncoderConfig.SetResolutionDivisor(1);
 
   mEncoderConfig.SetContentType(mCodecMode == webrtc::kRealtimeVideo ?
     webrtc::VideoEncoderConfig::ContentType::kRealtimeVideo :
     webrtc::VideoEncoderConfig::ContentType::kScreen);
   // for the GMP H.264 encoder/decoder!!
   mEncoderConfig.SetMinTransmitBitrateBps(0);
   // Expected max number of encodings
-  mEncoderConfig.SetMaxEncodings(codecConfig->mSimulcastEncodings.size());
+  mEncoderConfig.SetMaxEncodings(streamCount);
 
   // If only encoder stream attibutes have been changed, there is no need to stop,
   // create a new webrtc::VideoSendStream, and restart.
   // Recreating on PayloadType change may be overkill, but is safe.
   if (mSendStream) {
     if (!RequiresNewSendStream(*codecConfig)) {
       mCurSendCodecConfig->mEncodingConstraints = codecConfig->mEncodingConstraints;
       mCurSendCodecConfig->mSimulcastEncodings = codecConfig->mSimulcastEncodings;
--- a/media/webrtc/signaling/src/media-conduit/VideoConduit.h
+++ b/media/webrtc/signaling/src/media-conduit/VideoConduit.h
@@ -522,16 +522,22 @@ private:
   const nsCOMPtr<nsIEventTarget> mStsThread;
 
   Mutex mMutex;
 
   // Adapter handling resolution constraints from signaling and sinks.
   // Written only on main thread. Guarded by mMutex, except for reads on main.
   UniquePtr<cricket::VideoAdapter> mVideoAdapter;
 
+  // Adapter for simulcast layers. We use this to handle scaleResolutionDownBy
+  // for layers. It's separate from mVideoAdapter to not affect scaling settings
+  // for incoming frames.
+  // Written only on main thread. Guarded by mMutex, except for reads on main.
+  UniquePtr<cricket::VideoAdapter> mSimulcastAdapter;
+
   // Our own record of the sinks added to mVideoBroadcaster so we can support
   // dispatching updates to sinks from off-main-thread. Main thread only.
   AutoTArray<rtc::VideoSinkInterface<webrtc::VideoFrame>*, 1> mRegisteredSinks;
 
   // Broadcaster that distributes our frames to all registered sinks.
   // Sinks can only be added, updated and removed on main thread.
   // Frames can be passed in on any thread.
   rtc::VideoBroadcaster mVideoBroadcaster;