Bug 1054624 - Fix high-packet-loss problems with H.264 WebRTC calls. r=jesup, a=lmandel
authorMo Zanaty <mzanaty@cisco.com>
Sun, 14 Sep 2014 08:17:24 -0400
changeset 216769 75eddbd6dc80
parent 216768 2dbe6d8a5c30
child 216770 62d020eff891
push id3906
push userryanvm@gmail.com
push date2014-09-17 15:11 +0000
treeherdermozilla-beta@62d020eff891 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjesup, lmandel
bugs1054624
milestone33.0
Bug 1054624 - Fix high-packet-loss problems with H.264 WebRTC calls. r=jesup, a=lmandel
media/webrtc/trunk/webrtc/modules/interface/module_common_types.h
media/webrtc/trunk/webrtc/modules/rtp_rtcp/source/rtp_receiver_video.cc
media/webrtc/trunk/webrtc/modules/video_coding/main/source/session_info.cc
--- a/media/webrtc/trunk/webrtc/modules/interface/module_common_types.h
+++ b/media/webrtc/trunk/webrtc/modules/interface/module_common_types.h
@@ -112,17 +112,17 @@ enum RtpVideoCodecTypes {
   kRtpVideoGeneric,
   kRtpVideoVp8,
   kRtpVideoH264
 };
 struct RTPVideoHeader {
   uint16_t width;  // size
   uint16_t height;
 
-  bool isFirstPacket;    // first packet in frame
+  bool isFirstPacket;    // first packet in frame (or NAL for H.264)
   uint8_t simulcastIdx;  // Index if the simulcast encoder creating
                          // this frame, 0 if not using simulcast.
   RtpVideoCodecTypes codec;
   RTPVideoTypeHeader codecHeader;
 };
 union RTPTypeHeader {
   RTPAudioHeader Audio;
   RTPVideoHeader Video;
--- a/media/webrtc/trunk/webrtc/modules/rtp_rtcp/source/rtp_receiver_video.cc
+++ b/media/webrtc/trunk/webrtc/modules/rtp_rtcp/source/rtp_receiver_video.cc
@@ -230,46 +230,82 @@ int32_t RTPReceiverVideo::ReceiveVp8Code
 }
 
 int32_t RTPReceiverVideo::ReceiveH264Codec(WebRtcRTPHeader* rtp_header,
                                           const uint8_t* payload_data,
                                           uint16_t payload_data_length) {
   size_t offset = RtpFormatH264::kNalHeaderOffset;
   uint8_t nal_type = payload_data[offset] & RtpFormatH264::kTypeMask;
   rtp_header->type.Video.codecHeader.H264.nalu_header = nal_type;
+  // For H.264, isFirstPacket means first in NAL unit, not first in the
+  // timestamp, which elsewhere is referred to as a 'frame' or 'session'
+  rtp_header->type.Video.isFirstPacket = true;
   // get original NAL type if FU-A or STAP-A
   switch (nal_type) {
     case RtpFormatH264::kFuA:
       offset = RtpFormatH264::kFuAHeaderOffset;
-      if (offset >= payload_data_length) return -1; // malformed
+      if (offset >= payload_data_length) {
+        return -1; // malformed
+      }
       nal_type = payload_data[offset] & RtpFormatH264::kTypeMask;
+      if (!(payload_data[offset] & RtpFormatH264::kFragStartBit)) {
+        rtp_header->type.Video.isFirstPacket = false;
+      }
       break;
     case RtpFormatH264::kStapA:
       offset = RtpFormatH264::kStapAHeaderOffset +
                RtpFormatH264::kAggUnitLengthSize;
-      if (offset >= payload_data_length) return -1; // malformed
+      if (offset >= payload_data_length) {
+        return -1; // malformed
+      }
       nal_type = payload_data[offset] & RtpFormatH264::kTypeMask;
       break;
     default:
       break;
   }
   // key frames start with SPS, PPS, IDR, or Recovery Point SEI
+  // Recovery Point SEI's are used in AIR and GDR refreshes, which don't
+  // send large iframes, and instead use forms of incremental/continuous refresh.
   rtp_header->frameType = kVideoFrameDelta;
   switch (nal_type) {
     case RtpFormatH264::kSei: // check if it is a Recovery Point SEI (aka GDR)
-      if (offset+1 >= payload_data_length) return -1; // malformed
-      if (payload_data[offset+1] != RtpFormatH264::kSeiRecPt) break;
+      if (offset+1 >= payload_data_length) {
+        return -1; // malformed
+      }
+      if (payload_data[offset+1] != RtpFormatH264::kSeiRecPt) {
+        break; // some other form of SEI - not a keyframe
+      }
       // else fall through since GDR is like IDR
     case RtpFormatH264::kSps:
     case RtpFormatH264::kPps:
     case RtpFormatH264::kIdr:
       rtp_header->frameType = kVideoFrameKey;
       break;
+    default:
+      break;
   }
+#if 0
+  // XXX This check (which is complex to implement) would verify if a NAL
+  // codes the first MB in the frame ("access unit").  See 7.4.1.2.4 in the
+  // 2003 draft H.264 spec for an algorithm for this, which requires
+  // considerable parsing of the stream.  Even simpler variants are
+  // complex. Since mis-identification of complete KeyFrames doesn't have
+  // to be 100% correct for "fast-forward" in error recovery, we can let it
+  // consider *any* NAL that's not a non-start FU-A packet to be a possible
+  // first-in-frame/session/"access unit" with minimal impact.  In some
+  // cases of leading loss, it might try to fast-forward to a "session"
+  // that's missing an SPS/PPS or missing the first NAL of a Mode 0 frame.
+  // This should be ok, even though the decode *might* fail.
 
+  // Check if this NAL codes for pixels *and* is the first NAL of a frame
+  // Would fail for FMO/ASO, which aren't allowed in baseline
+  if (nal_type == RtpFormatH264::kIdr || nal_type == RtpFormatH264::kIpb) {
+    // TODO; see 7.4.1.2.4  Detection of the first VCL NAL unit of a primary coded picture
+  }
+#endif
   // receive payloads as-is, depacketize later when moving to frame buffer
   if (data_callback_->OnReceivedPayloadData(
       payload_data, payload_data_length, rtp_header) != 0) {
     return -1;
   }
   return 0;
 }
 
--- a/media/webrtc/trunk/webrtc/modules/video_coding/main/source/session_info.cc
+++ b/media/webrtc/trunk/webrtc/modules/video_coding/main/source/session_info.cc
@@ -471,54 +471,81 @@ int VCMSessionInfo::InsertPacket(const V
   if (packets_.size() == kMaxPacketsInSession) {
     return -1;
   }
 
   // Find the position of this packet in the packet list in sequence number
   // order and insert it. Loop over the list in reverse order.
   ReversePacketIterator rit = packets_.rbegin();
   for (; rit != packets_.rend(); ++rit)
-    if (LatestSequenceNumber(packet.seqNum, (*rit).seqNum) == packet.seqNum)
+    if (LatestSequenceNumber(packet.seqNum, (*rit).seqNum) == packet.seqNum) {
       break;
+    }
 
   // Check for duplicate packets.
   if (rit != packets_.rend() &&
-      (*rit).seqNum == packet.seqNum && (*rit).sizeBytes > 0)
+      (*rit).seqNum == packet.seqNum && (*rit).sizeBytes > 0) {
     return -2;
+  }
 
   PacketIterator packet_list_it;
 
-  // Only insert media packets between first and last packets (when available).
-  // Placing check here, as to properly account for duplicate packets.
-  // Check if this is first packet (only valid for some codecs)
-  // Should only be set for one packet per session.
-  if (packet.isFirstPacket && first_packet_seq_num_ == -1) {
-    // The first packet in a frame signals the frame type.
-    frame_type_ = packet.frameType;
-    // Store the sequence number for the first packet.
-    first_packet_seq_num_ = static_cast<int>(packet.seqNum);
-  } else if (first_packet_seq_num_ != -1 &&
-             !IsNewerSequenceNumber(packet.seqNum, first_packet_seq_num_)) {
-    //LOG(LS_WARNING) << "Received packet with a sequence number which is out "
-    //                 "of frame boundaries";
-    return -3;
-  } else if (frame_type_ == kFrameEmpty && packet.frameType != kFrameEmpty) {
-    // Update the frame type with the type of the first media packet.
-    // TODO(mikhal): Can this trigger?
-    frame_type_ = packet.frameType;
-  }
+  if (packet.codec == kVideoCodecH264) {
+    // H.264 can have leading or trailing non-VCL (Video Coding Layer)
+    // NALUs, such as SPS/PPS/SEI and others.  Also, the RTP marker bit is
+    // not reliable for the last packet of a frame (RFC 6184 5.1 - "Decoders
+    // [] MUST NOT rely on this property"), so allow out-of-order packets to
+    // update the first and last seq# range.  Also mark as a key frame if
+    // any packet is of that type.
+    if (frame_type_ != kVideoFrameKey) {
+      frame_type_ = packet.frameType;
+    }
+    if ((!HaveFirstPacket() ||
+        IsNewerSequenceNumber(first_packet_seq_num_, packet.seqNum)) &&
+        packet.isFirstPacket) {
+      first_packet_seq_num_ = packet.seqNum;
+    }
+    // Note: the code does *not* currently handle the Marker bit being totally
+    // absent from a frame.  It does not, however, depend on it being on the last
+    // packet of the 'frame'/'session'.
+    if ((!HaveLastPacket() && packet.markerBit) ||
+        (HaveLastPacket() &&
+        IsNewerSequenceNumber(packet.seqNum, last_packet_seq_num_))) {
+      last_packet_seq_num_ = packet.seqNum;
+    }
+  } else {
+    // Only insert media packets between first and last packets (when available).
+    // Placing check here, as to properly account for duplicate packets.
+    // Check if this is first packet (only valid for some codecs)
+    // Should only be set for one packet per session.
+    if (packet.isFirstPacket && first_packet_seq_num_ == -1) {
+      // The first packet in a frame signals the frame type.
+      frame_type_ = packet.frameType;
+      // Store the sequence number for the first packet.
+      first_packet_seq_num_ = static_cast<int>(packet.seqNum);
+    } else if (first_packet_seq_num_ != -1 &&
+               !IsNewerSequenceNumber(packet.seqNum, first_packet_seq_num_)) {
+      //LOG(LS_WARNING) << "Received packet with a sequence number which is out "
+      //                 "of frame boundaries";
+      return -3;
+    } else if (frame_type_ == kFrameEmpty && packet.frameType != kFrameEmpty) {
+      // Update the frame type with the type of the first media packet.
+      // TODO(mikhal): Can this trigger?
+      frame_type_ = packet.frameType;
+    }
 
-  // Track the marker bit, should only be set for one packet per session.
-  if (packet.markerBit && last_packet_seq_num_ == -1) {
-    last_packet_seq_num_ = static_cast<int>(packet.seqNum);
-  } else if (last_packet_seq_num_ != -1 &&
-             IsNewerSequenceNumber(packet.seqNum, last_packet_seq_num_)) {
-    //LOG(LS_WARNING) << "Received packet with a sequence number which is out "
-    //                 "of frame boundaries";
-    return -3;
+    // Track the marker bit, should only be set for one packet per session.
+    if (packet.markerBit && last_packet_seq_num_ == -1) {
+      last_packet_seq_num_ = static_cast<int>(packet.seqNum);
+    } else if (last_packet_seq_num_ != -1 &&
+               IsNewerSequenceNumber(packet.seqNum, last_packet_seq_num_)) {
+      //LOG(LS_WARNING) << "Received packet with a sequence number which is out "
+      //                 "of frame boundaries";
+      return -3;
+    }
   }
 
   // The insert operation invalidates the iterator |rit|.
   packet_list_it = packets_.insert(rit.base(), packet);
 
   int returnLength = InsertBuffer(frame_buffer, packet_list_it);
   UpdateCompleteSession();
   if (decode_error_mode == kWithErrors)