Bug 1050461 - Clean up webrtc upstream h264 mode 0 support and jitter buffer code, remove kludges. r=jesup, a=sledru
authorMo Zanaty <mzanaty@cisco.com>
Sat, 09 Aug 2014 01:46:04 -0400
changeset 216372 d92bf0799e5c9b1a6e80dfe769c029ce2727afca
parent 216371 aa7f00d2f78d3fe978f64bf4a4e89d215609c3e5
child 216373 240a0232bc04869dbb9d40775dd67b49645f173b
push id3857
push userraliiev@mozilla.com
push dateTue, 02 Sep 2014 16:39:23 +0000
treeherdermozilla-beta@5638b907b505 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjesup, sledru
bugs1050461
milestone33.0a2
Bug 1050461 - Clean up webrtc upstream h264 mode 0 support and jitter buffer code, remove kludges. r=jesup, a=sledru Changes the H.264 RTP receiver to handle multiple NAL units per frame with the same timestamp. A single jitter buffer frame is created for all packets with the same RTP timestamp. NAL units are depacketized upon insertion to the encoded frame buffer. Depacketization includes insertion of start codes and removal of fragmentation and aggregation unit headers.
media/webrtc/trunk/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc
media/webrtc/trunk/webrtc/modules/rtp_rtcp/source/rtp_format_h264.h
media/webrtc/trunk/webrtc/modules/rtp_rtcp/source/rtp_receiver_video.cc
media/webrtc/trunk/webrtc/modules/video_coding/main/source/packet.cc
media/webrtc/trunk/webrtc/modules/video_coding/main/source/session_info.cc
media/webrtc/trunk/webrtc/modules/video_coding/main/source/session_info.h
--- a/media/webrtc/trunk/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc
+++ b/media/webrtc/trunk/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc
@@ -28,18 +28,18 @@ RtpFormatH264::RtpFormatH264(const uint8
       payload_size_(static_cast<int>(payload_size)),
       max_payload_len_(static_cast<int>(max_payload_len)),
       fragments_(0),
       fragment_size_(0),
       next_fragment_(-1) {
   if (payload_size_ <= max_payload_len_) {
     fragments_ = 0;
   } else {
-    fragment_size_ = max_payload_len_ - kH264FUAHeaderLengthInBytes;
-    fragments_ = ((payload_size_ - kH264NALHeaderLengthInBytes) + (fragment_size_-1)) /
+    fragment_size_ = max_payload_len_ - (kFuAHeaderOffset+kFuAHeaderSize);
+    fragments_ = ((payload_size_ - kNalHeaderSize) + (fragment_size_-1)) /
                  fragment_size_;
     next_fragment_ = 0;
   }
 }
 
 RtpFormatH264::~RtpFormatH264() {
 }
 
@@ -54,89 +54,88 @@ int RtpFormatH264::NextPacket(uint8_t* b
 
   // TODO(jesup) This supports Mode 1 packetization only
 
   // For mode 0, it's all single-NAL, and maybe deal with that by simply
   // setting a large max_payload_len when constructing this (and tell the
   // codec to keep generated NAL sizes less than one packet).  If the codec
   // goes over, a fragmented RTP packet would be sent (and may work or not).
   uint8_t header = payload_data_[0];
-  uint8_t type   = header & kH264NAL_TypeMask;
+  uint8_t type   = header & kTypeMask;
   if (payload_size_ <= max_payload_len_) {
 //#define TEST_STAP_A
 #ifdef TEST_STAP_A
     static uint8_t sps_buffer[256];
     static uint32_t sps_size;
-    if (type == kH264NALU_SPS) {
+    if (type == kSps) {
 
-      sps_buffer[0] = kH264NALU_STAPA;
+      sps_buffer[0] = kStapA;
       *(reinterpret_cast<uint16_t*>(&sps_buffer[1])) = htons(payload_size_); // include NAL byte
       memcpy(&sps_buffer[1 + sizeof(uint16_t)], payload_data_, payload_size_);
       sps_size = 1 + sizeof(uint16_t) + payload_size_;
       *bytes_to_send = 0;
       return -1;
-    } else if (type == kH264NALU_PPS && sps_size != 0) {
+    } else if (type == kPps && sps_size != 0) {
       // Send a STAP-A of SPS/PPS
       *(reinterpret_cast<uint16_t*>(&sps_buffer[sps_size])) = htons(payload_size_);
       memcpy(&sps_buffer[sps_size + sizeof(uint16_t)], payload_data_, payload_size_);
       memcpy(buffer, sps_buffer, sps_size + 2 + payload_size_);
       *bytes_to_send = sps_size + 2 + payload_size_;
       sps_size = 0;
       *last_packet   = false;
       return 0;
     }
 #endif
     // single NAL_UNIT
     *bytes_to_send = payload_size_;
     // TODO(jesup) - this doesn't work correctly for Mode 0.
     // Unfortunately, we don't have a good signal to which NAL generated by
     // the encoder is the last NAL of the frame.  We need that to be passed
     // through to this point, instead of trying to generate it from the packets
-    if (type == kH264NALU_SPS || type == kH264NALU_PPS ||
-        type == kH264NALU_SEI||type == kh264NALU_PREFIX) {
+    if (type == kSps || type == kPps ||
+        type == kSei || type == kPrefix) {
       *last_packet   = false;
     } else {
       *last_packet   = true;
     }
     memcpy(buffer, payload_data_, payload_size_);
     WEBRTC_TRACE(kTraceStream, kTraceRtpRtcp, -1,
                  "RtpFormatH264(single NALU with type:%d, payload_size:%d",
                  type, payload_size_);
     return 0;
   } else {
-    uint8_t fu_indicator = (header & (kH264NAL_FBit | kH264NAL_NRIMask)) |
-                           kH264NALU_FUA;
+    uint8_t fu_indicator = (header & (kFBit | kNriMask)) | kFuA;
     uint8_t fu_header = 0;
     bool first_fragment = (next_fragment_ == 0);
     bool last_fragment = (next_fragment_ == (fragments_ -1));
 
     // S | E | R | 5 bit type.
-    fu_header |= (first_fragment ? kH264FU_SBit : 0);
-    fu_header |= (last_fragment ? kH264FU_EBit :0);
+    fu_header |= (first_fragment ? kFragStartBit : 0);
+    fu_header |= (last_fragment ? kFragEndBit :0);
     fu_header |= type;
     buffer[0] = fu_indicator;
     buffer[1] = fu_header;
 
     if (last_fragment) {
       // last fragment
       *bytes_to_send = payload_size_ -
-                       kH264NALHeaderLengthInBytes -
+                       kNalHeaderSize -
                        next_fragment_ * fragment_size_ +
-                       kH264FUAHeaderLengthInBytes;
+                       kFuAHeaderOffset+kFuAHeaderSize;
       *last_packet   = true;
-      memcpy(buffer + kH264FUAHeaderLengthInBytes,
-             payload_data_ + kH264NALHeaderLengthInBytes +
+      memcpy(buffer + kFuAHeaderOffset+kFuAHeaderSize,
+             payload_data_ + kNalHeaderSize +
                 next_fragment_ * fragment_size_,
-             *bytes_to_send - kH264FUAHeaderLengthInBytes);
+             *bytes_to_send - (kFuAHeaderOffset+kFuAHeaderSize));
       // We do not send original NALU header
     } else {
-      *bytes_to_send = fragment_size_ + kH264FUAHeaderLengthInBytes;
+      *bytes_to_send = fragment_size_ + kFuAHeaderOffset+kFuAHeaderSize;
       *last_packet   = false;
-      memcpy(buffer + kH264FUAHeaderLengthInBytes,
-             payload_data_ + kH264NALHeaderLengthInBytes +
+      memcpy(buffer + kFuAHeaderOffset+kFuAHeaderSize,
+             payload_data_ + kNalHeaderSize +
                  next_fragment_ * fragment_size_,
              fragment_size_);  // We do not send original NALU header
     }
     next_fragment_++;
     return 1;
   }
 }
 
--- a/media/webrtc/trunk/webrtc/modules/rtp_rtcp/source/rtp_format_h264.h
+++ b/media/webrtc/trunk/webrtc/modules/rtp_rtcp/source/rtp_format_h264.h
@@ -29,42 +29,55 @@
 #include "webrtc/system_wrappers/interface/constructor_magic.h"
 #include "webrtc/typedefs.h"
 
 namespace webrtc {
 
 // Packetizer for H264.
 class RtpFormatH264 {
  public:
-  enum {
-    kH264NALU_SLICE             = 1,
-    kH264NALU_IDR               = 5,
-    kH264NALU_SEI               = 6,
-    kH264NALU_SPS               = 7,
-    kH264NALU_PPS               = 8,
-    kh264NALU_PREFIX            = 14,
-    kH264NALU_STAPA             = 24,
-    kH264NALU_FUA               = 28
+
+  // This supports H.264 RTP packetization modes 0/1 from RFC 6184
+  // Single NALU: NAL Header (1 byte), Data...
+  // FU-A   NALU: NAL Header, FU Header (1 byte), Data...
+  // STAP-A NALU: NAL Header, Length1 (2 bytes), Data1, Length2, Data2...
+
+  enum NalHeader { // Network Abstraction Layer Unit Header
+    kNalHeaderOffset = 0, // start of every RTP payload
+    kNalHeaderSize = 1, // 1 byte:
+    kTypeMask = 0x1f, // bits 0-4: NAL Type
+    kNriMask = 0x60, // bits 5-6: Non-Ref Indicator
+    kFBit = 0x80, // bit 7: Forbidden (always 0)
   };
 
-  static const int kH264NALHeaderLengthInBytes = 1;
-  static const int kH264FUAHeaderLengthInBytes = 2;
-
-// bits for FU (A and B) indicators
-  enum H264NalDefs {
-    kH264NAL_FBit = 0x80,
-    kH264NAL_NRIMask = 0x60,
-    kH264NAL_TypeMask = 0x1F
+  enum NalType { // 0-23 from H.264, 24-31 from RFC 6184
+    kIpb = 1, // I/P/B slice
+    kIdr = 5, // IDR slice
+    kSei = 6, // Supplementary Enhancement Info
+    kSeiRecPt = 6, // Recovery Point SEI Payload
+    kSps = 7, // Sequence Parameter Set
+    kPps = 8, // Picture Parameter Set
+    kPrefix = 14, // Prefix
+    kStapA = 24, // Single-Time Aggregation Packet Type A
+    kFuA = 28, // Fragmentation Unit Type A
   };
 
-  enum H264FUDefs {
-    // bits for FU (A and B) headers
-    kH264FU_SBit = 0x80,
-    kH264FU_EBit = 0x40,
-    kH264FU_RBit = 0x20
+  enum FuAHeader {
+    kFuAHeaderOffset = 1, // follows NAL Header
+    kFuAHeaderSize = 1, // 1 byte: bits 0-4: Original NAL Type
+    kFragStartBit = 0x80, // bit 7: Start of Fragment
+    kFragEndBit = 0x40, // bit 6: End of Fragment
+    kReservedBit = 0x20 // bit 5: Reserved
+  };
+  enum StapAHeader {
+    kStapAHeaderOffset = 1, // follows NAL Header
+    kAggUnitLengthSize = 2 // 2-byte length of next NALU including NAL header
+  };
+  enum StartCodePrefix { // H.264 Annex B format {0,0,0,1}
+    kStartCodeSize = 4 // 4 byte prefix before each NAL header
   };
 
   // Initialize with payload from encoder.
   // The payload_data must be exactly one encoded H264 frame.
   RtpFormatH264(const uint8_t* payload_data,
                 uint32_t payload_size,
                 int max_payload_len);
 
--- a/media/webrtc/trunk/webrtc/modules/rtp_rtcp/source/rtp_receiver_video.cc
+++ b/media/webrtc/trunk/webrtc/modules/rtp_rtcp/source/rtp_receiver_video.cc
@@ -120,19 +120,19 @@ int32_t RTPReceiverVideo::ParseVideoCode
     bool is_first_packet) {
   WEBRTC_TRACE(kTraceStream,
                kTraceRtpRtcp,
                id_,
                "%s(timestamp:%u)",
                __FUNCTION__,
                rtp_header->header.timestamp);
 
+  rtp_header->type.Video.isFirstPacket = is_first_packet;
   switch (rtp_header->type.Video.codec) {
     case kRtpVideoGeneric:
-      rtp_header->type.Video.isFirstPacket = is_first_packet;
       return ReceiveGenericCodec(rtp_header, payload_data, payload_data_length);
     case kRtpVideoVp8:
       return ReceiveVp8Codec(rtp_header, payload_data, payload_data_length);
     case kRtpVideoH264:
       return ReceiveH264Codec(rtp_header, payload_data, payload_data_length);
     case kRtpVideoNone:
       break;
   }
@@ -227,145 +227,52 @@ int32_t RTPReceiverVideo::ReceiveVp8Code
     return -1;
   }
   return 0;
 }
 
 int32_t RTPReceiverVideo::ReceiveH264Codec(WebRtcRTPHeader* rtp_header,
                                           const uint8_t* payload_data,
                                           uint16_t payload_data_length) {
-  // real payload
-  uint8_t* payload;
-  uint16_t payload_length;
-  uint8_t nal_type = payload_data[0] & RtpFormatH264::kH264NAL_TypeMask;
-
-  // Note: This code handles only FU-A and single NALU mode packets.
-  if (nal_type == RtpFormatH264::kH264NALU_FUA) {
-    // Fragmentation
-    uint8_t fnri = payload_data[0] & 
-                   (RtpFormatH264::kH264NAL_FBit | RtpFormatH264::kH264NAL_NRIMask);
-    uint8_t original_nal_type = payload_data[1] & RtpFormatH264::kH264NAL_TypeMask;
-    bool first_fragment = !!(payload_data[1] & RtpFormatH264::kH264FU_SBit);
-    //bool last_fragment = !!(payload_data[1] & RtpFormatH264::kH264FU_EBit);
-
-    uint8_t original_nal_header = fnri | original_nal_type;
-    if (first_fragment) {
-      payload = const_cast<uint8_t*> (payload_data) +
-          RtpFormatH264::kH264NALHeaderLengthInBytes;
-      payload[0] = original_nal_header;
-      payload_length = payload_data_length -
-          RtpFormatH264::kH264NALHeaderLengthInBytes;
-    } else {
-      payload = const_cast<uint8_t*> (payload_data)  +
-          RtpFormatH264::kH264FUAHeaderLengthInBytes;
-      payload_length = payload_data_length -
-          RtpFormatH264::kH264FUAHeaderLengthInBytes;
-    }
-
-    // WebRtcRTPHeader
-    if (original_nal_type == RtpFormatH264::kH264NALU_IDR) {
+  size_t offset = RtpFormatH264::kNalHeaderOffset;
+  uint8_t nal_type = payload_data[offset] & RtpFormatH264::kTypeMask;
+  rtp_header->type.Video.codecHeader.H264.nalu_header = nal_type;
+  // get original NAL type if FU-A or STAP-A
+  switch (nal_type) {
+    case RtpFormatH264::kFuA:
+      offset = RtpFormatH264::kFuAHeaderOffset;
+      if (offset >= payload_data_length) return -1; // malformed
+      nal_type = payload_data[offset] & RtpFormatH264::kTypeMask;
+      break;
+    case RtpFormatH264::kStapA:
+      offset = RtpFormatH264::kStapAHeaderOffset +
+               RtpFormatH264::kAggUnitLengthSize;
+      if (offset >= payload_data_length) return -1; // malformed
+      nal_type = payload_data[offset] & RtpFormatH264::kTypeMask;
+      break;
+    default:
+      break;
+  }
+  // key frames start with SPS, PPS, IDR, or Recovery Point SEI
+  rtp_header->frameType = kVideoFrameDelta;
+  switch (nal_type) {
+    case RtpFormatH264::kSei: // check if it is a Recovery Point SEI (aka GDR)
+      if (offset+1 >= payload_data_length) return -1; // malformed
+      if (payload_data[offset+1] != RtpFormatH264::kSeiRecPt) break;
+      // else fall through since GDR is like IDR
+    case RtpFormatH264::kSps:
+    case RtpFormatH264::kPps:
+    case RtpFormatH264::kIdr:
       rtp_header->frameType = kVideoFrameKey;
-    } else {
-      rtp_header->frameType = kVideoFrameDelta;
-    }
-    rtp_header->type.Video.codec    = kRtpVideoH264;
-    rtp_header->type.Video.isFirstPacket = first_fragment;
-    RTPVideoHeaderH264* h264_header = &rtp_header->type.Video.codecHeader.H264;
-    h264_header->nalu_header        = original_nal_header;
-    h264_header->single_nalu        = false;
-
-  } else if (nal_type == RtpFormatH264::kH264NALU_STAPA) {
-
-    payload = const_cast<uint8_t*> (payload_data) +
-              RtpFormatH264::kH264NALHeaderLengthInBytes;
-    size_t size = payload_data_length -
-                  RtpFormatH264::kH264NALHeaderLengthInBytes;
-    uint32_t timestamp = rtp_header->header.timestamp;
-    rtp_header->type.Video.codec    = kRtpVideoH264;
-    rtp_header->type.Video.isFirstPacket = true;
-    RTPVideoHeaderH264* h264_header = &rtp_header->type.Video.codecHeader.H264;
-    h264_header->single_nalu        = true;
-
-    while (size > 0) {
-      payload_length = ntohs(*(reinterpret_cast<uint16_t*>(payload)));
-      // payload_length includes the NAL type byte
-      payload += sizeof(uint16_t); // points to NAL byte and then N bytes of NAL data
-      h264_header->nalu_header        = payload[0];
-      switch (*payload & RtpFormatH264::kH264NAL_TypeMask) {
-        case RtpFormatH264::kH264NALU_SPS:
-          // TODO(jesup): Evil hack.  see below
-          rtp_header->header.timestamp = timestamp - 20;
-          rtp_header->frameType = kVideoFrameKey;
-          break;
-        case RtpFormatH264::kH264NALU_PPS:
-          // TODO(jesup): Evil hack.  see below
-          rtp_header->header.timestamp = timestamp - 10;
-          rtp_header->frameType = kVideoFrameKey;
-          break;
-        case RtpFormatH264::kh264NALU_PREFIX:
-          rtp_header->header.timestamp = timestamp - 5;
-          rtp_header->frameType = kVideoFrameKey;
-          break;
-        case RtpFormatH264::kH264NALU_IDR:
-          rtp_header->frameType = kVideoFrameKey;
-          break;
-        default:
-          rtp_header->frameType = kVideoFrameDelta;
-          break;
-      }
-      if (data_callback_->OnReceivedPayloadData(payload,
-                                                payload_length,
-                                                rtp_header) != 0) {
-        return -1;
-      }
-      payload += payload_length;
-      assert(size >= sizeof(uint16_t) + payload_length);
-      size -= sizeof(uint16_t) + payload_length;
-    }
-    return 0;
-
-  } else {
-
-    // single NALU
-    payload = const_cast<uint8_t*> (payload_data);
-    payload_length = payload_data_length;
-
-    rtp_header->type.Video.codec    = kRtpVideoH264;
-    rtp_header->type.Video.isFirstPacket = true;
-    RTPVideoHeaderH264* h264_header = &rtp_header->type.Video.codecHeader.H264;
-    h264_header->nalu_header        = payload_data[0];
-    h264_header->single_nalu        = true;
-
-    // WebRtcRTPHeader
-    switch (nal_type) {
-      // TODO(jesup): Evil hack.  The jitter buffer *really* doesn't like
-      // "frames" to have the same timestamps.  NOTE: this only works
-      // for SPS/PPS/IDR, not for PPS/SPS/IDR.  Keep this until all issues
-      // are resolved in the jitter buffer
-      case RtpFormatH264::kH264NALU_SPS:
-        rtp_header->header.timestamp -= 10;
-        // fall through
-      case RtpFormatH264::kH264NALU_PPS:
-        rtp_header->header.timestamp -= 10;
-        // fall through
-      case RtpFormatH264::kh264NALU_PREFIX:
-        rtp_header->header.timestamp -= 5;
-        // fall through
-      case RtpFormatH264::kH264NALU_IDR:
-        rtp_header->frameType = kVideoFrameKey;
-        break;
-      default:
-        rtp_header->frameType = kVideoFrameDelta;
-        break;
-    }
+      break;
   }
 
-  if (data_callback_->OnReceivedPayloadData(payload,
-                                            payload_length,
-                                            rtp_header) != 0) {
+  // receive payloads as-is, depacketize later when moving to frame buffer
+  if (data_callback_->OnReceivedPayloadData(
+      payload_data, payload_data_length, rtp_header) != 0) {
     return -1;
   }
   return 0;
 }
 
 int32_t RTPReceiverVideo::ReceiveGenericCodec(
     WebRtcRTPHeader* rtp_header,
     const uint8_t* payload_data,
--- a/media/webrtc/trunk/webrtc/modules/video_coding/main/source/packet.cc
+++ b/media/webrtc/trunk/webrtc/modules/video_coding/main/source/packet.cc
@@ -105,17 +105,16 @@ void VCMPacket::CopyCodecSpecifics(const
           completeNALU = kNaluEnd;
       else
           completeNALU = kNaluIncomplete;
 
       codec = kVideoCodecVP8;
       break;
     }
     case kRtpVideoH264: {
-      uint8_t nal_type = videoHeader.codecHeader.H264.nalu_header & RtpFormatH264::kH264NAL_TypeMask;
       isFirstPacket = videoHeader.isFirstPacket;
       if (isFirstPacket) {
         insertStartCode = true;
       }
       if (videoHeader.codecHeader.H264.single_nalu) {
          completeNALU = kNaluComplete;
       } else if (isFirstPacket)
          completeNALU = kNaluStart;
--- a/media/webrtc/trunk/webrtc/modules/video_coding/main/source/session_info.cc
+++ b/media/webrtc/trunk/webrtc/modules/video_coding/main/source/session_info.cc
@@ -4,17 +4,16 @@
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
 #include "webrtc/modules/video_coding/main/source/session_info.h"
-
 #include "webrtc/modules/video_coding/main/source/packet.h"
 #include "webrtc/modules/rtp_rtcp/source/rtp_format_h264.h"
 
 namespace webrtc {
 
 // Used in determining whether a frame is decodable.
 enum {kRttThreshold = 100};  // Not decodable if Rtt is lower than this.
 
@@ -111,48 +110,114 @@ int VCMSessionInfo::SessionLength() cons
     length += (*it).sizeBytes;
   return length;
 }
 
 int VCMSessionInfo::NumPackets() const {
   return packets_.size();
 }
 
+void VCMSessionInfo::CopyPacket(uint8_t* dst, const uint8_t* src, size_t len) {
+  memcpy(dst, src, len);
+}
+
+void VCMSessionInfo::CopyWithStartCode(uint8_t* dst, const uint8_t* src, size_t len) {
+  // H.264 Start Code is 2 or more bytes of 0 followed by 1 byte of 1.
+  memset(dst, 0, RtpFormatH264::kStartCodeSize-1);
+  dst[RtpFormatH264::kStartCodeSize-1] = 1;
+  CopyPacket(dst + RtpFormatH264::kStartCodeSize, src, len);
+}
+
 int VCMSessionInfo::InsertBuffer(uint8_t* frame_buffer,
                                  PacketIterator packet_it) {
   VCMPacket& packet = *packet_it;
-  PacketIterator it;
 
-  int packet_size = packet.sizeBytes;
-  packet_size += (packet.insertStartCode ? kH264StartCodeLengthBytes : 0);
-
-  // Calculate the offset into the frame buffer for this packet.
-  int offset = 0;
-  for (it = packets_.begin(); it != packet_it; ++it)
-    offset += (*it).sizeBytes;
+  // Advance to the offset into the frame buffer for this packet.
+  for (PacketIterator it = packets_.begin(); it != packet_it; ++it)
+    frame_buffer += (*it).sizeBytes;
 
-  // Set the data pointer to pointing to the start of this packet in the
-  // frame buffer.
-  const uint8_t* data = packet.dataPtr;
-  packet.dataPtr = frame_buffer + offset;
-  packet.sizeBytes = packet_size;
-
-  ShiftSubsequentPackets(packet_it, packet_size);
-
-  const uint8_t startCode[] = {0, 0, 0, 1};
-  if (packet.insertStartCode) {
-    memcpy(const_cast<uint8_t*>(packet.dataPtr), startCode,
-           kH264StartCodeLengthBytes);
+  if (packet.codec == kVideoCodecH264) {
+    // Calculate extra packet size needed for adding start codes,
+    // and removing fragmentation and aggregation unit headers.
+    size_t nalu_size;
+    size_t all_nalu_size = 0;
+    const uint8_t* nalu_ptr = packet.dataPtr;
+    uint8_t nal_header = *nalu_ptr;
+    uint8_t fu_header;
+    switch (nal_header & RtpFormatH264::kTypeMask) {
+      case RtpFormatH264::kFuA:
+        fu_header = nalu_ptr[RtpFormatH264::kFuAHeaderOffset];
+        if (fu_header & RtpFormatH264::kFragStartBit) {
+          nal_header &= ~RtpFormatH264::kTypeMask; // Keep F/NRI bits
+          nal_header |= fu_header & RtpFormatH264::kTypeMask; // Keep NAL type
+          packet.sizeBytes -= RtpFormatH264::kFuAHeaderOffset;
+          packet.dataPtr += RtpFormatH264::kFuAHeaderOffset;
+          ShiftSubsequentPackets(packet_it, packet.sizeBytes +
+                                 RtpFormatH264::kStartCodeSize);
+          CopyWithStartCode(frame_buffer, packet.dataPtr, packet.sizeBytes);
+          frame_buffer[RtpFormatH264::kStartCodeSize] = nal_header;
+          packet.sizeBytes += RtpFormatH264::kStartCodeSize;
+          packet.dataPtr = frame_buffer;
+          packet.completeNALU = kNaluStart;
+        } else {
+          packet.sizeBytes -= RtpFormatH264::kFuAHeaderOffset +
+                              RtpFormatH264::kFuAHeaderSize;
+          packet.dataPtr += RtpFormatH264::kFuAHeaderOffset +
+                            RtpFormatH264::kFuAHeaderSize;
+          ShiftSubsequentPackets(packet_it, packet.sizeBytes);
+          CopyPacket(frame_buffer, packet.dataPtr, packet.sizeBytes);
+          packet.dataPtr = frame_buffer;
+          if (fu_header & RtpFormatH264::kFragEndBit) {
+            packet.completeNALU = kNaluEnd;
+          } else {
+            packet.completeNALU = kNaluIncomplete;
+          }
+        }
+        break;
+      case RtpFormatH264::kStapA:
+        packet.sizeBytes -= RtpFormatH264::kStapAHeaderOffset;
+        packet.dataPtr += RtpFormatH264::kStapAHeaderOffset;
+        for (nalu_ptr = packet.dataPtr;
+             nalu_ptr < packet.dataPtr + packet.sizeBytes;
+             nalu_ptr += nalu_size + RtpFormatH264::kAggUnitLengthSize) {
+          nalu_size = (nalu_ptr[0] << 8) + nalu_ptr[1];
+          all_nalu_size += nalu_size + RtpFormatH264::kStartCodeSize;
+        }
+        if (nalu_ptr > packet.dataPtr + packet.sizeBytes) {
+          // malformed packet
+          packet.completeNALU = kNaluIncomplete;
+          return -1;
+        }
+        ShiftSubsequentPackets(packet_it, all_nalu_size);
+        for (nalu_ptr = packet.dataPtr;
+             nalu_ptr < packet.dataPtr + packet.sizeBytes;
+             nalu_ptr += nalu_size + RtpFormatH264::kAggUnitLengthSize) {
+          nalu_size = (nalu_ptr[0] << 8) + nalu_ptr[1];
+          CopyWithStartCode(frame_buffer, nalu_ptr+2, nalu_size);
+          frame_buffer += nalu_size + RtpFormatH264::kStartCodeSize;
+        }
+        packet.sizeBytes = all_nalu_size;
+        packet.dataPtr = frame_buffer - all_nalu_size;
+        packet.completeNALU = kNaluComplete;
+        break;
+      default:
+        ShiftSubsequentPackets(packet_it, packet.sizeBytes +
+                               RtpFormatH264::kStartCodeSize);
+        CopyWithStartCode(frame_buffer, packet.dataPtr, packet.sizeBytes);
+        packet.sizeBytes += RtpFormatH264::kStartCodeSize;
+        packet.dataPtr = frame_buffer;
+        packet.completeNALU = kNaluComplete;
+        break;
+    } // switch nal_type
+  } else { // not H.264
+    ShiftSubsequentPackets(packet_it, packet.sizeBytes);
+    CopyPacket(frame_buffer, packet.dataPtr, packet.sizeBytes);
+    packet.dataPtr = frame_buffer;
   }
-  memcpy(const_cast<uint8_t*>(packet.dataPtr
-      + (packet.insertStartCode ? kH264StartCodeLengthBytes : 0)),
-      data,
-      packet.sizeBytes);
-
-  return packet_size;
+  return packet.sizeBytes;
 }
 
 void VCMSessionInfo::ShiftSubsequentPackets(PacketIterator it,
                                             int steps_to_shift) {
   ++it;
   if (it == packets_.end())
     return;
   uint8_t* first_packet_ptr = const_cast<uint8_t*>((*it).dataPtr);
@@ -415,80 +480,49 @@ int VCMSessionInfo::InsertPacket(const V
       break;
 
   // Check for duplicate packets.
   if (rit != packets_.rend() &&
       (*rit).seqNum == packet.seqNum && (*rit).sizeBytes > 0)
     return -2;
 
   PacketIterator packet_list_it;
-  if (packet.codec == kVideoCodecH264) {
-    RTPVideoHeaderH264 h264 = packet.codecSpecificHeader.codecHeader.H264;
-    uint8_t nal_type = h264.nalu_header & RtpFormatH264::kH264NAL_TypeMask;
-
-    if (packet.isFirstPacket) {
-      if (HaveFirstPacket() == false ||
-          IsNewerSequenceNumber(first_packet_seq_num_, packet.seqNum)) {
-        first_packet_seq_num_ = packet.seqNum;
-        frame_type_ = packet.frameType;
-      }
-    }
-
-    // TODO(jesup) Handle STAP-A's here, since they must share a timestamp.  Break
-    // into individual packets at this point, then handle like kNaluCompletes
-
-    // Ignore Marker bit for reassembly, since it's not 100% guaranteed to be correct
-    // Look at kNaluComplete (single_nal), or an unbroken sequence of
-    // isFirstPacket/kNaluStart (FU-A with S bit), FU-A's, FU-A with E bit (kNaluEnd)
-    if ((packet.completeNALU == kNaluComplete || packet.completeNALU == kNaluEnd) &&
-        last_packet_seq_num_ == -1) {
-      last_packet_seq_num_ = static_cast<int>(packet.seqNum);
-    } else if (last_packet_seq_num_ != -1 &&
-      IsNewerSequenceNumber(packet.seqNum, last_packet_seq_num_)) {
-      //LOG(LS_WARNING) << "Received packet with a sequence number which is out "
-      //                 " of frame boundaries";
-      return -3;
-    }
 
-    // The insert operation invalidates the iterator |rit|.
-    packet_list_it = packets_.insert(rit.base(), packet);
-  } else {
-    // Only insert media packets between first and last packets (when available).
-    // Placing check here, as to properly account for duplicate packets.
-    // Check if this is first packet (only valid for some codecs)
-    // Should only be set for one packet per session.
-    if (packet.isFirstPacket && first_packet_seq_num_ == -1) {
-      // The first packet in a frame signals the frame type.
-      frame_type_ = packet.frameType;
-      // Store the sequence number for the first packet.
-      first_packet_seq_num_ = static_cast<int>(packet.seqNum);
-    } else if (first_packet_seq_num_ != -1 &&
-      !IsNewerSequenceNumber(packet.seqNum, first_packet_seq_num_)) {
-      //LOG(LS_WARNING) << "Received packet with a sequence number which is out "
-      //                 "of frame boundaries";
-      return -3;
-    } else if (frame_type_ == kFrameEmpty && packet.frameType != kFrameEmpty) {
-      // Update the frame type with the type of the first media packet.
-      // TODO(mikhal): Can this trigger?
-      frame_type_ = packet.frameType;
-    }
+  // Only insert media packets between first and last packets (when available).
+  // Placing check here, as to properly account for duplicate packets.
+  // Check if this is first packet (only valid for some codecs)
+  // Should only be set for one packet per session.
+  if (packet.isFirstPacket && first_packet_seq_num_ == -1) {
+    // The first packet in a frame signals the frame type.
+    frame_type_ = packet.frameType;
+    // Store the sequence number for the first packet.
+    first_packet_seq_num_ = static_cast<int>(packet.seqNum);
+  } else if (first_packet_seq_num_ != -1 &&
+             !IsNewerSequenceNumber(packet.seqNum, first_packet_seq_num_)) {
+    //LOG(LS_WARNING) << "Received packet with a sequence number which is out "
+    //                 "of frame boundaries";
+    return -3;
+  } else if (frame_type_ == kFrameEmpty && packet.frameType != kFrameEmpty) {
+    // Update the frame type with the type of the first media packet.
+    // TODO(mikhal): Can this trigger?
+    frame_type_ = packet.frameType;
+  }
 
-    // Track the marker bit, should only be set for one packet per session.
-    if (packet.markerBit && last_packet_seq_num_ == -1) {
-      last_packet_seq_num_ = static_cast<int>(packet.seqNum);
-    } else if (last_packet_seq_num_ != -1 &&
-        IsNewerSequenceNumber(packet.seqNum, last_packet_seq_num_)) {
-      //LOG(LS_WARNING) << "Received packet with a sequence number which is out "
-      //                 "of frame boundaries";
-      return -3;
-    }
+  // Track the marker bit, should only be set for one packet per session.
+  if (packet.markerBit && last_packet_seq_num_ == -1) {
+    last_packet_seq_num_ = static_cast<int>(packet.seqNum);
+  } else if (last_packet_seq_num_ != -1 &&
+             IsNewerSequenceNumber(packet.seqNum, last_packet_seq_num_)) {
+    //LOG(LS_WARNING) << "Received packet with a sequence number which is out "
+    //                 "of frame boundaries";
+    return -3;
+  }
 
-    // The insert operation invalidates the iterator |rit|.
-    packet_list_it = packets_.insert(rit.base(), packet);
-  }
+  // The insert operation invalidates the iterator |rit|.
+  packet_list_it = packets_.insert(rit.base(), packet);
 
   int returnLength = InsertBuffer(frame_buffer, packet_list_it);
   UpdateCompleteSession();
   if (decode_error_mode == kWithErrors)
     decodable_ = true;
   else if (decode_error_mode == kSelectiveErrors)
     UpdateDecodableSession(frame_data);
   return returnLength;
--- a/media/webrtc/trunk/webrtc/modules/video_coding/main/source/session_info.h
+++ b/media/webrtc/trunk/webrtc/modules/video_coding/main/source/session_info.h
@@ -109,16 +109,18 @@ class VCMSessionInfo {
   // for each packet found which doesn't have the beginning bit set.
   PacketIterator FindNextPartitionBeginning(PacketIterator it) const;
 
   // Returns an iterator pointing to the last packet of the partition pointed to
   // by |it|.
   PacketIterator FindPartitionEnd(PacketIterator it) const;
   static bool InSequence(const PacketIterator& it,
                          const PacketIterator& prev_it);
+  void CopyPacket(uint8_t* dst, const uint8_t* src, size_t len);
+  void CopyWithStartCode(uint8_t* dst, const uint8_t* src, size_t len);
   int InsertBuffer(uint8_t* frame_buffer,
                    PacketIterator packetIterator);
   void ShiftSubsequentPackets(PacketIterator it, int steps_to_shift);
   PacketIterator FindNaluEnd(PacketIterator packet_iter) const;
   // Deletes the data of all packets between |start| and |end|, inclusively.
   // Note that this function doesn't delete the actual packets.
   int DeletePacketData(PacketIterator start,
                        PacketIterator end);