Bug 1139779: Part1. Extract display dimension from SPS NAL. r=rillian a=lsblakk
authorJean-Yves Avenard <jyavenard@mozilla.com>
Tue, 10 Mar 2015 21:19:41 +1100
changeset 248169 2802cf435885526ed59b1dc5bcc7222ce5a43285
parent 248168 7f90fdddb5ef9c67946fd313a0ee34173beba31a
child 248170 8d8cd865b4d074d7b8ba8b4d97778faa1c88b32e
push id7776
push usercpearce@mozilla.com
push dateWed, 18 Mar 2015 03:12:44 +0000
treeherdermozilla-aurora@4acd7286be9e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersrillian, lsblakk
bugs1139779
milestone38.0a2
Bug 1139779: Part1. Extract display dimension from SPS NAL. r=rillian a=lsblakk
media/libstagefright/binding/H264.cpp
media/libstagefright/binding/include/mp4_demuxer/H264.h
--- a/media/libstagefright/binding/H264.cpp
+++ b/media/libstagefright/binding/H264.cpp
@@ -4,16 +4,17 @@
 
 #include "mozilla/ArrayUtils.h"
 #include "mozilla/PodOperations.h"
 #include "mp4_demuxer/AnnexB.h"
 #include "mp4_demuxer/ByteReader.h"
 #include "mp4_demuxer/ByteWriter.h"
 #include "mp4_demuxer/H264.h"
 #include <media/stagefright/foundation/ABitReader.h>
+#include <cmath>
 
 using namespace mozilla;
 
 namespace mp4_demuxer
 {
 
 class BitReader
 {
@@ -68,17 +69,21 @@ public:
 
 private:
   stagefright::ABitReader mBitReader;
 };
 
 SPSData::SPSData()
 {
   PodZero(this);
-  chroma_format_idc = 1;
+  // Default values when they aren't defined as per ITU-T H.264 (2014/02).
+  video_format = 5;
+  colour_primaries = 2;
+  transfer_characteristics = 2;
+  sample_ratio = 1.0;
 }
 
 /* static */ already_AddRefed<ByteBuffer>
 H264::DecodeNALUnit(const ByteBuffer* aNAL)
 {
   MOZ_ASSERT(aNAL);
 
   if (aNAL->Length() < 4) {
@@ -118,16 +123,25 @@ H264::DecodeNALUnit(const ByteBuffer* aN
       zeros++;
     } else {
       zeros = 0;
     }
   }
   return rbsp.forget();
 }
 
+static int32_t
+ConditionDimension(float aValue)
+{
+  // This will exclude NaNs and too-big values.
+  if (aValue > 1.0 && aValue <= INT32_MAX)
+    return int32_t(round(aValue));
+  return 0;
+}
+
 /* static */ bool
 H264::DecodeSPS(const ByteBuffer* aSPS, SPSData& aDest)
 {
   MOZ_ASSERT(aSPS);
   BitReader br(*aSPS);
 
   int32_t lastScale;
   int32_t nextScale;
@@ -197,16 +211,22 @@ H264::DecodeSPS(const ByteBuffer* aSPS, 
   aDest.frame_cropping_flag = br.ReadBit();
   if (aDest.frame_cropping_flag) {
     aDest.frame_crop_left_offset = br.ReadUE();
     aDest.frame_crop_right_offset = br.ReadUE();
     aDest.frame_crop_top_offset = br.ReadUE();
     aDest.frame_crop_bottom_offset = br.ReadUE();
   }
 
+  aDest.sample_ratio = 1.0f;
+  aDest.vui_parameters_present_flag = br.ReadBit();
+  if (aDest.vui_parameters_present_flag) {
+    vui_parameters(br, aDest);
+  }
+
   // Calculate common values.
 
   // FFmpeg and VLC ignore the left and top cropping. Do the same here.
 
   uint8_t ChromaArrayType =
     aDest.separate_colour_plane_flag ? 0 : aDest.chroma_format_idc;
   // Calculate width.
   uint32_t CropUnitX = 1;
@@ -221,102 +241,208 @@ H264::DecodeSPS(const ByteBuffer* aSPS, 
   uint32_t CropUnitY = 2 - aDest.frame_mbs_only_flag;
   uint32_t SubHeightC = aDest.chroma_format_idc <= 1 ? 2 : 1;
   if (ChromaArrayType != 0)
     CropUnitY *= SubHeightC;
   uint32_t cropY = CropUnitY * aDest.frame_crop_bottom_offset;
   aDest.pic_height = aDest.pic_height_in_map_units * 16 - cropY;
 
   aDest.interlaced = !aDest.frame_mbs_only_flag;
+
+  // Determine display size.
+  if (aDest.sample_ratio > 1.0) {
+    // Increase the intrinsic width
+    aDest.display_width =
+      ConditionDimension(aDest.pic_width * aDest.sample_ratio);
+    aDest.display_height = aDest.pic_height;
+  } else {
+    // Increase the intrinsic height
+    aDest.display_width = aDest.pic_width;
+    aDest.display_height =
+      ConditionDimension(aDest.pic_height / aDest.sample_ratio);
+  }
+
   return true;
 }
 
 /* static */ void
 H264::vui_parameters(BitReader& aBr, SPSData& aDest)
 {
   aDest.aspect_ratio_info_present_flag = aBr.ReadBit();
-  if (aDest.aspect_ratio_info_present_flag)
-  {
+  if (aDest.aspect_ratio_info_present_flag) {
     aDest.aspect_ratio_idc = aBr.ReadBits(8);
+    aDest.sar_width = aDest.sar_height = 0;
 
-    if (aDest.aspect_ratio_idc == 255 /* EXTENDED_SAR */) {
-      aDest.sar_width  = aBr.ReadBits(16);
-      aDest.sar_height = aBr.ReadBits(16);
+    // From E.2.1 VUI parameters semantics (ITU-T H.264 02/2014)
+    switch (aDest.aspect_ratio_idc)  {
+      case 0:
+        // Unspecified
+        break;
+      case 1:
+        /*
+          1:1
+         7680x4320 16:9 frame without horizontal overscan
+         3840x2160 16:9 frame without horizontal overscan
+         1280x720 16:9 frame without horizontal overscan
+         1920x1080 16:9 frame without horizontal overscan (cropped from 1920x1088)
+         640x480 4:3 frame without horizontal overscan
+         */
+        aDest.sample_ratio = 1.0f;
+        break;
+      case 2:
+        /*
+          12:11
+         720x576 4:3 frame with horizontal overscan
+         352x288 4:3 frame without horizontal overscan
+         */
+        aDest.sample_ratio = 12.0 / 11.0;
+        break;
+      case 3:
+        /*
+          10:11
+         720x480 4:3 frame with horizontal overscan
+         352x240 4:3 frame without horizontal overscan
+         */
+        aDest.sample_ratio = 10.0 / 11.0;
+        break;
+      case 4:
+        /*
+          16:11
+         720x576 16:9 frame with horizontal overscan
+         528x576 4:3 frame without horizontal overscan
+         */
+        aDest.sample_ratio = 16.0 / 11.0;
+        break;
+      case 5:
+        /*
+          40:33
+         720x480 16:9 frame with horizontal overscan
+         528x480 4:3 frame without horizontal overscan
+         */
+        aDest.sample_ratio = 40.0 / 33.0;
+        break;
+      case 6:
+        /*
+          24:11
+         352x576 4:3 frame without horizontal overscan
+         480x576 16:9 frame with horizontal overscan
+         */
+        aDest.sample_ratio = 24.0 / 11.0;
+        break;
+      case 7:
+        /*
+          20:11
+         352x480 4:3 frame without horizontal overscan
+         480x480 16:9 frame with horizontal overscan
+         */
+        aDest.sample_ratio = 20.0 / 11.0;
+        break;
+      case 8:
+        /*
+          32:11
+         352x576 16:9 frame without horizontal overscan
+         */
+        aDest.sample_ratio = 32.0 / 11.0;
+        break;
+      case 9:
+        /*
+          80:33
+         352x480 16:9 frame without horizontal overscan
+         */
+        aDest.sample_ratio = 80.0 / 33.0;
+        break;
+      case 10:
+        /*
+          18:11
+         480x576 4:3 frame with horizontal overscan
+         */
+        aDest.sample_ratio = 18.0 / 11.0;
+        break;
+      case 11:
+        /*
+          15:11
+         480x480 4:3 frame with horizontal overscan
+         */
+        aDest.sample_ratio = 15.0 / 11.0;
+        break;
+      case 12:
+        /*
+          64:33
+         528x576 16:9 frame with horizontal overscan
+         */
+        aDest.sample_ratio = 64.0 / 33.0;
+        break;
+      case 13:
+        /*
+          160:99
+         528x480 16:9 frame without horizontal overscan
+         */
+        aDest.sample_ratio = 160.0 / 99.0;
+        break;
+      case 14:
+        /*
+          4:3
+         1440x1080 16:9 frame without horizontal overscan
+         */
+        aDest.sample_ratio = 4.0 / 3.0;
+        break;
+      case 15:
+        /*
+          3:2
+         1280x1080 16:9 frame without horizontal overscan
+         */
+        aDest.sample_ratio = 3.2 / 2.0;
+        break;
+      case 16:
+        /*
+          2:1
+         960x1080 16:9 frame without horizontal overscan
+         */
+        aDest.sample_ratio = 2.0 / 1.0;
+        break;
+      case 255:
+        /* Extended_SAR */
+        aDest.sar_width  = aBr.ReadBits(16);
+        aDest.sar_height = aBr.ReadBits(16);
+        if (aDest.sar_width && aDest.sar_height) {
+          aDest.sample_ratio = float(aDest.sar_width) / float(aDest.sar_height);
+        }
+        break;
+      default:
+        break;
     }
   }
-  else {
-    aDest.sar_width = aDest.sar_height = 0;
-  }
 
   if (aBr.ReadBit()) { //overscan_info_present_flag
     aDest.overscan_appropriate_flag = aBr.ReadBit();
   }
-  if (aBr.ReadBit()) { //video_signal_type_present_flag
+
+  if (aBr.ReadBit()) { // video_signal_type_present_flag
     aDest.video_format = aBr.ReadBits(3);
     aDest.video_full_range_flag = aBr.ReadBit();
     aDest.colour_description_present_flag = aBr.ReadBit();
     if (aDest.colour_description_present_flag) {
       aDest.colour_primaries = aBr.ReadBits(8);
       aDest.transfer_characteristics = aBr.ReadBits(8);
       aDest.matrix_coefficients = aBr.ReadBits(8);
     }
   }
+
   aDest.chroma_loc_info_present_flag = aBr.ReadBit();
-
   if (aDest.chroma_loc_info_present_flag) {
     aDest.chroma_sample_loc_type_top_field = aBr.ReadUE();
     aDest.chroma_sample_loc_type_bottom_field = aBr.ReadUE();
   }
 
-  if (aBr.ReadBit()) { //timing_info_present_flag
+  aDest.timing_info_present_flag = aBr.ReadBit();
+  if (aDest.timing_info_present_flag ) {
     aDest.num_units_in_tick = aBr.ReadBits(32);
     aDest.time_scale = aBr.ReadBits(32);
     aDest.fixed_frame_rate_flag = aBr.ReadBit();
   }
-
-  bool hrd_present = false;
-  if (aBr.ReadBit()) { // nal_hrd_parameters_present_flag
-    hrd_parameters(aBr);
-    hrd_present = true;
-  }
-  if (aBr.ReadBit()) { // vcl_hrd_parameters_present_flag
-    hrd_parameters(aBr);
-    hrd_present = true;
-  }
-  if (hrd_present) {
-    aBr.ReadBit(); // low_delay_hrd_flag
-  }
-  aDest.pic_struct_present_flag = aBr.ReadBit();
-  aDest.bitstream_restriction_flag = aBr.ReadBit();
-  if (aDest.bitstream_restriction_flag) {
-    aDest.motion_vectors_over_pic_boundaries_flag = aBr.ReadBit();
-    aDest.max_bytes_per_pic_denom = aBr.ReadUE();
-    aDest.max_bits_per_mb_denom = aBr.ReadUE();
-    aDest.log2_max_mv_length_horizontal = aBr.ReadUE();
-    aDest.log2_max_mv_length_vertical = aBr.ReadUE();
-    aDest.max_num_reorder_frames = aBr.ReadUE();
-    aDest.max_dec_frame_buffering = aBr.ReadUE();
-  }
-}
-
-/* static */ void
-H264::hrd_parameters(BitReader& aBr)
-{
-  uint32_t cpb_cnt_minus1 = aBr.ReadUE();
-  aBr.ReadBits(4); // bit_rate_scale
-  aBr.ReadBits(4); // cpb_size_scale
-  for (uint32_t SchedSelIdx = 0; SchedSelIdx <= cpb_cnt_minus1; SchedSelIdx++) {
-    aBr.ReadUE(); // bit_rate_value_minus1[ SchedSelIdx ]
-    aBr.ReadUE(); // cpb_size_value_minus1[ SchedSelIdx ]
-    aBr.ReadBit(); // cbr_flag[ SchedSelIdx ]
-  }
-  aBr.ReadBits(5); // initial_cpb_removal_delay_length_minus1
-  aBr.ReadBits(5); // cpb_removal_delay_length_minus1
-  aBr.ReadBits(5); // dpb_output_delay_length_minus1
-  aBr.ReadBits(5); // time_offset_length
 }
 
 /* static */ bool
 H264::DecodeSPSFromExtraData(const ByteBuffer* aExtraData, SPSData& aDest)
 {
   if (!AnnexB::HasSPS(aExtraData)) {
     return false;
   }
--- a/media/libstagefright/binding/include/mp4_demuxer/H264.h
+++ b/media/libstagefright/binding/include/mp4_demuxer/H264.h
@@ -26,16 +26,26 @@ struct SPSData
     pic_height = (2 - frame_mbs_only_flag) * ((pic_height_in_map_units_minus1 + 1) * 16)
                  - (frame_crop_top_offset + frame_crop_bottom_offset) * 2
    */
   uint32_t pic_height;
 
   bool interlaced;
 
   /*
+   Displayed size.
+   display_width and display_height are adjusted according to the display
+   sample aspect ratio.
+   */
+  uint32_t display_width;
+  uint32_t display_height;
+
+  float sample_ratio;
+
+  /*
     H264 decoding parameters according to ITU-T H.264 (T-REC-H.264-201402-I/en)
    http://www.itu.int/rec/T-REC-H.264-201402-I/en
    */
 
   bool constraint_set0_flag;
   bool constraint_set1_flag;
   bool constraint_set2_flag;
   bool constraint_set3_flag;
@@ -217,131 +227,97 @@ struct SPSData
     sar_height. When the aspect_ratio_idc syntax element is not
     present, aspect_ratio_idc value shall be inferred to be
     equal to 0.
    */
   uint8_t aspect_ratio_idc;
   uint32_t sar_width;
   uint32_t sar_height;
 
+  /*
+    video_signal_type_present_flag equal to 1 specifies that video_format,
+    video_full_range_flag and colour_description_present_flag are present.
+    video_signal_type_present_flag equal to 0, specify that video_format,
+    video_full_range_flag and colour_description_present_flag are not present.
+   */
+  bool video_signal_type_present_flag;
+
+  /*
+    overscan_info_present_flag equal to1 specifies that the
+    overscan_appropriate_flag is present. When overscan_info_present_flag is
+    equal to 0 or is not present, the preferred display method for the video
+    signal is unspecified (Unspecified).
+   */
   bool overscan_info_present_flag;
+  /*
+    overscan_appropriate_flag equal to 1 indicates that the cropped decoded
+    pictures output are suitable for display using overscan.
+    overscan_appropriate_flag equal to 0 indicates that the cropped decoded
+    pictures output contain visually important information in the entire region
+    out to the edges of the cropping rectangle of the picture
+   */
   bool overscan_appropriate_flag;
 
+  /*
+    video_format indicates the representation of the pictures as specified in
+    Table E-2, before being coded in accordance with this
+    Recommendation | International Standard. When the video_format syntax element
+    is not present, video_format value shall be inferred to be equal to 5.
+    (Unspecified video format)
+   */
   uint8_t video_format;
+
+  /*
+    video_full_range_flag indicates the black level and range of the luma and
+    chroma signals as derived from E′Y, E′PB, and E′PR or E′R, E′G, and E′B
+    real-valued component signals.
+    When the video_full_range_flag syntax element is not present, the value of
+    video_full_range_flag shall be inferred to be equal to 0.
+   */
   bool video_full_range_flag;
+
+  /*
+    colour_description_present_flag equal to1 specifies that colour_primaries,
+    transfer_characteristics and matrix_coefficients are present.
+    colour_description_present_flag equal to 0 specifies that colour_primaries,
+    transfer_characteristics and matrix_coefficients are not present.
+   */
   bool colour_description_present_flag;
+
+  /*
+    colour_primaries indicates the chromaticity coordinates of the source
+    primaries as specified in Table E-3 in terms of the CIE 1931 definition of
+    x and y as specified by ISO 11664-1.
+    When the colour_primaries syntax element is not present, the value of
+    colour_primaries shall be inferred to be equal to 2 (the chromaticity is
+    unspecified or is determined by the application).
+   */
   uint8_t colour_primaries;
+
+  /*
+    transfer_characteristics indicates the opto-electronic transfer
+    characteristic of the source picture as specified in Table E-4 as a function
+    of a linear optical intensity input Lc with a nominal real-valued range of 0
+    to 1.
+    When the transfer_characteristics syntax element is not present, the value
+    of transfer_characteristics shall be inferred to be equal to 2
+    (the transfer characteristics are unspecified or are determined by the
+    application).
+   */
   uint8_t transfer_characteristics;
+
   uint8_t matrix_coefficients;
   bool chroma_loc_info_present_flag;
   uint32_t chroma_sample_loc_type_top_field;
   uint32_t chroma_sample_loc_type_bottom_field;
+  bool timing_info_present_flag;
   uint32_t num_units_in_tick;
   uint32_t time_scale;
   bool fixed_frame_rate_flag;
 
-  // Bitstream restriction parameters
-
-  /*
-    pic_struct_present_flag equal to 1 specifies that picture timing SEI
-    messages (clause D.2.2) are present that include the pic_struct syntax
-    element. pic_struct_present_flag equal to 0 specifies that the pic_struct
-    syntax element is not present in picture timing SEI messages.
-    When pic_struct_present_flag is not present, its value shall be inferred to
-    be equal to 0.
-   */
-  bool pic_struct_present_flag;
-
-  /*
-    bitstream_restriction_flag equal to 1, specifies that the following coded
-    video sequence bitstream restriction parameters are present.
-    bitstream_restriction_flag equal to 0, specifies that the following coded
-    video sequence bitstream restriction parameters are not present.
-   */
-  bool bitstream_restriction_flag;
-
-  /*
-    motion_vectors_over_pic_boundaries_flag equal to 0 indicates that no
-    sample outside the picture boundaries and no sample at a fractional
-    sample position for which the sample value is derived using one or more
-    samples outside the picture boundaries is used for inter prediction of any
-    sample. motion_vectors_over_pic_boundaries_flag equal to 1 indicates that
-    one or more samples outside picture boundaries may be used in inter
-    prediction. When the motion_vectors_over_pic_boundaries_flag syntax element
-    is not present, motion_vectors_over_pic_boundaries_flag value shall be
-    inferred to be equal to 1.
-   */
-  bool motion_vectors_over_pic_boundaries_flag;
-
-  /*
-    max_bytes_per_pic_denom indicates a number of bytes not exceeded by the
-    sum of the sizes of the VCL NAL units associated with any coded picture in
-    the coded video sequence.
-   */
-  uint32_t max_bytes_per_pic_denom;
-
-  /*
-    max_bits_per_mb_denom indicates an upper bound for the number of coded bits
-    of macroblock_layer( ) data for any macroblock in any picture of the coded
-    video sequence. The value of max_bits_per_mb_denom shall be in the range
-    of 0 to 16, inclusive.
-   */
-  uint32_t max_bits_per_mb_denom;
-
-  /*
-    log2_max_mv_length_horizontal and log2_max_mv_length_vertical indicate the
-    maximum absolute value of a decoded horizontal and vertical motion vector
-    component, respectively, in 1⁄4 luma sample units, for all pictures in the
-    coded video sequence. A value of n asserts that no value of a motion vector
-    component shall exceed the range from −2n to 2n − 1, inclusive, in units
-    of 1⁄4 luma sample displacement. The value of log2_max_mv_length_horizontal
-    shall be in the range of 0 to 16, inclusive. The value of
-    log2_max_mv_length_vertical shall be in the range of 0 to 16, inclusive.
-    When log2_max_mv_length_horizontal is not present, the values of
-    log2_max_mv_length_horizontal and log2_max_mv_length_vertical shall be
-    inferred to be equal to 16.
-   */
-   uint32_t log2_max_mv_length_horizontal;
-   uint32_t log2_max_mv_length_vertical;
-
-  /*
-    max_num_reorder_frames indicates an upper bound for the number of frames
-    buffers, in the decoded picture buffer (DPB), that are required for storing
-    frames, complementary field pairs, and non-paired fields before output.
-    It is a requirement of bitstream conformance that the maximum number of
-    frames, complementary field pairs, or non-paired fields that precede any
-    frame, complementary field pair, or non-paired field in the coded video
-    sequence in decoding order and follow it in output order shall be less than
-    or equal to max_num_reorder_frames. The value of max_num_reorder_frames
-    shall be in the range of 0 to max_dec_frame_buffering, inclusive.
-    When the max_num_reorder_frames syntax element is not present, the value
-    of max_num_reorder_frames value shall be inferred as follows:
-     – If profile_idc is equal to 44, 86, 100, 110, 122, or 244 and
-       constraint_set3_flag is equal to 1, the value of max_num_reorder_frames
-       shall be inferred to be equal to 0.
-     – Otherwise (profile_idc is not equal to 44, 86, 100, 110, 122, or 244 or
-       constraint_set3_flag is equal to 0), the value of max_num_reorder_frames
-       shall be inferred to be equal to MaxDpbFrames.
-   */
-   uint32_t max_num_reorder_frames;
-
-  /*
-    max_dec_frame_buffering specifies the required size of the HRD decoded
-    picture buffer (DPB) in units of frame buffers. It is a requirement of
-    bitstream conformance that the coded video sequence shall not require a
-    decoded picture buffer with size of more than
-    Max( 1, max_dec_frame_buffering ) frame buffers to enable the output of
-    decoded pictures at the output times specified by dpb_output_delay of the
-    picture timing SEI messages. The value of max_dec_frame_buffering shall be
-    greater than or equal to max_num_ref_frames. An upper bound for the value
-    of max_dec_frame_buffering is specified by the level limits in
-    clauses A.3.1, A.3.2, G.10.2.1, and H.10.2.
-   */
-   uint32_t max_dec_frame_buffering;
-
   SPSData();
 };
 
 class H264
 {
 public:
   static bool DecodeSPSFromExtraData(const ByteBuffer* aExtraData, SPSData& aDest);
   /* Extract RAW BYTE SEQUENCE PAYLOAD from NAL content.