Bug 1139779: Part1. Extract display dimension from SPS NAL. r=rillian a=lsblakk
--- a/media/libstagefright/binding/H264.cpp
+++ b/media/libstagefright/binding/H264.cpp
@@ -4,16 +4,17 @@
#include "mozilla/ArrayUtils.h"
#include "mozilla/PodOperations.h"
#include "mp4_demuxer/AnnexB.h"
#include "mp4_demuxer/ByteReader.h"
#include "mp4_demuxer/ByteWriter.h"
#include "mp4_demuxer/H264.h"
#include <media/stagefright/foundation/ABitReader.h>
+#include <cmath>
using namespace mozilla;
namespace mp4_demuxer
{
class BitReader
{
@@ -68,17 +69,21 @@ public:
private:
stagefright::ABitReader mBitReader;
};
SPSData::SPSData()
{
PodZero(this);
- chroma_format_idc = 1;
+ // Default values when they aren't defined as per ITU-T H.264 (2014/02).
+ video_format = 5;
+ colour_primaries = 2;
+ transfer_characteristics = 2;
+ sample_ratio = 1.0;
}
/* static */ already_AddRefed<ByteBuffer>
H264::DecodeNALUnit(const ByteBuffer* aNAL)
{
MOZ_ASSERT(aNAL);
if (aNAL->Length() < 4) {
@@ -118,16 +123,25 @@ H264::DecodeNALUnit(const ByteBuffer* aN
zeros++;
} else {
zeros = 0;
}
}
return rbsp.forget();
}
+static int32_t
+ConditionDimension(float aValue)
+{
+ // This will exclude NaNs and too-big values.
+ if (aValue > 1.0 && aValue <= INT32_MAX)
+ return int32_t(round(aValue));
+ return 0;
+}
+
/* static */ bool
H264::DecodeSPS(const ByteBuffer* aSPS, SPSData& aDest)
{
MOZ_ASSERT(aSPS);
BitReader br(*aSPS);
int32_t lastScale;
int32_t nextScale;
@@ -197,16 +211,22 @@ H264::DecodeSPS(const ByteBuffer* aSPS,
aDest.frame_cropping_flag = br.ReadBit();
if (aDest.frame_cropping_flag) {
aDest.frame_crop_left_offset = br.ReadUE();
aDest.frame_crop_right_offset = br.ReadUE();
aDest.frame_crop_top_offset = br.ReadUE();
aDest.frame_crop_bottom_offset = br.ReadUE();
}
+ aDest.sample_ratio = 1.0f;
+ aDest.vui_parameters_present_flag = br.ReadBit();
+ if (aDest.vui_parameters_present_flag) {
+ vui_parameters(br, aDest);
+ }
+
// Calculate common values.
// FFmpeg and VLC ignore the left and top cropping. Do the same here.
uint8_t ChromaArrayType =
aDest.separate_colour_plane_flag ? 0 : aDest.chroma_format_idc;
// Calculate width.
uint32_t CropUnitX = 1;
@@ -221,102 +241,208 @@ H264::DecodeSPS(const ByteBuffer* aSPS,
uint32_t CropUnitY = 2 - aDest.frame_mbs_only_flag;
uint32_t SubHeightC = aDest.chroma_format_idc <= 1 ? 2 : 1;
if (ChromaArrayType != 0)
CropUnitY *= SubHeightC;
uint32_t cropY = CropUnitY * aDest.frame_crop_bottom_offset;
aDest.pic_height = aDest.pic_height_in_map_units * 16 - cropY;
aDest.interlaced = !aDest.frame_mbs_only_flag;
+
+ // Determine display size.
+ if (aDest.sample_ratio > 1.0) {
+ // Increase the intrinsic width
+ aDest.display_width =
+ ConditionDimension(aDest.pic_width * aDest.sample_ratio);
+ aDest.display_height = aDest.pic_height;
+ } else {
+ // Increase the intrinsic height
+ aDest.display_width = aDest.pic_width;
+ aDest.display_height =
+ ConditionDimension(aDest.pic_height / aDest.sample_ratio);
+ }
+
return true;
}
/* static */ void
H264::vui_parameters(BitReader& aBr, SPSData& aDest)
{
aDest.aspect_ratio_info_present_flag = aBr.ReadBit();
- if (aDest.aspect_ratio_info_present_flag)
- {
+ if (aDest.aspect_ratio_info_present_flag) {
aDest.aspect_ratio_idc = aBr.ReadBits(8);
+ aDest.sar_width = aDest.sar_height = 0;
- if (aDest.aspect_ratio_idc == 255 /* EXTENDED_SAR */) {
- aDest.sar_width = aBr.ReadBits(16);
- aDest.sar_height = aBr.ReadBits(16);
+ // From E.2.1 VUI parameters semantics (ITU-T H.264 02/2014)
+ switch (aDest.aspect_ratio_idc) {
+ case 0:
+ // Unspecified
+ break;
+ case 1:
+ /*
+ 1:1
+ 7680x4320 16:9 frame without horizontal overscan
+ 3840x2160 16:9 frame without horizontal overscan
+ 1280x720 16:9 frame without horizontal overscan
+ 1920x1080 16:9 frame without horizontal overscan (cropped from 1920x1088)
+ 640x480 4:3 frame without horizontal overscan
+ */
+ aDest.sample_ratio = 1.0f;
+ break;
+ case 2:
+ /*
+ 12:11
+ 720x576 4:3 frame with horizontal overscan
+ 352x288 4:3 frame without horizontal overscan
+ */
+ aDest.sample_ratio = 12.0 / 11.0;
+ break;
+ case 3:
+ /*
+ 10:11
+ 720x480 4:3 frame with horizontal overscan
+ 352x240 4:3 frame without horizontal overscan
+ */
+ aDest.sample_ratio = 10.0 / 11.0;
+ break;
+ case 4:
+ /*
+ 16:11
+ 720x576 16:9 frame with horizontal overscan
+ 528x576 4:3 frame without horizontal overscan
+ */
+ aDest.sample_ratio = 16.0 / 11.0;
+ break;
+ case 5:
+ /*
+ 40:33
+ 720x480 16:9 frame with horizontal overscan
+ 528x480 4:3 frame without horizontal overscan
+ */
+ aDest.sample_ratio = 40.0 / 33.0;
+ break;
+ case 6:
+ /*
+ 24:11
+ 352x576 4:3 frame without horizontal overscan
+ 480x576 16:9 frame with horizontal overscan
+ */
+ aDest.sample_ratio = 24.0 / 11.0;
+ break;
+ case 7:
+ /*
+ 20:11
+ 352x480 4:3 frame without horizontal overscan
+ 480x480 16:9 frame with horizontal overscan
+ */
+ aDest.sample_ratio = 20.0 / 11.0;
+ break;
+ case 8:
+ /*
+ 32:11
+ 352x576 16:9 frame without horizontal overscan
+ */
+ aDest.sample_ratio = 32.0 / 11.0;
+ break;
+ case 9:
+ /*
+ 80:33
+ 352x480 16:9 frame without horizontal overscan
+ */
+ aDest.sample_ratio = 80.0 / 33.0;
+ break;
+ case 10:
+ /*
+ 18:11
+ 480x576 4:3 frame with horizontal overscan
+ */
+ aDest.sample_ratio = 18.0 / 11.0;
+ break;
+ case 11:
+ /*
+ 15:11
+ 480x480 4:3 frame with horizontal overscan
+ */
+ aDest.sample_ratio = 15.0 / 11.0;
+ break;
+ case 12:
+ /*
+ 64:33
+ 528x576 16:9 frame with horizontal overscan
+ */
+ aDest.sample_ratio = 64.0 / 33.0;
+ break;
+ case 13:
+ /*
+ 160:99
+ 528x480 16:9 frame without horizontal overscan
+ */
+ aDest.sample_ratio = 160.0 / 99.0;
+ break;
+ case 14:
+ /*
+ 4:3
+ 1440x1080 16:9 frame without horizontal overscan
+ */
+ aDest.sample_ratio = 4.0 / 3.0;
+ break;
+ case 15:
+ /*
+ 3:2
+ 1280x1080 16:9 frame without horizontal overscan
+ */
+ aDest.sample_ratio = 3.2 / 2.0;
+ break;
+ case 16:
+ /*
+ 2:1
+ 960x1080 16:9 frame without horizontal overscan
+ */
+ aDest.sample_ratio = 2.0 / 1.0;
+ break;
+ case 255:
+ /* Extended_SAR */
+ aDest.sar_width = aBr.ReadBits(16);
+ aDest.sar_height = aBr.ReadBits(16);
+ if (aDest.sar_width && aDest.sar_height) {
+ aDest.sample_ratio = float(aDest.sar_width) / float(aDest.sar_height);
+ }
+ break;
+ default:
+ break;
}
}
- else {
- aDest.sar_width = aDest.sar_height = 0;
- }
if (aBr.ReadBit()) { //overscan_info_present_flag
aDest.overscan_appropriate_flag = aBr.ReadBit();
}
- if (aBr.ReadBit()) { //video_signal_type_present_flag
+
+ if (aBr.ReadBit()) { // video_signal_type_present_flag
aDest.video_format = aBr.ReadBits(3);
aDest.video_full_range_flag = aBr.ReadBit();
aDest.colour_description_present_flag = aBr.ReadBit();
if (aDest.colour_description_present_flag) {
aDest.colour_primaries = aBr.ReadBits(8);
aDest.transfer_characteristics = aBr.ReadBits(8);
aDest.matrix_coefficients = aBr.ReadBits(8);
}
}
+
aDest.chroma_loc_info_present_flag = aBr.ReadBit();
-
if (aDest.chroma_loc_info_present_flag) {
aDest.chroma_sample_loc_type_top_field = aBr.ReadUE();
aDest.chroma_sample_loc_type_bottom_field = aBr.ReadUE();
}
- if (aBr.ReadBit()) { //timing_info_present_flag
+ aDest.timing_info_present_flag = aBr.ReadBit();
+ if (aDest.timing_info_present_flag ) {
aDest.num_units_in_tick = aBr.ReadBits(32);
aDest.time_scale = aBr.ReadBits(32);
aDest.fixed_frame_rate_flag = aBr.ReadBit();
}
-
- bool hrd_present = false;
- if (aBr.ReadBit()) { // nal_hrd_parameters_present_flag
- hrd_parameters(aBr);
- hrd_present = true;
- }
- if (aBr.ReadBit()) { // vcl_hrd_parameters_present_flag
- hrd_parameters(aBr);
- hrd_present = true;
- }
- if (hrd_present) {
- aBr.ReadBit(); // low_delay_hrd_flag
- }
- aDest.pic_struct_present_flag = aBr.ReadBit();
- aDest.bitstream_restriction_flag = aBr.ReadBit();
- if (aDest.bitstream_restriction_flag) {
- aDest.motion_vectors_over_pic_boundaries_flag = aBr.ReadBit();
- aDest.max_bytes_per_pic_denom = aBr.ReadUE();
- aDest.max_bits_per_mb_denom = aBr.ReadUE();
- aDest.log2_max_mv_length_horizontal = aBr.ReadUE();
- aDest.log2_max_mv_length_vertical = aBr.ReadUE();
- aDest.max_num_reorder_frames = aBr.ReadUE();
- aDest.max_dec_frame_buffering = aBr.ReadUE();
- }
-}
-
-/* static */ void
-H264::hrd_parameters(BitReader& aBr)
-{
- uint32_t cpb_cnt_minus1 = aBr.ReadUE();
- aBr.ReadBits(4); // bit_rate_scale
- aBr.ReadBits(4); // cpb_size_scale
- for (uint32_t SchedSelIdx = 0; SchedSelIdx <= cpb_cnt_minus1; SchedSelIdx++) {
- aBr.ReadUE(); // bit_rate_value_minus1[ SchedSelIdx ]
- aBr.ReadUE(); // cpb_size_value_minus1[ SchedSelIdx ]
- aBr.ReadBit(); // cbr_flag[ SchedSelIdx ]
- }
- aBr.ReadBits(5); // initial_cpb_removal_delay_length_minus1
- aBr.ReadBits(5); // cpb_removal_delay_length_minus1
- aBr.ReadBits(5); // dpb_output_delay_length_minus1
- aBr.ReadBits(5); // time_offset_length
}
/* static */ bool
H264::DecodeSPSFromExtraData(const ByteBuffer* aExtraData, SPSData& aDest)
{
if (!AnnexB::HasSPS(aExtraData)) {
return false;
}
--- a/media/libstagefright/binding/include/mp4_demuxer/H264.h
+++ b/media/libstagefright/binding/include/mp4_demuxer/H264.h
@@ -26,16 +26,26 @@ struct SPSData
pic_height = (2 - frame_mbs_only_flag) * ((pic_height_in_map_units_minus1 + 1) * 16)
- (frame_crop_top_offset + frame_crop_bottom_offset) * 2
*/
uint32_t pic_height;
bool interlaced;
/*
+ Displayed size.
+ display_width and display_height are adjusted according to the display
+ sample aspect ratio.
+ */
+ uint32_t display_width;
+ uint32_t display_height;
+
+ float sample_ratio;
+
+ /*
H264 decoding parameters according to ITU-T H.264 (T-REC-H.264-201402-I/en)
http://www.itu.int/rec/T-REC-H.264-201402-I/en
*/
bool constraint_set0_flag;
bool constraint_set1_flag;
bool constraint_set2_flag;
bool constraint_set3_flag;
@@ -217,131 +227,97 @@ struct SPSData
sar_height. When the aspect_ratio_idc syntax element is not
present, aspect_ratio_idc value shall be inferred to be
equal to 0.
*/
uint8_t aspect_ratio_idc;
uint32_t sar_width;
uint32_t sar_height;
+ /*
+ video_signal_type_present_flag equal to 1 specifies that video_format,
+ video_full_range_flag and colour_description_present_flag are present.
+ video_signal_type_present_flag equal to 0, specify that video_format,
+ video_full_range_flag and colour_description_present_flag are not present.
+ */
+ bool video_signal_type_present_flag;
+
+ /*
+ overscan_info_present_flag equal to1 specifies that the
+ overscan_appropriate_flag is present. When overscan_info_present_flag is
+ equal to 0 or is not present, the preferred display method for the video
+ signal is unspecified (Unspecified).
+ */
bool overscan_info_present_flag;
+ /*
+ overscan_appropriate_flag equal to 1 indicates that the cropped decoded
+ pictures output are suitable for display using overscan.
+ overscan_appropriate_flag equal to 0 indicates that the cropped decoded
+ pictures output contain visually important information in the entire region
+ out to the edges of the cropping rectangle of the picture
+ */
bool overscan_appropriate_flag;
+ /*
+ video_format indicates the representation of the pictures as specified in
+ Table E-2, before being coded in accordance with this
+ Recommendation | International Standard. When the video_format syntax element
+ is not present, video_format value shall be inferred to be equal to 5.
+ (Unspecified video format)
+ */
uint8_t video_format;
+
+ /*
+ video_full_range_flag indicates the black level and range of the luma and
+ chroma signals as derived from E′Y, E′PB, and E′PR or E′R, E′G, and E′B
+ real-valued component signals.
+ When the video_full_range_flag syntax element is not present, the value of
+ video_full_range_flag shall be inferred to be equal to 0.
+ */
bool video_full_range_flag;
+
+ /*
+ colour_description_present_flag equal to1 specifies that colour_primaries,
+ transfer_characteristics and matrix_coefficients are present.
+ colour_description_present_flag equal to 0 specifies that colour_primaries,
+ transfer_characteristics and matrix_coefficients are not present.
+ */
bool colour_description_present_flag;
+
+ /*
+ colour_primaries indicates the chromaticity coordinates of the source
+ primaries as specified in Table E-3 in terms of the CIE 1931 definition of
+ x and y as specified by ISO 11664-1.
+ When the colour_primaries syntax element is not present, the value of
+ colour_primaries shall be inferred to be equal to 2 (the chromaticity is
+ unspecified or is determined by the application).
+ */
uint8_t colour_primaries;
+
+ /*
+ transfer_characteristics indicates the opto-electronic transfer
+ characteristic of the source picture as specified in Table E-4 as a function
+ of a linear optical intensity input Lc with a nominal real-valued range of 0
+ to 1.
+ When the transfer_characteristics syntax element is not present, the value
+ of transfer_characteristics shall be inferred to be equal to 2
+ (the transfer characteristics are unspecified or are determined by the
+ application).
+ */
uint8_t transfer_characteristics;
+
uint8_t matrix_coefficients;
bool chroma_loc_info_present_flag;
uint32_t chroma_sample_loc_type_top_field;
uint32_t chroma_sample_loc_type_bottom_field;
+ bool timing_info_present_flag;
uint32_t num_units_in_tick;
uint32_t time_scale;
bool fixed_frame_rate_flag;
- // Bitstream restriction parameters
-
- /*
- pic_struct_present_flag equal to 1 specifies that picture timing SEI
- messages (clause D.2.2) are present that include the pic_struct syntax
- element. pic_struct_present_flag equal to 0 specifies that the pic_struct
- syntax element is not present in picture timing SEI messages.
- When pic_struct_present_flag is not present, its value shall be inferred to
- be equal to 0.
- */
- bool pic_struct_present_flag;
-
- /*
- bitstream_restriction_flag equal to 1, specifies that the following coded
- video sequence bitstream restriction parameters are present.
- bitstream_restriction_flag equal to 0, specifies that the following coded
- video sequence bitstream restriction parameters are not present.
- */
- bool bitstream_restriction_flag;
-
- /*
- motion_vectors_over_pic_boundaries_flag equal to 0 indicates that no
- sample outside the picture boundaries and no sample at a fractional
- sample position for which the sample value is derived using one or more
- samples outside the picture boundaries is used for inter prediction of any
- sample. motion_vectors_over_pic_boundaries_flag equal to 1 indicates that
- one or more samples outside picture boundaries may be used in inter
- prediction. When the motion_vectors_over_pic_boundaries_flag syntax element
- is not present, motion_vectors_over_pic_boundaries_flag value shall be
- inferred to be equal to 1.
- */
- bool motion_vectors_over_pic_boundaries_flag;
-
- /*
- max_bytes_per_pic_denom indicates a number of bytes not exceeded by the
- sum of the sizes of the VCL NAL units associated with any coded picture in
- the coded video sequence.
- */
- uint32_t max_bytes_per_pic_denom;
-
- /*
- max_bits_per_mb_denom indicates an upper bound for the number of coded bits
- of macroblock_layer( ) data for any macroblock in any picture of the coded
- video sequence. The value of max_bits_per_mb_denom shall be in the range
- of 0 to 16, inclusive.
- */
- uint32_t max_bits_per_mb_denom;
-
- /*
- log2_max_mv_length_horizontal and log2_max_mv_length_vertical indicate the
- maximum absolute value of a decoded horizontal and vertical motion vector
- component, respectively, in 1⁄4 luma sample units, for all pictures in the
- coded video sequence. A value of n asserts that no value of a motion vector
- component shall exceed the range from −2n to 2n − 1, inclusive, in units
- of 1⁄4 luma sample displacement. The value of log2_max_mv_length_horizontal
- shall be in the range of 0 to 16, inclusive. The value of
- log2_max_mv_length_vertical shall be in the range of 0 to 16, inclusive.
- When log2_max_mv_length_horizontal is not present, the values of
- log2_max_mv_length_horizontal and log2_max_mv_length_vertical shall be
- inferred to be equal to 16.
- */
- uint32_t log2_max_mv_length_horizontal;
- uint32_t log2_max_mv_length_vertical;
-
- /*
- max_num_reorder_frames indicates an upper bound for the number of frames
- buffers, in the decoded picture buffer (DPB), that are required for storing
- frames, complementary field pairs, and non-paired fields before output.
- It is a requirement of bitstream conformance that the maximum number of
- frames, complementary field pairs, or non-paired fields that precede any
- frame, complementary field pair, or non-paired field in the coded video
- sequence in decoding order and follow it in output order shall be less than
- or equal to max_num_reorder_frames. The value of max_num_reorder_frames
- shall be in the range of 0 to max_dec_frame_buffering, inclusive.
- When the max_num_reorder_frames syntax element is not present, the value
- of max_num_reorder_frames value shall be inferred as follows:
- – If profile_idc is equal to 44, 86, 100, 110, 122, or 244 and
- constraint_set3_flag is equal to 1, the value of max_num_reorder_frames
- shall be inferred to be equal to 0.
- – Otherwise (profile_idc is not equal to 44, 86, 100, 110, 122, or 244 or
- constraint_set3_flag is equal to 0), the value of max_num_reorder_frames
- shall be inferred to be equal to MaxDpbFrames.
- */
- uint32_t max_num_reorder_frames;
-
- /*
- max_dec_frame_buffering specifies the required size of the HRD decoded
- picture buffer (DPB) in units of frame buffers. It is a requirement of
- bitstream conformance that the coded video sequence shall not require a
- decoded picture buffer with size of more than
- Max( 1, max_dec_frame_buffering ) frame buffers to enable the output of
- decoded pictures at the output times specified by dpb_output_delay of the
- picture timing SEI messages. The value of max_dec_frame_buffering shall be
- greater than or equal to max_num_ref_frames. An upper bound for the value
- of max_dec_frame_buffering is specified by the level limits in
- clauses A.3.1, A.3.2, G.10.2.1, and H.10.2.
- */
- uint32_t max_dec_frame_buffering;
-
SPSData();
};
class H264
{
public:
static bool DecodeSPSFromExtraData(const ByteBuffer* aExtraData, SPSData& aDest);
/* Extract RAW BYTE SEQUENCE PAYLOAD from NAL content.