Bug 1163667 - [3.3] Parse complete Xing/Info header info. r=jya
authorEugen Sawin <esawin@me73.com>
Thu, 15 Oct 2015 19:58:02 +0200
changeset 304704 80b86520f0a7e29a45c26d1e865e2c50e2ec76f2
parent 304703 d86a877b276ba8a935a9452736312680655bd9b4
child 304705 764e9faacfc4a10e15213c80b95aed7a5935958a
push id1001
push userraliiev@mozilla.com
push dateMon, 18 Jan 2016 19:06:03 +0000
treeherdermozilla-release@8b89261f3ac4 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjya
bugs1163667
milestone44.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1163667 - [3.3] Parse complete Xing/Info header info. r=jya
dom/media/MP3Demuxer.cpp
dom/media/MP3Demuxer.h
dom/media/gtest/TestMP3Demuxer.cpp
--- a/dom/media/MP3Demuxer.cpp
+++ b/dom/media/MP3Demuxer.cpp
@@ -331,20 +331,23 @@ MP3TrackDemuxer::StreamLength() const {
 }
 
 TimeUnit
 MP3TrackDemuxer::Duration() const {
   if (!mNumParsedFrames) {
     return TimeUnit::FromMicroseconds(-1);
   }
 
-  const int64_t streamLen = StreamLength();
-  // Assume we know the exact number of frames from the VBR header.
-  int64_t numFrames = mParser.VBRInfo().NumFrames();
-  if (numFrames < 0) {
+  int64_t numFrames = 0;
+  const auto numAudioFrames = mParser.VBRInfo().NumAudioFrames();
+  if (numAudioFrames) {
+    // VBR headers don't include the VBR header frame.
+    numFrames = numAudioFrames.value() + 1;
+  } else {
+    const int64_t streamLen = StreamLength();
     if (streamLen < 0) {
       // Unknown length, we can't estimate duration.
       return TimeUnit::FromMicroseconds(-1);
     }
     numFrames = (streamLen - mFirstFrameOffset) / AverageFrameLength();
   }
   return Duration(numFrames);
 }
@@ -527,20 +530,25 @@ MP3TrackDemuxer::Read(uint8_t* aBuffer, 
   const nsresult rv = mSource.ReadAt(aOffset, reinterpret_cast<char*>(aBuffer),
                                      static_cast<uint32_t>(aSize), &read);
   NS_ENSURE_SUCCESS(rv, 0);
   return static_cast<int32_t>(read);
 }
 
 double
 MP3TrackDemuxer::AverageFrameLength() const {
-  if (!mNumParsedFrames) {
-    return 0.0;
+  if (mNumParsedFrames) {
+    return static_cast<double>(mTotalFrameLen) / mNumParsedFrames;
   }
-  return static_cast<double>(mTotalFrameLen) / mNumParsedFrames;
+  const auto& vbr = mParser.VBRInfo();
+  if (vbr.NumBytes() && vbr.NumAudioFrames()) {
+    return static_cast<double>(vbr.NumBytes().value()) /
+           (vbr.NumAudioFrames().value() + 1);
+  }
+  return 0.0;
 }
 
 // FrameParser
 
 namespace frame_header {
 // FrameHeader mRaw byte offsets.
 static const int SYNC1 = 0;
 static const int SYNC2_VERSION_LAYER_PROTECTION = 1;
@@ -828,68 +836,126 @@ FrameParser::FrameHeader::Update(uint8_t
   if (mPos < SIZE) {
     mRaw[mPos] = c;
   }
   return IsValid(mPos++);
 }
 
 // FrameParser::VBRHeader
 
+namespace vbr_header {
+static const char* TYPE_STR[3] = {"NONE", "XING", "VBRI"};
+static const uint32_t TOC_SIZE = 100;
+} // namespace vbr_header
+
 FrameParser::VBRHeader::VBRHeader()
-  : mNumFrames(-1),
-    mType(NONE)
+  : mType(NONE)
 {
 }
 
 FrameParser::VBRHeader::VBRHeaderType
 FrameParser::VBRHeader::Type() const {
   return mType;
 }
 
+const Maybe<uint32_t>&
+FrameParser::VBRHeader::NumAudioFrames() const {
+  return mNumAudioFrames;
+}
+
+const Maybe<uint32_t>&
+FrameParser::VBRHeader::NumBytes() const {
+  return mNumBytes;
+}
+
+const Maybe<uint32_t>&
+FrameParser::VBRHeader::Scale() const {
+  return mScale;
+}
+
+bool
+FrameParser::VBRHeader::IsTOCPresent() const {
+  return mTOC.size() == vbr_header::TOC_SIZE;
+}
+
 int64_t
-FrameParser::VBRHeader::NumFrames() const {
-  return mNumFrames;
+FrameParser::VBRHeader::Offset(float aDurationFac) const {
+  if (!IsTOCPresent()) {
+    return -1;
+  }
+
+  // Constrain the duration percentage to [0, 99].
+  const float durationPer = 100.0f * std::min(0.99f, std::max(0.0f, aDurationFac));
+  const size_t fullPer = durationPer;
+  const float rest = durationPer - fullPer;
+
+  MOZ_ASSERT(fullPer < mTOC.size());
+  int64_t offset = mTOC.at(fullPer);
+
+  if (rest > 0.0 && fullPer + 1 < mTOC.size()) {
+    offset += rest * (mTOC.at(fullPer + 1) - offset);
+  }
+
+  return offset;
 }
 
 bool
 FrameParser::VBRHeader::ParseXing(ByteReader* aReader) {
-  static const uint32_t TAG = BigEndian::readUint32("Xing");
-  static const uint32_t TAG2 = BigEndian::readUint32("Info");
-  static const uint32_t FRAME_COUNT_OFFSET = 8;
-  static const uint32_t FRAME_COUNT_SIZE = 4;
+  static const uint32_t XING_TAG = BigEndian::readUint32("Xing");
+  static const uint32_t INFO_TAG = BigEndian::readUint32("Info");
 
   enum Flags {
     NUM_FRAMES = 0x01,
     NUM_BYTES = 0x02,
     TOC = 0x04,
     VBR_SCALE = 0x08
   };
 
   MOZ_ASSERT(aReader);
   const size_t prevReaderOffset = aReader->Offset();
 
   // We have to search for the Xing header as its position can change.
-  while (aReader->Remaining() >= FRAME_COUNT_OFFSET + FRAME_COUNT_SIZE) {
-    if (aReader->PeekU32() != TAG && aReader->PeekU32() != TAG2) {
-      aReader->Read(1);
-      continue;
-    }
-    // Skip across the VBR header ID tag.
-    aReader->Read(sizeof(TAG));
+  while (aReader->CanRead32() &&
+         aReader->PeekU32() != XING_TAG && aReader->PeekU32() != INFO_TAG) {
+    aReader->Read(1);
+  }
 
-    const uint32_t flags = aReader->ReadU32();
-    if (flags & NUM_FRAMES) {
-      mNumFrames = aReader->ReadU32();
-    }
+  if (aReader->CanRead32()) {
+    // Skip across the VBR header ID tag.
+    aReader->ReadU32();
     mType = XING;
-    aReader->Seek(prevReaderOffset);
-    return true;
+  }
+  uint32_t flags = 0;
+  if (aReader->CanRead32()) {
+    flags = aReader->ReadU32();
+  }
+  if (flags & NUM_FRAMES && aReader->CanRead32()) {
+    mNumAudioFrames = Some(aReader->ReadU32());
+  }
+  if (flags & NUM_BYTES && aReader->CanRead32()) {
+    mNumBytes = Some(aReader->ReadU32());
   }
+  if (flags & TOC && aReader->Remaining() >= vbr_header::TOC_SIZE) {
+    if (!mNumBytes) {
+      // We don't have the stream size to calculate offsets, skip the TOC.
+      aReader->Read(vbr_header::TOC_SIZE);
+    } else {
+      mTOC.clear();
+      mTOC.reserve(vbr_header::TOC_SIZE);
+      for (size_t i = 0; i < vbr_header::TOC_SIZE; ++i) {
+        mTOC.push_back(1.0f / 256.0f * aReader->ReadU8() * mNumBytes.value());
+      }
+    }
+  }
+  if (flags & VBR_SCALE && aReader->CanRead32()) {
+    mScale = Some(aReader->ReadU32());
+  }
+
   aReader->Seek(prevReaderOffset);
-  return false;
+  return mType == XING;
 }
 
 bool
 FrameParser::VBRHeader::ParseVBRI(ByteReader* aReader) {
   static const uint32_t TAG = BigEndian::readUint32("VBRI");
   static const uint32_t OFFSET = 32 + FrameParser::FrameHeader::SIZE;
   static const uint32_t FRAME_COUNT_OFFSET = OFFSET + 14;
   static const uint32_t MIN_FRAME_SIZE = OFFSET + 26;
@@ -900,29 +966,36 @@ FrameParser::VBRHeader::ParseVBRI(ByteRe
   MOZ_ASSERT(aReader->PeekU16() & 0xFFE0);
   const size_t prevReaderOffset = aReader->Offset();
 
   // VBRI have a fixed relative position, so let's check for it there.
   if (aReader->Remaining() > MIN_FRAME_SIZE) {
     aReader->Seek(prevReaderOffset + OFFSET);
     if (aReader->ReadU32() == TAG) {
       aReader->Seek(prevReaderOffset + FRAME_COUNT_OFFSET);
-      mNumFrames = aReader->ReadU32();
+      mNumAudioFrames = Some(aReader->ReadU32());
       mType = VBRI;
       aReader->Seek(prevReaderOffset);
       return true;
     }
   }
   aReader->Seek(prevReaderOffset);
   return false;
 }
 
 bool
 FrameParser::VBRHeader::Parse(ByteReader* aReader) {
-  return ParseVBRI(aReader) || ParseXing(aReader);
+  const bool rv = ParseVBRI(aReader) || ParseXing(aReader);
+  if (rv) {
+    MP3LOG("VBRHeader::Parse found valid VBR/CBR header: type=%s"
+           " NumAudioFrames=%u NumBytes=%u Scale=%u TOC-size=%u",
+           vbr_header::TYPE_STR[Type()], NumAudioFrames().valueOr(0),
+           NumBytes().valueOr(0), Scale().valueOr(0), mTOC.size());
+  }
+  return rv;
 }
 
 // FrameParser::Frame
 
 void
 FrameParser::Frame::Reset() {
   mHeader.Reset();
 }
--- a/dom/media/MP3Demuxer.h
+++ b/dom/media/MP3Demuxer.h
@@ -1,16 +1,17 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifndef MP3_DEMUXER_H_
 #define MP3_DEMUXER_H_
 
 #include "mozilla/Attributes.h"
+#include "mozilla/Maybe.h"
 #include "MediaDataDemuxer.h"
 #include "MediaResource.h"
 #include "mp4_demuxer/ByteReader.h"
 
 namespace mozilla {
 namespace mp3 {
 
 class MP3TrackDemuxer;
@@ -200,30 +201,45 @@ public:
     // incremented via Update.
     int mPos;
   };
 
   // VBR frames may contain Xing or VBRI headers for additional info, we use
   // this class to parse them and access this info.
   class VBRHeader {
   public:
+    // Synchronize with vbr_header TYPE_STR on change.
     enum VBRHeaderType {
-      NONE,
+      NONE = 0,
       XING,
       VBRI
     };
 
     // Constructor.
     VBRHeader();
 
     // Returns the parsed VBR header type, or NONE if no valid header found.
     VBRHeaderType Type() const;
 
-    // Returns the total number of frames expected in the stream/file.
-    int64_t NumFrames() const;
+    // Returns the total number of audio frames (excluding the VBR header frame)
+    // expected in the stream/file.
+    const Maybe<uint32_t>& NumAudioFrames() const;
+
+    // Returns the expected size of the stream.
+    const Maybe<uint32_t>& NumBytes() const;
+
+    // Returns the VBR scale factor (0: best quality, 100: lowest quality).
+    const Maybe<uint32_t>& Scale() const;
+
+    // Returns true iff Xing/Info TOC (table of contents) is present.
+    bool IsTOCPresent() const;
+
+    // Returns the byte offset for the given duration percentage as a factor
+    // (0: begin, 1.0: end).
+    int64_t Offset(float aDurationFac) const;
 
     // Parses contents of given ByteReader for a valid VBR header.
     // The offset of the passed ByteReader needs to point to an MPEG frame begin,
     // as a VBRI-style header is searched at a fixed offset relative to frame begin.
     // Returns whether a valid VBR header was found in the range.
     bool Parse(mp4_demuxer::ByteReader* aReader);
 
   private:
@@ -235,17 +251,26 @@ public:
     // Parses contents of given ByteReader for a valid VBRI header.
     // The initial ByteReader offset will be preserved. It also needs to point
     // to the beginning of a valid MPEG frame, as VBRI headers are searched
     // at a fixed offset relative to frame begin.
     // Returns whether a valid VBRI header was found in the range.
     bool ParseVBRI(mp4_demuxer::ByteReader* aReader);
 
     // The total number of frames expected as parsed from a VBR header.
-    int64_t mNumFrames;
+    Maybe<uint32_t> mNumAudioFrames;
+
+    // The total number of bytes expected in the stream.
+    Maybe<uint32_t> mNumBytes;
+
+    // The VBR scale factor.
+    Maybe<uint32_t> mScale;
+
+    // The TOC table mapping duration percentage to byte offset.
+    std::vector<int64_t> mTOC;
 
     // The detected VBR header type.
     VBRHeaderType mType;
   };
 
   // Frame meta container used to parse and hold a frame header and side info.
   class Frame {
   public:
--- a/dom/media/gtest/TestMP3Demuxer.cpp
+++ b/dom/media/gtest/TestMP3Demuxer.cpp
@@ -219,20 +219,20 @@ TEST_F(MP3DemuxerTest, VBRHeader) {
     RefPtr<MediaRawData> frame(target.mDemuxer->DemuxSample());
     ASSERT_TRUE(frame);
 
     const auto& vbr = target.mDemuxer->VBRInfo();
 
     if (target.mIsVBR) {
       EXPECT_EQ(FrameParser::VBRHeader::XING, vbr.Type());
       // TODO: find reference number which accounts for trailing headers.
-      // EXPECT_EQ(target.mNumSamples / target.mSamplesPerFrame, vbr.NumFrames());
+      // EXPECT_EQ(target.mNumSamples / target.mSamplesPerFrame, vbr.NumAudioFrames().value());
     } else {
       EXPECT_EQ(FrameParser::VBRHeader::NONE, vbr.Type());
-      EXPECT_EQ(-1, vbr.NumFrames());
+      EXPECT_FALSE(vbr.NumAudioFrames());
     }
   }
 }
 
 TEST_F(MP3DemuxerTest, FrameParsing) {
   for (const auto& target: mTargets) {
     RefPtr<MediaRawData> frameData(target.mDemuxer->DemuxSample());
     ASSERT_TRUE(frameData);