Bug 1093815 - Add MP3 track demuxer. r=kinetik
authorEugen Sawin <esawin@mozilla.com>
Wed, 13 May 2015 15:15:36 +0200
changeset 243719 aaa61c18c5d28046739d599bb37110dea241cb7b
parent 243718 7923a0c837b56efcd1376b95318fe4cdce03d5e8
child 243720 85a6b53f377dd40b189fe12fa8360ff0ce6aa32d
push id28753
push userkwierso@gmail.com
push dateThu, 14 May 2015 22:33:43 +0000
treeherdermozilla-central@07e2e15703cb [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerskinetik
bugs1093815, 100644
milestone41.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1093815 - Add MP3 track demuxer. r=kinetik --- media/libstagefright/binding/MP3TrackDemuxer.cpp | 755 +++++++++++++++++++++ .../binding/include/mp4_demuxer/MP3TrackDemuxer.h | 380 +++++++++++ media/libstagefright/moz.build | 2 + 3 files changed, 1137 insertions(+) create mode 100644 media/libstagefright/binding/MP3TrackDemuxer.cpp create mode 100644 media/libstagefright/binding/include/mp4_demuxer/MP3TrackDemuxer.h
media/libstagefright/binding/MP3TrackDemuxer.cpp
media/libstagefright/binding/include/mp4_demuxer/MP3TrackDemuxer.h
media/libstagefright/moz.build
new file mode 100644
--- /dev/null
+++ b/media/libstagefright/binding/MP3TrackDemuxer.cpp
@@ -0,0 +1,755 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mp4_demuxer/MP3TrackDemuxer.h"
+#include "mozilla/Assertions.h"
+#include "mozilla/Endian.h"
+#include "VideoUtils.h"
+
+namespace mp4_demuxer {
+
+// MP3Demuxer
+
+MP3Demuxer::MP3Demuxer(Stream* aSource)
+  : mSource(aSource),
+    mOffset(0),
+    mFirstFrameOffset(0),
+    mStreamLength(-1),
+    mNumParsedFrames(0),
+    mFrameIndex(0),
+    mTotalFrameLen(0),
+    mSamplesPerFrame(0),
+    mSamplesPerSecond(0),
+    mChannels(0)
+{
+}
+
+bool
+MP3Demuxer::Init() {
+  if (!mSource->Length(&mStreamLength)) {
+    // Length is unknown.
+    mStreamLength = -1;
+  }
+  return true;
+}
+
+#ifdef ENABLE_TESTS
+const FrameParser::Frame&
+MP3Demuxer::LastFrame() const {
+  return mParser.PrevFrame();
+}
+#endif
+
+const ID3Parser::ID3Header&
+MP3Demuxer::ID3Header() const {
+  return mParser.ID3Header();
+}
+
+const FrameParser::VBRHeader&
+MP3Demuxer::VBRInfo() const {
+  return mParser.VBRInfo();
+}
+
+void
+MP3Demuxer::Seek(Microseconds aTime) {
+  SlowSeek(aTime);
+}
+
+void
+MP3Demuxer::FastSeek(Microseconds aTime) {
+  if (!aTime) {
+    // Quick seek to the beginning of the stream.
+    mOffset = mFirstFrameOffset;
+    mFrameIndex = 0;
+    mParser.FinishParsing();
+    return;
+  }
+
+  if (!mSamplesPerFrame || !mNumParsedFrames) {
+    return;
+  }
+
+  const int64_t numFrames = static_cast<double>(aTime) / USECS_PER_S *
+                            mSamplesPerSecond / mSamplesPerFrame;
+  mOffset = mFirstFrameOffset + numFrames * AverageFrameLength();
+  mFrameIndex = numFrames;
+  mParser.FinishParsing();
+}
+
+void
+MP3Demuxer::SlowSeek(Microseconds aTime) {
+  if (!aTime) {
+    FastSeek(aTime);
+    return;
+  }
+
+  if (Duration(mFrameIndex) > aTime) {
+    FastSeek(aTime);
+  }
+
+  nsRefPtr<MediaRawData> frameData(GetNext());
+  while (frameData && Duration(mFrameIndex + 1) < aTime) {
+    frameData = GetNext();
+  }
+}
+
+
+already_AddRefed<MediaRawData>
+MP3Demuxer::DemuxSample() {
+  nsRefPtr<MediaRawData> sample(GetNext());
+  if (!sample) {
+    return nullptr;
+  }
+  return sample.forget();
+}
+
+Microseconds
+MP3Demuxer::GetNextKeyframeTime() {
+  return -1;
+}
+
+int64_t
+MP3Demuxer::StreamLength() const {
+  return mStreamLength;
+}
+
+int64_t
+MP3Demuxer::Duration() const {
+  if (!mNumParsedFrames) {
+    return -1;
+  }
+
+  // Assume we know the exact number of frames from the VBR header.
+  int64_t numFrames = mParser.VBRInfo().NumFrames();
+  if (numFrames < 0) {
+    if (mStreamLength < 0) {
+      // Unknown length, we can't estimate duration.
+      return -1;
+    }
+    numFrames = (mStreamLength - mFirstFrameOffset) / AverageFrameLength();
+  }
+  return Duration(numFrames);
+}
+
+int64_t
+MP3Demuxer::Duration(int64_t aNumFrames) const {
+  if (!mSamplesPerSecond) {
+    return -1;
+  }
+
+  const double usPerFrame = USECS_PER_S * mSamplesPerFrame / mSamplesPerSecond;
+  return aNumFrames * usPerFrame;
+}
+
+already_AddRefed<mozilla::MediaRawData>
+MP3Demuxer::GetNext() {
+  static const int BUFFER_SIZE = 4096;
+
+  uint8_t buffer[BUFFER_SIZE];
+  uint32_t read = 0;
+  const uint8_t* frameBeg = nullptr;
+  const uint8_t* bufferEnd = nullptr;
+
+  while (frameBeg == bufferEnd &&
+         (read = Read(buffer, mOffset, BUFFER_SIZE)) > 0) {
+    MOZ_ASSERT(mOffset + read > mOffset);
+    mOffset += read;
+    bufferEnd = buffer + read;
+    frameBeg = mParser.Parse(buffer, bufferEnd);
+  }
+
+  if (frameBeg == bufferEnd || !mParser.CurrentFrame().Length()) {
+    return nullptr;
+  }
+
+  // Valid frame header was fully parsed, let's read the whole frame.
+  const int32_t frameLen = mParser.CurrentFrame().Length();
+  nsRefPtr<MediaRawData> frame = new MediaRawData();
+  frame->mOffset = mOffset - (bufferEnd - frameBeg) + 1;
+
+  nsAutoPtr<MediaRawDataWriter> frameWriter(frame->CreateWriter());
+  if (!frameWriter->SetSize(frameLen)) {
+    return nullptr;
+  }
+
+  read = Read(frameWriter->mData, frame->mOffset, frame->mSize);
+
+  if (read != frame->mSize) {
+    return nullptr;
+  }
+
+  // Prevent overflow.
+  if (mTotalFrameLen + frameLen < 0) {
+    // These variables have a linear dependency and are only used to derive the
+    // average frame length.
+    mTotalFrameLen /= 2;
+    mNumParsedFrames /= 2;
+  }
+
+  // Full frame parsed, move offset to its end.
+  mOffset = frame->mOffset + frame->mSize;
+  MOZ_ASSERT(mOffset > frame->mOffset);
+
+  mTotalFrameLen += frameLen;
+  mSamplesPerFrame = mParser.CurrentFrame().Header().SamplesPerFrame();
+  mSamplesPerSecond = mParser.CurrentFrame().Header().SampleRate();
+  mChannels = mParser.CurrentFrame().Header().Channels();
+  ++mNumParsedFrames;
+  ++mFrameIndex;
+  MOZ_ASSERT(mFrameIndex > 0);
+
+  frame->mTime = Duration(mFrameIndex - 1);
+  frame->mDuration = Duration(1);
+
+  if (mNumParsedFrames == 1) {
+    // First frame parsed, let's read VBR info if available.
+    // TODO: read info that helps with seeking (bug 1163667).
+    mParser.ParseVBRHeader(frame->mData, frame->mData + frame->mSize);
+    mFirstFrameOffset = frame->mOffset;
+  }
+
+  // Prepare the parser for the next frame parsing session.
+  mParser.FinishParsing();
+  return frame.forget();
+}
+
+uint32_t
+MP3Demuxer::Read(uint8_t* aBuffer, uint32_t aOffset, uint32_t aSize) {
+  size_t read = 0;
+  if (!mSource->ReadAt(aOffset, aBuffer, aSize, &read)) {
+    read = 0;
+  }
+  return read;
+}
+
+double
+MP3Demuxer::AverageFrameLength() const {
+  if (!mNumParsedFrames) {
+    return 0.0;
+  }
+  return static_cast<double>(mTotalFrameLen) / mNumParsedFrames;
+}
+
+// FrameParser
+
+namespace frame_header {
+// FrameHeader mRaw byte offsets.
+static const int SYNC1 = 0;
+static const int SYNC2_VERSION_LAYER_PROTECTION = 1;
+static const int BITRATE_SAMPLERATE_PADDING_PRIVATE = 2;
+static const int CHANNELMODE_MODEEXT_COPY_ORIG_EMPH = 3;
+}
+
+FrameParser::FrameParser()
+{
+}
+
+void
+FrameParser::Reset() {
+  mID3Parser.Reset();
+  mFirstFrame.Reset();
+  mFrame.Reset();
+}
+
+void
+FrameParser::FinishParsing() {
+  if (!mID3Parser.Header().IsValid()) {
+    // Reset ID3 tags only if we have not parsed a valid ID3 header yet.
+    mID3Parser.Reset();
+  }
+#ifdef ENABLE_TESTS
+  mPrevFrame = mFrame;
+#endif
+  mFrame.Reset();
+}
+
+const FrameParser::Frame&
+FrameParser::CurrentFrame() const {
+  return mFrame;
+}
+
+#ifdef ENABLE_TESTS
+const FrameParser::Frame&
+FrameParser::PrevFrame() const {
+  return mPrevFrame;
+}
+#endif
+
+const FrameParser::Frame&
+FrameParser::FirstFrame() const {
+  return mFirstFrame;
+}
+
+const ID3Parser::ID3Header&
+FrameParser::ID3Header() const {
+  return mID3Parser.Header();
+}
+
+const FrameParser::VBRHeader&
+FrameParser::VBRInfo() const {
+  return mVBRHeader;
+}
+
+const uint8_t*
+FrameParser::Parse(const uint8_t* aBeg, const uint8_t* aEnd) {
+  if (!aBeg || !aEnd || aBeg >= aEnd) {
+    return aEnd;
+  }
+
+  if (!mID3Parser.Header().Size() && !mFirstFrame.Length()) {
+    // No MP3 frames have been parsed yet, look for ID3v2 headers at file begin.
+    // ID3v1 tags may only be at file end.
+    // TODO: should we try to read ID3 tags at end of file/mid-stream, too?
+    const uint8_t* id3Beg = mID3Parser.Parse(aBeg, aEnd);
+    if (id3Beg != aEnd) {
+      // ID3 headers found, skip past them.
+      aBeg = id3Beg + ID3Parser::ID3Header::SIZE + mID3Parser.Header().Size();
+    }
+  }
+
+  while (aBeg < aEnd && !mFrame.ParseNext(*aBeg)) {
+    ++aBeg;
+  }
+
+  if (mFrame.Length()) {
+    // MP3 frame found.
+    if (!mFirstFrame.Length()) {
+      mFirstFrame = mFrame;
+    }
+    // Move to the frame header begin to allow for whole-frame parsing.
+    aBeg -= FrameHeader::SIZE;
+    return aBeg;
+  }
+  return aEnd;
+}
+
+// FrameParser::Header
+
+FrameParser::FrameHeader::FrameHeader()
+{
+  Reset();
+}
+
+uint8_t
+FrameParser::FrameHeader::Sync1() const {
+  return mRaw[frame_header::SYNC1];
+}
+
+uint8_t
+FrameParser::FrameHeader::Sync2() const {
+  return 0x7 & mRaw[frame_header::SYNC2_VERSION_LAYER_PROTECTION] >> 5;
+}
+
+uint8_t
+FrameParser::FrameHeader::RawVersion() const {
+  return 0x3 & mRaw[frame_header::SYNC2_VERSION_LAYER_PROTECTION] >> 3;
+}
+
+uint8_t
+FrameParser::FrameHeader::RawLayer() const {
+  return 0x3 & mRaw[frame_header::SYNC2_VERSION_LAYER_PROTECTION] >> 1;
+}
+
+uint8_t
+FrameParser::FrameHeader::RawProtection() const {
+  return 0x1 & mRaw[frame_header::SYNC2_VERSION_LAYER_PROTECTION] >> 6;
+}
+
+uint8_t
+FrameParser::FrameHeader::RawBitrate() const {
+  return 0xF & mRaw[frame_header::BITRATE_SAMPLERATE_PADDING_PRIVATE] >> 4;
+}
+
+uint8_t
+FrameParser::FrameHeader::RawSampleRate() const {
+  return 0x3 & mRaw[frame_header::BITRATE_SAMPLERATE_PADDING_PRIVATE] >> 2;
+}
+
+uint8_t
+FrameParser::FrameHeader::Padding() const {
+  return 0x1 & mRaw[frame_header::BITRATE_SAMPLERATE_PADDING_PRIVATE] >> 1;
+}
+
+uint8_t
+FrameParser::FrameHeader::Private() const {
+  return 0x1 & mRaw[frame_header::BITRATE_SAMPLERATE_PADDING_PRIVATE];
+}
+
+uint8_t
+FrameParser::FrameHeader::RawChannelMode() const {
+  return 0xF & mRaw[frame_header::CHANNELMODE_MODEEXT_COPY_ORIG_EMPH] >> 4;
+}
+
+int32_t
+FrameParser::FrameHeader::Layer() const {
+  static const uint8_t LAYERS[4] = { 0, 3, 2, 1 };
+
+  return LAYERS[RawLayer()];
+}
+
+int32_t
+FrameParser::FrameHeader::SampleRate() const {
+  // Sample rates - use [version][srate]
+  static const uint16_t SAMPLE_RATE[4][4] = {
+    { 11025, 12000,  8000, 0 }, // MPEG 2.5
+    {     0,     0,     0, 0 }, // Reserved
+    { 22050, 24000, 16000, 0 }, // MPEG 2
+    { 44100, 48000, 32000, 0 }  // MPEG 1
+  };
+
+  return SAMPLE_RATE[RawVersion()][RawSampleRate()];
+}
+
+int32_t
+FrameParser::FrameHeader::Channels() const {
+  // 3 is single channel (mono), any other value is some variant of dual
+  // channel.
+  return RawChannelMode() == 3 ? 1 : 2;
+}
+
+int32_t
+FrameParser::FrameHeader::SamplesPerFrame() const {
+  // Samples per frame - use [version][layer]
+  static const uint16_t FRAME_SAMPLE[4][4] = {
+    // Layer     3     2     1       Version
+    {      0,  576, 1152,  384 }, // 2.5
+    {      0,    0,    0,    0 }, // Reserved
+    {      0,  576, 1152,  384 }, // 2
+    {      0, 1152, 1152,  384 }  // 1
+  };
+
+  return FRAME_SAMPLE[RawVersion()][RawLayer()];
+}
+
+int32_t
+FrameParser::FrameHeader::Bitrate() const {
+  // Bitrates - use [version][layer][bitrate]
+  static const uint16_t BITRATE[4][4][16] = {
+    { // Version 2.5
+      { 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0 }, // Reserved
+      { 0,   8,  16,  24,  32,  40,  48,  56,  64,  80,  96, 112, 128, 144, 160, 0 }, // Layer 3
+      { 0,   8,  16,  24,  32,  40,  48,  56,  64,  80,  96, 112, 128, 144, 160, 0 }, // Layer 2
+      { 0,  32,  48,  56,  64,  80,  96, 112, 128, 144, 160, 176, 192, 224, 256, 0 }  // Layer 1
+    },
+    { // Reserved
+      { 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0 }, // Invalid
+      { 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0 }, // Invalid
+      { 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0 }, // Invalid
+      { 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0 }  // Invalid
+    },
+    { // Version 2
+      { 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0 }, // Reserved
+      { 0,   8,  16,  24,  32,  40,  48,  56,  64,  80,  96, 112, 128, 144, 160, 0 }, // Layer 3
+      { 0,   8,  16,  24,  32,  40,  48,  56,  64,  80,  96, 112, 128, 144, 160, 0 }, // Layer 2
+      { 0,  32,  48,  56,  64,  80,  96, 112, 128, 144, 160, 176, 192, 224, 256, 0 }  // Layer 1
+    },
+    { // Version 1
+      { 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0 }, // Reserved
+      { 0,  32,  40,  48,  56,  64,  80,  96, 112, 128, 160, 192, 224, 256, 320, 0 }, // Layer 3
+      { 0,  32,  48,  56,  64,  80,  96, 112, 128, 160, 192, 224, 256, 320, 384, 0 }, // Layer 2
+      { 0,  32,  64,  96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 0 }, // Layer 1
+    }
+  };
+
+  return 1000 * BITRATE[RawVersion()][RawLayer()][RawBitrate()];
+}
+
+int32_t
+FrameParser::FrameHeader::SlotSize() const {
+  // Slot size (MPEG unit of measurement) - use [layer]
+  static const uint8_t SLOT_SIZE[4] = { 0, 1, 1, 4 }; // Rsvd, 3, 2, 1
+
+  return SLOT_SIZE[RawLayer()];
+}
+
+bool
+FrameParser::FrameHeader::ParseNext(uint8_t c) {
+  if (!Update(c)) {
+    Reset();
+    if (!Update(c)) {
+      Reset();
+    }
+  }
+  return IsValid();
+}
+
+bool
+FrameParser::FrameHeader::IsValid(int aPos) const {
+  if (IsValid()) {
+    return true;
+  }
+  if (aPos == frame_header::SYNC1) {
+    return Sync1() == 0xFF;
+  }
+  if (aPos == frame_header::SYNC2_VERSION_LAYER_PROTECTION) {
+    return Sync2() == 7 &&
+           RawVersion() != 1 &&
+           RawLayer() != 0;
+  }
+  if (aPos == frame_header::BITRATE_SAMPLERATE_PADDING_PRIVATE) {
+    return RawBitrate() != 0xF;
+  }
+  return true;
+}
+
+bool
+FrameParser::FrameHeader::IsValid() const {
+  return mPos >= SIZE;
+}
+
+void
+FrameParser::FrameHeader::Reset() {
+  mPos = 0;
+}
+
+bool
+FrameParser::FrameHeader::Update(uint8_t c) {
+  if (mPos < SIZE) {
+    mRaw[mPos] = c;
+  }
+  return IsValid(mPos++);
+}
+
+// FrameParser::VBRHeader
+
+FrameParser::VBRHeader::VBRHeader()
+  : mNumFrames(-1),
+    mType(NONE)
+{
+}
+
+FrameParser::VBRHeader::VBRHeaderType
+FrameParser::VBRHeader::Type() const {
+  return mType;
+}
+
+int64_t
+FrameParser::VBRHeader::NumFrames() const {
+  return mNumFrames;
+}
+
+bool
+FrameParser::VBRHeader::ParseXing(const uint8_t* aBeg, const uint8_t* aEnd) {
+  static const uint32_t TAG = BigEndian::readUint32("Xing");
+  static const uint32_t FRAME_COUNT_OFFSET = 8;
+
+  enum Flags {
+    NUM_FRAMES = 0x01,
+    NUM_BYTES = 0x02,
+    TOC = 0x04,
+    VBR_SCALE = 0x08
+  };
+
+  if (!aBeg || !aEnd || aBeg >= aEnd) {
+    return false;
+  }
+
+  // We have to search for the Xing header as its position can change.
+  for (; aBeg + sizeof(TAG) < aEnd; ++aBeg) {
+    if (BigEndian::readUint32(aBeg) != TAG) {
+      continue;
+    }
+
+    const uint32_t flags = BigEndian::readUint32(aBeg + sizeof(TAG));
+    if (flags & NUM_FRAMES && aBeg + FRAME_COUNT_OFFSET < aEnd) {
+      mNumFrames = BigEndian::readUint32(aBeg + FRAME_COUNT_OFFSET);
+    }
+    mType = XING;
+    return true;
+  }
+  return false;
+}
+
+bool
+FrameParser::VBRHeader::ParseVBRI(const uint8_t* aBeg, const uint8_t* aEnd) {
+  static const uint32_t TAG = BigEndian::readUint32("VBRI");
+  static const uint32_t OFFSET = 32 - FrameParser::FrameHeader::SIZE;
+  static const uint32_t FRAME_COUNT_OFFSET = OFFSET + 14;
+  static const uint32_t MIN_FRAME_SIZE = OFFSET + 26;
+
+  if (!aBeg || !aEnd || aBeg >= aEnd) {
+    return false;
+  }
+
+  const int64_t frameLen = aEnd - aBeg;
+  // VBRI have a fixed relative position, so let's check for it there.
+  if (frameLen > MIN_FRAME_SIZE &&
+      BigEndian::readUint32(aBeg + OFFSET) == TAG) {
+    mNumFrames = BigEndian::readUint32(aBeg + FRAME_COUNT_OFFSET);
+    mType = VBRI;
+    return true;
+  }
+  return false;
+}
+
+bool
+FrameParser::VBRHeader::Parse(const uint8_t* aBeg, const uint8_t* aEnd) {
+  return ParseVBRI(aBeg, aEnd) || ParseXing(aBeg, aEnd);
+}
+
+// FrameParser::Frame
+
+void
+FrameParser::Frame::Reset() {
+  mHeader.Reset();
+}
+
+int32_t
+FrameParser::Frame::Length() const {
+  if (!mHeader.IsValid() || !mHeader.SampleRate()) {
+    return 0;
+  }
+
+  const float bitsPerSample = mHeader.SamplesPerFrame() / 8.0f;
+  const int32_t frameLen = bitsPerSample * mHeader.Bitrate() /
+                           mHeader.SampleRate() +
+                           mHeader.Padding() * mHeader.SlotSize();
+  return frameLen;
+}
+
+bool
+FrameParser::Frame::ParseNext(uint8_t c) {
+  return mHeader.ParseNext(c);
+}
+
+const FrameParser::FrameHeader&
+FrameParser::Frame::Header() const {
+  return mHeader;
+}
+
+bool
+FrameParser::ParseVBRHeader(const uint8_t* aBeg, const uint8_t* aEnd) {
+  return mVBRHeader.Parse(aBeg, aEnd);
+}
+
+// ID3Parser
+
+// Constants
+namespace id3_header {
+static const int ID_LEN = 3;
+static const int VERSION_LEN = 2;
+static const int FLAGS_LEN = 1;
+static const int SIZE_LEN = 4;
+
+static const int ID_END = ID_LEN;
+static const int VERSION_END = ID_END + VERSION_LEN;
+static const int FLAGS_END = VERSION_END + FLAGS_LEN;
+static const int SIZE_END = FLAGS_END + SIZE_LEN;
+
+static const uint8_t ID[ID_LEN] = {'I', 'D', '3'};
+}
+
+const uint8_t*
+ID3Parser::Parse(const uint8_t* aBeg, const uint8_t* aEnd) {
+  if (!aBeg || !aEnd || aBeg >= aEnd) {
+    return aEnd;
+  }
+
+  while (aBeg < aEnd && !mHeader.ParseNext(*aBeg)) {
+    ++aBeg;
+  }
+
+  if (aBeg < aEnd) {
+    // Header found, move to header begin.
+    aBeg -= ID3Header::SIZE - 1;
+  }
+  return aBeg;
+}
+
+void
+ID3Parser::Reset() {
+  mHeader.Reset();
+}
+
+const ID3Parser::ID3Header&
+ID3Parser::Header() const {
+  return mHeader;
+}
+
+// ID3Parser::Header
+
+ID3Parser::ID3Header::ID3Header()
+{
+  Reset();
+}
+
+void
+ID3Parser::ID3Header::Reset() {
+  mSize = 0;
+  mPos = 0;
+}
+
+uint8_t
+ID3Parser::ID3Header::MajorVersion() const {
+  return mRaw[id3_header::ID_END];
+}
+
+uint8_t
+ID3Parser::ID3Header::MinorVersion() const {
+  return mRaw[id3_header::ID_END + 1];
+}
+
+uint8_t
+ID3Parser::ID3Header::Flags() const {
+  return mRaw[id3_header::FLAGS_END - id3_header::FLAGS_LEN];
+}
+
+uint32_t
+ID3Parser::ID3Header::Size() const {
+  return mSize;
+}
+
+bool
+ID3Parser::ID3Header::ParseNext(uint8_t c) {
+  if (!Update(c)) {
+    Reset();
+    if (!Update(c)) {
+      Reset();
+    }
+  }
+  return IsValid();
+}
+
+bool
+ID3Parser::ID3Header::IsValid(int aPos) const {
+  if (IsValid()) {
+    return true;
+  }
+  const uint8_t c = mRaw[aPos];
+  if (aPos < id3_header::ID_END) {
+    return id3_header::ID[aPos] == c;
+  }
+  if (aPos < id3_header::VERSION_END) {
+    return c < 0xFF;
+  }
+  if (aPos < id3_header::FLAGS_END) {
+    return true;
+  }
+  if (aPos < id3_header::SIZE_END) {
+    return c < 0x80;
+  }
+  return true;
+}
+
+bool
+ID3Parser::ID3Header::IsValid() const {
+  return mPos >= SIZE;
+}
+
+bool
+ID3Parser::ID3Header::Update(uint8_t c) {
+  if (mPos >= id3_header::SIZE_END - id3_header::SIZE_LEN &&
+      mPos < id3_header::SIZE_END) {
+    mSize <<= 7;
+    mSize |= c;
+  }
+  if (mPos < SIZE) {
+    mRaw[mPos] = c;
+  }
+  return IsValid(mPos++);
+}
+
+}  // namespace mp4_demuxer
new file mode 100644
--- /dev/null
+++ b/media/libstagefright/binding/include/mp4_demuxer/MP3TrackDemuxer.h
@@ -0,0 +1,380 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MP3_TRACK_DEMUXER_H_
+#define MP3_TRACK_DEMUXER_H_
+
+#include "mozilla/Attributes.h"
+#include "demuxer/TrackDemuxer.h"
+
+namespace mp4_demuxer {
+
+// ID3 header parser state machine used by FrameParser.
+// The header contains the following format (one byte per term):
+// 'I' 'D' '3' MajorVersion MinorVersion Flags Size1 Size2 Size3 Size4
+// For more details see http://id3.org/id3v2.3.0.
+class ID3Parser {
+public:
+  // Holds the ID3 header and its parsing state.
+  class ID3Header {
+  public:
+    // The header size is static, see class comment.
+    static const int SIZE = 10;
+
+    // Constructor.
+    ID3Header();
+
+    // Resets the state to allow for a new parsing session.
+    void Reset();
+
+    // The ID3 tags are versioned like this: ID3vMajorVersion.MinorVersion.
+    uint8_t MajorVersion() const;
+    uint8_t MinorVersion() const;
+
+    // The ID3 flags field.
+    uint8_t Flags() const;
+
+    // The derived size based on the provides size fields.
+    uint32_t Size() const;
+
+    // Returns whether the parsed data is a valid ID3 header up to the given
+    // byte position.
+    bool IsValid(int aPos) const;
+
+    // Returns whether the parsed data is a complete and valid ID3 header.
+    bool IsValid() const;
+
+    // Parses the next provided byte.
+    // Returns whether the byte creates a valid sequence up to this point.
+    bool ParseNext(uint8_t c);
+
+  private:
+    // Updates the parser state machine with the provided next byte.
+    // Returns whether the provided byte is a valid next byte in the sequence.
+    bool Update(uint8_t c);
+
+    // The currently parsed byte sequence.
+    uint8_t mRaw[SIZE];
+
+    // The derived size as provided by the size fields.
+    // The header size fields holds a 4 byte sequence with each MSB set to 0,
+    // this bits need to be ignored when deriving the actual size.
+    uint32_t mSize;
+
+    // The current byte position in the parsed sequence. Reset via Reset and
+    // incremented via Update.
+    int mPos;
+  };
+
+  // Returns the parsed ID3 header. Note: check for validity.
+  const ID3Header& Header() const;
+
+  // Parses the given buffer range [aBeg, aEnd) for a valid ID3 header.
+  // Returns the header begin position or aEnd if no valid header was found.
+  const uint8_t* Parse(const uint8_t* aBeg, const uint8_t* aEnd);
+
+  // Resets the state to allow for a new parsing session.
+  void Reset();
+
+private:
+  // The currently parsed ID3 header. Reset via Reset, updated via Parse.
+  ID3Header mHeader;
+};
+
+// MPEG audio frame parser.
+// The MPEG frame header has the following format (one bit per character):
+// 11111111 111VVLLC BBBBSSPR MMEETOHH
+// {   sync   } - 11 sync bits
+//   VV         - MPEG audio version ID (0->2.5, 1->reserved, 2->2, 3->1)
+//   LL         - Layer description (0->reserved, 1->III, 2->II, 3->I)
+//   C          - CRC protection bit (0->protected, 1->not protected)
+//   BBBB       - Bitrate index (see table in implementation)
+//   SS         - Sampling rate index (see table in implementation)
+//   P          - Padding bit (0->not padded, 1->padded by 1 slot size)
+//   R          - Private bit (ignored)
+//   MM         - Channel mode (0->stereo, 1->joint stereo, 2->dual channel,
+//                3->single channel)
+//   EE         - Mode extension for joint stereo (ignored)
+//   T          - Copyright (0->disabled, 1->enabled)
+//   O          - Original (0->copy, 1->original)
+//   HH         - Emphasis (0->none, 1->50/15 ms, 2->reserved, 3->CCIT J.17)
+class FrameParser {
+public:
+  // Holds the frame header and its parsing state.
+  class FrameHeader {
+  public:
+    // The header size is static, see class comments.
+    static const int SIZE = 4;
+
+    // Constructor.
+    FrameHeader();
+
+    // Raw field access, see class comments for details.
+    uint8_t Sync1() const;
+    uint8_t Sync2() const;
+    uint8_t RawVersion() const;
+    uint8_t RawLayer() const;
+    uint8_t RawProtection() const;
+    uint8_t RawBitrate() const;
+    uint8_t RawSampleRate() const;
+    uint8_t Padding() const;
+    uint8_t Private() const;
+    uint8_t RawChannelMode() const;
+
+    // Sampling rate frequency in Hz.
+    int32_t SampleRate() const;
+
+    // Number of audio channels.
+    int32_t Channels() const;
+
+    // Samples per frames, static depending on MPEG version and layer.
+    int32_t SamplesPerFrame() const;
+
+    // Slot size used for padding, static depending on MPEG layer.
+    int32_t SlotSize() const;
+
+    // Bitrate in kbps, can vary between frames.
+    int32_t Bitrate() const;
+
+    // MPEG layer (0->invalid, 1->I, 2->II, 3->III).
+    int32_t Layer() const;
+
+    // Returns whether the parsed data is a valid frame header up to the given
+    // byte position.
+    bool IsValid(const int aPos) const;
+
+    // Returns whether the parsed data is a complete and valid frame header.
+    bool IsValid() const;
+
+    // Resets the state to allow for a new parsing session.
+    void Reset();
+
+    // Parses the next provided byte.
+    // Returns whether the byte creates a valid sequence up to this point.
+    bool ParseNext(const uint8_t c);
+
+  private:
+    // Updates the parser state machine with the provided next byte.
+    // Returns whether the provided byte is a valid next byte in the sequence.
+    bool Update(const uint8_t c);
+
+    // The currently parsed byte sequence.
+    uint8_t mRaw[SIZE];
+
+    // The current byte position in the parsed sequence. Reset via Reset and
+    // incremented via Update.
+    int mPos;
+  };
+
+  // VBR frames may contain Xing or VBRI headers for additional info, we use
+  // this class to parse them and access this info.
+  class VBRHeader {
+  public:
+    enum VBRHeaderType {
+      NONE,
+      XING,
+      VBRI
+    };
+
+    // Constructor.
+    VBRHeader();
+
+    // Returns the parsed VBR header type, or NONE if no valid header found.
+    VBRHeaderType Type() const;
+
+    // Returns the total number of frames expected in the stream/file.
+    int64_t NumFrames() const;
+
+    // Parses given buffer [aBeg, aEnd) for a valid VBR header.
+    // Returns whether a valid VBR header was found in the range.
+    bool Parse(const uint8_t* aBeg, const uint8_t* aEnd);
+
+  private:
+    // Parses given buffer [aBeg, aEnd) for a valid Xing header.
+    // Returns whether a valid Xing header was found in the range.
+    bool ParseXing(const uint8_t* aBeg, const uint8_t* aEnd);
+
+    // Parses given buffer [aBeg, aEnd) for a valid VBRI header.
+    // Returns whether a valid VBRI header was found in the range.
+    bool ParseVBRI(const uint8_t* aBeg, const uint8_t* aEnd);
+
+    // The total number of frames expected as parsed from a VBR header.
+    int64_t mNumFrames;
+
+    // The detected VBR header type.
+    VBRHeaderType mType;
+  };
+
+  // Frame meta container used to parse and hold a frame header and side info.
+  class Frame {
+  public:
+    // Returns the length of the frame excluding the header in bytes.
+    int32_t Length() const;
+
+    // Returns the parsed frame header.
+    const FrameHeader& Header() const;
+
+    // Resets the frame header and data.
+    void Reset();
+
+    // Parses the next provided byte.
+    // Returns whether the byte creates a valid sequence up to this point.
+    bool ParseNext(uint8_t c);
+
+  private:
+    // The currently parsed frame header.
+    FrameHeader mHeader;
+  };
+
+  // Constructor.
+  FrameParser();
+
+  // Returns the currently parsed frame. Reset via Reset or FinishParsing.
+  const Frame& CurrentFrame() const;
+
+#ifdef ENABLE_TESTS
+  // Returns the previously parsed frame. Reset via Reset.
+  const Frame& PrevFrame() const;
+#endif
+
+  // Returns the first parsed frame. Reset via Reset.
+  const Frame& FirstFrame() const;
+
+  // Returns the parsed ID3 header. Note: check for validity.
+  const ID3Parser::ID3Header& ID3Header() const;
+
+  // Returns the parsed VBR header info. Note: check for validity by type.
+  const VBRHeader& VBRInfo() const;
+
+  // Resets the parser. Don't use between frames as first frame data is reset.
+  void Reset();
+
+  // Clear the last parsed frame to allow for next frame parsing, i.e.:
+  // - sets PrevFrame to CurrentFrame
+  // - resets the CurrentFrame
+  // - resets ID3Header if no valid header was parsed yet
+  void FinishParsing();
+
+  // Parses given buffer [aBeg, aEnd) for a valid frame header.
+  // Returns begin of frame header if a frame header was found or aEnd otherwise.
+  const uint8_t* Parse(const uint8_t* aBeg, const uint8_t* aEnd);
+
+  // Parses given buffer [aBeg, aEnd) for a valid VBR header.
+  // Returns whether a valid VBR header was found.
+  bool ParseVBRHeader(const uint8_t* aBeg, const uint8_t* aEnd);
+
+private:
+  // ID3 header parser.
+  ID3Parser mID3Parser;
+
+  // VBR header parser.
+  VBRHeader mVBRHeader;
+
+  // We keep the first parsed frame around for static info access, the
+  // previously parsed frame for debugging and the currently parsed frame.
+  Frame mFirstFrame;
+  Frame mFrame;
+#ifdef ENABLE_TESTS
+  Frame mPrevFrame;
+#endif
+};
+
+// The MP3 demuxer used to extract MPEG frames and side information out of
+// MPEG streams.
+class MP3Demuxer : public mozilla::TrackDemuxer {
+public:
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MP3Demuxer);
+
+  // Constructor, expecing a valid stream source.
+  explicit MP3Demuxer(Stream* aSource);
+
+  // Initializes the demuxer by reading the expected stream length, if
+  // available. Optional, but recommended.
+  // Currently always returns true.
+  bool Init();
+
+  // Returns the total stream length if known, -1 otherwise.
+  int64_t StreamLength() const;
+
+  // Returns the estimated stream duration in microseconds, or -1 if no
+  // estimation available.
+  int64_t Duration() const;
+
+  // Returns the estimated duration up to the given frames number in microseconds,
+  // or -1 if no estimation available.
+  int64_t Duration(int64_t aNumFrames) const;
+
+#ifdef ENABLE_TESTS
+  const FrameParser::Frame& LastFrame() const;
+#endif
+  const ID3Parser::ID3Header& ID3Header() const;
+  const FrameParser::VBRHeader& VBRInfo() const;
+
+  // TrackDemuxer interface.
+  virtual void Seek(Microseconds aTime) override;
+  virtual already_AddRefed<mozilla::MediaRawData> DemuxSample() override;
+  virtual Microseconds GetNextKeyframeTime() override;
+
+  void UpdateConfig(mozilla::AudioInfo& aConfig) {
+    aConfig.mRate = mSamplesPerSecond;
+    aConfig.mChannels = mChannels;
+    aConfig.mBitDepth = 16;
+    aConfig.mMimeType = "audio/mpeg";
+  }
+
+private:
+  // Destructor.
+  ~MP3Demuxer() {}
+
+  // Fast approximate seeking to given time.
+  void FastSeek(Microseconds aTime);
+
+  // Slow, more accurate approximate seeking to given time.
+  void SlowSeek(Microseconds aTime);
+
+  // Returns the next MPEG frame, if available.
+  already_AddRefed<mozilla::MediaRawData> GetNext();
+
+  // Reads aSize bytes into aBuffer from the source starting at aOffset.
+  // Returns the actual size read.
+  uint32_t Read(uint8_t* aBuffer, uint32_t aOffset, uint32_t aSize);
+
+  // Returns the average frame length derived from the previously parsed frames.
+  double AverageFrameLength() const;
+
+  // The (hopefully) MPEG source stream.
+  nsRefPtr<Stream> mSource;
+
+  // MPEG frame parser used to detect frames and extract side info.
+  FrameParser mParser;
+
+  // Current byte offset in the source stream.
+  uint64_t mOffset;
+
+  // Byte offset of the begin of the first frame, or 0 if none parsed yet.
+  uint64_t mFirstFrameOffset;
+
+  // Total expected stream length, if available, or -1 otherwise.
+  int64_t mStreamLength;
+
+  // Total parsed frames.
+  int64_t mNumParsedFrames;
+  int64_t mFrameIndex;
+
+  // Sum of parsed frames' lengths in bytes.
+  int64_t mTotalFrameLen;
+
+  // Samples per frame metric derived from frame headers or 0 if none available.
+  int32_t mSamplesPerFrame;
+
+  // Samples per second metric derived from frame headers or 0 if none available.
+  int32_t mSamplesPerSecond;
+
+  // Channel count derived from frame headers or 0 if none available.
+  int32_t mChannels;
+};
+
+}
+
+#endif
--- a/media/libstagefright/moz.build
+++ b/media/libstagefright/moz.build
@@ -55,16 +55,17 @@ EXPORTS.mp4_demuxer += [
     'binding/include/mp4_demuxer/BufferStream.h',
     'binding/include/mp4_demuxer/ByteReader.h',
     'binding/include/mp4_demuxer/ByteWriter.h',
     'binding/include/mp4_demuxer/DecoderData.h',
     'binding/include/mp4_demuxer/H264.h',
     'binding/include/mp4_demuxer/Index.h',
     'binding/include/mp4_demuxer/Interval.h',
     'binding/include/mp4_demuxer/MoofParser.h',
+    'binding/include/mp4_demuxer/MP3TrackDemuxer.h',
     'binding/include/mp4_demuxer/mp4_demuxer.h',
     'binding/include/mp4_demuxer/MP4Metadata.h',
     'binding/include/mp4_demuxer/MP4TrackDemuxer.h',
     'binding/include/mp4_demuxer/ResourceStream.h',
     'binding/include/mp4_demuxer/SinfParser.h',
     'binding/include/mp4_demuxer/Stream.h',
 ]
 
@@ -85,16 +86,17 @@ UNIFIED_SOURCES += [
     'binding/Adts.cpp',
     'binding/AnnexB.cpp',
     'binding/Box.cpp',
     'binding/BufferStream.cpp',
     'binding/DecoderData.cpp',
     'binding/H264.cpp',
     'binding/Index.cpp',
     'binding/MoofParser.cpp',
+    'binding/MP3TrackDemuxer.cpp',
     'binding/mp4_demuxer.cpp',
     'binding/MP4Metadata.cpp',
     'binding/MP4TrackDemuxer.cpp',
     'binding/ResourceStream.cpp',
     'binding/SinfParser.cpp',
     'frameworks/av/media/libstagefright/DataSource.cpp',
     'frameworks/av/media/libstagefright/ESDS.cpp',
     'frameworks/av/media/libstagefright/foundation/AAtomizer.cpp',