Bug 1264622: [MP4] Resync stagefright's updateAudioTrackInfoFromESDS_MPEG4Audio with upstream. r=kentuckyfriedtakahe
authorJean-Yves Avenard <jyavenard@mozilla.com>
Thu, 14 Apr 2016 23:32:40 +1000
changeset 331240 66e2cb10760096a9b9f1b1de813a7e3865cb5630
parent 331239 f84123a66e198c7c6f17aab04e65e28bdc92896e
child 331241 916d2a778a48a2da47051c4ccd1c7139a14ed8fd
push id6048
push userkmoir@mozilla.com
push dateMon, 06 Jun 2016 19:02:08 +0000
treeherdermozilla-beta@46d72a56c57d [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerskentuckyfriedtakahe
bugs1264622
milestone48.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1264622: [MP4] Resync stagefright's updateAudioTrackInfoFromESDS_MPEG4Audio with upstream. r=kentuckyfriedtakahe This adds support for parsing AOT audio specific config (which contains extended channels layout). MozReview-Commit-ID: 7yNGBEAI7sv
media/libstagefright/frameworks/av/include/media/stagefright/MetaData.h
media/libstagefright/frameworks/av/media/libstagefright/MPEG4Extractor.cpp
--- a/media/libstagefright/frameworks/av/include/media/stagefright/MetaData.h
+++ b/media/libstagefright/frameworks/av/include/media/stagefright/MetaData.h
@@ -132,16 +132,17 @@ enum {
     kKeyMediaLanguage     = 'lang',  // cstring
 
     // To store the timed text format data
     kKeyTextFormatData    = 'text',  // raw data
 
     kKeyRequiresSecureBuffers = 'secu',  // bool (int32_t)
 
     kKeyIsADTS            = 'adts',  // bool (int32_t)
+    kKeyAACAOT            = 'aaot',  // int32_t
 
     // If a MediaBuffer's data represents (at least partially) encrypted
     // data, the following fields aid in decryption.
     // The data can be thought of as pairs of plain and encrypted data
     // fragments, i.e. plain and encrypted data alternate.
     // The first fragment is by convention plain data (if that's not the
     // case, simply specify plain fragment size of 0).
     // kKeyEncryptedSizes and kKeyPlainSizes each map to an array of
--- a/media/libstagefright/frameworks/av/media/libstagefright/MPEG4Extractor.cpp
+++ b/media/libstagefright/frameworks/av/media/libstagefright/MPEG4Extractor.cpp
@@ -2457,30 +2457,82 @@ status_t MPEG4Extractor::verifyTrack(Tra
         if (keysize > 16) {
             return ERROR_MALFORMED;
         }
     }
 
     return OK;
 }
 
+typedef enum {
+    //AOT_NONE             = -1,
+    //AOT_NULL_OBJECT      = 0,
+    //AOT_AAC_MAIN         = 1, /**< Main profile                              */
+    AOT_AAC_LC           = 2,   /**< Low Complexity object                     */
+    //AOT_AAC_SSR          = 3,
+    //AOT_AAC_LTP          = 4,
+    AOT_SBR              = 5,
+    //AOT_AAC_SCAL         = 6,
+    //AOT_TWIN_VQ          = 7,
+    //AOT_CELP             = 8,
+    //AOT_HVXC             = 9,
+    //AOT_RSVD_10          = 10, /**< (reserved)                                */
+    //AOT_RSVD_11          = 11, /**< (reserved)                                */
+    //AOT_TTSI             = 12, /**< TTSI Object                               */
+    //AOT_MAIN_SYNTH       = 13, /**< Main Synthetic object                     */
+    //AOT_WAV_TAB_SYNTH    = 14, /**< Wavetable Synthesis object                */
+    //AOT_GEN_MIDI         = 15, /**< General MIDI object                       */
+    //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
+    AOT_ER_AAC_LC        = 17,   /**< Error Resilient(ER) AAC Low Complexity    */
+    //AOT_RSVD_18          = 18, /**< (reserved)                                */
+    //AOT_ER_AAC_LTP       = 19, /**< Error Resilient(ER) AAC LTP object        */
+    AOT_ER_AAC_SCAL      = 20,   /**< Error Resilient(ER) AAC Scalable object   */
+    //AOT_ER_TWIN_VQ       = 21, /**< Error Resilient(ER) TwinVQ object         */
+    AOT_ER_BSAC          = 22,   /**< Error Resilient(ER) BSAC object           */
+    AOT_ER_AAC_LD        = 23,   /**< Error Resilient(ER) AAC LowDelay object   */
+    //AOT_ER_CELP          = 24, /**< Error Resilient(ER) CELP object           */
+    //AOT_ER_HVXC          = 25, /**< Error Resilient(ER) HVXC object           */
+    //AOT_ER_HILN          = 26, /**< Error Resilient(ER) HILN object           */
+    //AOT_ER_PARA          = 27, /**< Error Resilient(ER) Parametric object     */
+    //AOT_RSVD_28          = 28, /**< might become SSC                          */
+    AOT_PS               = 29,   /**< PS, Parametric Stereo (includes SBR)      */
+    //AOT_MPEGS            = 30, /**< MPEG Surround                             */
+
+    AOT_ESCAPE           = 31,   /**< Signal AOT uses more than 5 bits          */
+
+    //AOT_MP3ONMP4_L1      = 32, /**< MPEG-Layer1 in mp4                        */
+    //AOT_MP3ONMP4_L2      = 33, /**< MPEG-Layer2 in mp4                        */
+    //AOT_MP3ONMP4_L3      = 34, /**< MPEG-Layer3 in mp4                        */
+    //AOT_RSVD_35          = 35, /**< might become DST                          */
+    //AOT_RSVD_36          = 36, /**< might become ALS                          */
+    //AOT_AAC_SLS          = 37, /**< AAC + SLS                                 */
+    //AOT_SLS              = 38, /**< SLS                                       */
+    //AOT_ER_AAC_ELD       = 39, /**< AAC Enhanced Low Delay                    */
+
+    //AOT_USAC             = 42, /**< USAC                                      */
+    //AOT_SAOC             = 43, /**< SAOC                                      */
+    //AOT_LD_MPEGS         = 44, /**< Low Delay MPEG Surround                   */
+
+    //AOT_RSVD50           = 50,  /**< Interim AOT for Rsvd50                   */
+} AUDIO_OBJECT_TYPE;
+
 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
         const void *esds_data, size_t esds_size) {
     ESDS esds(esds_data, esds_size);
 
     uint8_t objectTypeIndication;
     if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
         return ERROR_MALFORMED;
     }
 
     if (objectTypeIndication == 0xe1) {
         // This isn't MPEG4 audio at all, it's QCELP 14k...
-        if (!mLastTrack) {
-          return ERROR_MALFORMED;
-        }
+        if (mLastTrack == NULL)
+            return ERROR_MALFORMED;
+
         mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
         return OK;
     }
 
     if (objectTypeIndication  == 0x6b || objectTypeIndication  == 0x69) {
         // The media subtype is MP3 audio
         if (!mLastTrack) {
           return ERROR_MALFORMED;
@@ -2491,111 +2543,253 @@ status_t MPEG4Extractor::updateAudioTrac
     const uint8_t *csd;
     size_t csd_size;
     if (esds.getCodecSpecificInfo(
                 (const void **)&csd, &csd_size) != OK) {
         return ERROR_MALFORMED;
     }
 
 #if 0
-    printf("ESD of size %d\n", csd_size);
-    hexdump(csd, csd_size);
+    if (kUseHexDump) {
+        printf("ESD of size %zu\n", csd_size);
+        hexdump(csd, csd_size);
+    }
 #endif
 
     if (csd_size == 0) {
         // There's no further information, i.e. no codec specific data
         // Let's assume that the information provided in the mpeg4 headers
         // is accurate and hope for the best.
 
         return OK;
     }
 
     if (csd_size < 2) {
         return ERROR_MALFORMED;
     }
 
+    static uint32_t kSamplingRate[] = {
+        96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
+        16000, 12000, 11025, 8000, 7350
+    };
+
     ABitReader br(csd, csd_size);
     if (br.numBitsLeft() < 5) {
         return ERROR_MALFORMED;
     }
     uint32_t objectType = br.getBits(5);
 
     if (objectType == 31) {  // AAC-ELD => additional 6 bits
         if (br.numBitsLeft() < 6) {
             return ERROR_MALFORMED;
         }
         objectType = 32 + br.getBits(6);
     }
 
+    if (mLastTrack == NULL)
+        return ERROR_MALFORMED;
+
     if (objectType >= 1 && objectType <= 4) {
-        if (!mLastTrack) {
-          return ERROR_MALFORMED;
-        }
         mLastTrack->meta->setInt32(kKeyAACProfile, objectType);
     }
 
+    //keep AOT type
+    mLastTrack->meta->setInt32(kKeyAACAOT, objectType);
+
     if (br.numBitsLeft() < 4) {
         return ERROR_MALFORMED;
     }
     uint32_t freqIndex = br.getBits(4);
 
     int32_t sampleRate = 0;
     int32_t numChannels = 0;
     if (freqIndex == 15) {
-        if (csd_size < 5) {
-            return ERROR_MALFORMED;
-        }
-        if (br.numBitsLeft() < 24 + 4) {
-            return ERROR_MALFORMED;
-        }
+        if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
         sampleRate = br.getBits(24);
         numChannels = br.getBits(4);
     } else {
-        if (br.numBitsLeft() < 4) {
+        if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
+        numChannels = br.getBits(4);
+
+        if (freqIndex == 13 || freqIndex == 14) {
             return ERROR_MALFORMED;
         }
-        numChannels = br.getBits(4);
-        if (objectType == 5) {
-            // SBR specific config per 14496-3 table 1.13
-            if (br.numBitsLeft() < 4) {
+
+        sampleRate = kSamplingRate[freqIndex];
+    }
+
+    if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
+        if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
+        uint32_t extFreqIndex = br.getBits(4);
+        int32_t extSampleRate;
+        if (extFreqIndex == 15) {
+            if (csd_size < 8) {
+                return ERROR_MALFORMED;
+            }
+            if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
+            extSampleRate = br.getBits(24);
+        } else {
+            if (extFreqIndex == 13 || extFreqIndex == 14) {
                 return ERROR_MALFORMED;
             }
-            freqIndex = br.getBits(4);
-            if (freqIndex == 15) {
-                if (csd_size < 8) {
+            extSampleRate = kSamplingRate[extFreqIndex];
+        }
+        //TODO: save the extension sampling rate value in meta data =>
+        //      mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate);
+    }
+
+    switch (numChannels) {
+        // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
+        case 0:
+        case 1:// FC
+        case 2:// FL FR
+        case 3:// FC, FL FR
+        case 4:// FC, FL FR, RC
+        case 5:// FC, FL FR, SL SR
+        case 6:// FC, FL FR, SL SR, LFE
+            //numChannels already contains the right value
+            break;
+        case 11:// FC, FL FR, SL SR, RC, LFE
+            numChannels = 7;
+            break;
+        case 7: // FC, FCL FCR, FL FR, SL SR, LFE
+        case 12:// FC, FL  FR,  SL SR, RL RR, LFE
+        case 14:// FC, FL  FR,  SL SR, LFE, FHL FHR
+            numChannels = 8;
+            break;
+        default:
+            return ERROR_UNSUPPORTED;
+    }
+
+    {
+        if (objectType == AOT_SBR || objectType == AOT_PS) {
+            if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
+            objectType = br.getBits(5);
+
+            if (objectType == AOT_ESCAPE) {
+                if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
+                objectType = 32 + br.getBits(6);
+            }
+        }
+        if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
+                objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
+                objectType == AOT_ER_BSAC) {
+            if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
+            const int32_t frameLengthFlag = br.getBits(1);
+
+            const int32_t dependsOnCoreCoder = br.getBits(1);
+
+            if (dependsOnCoreCoder ) {
+                if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
+                const int32_t coreCoderDelay = br.getBits(14);
+            }
+
+            int32_t extensionFlag = -1;
+            if (br.numBitsLeft() > 0) {
+                extensionFlag = br.getBits(1);
+            } else {
+                switch (objectType) {
+                // 14496-3 4.5.1.1 extensionFlag
+                case AOT_AAC_LC:
+                    extensionFlag = 0;
+                    break;
+                case AOT_ER_AAC_LC:
+                case AOT_ER_AAC_SCAL:
+                case AOT_ER_BSAC:
+                case AOT_ER_AAC_LD:
+                    extensionFlag = 1;
+                    break;
+                default:
+                    return ERROR_MALFORMED;
+                    break;
+                }
+                ALOGW("csd missing extension flag; assuming %d for object type %u.",
+                        extensionFlag, objectType);
+            }
+
+            if (numChannels == 0) {
+                int32_t channelsEffectiveNum = 0;
+                int32_t channelsNum = 0;
+                if (br.numBitsLeft() < 32) {
                     return ERROR_MALFORMED;
                 }
-                if (br.numBitsLeft() < 24) {
-                    return ERROR_MALFORMED;
+                const int32_t ElementInstanceTag = br.getBits(4);
+                const int32_t Profile = br.getBits(2);
+                const int32_t SamplingFrequencyIndex = br.getBits(4);
+                const int32_t NumFrontChannelElements = br.getBits(4);
+                const int32_t NumSideChannelElements = br.getBits(4);
+                const int32_t NumBackChannelElements = br.getBits(4);
+                const int32_t NumLfeChannelElements = br.getBits(2);
+                const int32_t NumAssocDataElements = br.getBits(3);
+                const int32_t NumValidCcElements = br.getBits(4);
+
+                const int32_t MonoMixdownPresent = br.getBits(1);
+
+                if (MonoMixdownPresent != 0) {
+                    if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
+                    const int32_t MonoMixdownElementNumber = br.getBits(4);
+                }
+
+                if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
+                const int32_t StereoMixdownPresent = br.getBits(1);
+                if (StereoMixdownPresent != 0) {
+                    if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
+                    const int32_t StereoMixdownElementNumber = br.getBits(4);
+                }
+
+                if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
+                const int32_t MatrixMixdownIndexPresent = br.getBits(1);
+                if (MatrixMixdownIndexPresent != 0) {
+                    if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
+                    const int32_t MatrixMixdownIndex = br.getBits(2);
+                    const int32_t PseudoSurroundEnable = br.getBits(1);
                 }
-                sampleRate = br.getBits(24);
+
+                int i;
+                for (i=0; i < NumFrontChannelElements; i++) {
+                    if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
+                    const int32_t FrontElementIsCpe = br.getBits(1);
+                    const int32_t FrontElementTagSelect = br.getBits(4);
+                    channelsNum += FrontElementIsCpe ? 2 : 1;
+                }
+
+                for (i=0; i < NumSideChannelElements; i++) {
+                    if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
+                    const int32_t SideElementIsCpe = br.getBits(1);
+                    const int32_t SideElementTagSelect = br.getBits(4);
+                    channelsNum += SideElementIsCpe ? 2 : 1;
+                }
+
+                for (i=0; i < NumBackChannelElements; i++) {
+                    if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
+                    const int32_t BackElementIsCpe = br.getBits(1);
+                    const int32_t BackElementTagSelect = br.getBits(4);
+                    channelsNum += BackElementIsCpe ? 2 : 1;
+                }
+                channelsEffectiveNum = channelsNum;
+
+                for (i=0; i < NumLfeChannelElements; i++) {
+                    if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
+                    const int32_t LfeElementTagSelect = br.getBits(4);
+                    channelsNum += 1;
+                }
+                ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
+                ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
+                numChannels = channelsNum;
             }
         }
-
-        if (sampleRate == 0) {
-            static uint32_t kSamplingRate[] = {
-                96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
-                16000, 12000, 11025, 8000, 7350
-            };
-
-            if (freqIndex == 13 || freqIndex == 14) {
-                return ERROR_MALFORMED;
-            }
-
-            sampleRate = kSamplingRate[freqIndex];
-        }
     }
 
     if (numChannels == 0) {
         return ERROR_UNSUPPORTED;
     }
 
-    if (!mLastTrack) {
-      return ERROR_MALFORMED;
-    }
+    if (mLastTrack == NULL)
+        return ERROR_MALFORMED;
+
     int32_t prevSampleRate;
     CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate));
 
     if (prevSampleRate != sampleRate) {
         ALOGV("mpeg4 audio sample rate different from previous setting. "
              "was: %d, now: %d", prevSampleRate, sampleRate);
     }