From 56997121c5031598fbbba7b7c53980b7fd529c2d Mon Sep 17 00:00:00 2001 From: Marco Nelissen Date: Tue, 28 Aug 2012 15:09:49 -0700 Subject: Fragmented mp4 extractor Still experimental. Set property "media.stagefright.use-fragmp4" to true to enable. Change-Id: I210b9c5b5164b5c5eefc31309845ee881ac7db8e --- include/media/stagefright/Utils.h | 2 + .../nuplayer/mp4/MP4Source.cpp | 4 + media/libstagefright/Android.mk | 1 + media/libstagefright/DRMExtractor.cpp | 5 - media/libstagefright/DataSource.cpp | 16 +- media/libstagefright/FragmentedMP4Extractor.cpp | 460 +++++++++++++++++++++ media/libstagefright/MPEG4Extractor.cpp | 3 +- media/libstagefright/MediaExtractor.cpp | 8 +- media/libstagefright/MetaData.cpp | 8 + media/libstagefright/Utils.cpp | 191 +++++++++ .../include/FragmentedMP4Extractor.h | 70 ++++ media/libstagefright/include/FragmentedMP4Parser.h | 23 +- media/libstagefright/mp4/FragmentedMP4Parser.cpp | 364 ++++++++++++++-- 13 files changed, 1108 insertions(+), 47 deletions(-) create mode 100644 media/libstagefright/FragmentedMP4Extractor.cpp create mode 100644 media/libstagefright/include/FragmentedMP4Extractor.h diff --git a/include/media/stagefright/Utils.h b/include/media/stagefright/Utils.h index d87902e7..8213af96 100644 --- a/include/media/stagefright/Utils.h +++ b/include/media/stagefright/Utils.h @@ -42,6 +42,8 @@ struct MetaData; struct AMessage; status_t convertMetaDataToMessage( const sp &meta, sp *format); +void convertMessageToMetaData( + const sp &format, sp &meta); } // namespace android diff --git a/media/libmediaplayerservice/nuplayer/mp4/MP4Source.cpp b/media/libmediaplayerservice/nuplayer/mp4/MP4Source.cpp index c80d13fd..ffb3a656 100644 --- a/media/libmediaplayerservice/nuplayer/mp4/MP4Source.cpp +++ b/media/libmediaplayerservice/nuplayer/mp4/MP4Source.cpp @@ -93,6 +93,10 @@ struct StreamSource : public FragmentedMP4Parser::Source { return total; } + bool isSeekable() { + return false; + } + private: sp mListener; off64_t mPosition; diff --git a/media/libstagefright/Android.mk b/media/libstagefright/Android.mk index 1522e75c..f40982e3 100644 --- a/media/libstagefright/Android.mk +++ b/media/libstagefright/Android.mk @@ -19,6 +19,7 @@ LOCAL_SRC_FILES:= \ ESDS.cpp \ FileSource.cpp \ FLACExtractor.cpp \ + FragmentedMP4Extractor.cpp \ HTTPBase.cpp \ JPEGSource.cpp \ MP3Extractor.cpp \ diff --git a/media/libstagefright/DRMExtractor.cpp b/media/libstagefright/DRMExtractor.cpp index 524c3aad..63cb4301 100644 --- a/media/libstagefright/DRMExtractor.cpp +++ b/media/libstagefright/DRMExtractor.cpp @@ -15,11 +15,6 @@ */ #include "include/DRMExtractor.h" -#include "include/AMRExtractor.h" -#include "include/MP3Extractor.h" -#include "include/MPEG4Extractor.h" -#include "include/WAVExtractor.h" -#include "include/OggExtractor.h" #include #include diff --git a/media/libstagefright/DataSource.cpp b/media/libstagefright/DataSource.cpp index 1de808e4..9d0eea26 100644 --- a/media/libstagefright/DataSource.cpp +++ b/media/libstagefright/DataSource.cpp @@ -20,17 +20,18 @@ #include "include/chromium_http_stub.h" #endif +#include "include/AACExtractor.h" +#include "include/DRMExtractor.h" +#include "include/FLACExtractor.h" +#include "include/FragmentedMP4Extractor.h" +#include "include/HTTPBase.h" #include "include/MP3Extractor.h" -#include "include/MPEG4Extractor.h" -#include "include/WAVExtractor.h" -#include "include/OggExtractor.h" #include "include/MPEG2PSExtractor.h" #include "include/MPEG2TSExtractor.h" +#include "include/MPEG4Extractor.h" #include "include/NuCachedSource2.h" -#include "include/HTTPBase.h" -#include "include/DRMExtractor.h" -#include "include/FLACExtractor.h" -#include "include/AACExtractor.h" +#include "include/OggExtractor.h" +#include "include/WAVExtractor.h" #include "include/WVMExtractor.h" #include "matroska/MatroskaExtractor.h" @@ -110,6 +111,7 @@ void DataSource::RegisterSniffer(SnifferFunc func) { // static void DataSource::RegisterDefaultSniffers() { RegisterSniffer(SniffMPEG4); + RegisterSniffer(SniffFragmentedMP4); RegisterSniffer(SniffMatroska); RegisterSniffer(SniffOgg); RegisterSniffer(SniffWAV); diff --git a/media/libstagefright/FragmentedMP4Extractor.cpp b/media/libstagefright/FragmentedMP4Extractor.cpp new file mode 100644 index 00000000..82712efd --- /dev/null +++ b/media/libstagefright/FragmentedMP4Extractor.cpp @@ -0,0 +1,460 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//#define LOG_NDEBUG 0 +#define LOG_TAG "FragmentedMP4Extractor" +#include + +#include "include/FragmentedMP4Extractor.h" +#include "include/SampleTable.h" +#include "include/ESDS.h" + +#include + +#include +#include +#include +#include + +#include // for property_get + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace android { + +class FragmentedMPEG4Source : public MediaSource { +public: + // Caller retains ownership of the Parser + FragmentedMPEG4Source(bool audio, + const sp &format, + const sp &parser, + const sp &extractor); + + virtual status_t start(MetaData *params = NULL); + virtual status_t stop(); + + virtual sp getFormat(); + + virtual status_t read( + MediaBuffer **buffer, const ReadOptions *options = NULL); + +protected: + virtual ~FragmentedMPEG4Source(); + +private: + Mutex mLock; + + sp mFormat; + sp mParser; + sp mExtractor; + bool mIsAudioTrack; + uint32_t mCurrentSampleIndex; + + bool mIsAVC; + size_t mNALLengthSize; + + bool mStarted; + + MediaBufferGroup *mGroup; + + bool mWantsNALFragments; + + uint8_t *mSrcBuffer; + + FragmentedMPEG4Source(const FragmentedMPEG4Source &); + FragmentedMPEG4Source &operator=(const FragmentedMPEG4Source &); +}; + + +FragmentedMP4Extractor::FragmentedMP4Extractor(const sp &source) + : mLooper(new ALooper), + mParser(new FragmentedMP4Parser()), + mDataSource(source), + mInitCheck(NO_INIT), + mFileMetaData(new MetaData) { + ALOGV("FragmentedMP4Extractor"); + mLooper->registerHandler(mParser); + mLooper->start(false /* runOnCallingThread */); + mParser->start(mDataSource); + + bool hasVideo = mParser->getFormat(false /* audio */, true /* synchronous */) != NULL; + bool hasAudio = mParser->getFormat(true /* audio */, true /* synchronous */) != NULL; + + ALOGV("number of tracks: %d", countTracks()); + + if (hasVideo) { + mFileMetaData->setCString( + kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4); + } else if (hasAudio) { + mFileMetaData->setCString(kKeyMIMEType, "audio/mp4"); + } else { + ALOGE("no audio and no video, no idea what file type this is"); + } + // tracks are numbered such that video track is first, audio track is second + if (hasAudio && hasVideo) { + mTrackCount = 2; + mAudioTrackIndex = 1; + } else if (hasAudio) { + mTrackCount = 1; + mAudioTrackIndex = 0; + } else if (hasVideo) { + mTrackCount = 1; + mAudioTrackIndex = -1; + } else { + mTrackCount = 0; + mAudioTrackIndex = -1; + } +} + +FragmentedMP4Extractor::~FragmentedMP4Extractor() { + ALOGV("~FragmentedMP4Extractor"); + mLooper->stop(); +} + +uint32_t FragmentedMP4Extractor::flags() const { + return CAN_PAUSE | + (mParser->isSeekable() ? (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0); +} + +sp FragmentedMP4Extractor::getMetaData() { + return mFileMetaData; +} + +size_t FragmentedMP4Extractor::countTracks() { + return mTrackCount; +} + + +sp FragmentedMP4Extractor::getTrackMetaData( + size_t index, uint32_t flags) { + if (index >= countTracks()) { + return NULL; + } + + sp msg = mParser->getFormat(index == mAudioTrackIndex, true /* synchronous */); + + if (msg == NULL) { + ALOGV("got null format for track %d", index); + return NULL; + } + + sp meta = new MetaData(); + convertMessageToMetaData(msg, meta); + return meta; +} + +static void MakeFourCCString(uint32_t x, char *s) { + s[0] = x >> 24; + s[1] = (x >> 16) & 0xff; + s[2] = (x >> 8) & 0xff; + s[3] = x & 0xff; + s[4] = '\0'; +} + +sp FragmentedMP4Extractor::getTrack(size_t index) { + if (index >= countTracks()) { + return NULL; + } + return new FragmentedMPEG4Source(index == mAudioTrackIndex, getTrackMetaData(index, 0), mParser, this); +} + + +//////////////////////////////////////////////////////////////////////////////// + +FragmentedMPEG4Source::FragmentedMPEG4Source( + bool audio, + const sp &format, + const sp &parser, + const sp &extractor) + : mFormat(format), + mParser(parser), + mExtractor(extractor), + mIsAudioTrack(audio), + mStarted(false), + mGroup(NULL), + mWantsNALFragments(false), + mSrcBuffer(NULL) { +} + +FragmentedMPEG4Source::~FragmentedMPEG4Source() { + if (mStarted) { + stop(); + } +} + +status_t FragmentedMPEG4Source::start(MetaData *params) { + Mutex::Autolock autoLock(mLock); + + CHECK(!mStarted); + + int32_t val; + if (params && params->findInt32(kKeyWantsNALFragments, &val) + && val != 0) { + mWantsNALFragments = true; + } else { + mWantsNALFragments = false; + } + ALOGV("caller wants NAL fragments: %s", mWantsNALFragments ? "yes" : "no"); + + mGroup = new MediaBufferGroup; + + int32_t max_size = 65536; + // XXX CHECK(mFormat->findInt32(kKeyMaxInputSize, &max_size)); + + mGroup->add_buffer(new MediaBuffer(max_size)); + + mSrcBuffer = new uint8_t[max_size]; + + mStarted = true; + + return OK; +} + +status_t FragmentedMPEG4Source::stop() { + Mutex::Autolock autoLock(mLock); + + CHECK(mStarted); + + delete[] mSrcBuffer; + mSrcBuffer = NULL; + + delete mGroup; + mGroup = NULL; + + mStarted = false; + mCurrentSampleIndex = 0; + + return OK; +} + +sp FragmentedMPEG4Source::getFormat() { + Mutex::Autolock autoLock(mLock); + + return mFormat; +} + + +status_t FragmentedMPEG4Source::read( + MediaBuffer **out, const ReadOptions *options) { + int64_t seekTimeUs; + ReadOptions::SeekMode mode; + if (options && options->getSeekTo(&seekTimeUs, &mode)) { + mParser->seekTo(mIsAudioTrack, seekTimeUs); + } + MediaBuffer *buffer = NULL; + mGroup->acquire_buffer(&buffer); + sp parseBuffer; + + status_t ret = mParser->dequeueAccessUnit(mIsAudioTrack, &parseBuffer, true /* synchronous */); + if (ret != OK) { + buffer->release(); + ALOGV("returning %d", ret); + return ret; + } + sp meta = parseBuffer->meta(); + int64_t timeUs; + CHECK(meta->findInt64("timeUs", &timeUs)); + buffer->meta_data()->setInt64(kKeyTime, timeUs); + buffer->set_range(0, parseBuffer->size()); + memcpy(buffer->data(), parseBuffer->data(), parseBuffer->size()); + *out = buffer; + return OK; +} + + +static bool isCompatibleBrand(uint32_t fourcc) { + static const uint32_t kCompatibleBrands[] = { + FOURCC('i', 's', 'o', 'm'), + FOURCC('i', 's', 'o', '2'), + FOURCC('a', 'v', 'c', '1'), + FOURCC('3', 'g', 'p', '4'), + FOURCC('m', 'p', '4', '1'), + FOURCC('m', 'p', '4', '2'), + + // Won't promise that the following file types can be played. + // Just give these file types a chance. + FOURCC('q', 't', ' ', ' '), // Apple's QuickTime + FOURCC('M', 'S', 'N', 'V'), // Sony's PSP + + FOURCC('3', 'g', '2', 'a'), // 3GPP2 + FOURCC('3', 'g', '2', 'b'), + }; + + for (size_t i = 0; + i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]); + ++i) { + if (kCompatibleBrands[i] == fourcc) { + return true; + } + } + + return false; +} + +// Attempt to actually parse the 'ftyp' atom and determine if a suitable +// compatible brand is present. +// Also try to identify where this file's metadata ends +// (end of the 'moov' atom) and report it to the caller as part of +// the metadata. +static bool Sniff( + const sp &source, String8 *mimeType, float *confidence, + sp *meta) { + // We scan up to 128k bytes to identify this file as an MP4. + static const off64_t kMaxScanOffset = 128ll * 1024ll; + + off64_t offset = 0ll; + bool foundGoodFileType = false; + bool isFragmented = false; + off64_t moovAtomEndOffset = -1ll; + bool done = false; + + while (!done && offset < kMaxScanOffset) { + uint32_t hdr[2]; + if (source->readAt(offset, hdr, 8) < 8) { + return false; + } + + uint64_t chunkSize = ntohl(hdr[0]); + uint32_t chunkType = ntohl(hdr[1]); + off64_t chunkDataOffset = offset + 8; + + if (chunkSize == 1) { + if (source->readAt(offset + 8, &chunkSize, 8) < 8) { + return false; + } + + chunkSize = ntoh64(chunkSize); + chunkDataOffset += 8; + + if (chunkSize < 16) { + // The smallest valid chunk is 16 bytes long in this case. + return false; + } + } else if (chunkSize < 8) { + // The smallest valid chunk is 8 bytes long. + return false; + } + + off64_t chunkDataSize = offset + chunkSize - chunkDataOffset; + + char chunkstring[5]; + MakeFourCCString(chunkType, chunkstring); + ALOGV("saw chunk type %s, size %lld @ %lld", chunkstring, chunkSize, offset); + switch (chunkType) { + case FOURCC('f', 't', 'y', 'p'): + { + if (chunkDataSize < 8) { + return false; + } + + uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4; + for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { + if (i == 1) { + // Skip this index, it refers to the minorVersion, + // not a brand. + continue; + } + + uint32_t brand; + if (source->readAt( + chunkDataOffset + 4 * i, &brand, 4) < 4) { + return false; + } + + brand = ntohl(brand); + char brandstring[5]; + MakeFourCCString(brand, brandstring); + ALOGV("Brand: %s", brandstring); + + if (isCompatibleBrand(brand)) { + foundGoodFileType = true; + break; + } + } + + if (!foundGoodFileType) { + return false; + } + + break; + } + + case FOURCC('m', 'o', 'o', 'v'): + { + moovAtomEndOffset = offset + chunkSize; + break; + } + + case FOURCC('m', 'o', 'o', 'f'): + { + // this is kind of broken, since we might not actually find a + // moof box in the first 128k. + isFragmented = true; + done = true; + break; + } + + default: + break; + } + + offset += chunkSize; + } + + if (!foundGoodFileType || !isFragmented) { + return false; + } + + *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; + *confidence = 0.5f; // slightly more than MPEG4Extractor + + if (moovAtomEndOffset >= 0) { + *meta = new AMessage; + (*meta)->setInt64("meta-data-size", moovAtomEndOffset); + (*meta)->setInt32("fragmented", 1); // tell MediaExtractor what to instantiate + + ALOGV("found metadata size: %lld", moovAtomEndOffset); + } + + return true; +} + +// used by DataSource::RegisterDefaultSniffers +bool SniffFragmentedMP4( + const sp &source, String8 *mimeType, float *confidence, + sp *meta) { + ALOGV("SniffFragmentedMP4"); + char prop[PROPERTY_VALUE_MAX]; + if (property_get("media.stagefright.use-fragmp4", prop, NULL) + && (!strcmp(prop, "1") || !strcasecmp(prop, "true"))) { + return Sniff(source, mimeType, confidence, meta); + } + + return false; +} + +} // namespace android diff --git a/media/libstagefright/MPEG4Extractor.cpp b/media/libstagefright/MPEG4Extractor.cpp index a5725417..7d49ef0a 100644 --- a/media/libstagefright/MPEG4Extractor.cpp +++ b/media/libstagefright/MPEG4Extractor.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +//#define LOG_NDEBUG 0 #define LOG_TAG "MPEG4Extractor" #include @@ -408,7 +409,7 @@ char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) { } // Reads an encoded integer 7 bits at a time until it encounters the high bit clear. -int32_t readSize(off64_t offset, +static int32_t readSize(off64_t offset, const sp DataSource, uint8_t *numOfBytes) { uint32_t size = 0; uint8_t data; diff --git a/media/libstagefright/MediaExtractor.cpp b/media/libstagefright/MediaExtractor.cpp index 9ab66119..b18c916f 100644 --- a/media/libstagefright/MediaExtractor.cpp +++ b/media/libstagefright/MediaExtractor.cpp @@ -21,6 +21,7 @@ #include "include/AMRExtractor.h" #include "include/MP3Extractor.h" #include "include/MPEG4Extractor.h" +#include "include/FragmentedMP4Extractor.h" #include "include/WAVExtractor.h" #include "include/OggExtractor.h" #include "include/MPEG2PSExtractor.h" @@ -93,7 +94,12 @@ sp MediaExtractor::Create( MediaExtractor *ret = NULL; if (!strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_MPEG4) || !strcasecmp(mime, "audio/mp4")) { - ret = new MPEG4Extractor(source); + int fragmented = 0; + if (meta != NULL && meta->findInt32("fragmented", &fragmented) && fragmented) { + ret = new FragmentedMP4Extractor(source); + } else { + ret = new MPEG4Extractor(source); + } } else if (!strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_MPEG)) { ret = new MP3Extractor(source, meta); } else if (!strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AMR_NB) diff --git a/media/libstagefright/MetaData.cpp b/media/libstagefright/MetaData.cpp index 755594a7..a01ec978 100644 --- a/media/libstagefright/MetaData.cpp +++ b/media/libstagefright/MetaData.cpp @@ -22,6 +22,8 @@ #include #include +#include +#include #include namespace android { @@ -318,6 +320,12 @@ String8 MetaData::typed_data::asString() const { default: out = String8::format("(unknown type %d, size %d)", mType, mSize); + if (mSize <= 48) { // if it's less than three lines of hex data, dump it + AString foo; + hexdump(data, mSize, 0, &foo); + out.append("\n"); + out.append(foo.c_str()); + } break; } return out; diff --git a/media/libstagefright/Utils.cpp b/media/libstagefright/Utils.cpp index 2a16f660..74e9222b 100644 --- a/media/libstagefright/Utils.cpp +++ b/media/libstagefright/Utils.cpp @@ -241,5 +241,196 @@ status_t convertMetaDataToMessage( return OK; } +static size_t reassembleAVCC(const sp &csd0, const sp csd1, char *avcc) { + + avcc[0] = 1; // version + avcc[1] = 0x64; // profile + avcc[2] = 0; // unused (?) + avcc[3] = 0xd; // level + avcc[4] = 0xff; // reserved+size + + size_t i = 0; + int numparams = 0; + int lastparamoffset = 0; + int avccidx = 6; + do { + if (i >= csd0->size() - 4 || + memcmp(csd0->data() + i, "\x00\x00\x00\x01", 4) == 0) { + if (i >= csd0->size() - 4) { + // there can't be another param here, so use all the rest + i = csd0->size(); + } + ALOGV("block at %d, last was %d", i, lastparamoffset); + if (lastparamoffset > 0) { + int size = i - lastparamoffset; + avcc[avccidx++] = size >> 8; + avcc[avccidx++] = size & 0xff; + memcpy(avcc+avccidx, csd0->data() + lastparamoffset, size); + avccidx += size; + numparams++; + } + i += 4; + lastparamoffset = i; + } else { + i++; + } + } while(i < csd0->size()); + ALOGV("csd0 contains %d params", numparams); + + avcc[5] = 0xe0 | numparams; + //and now csd-1 + i = 0; + numparams = 0; + lastparamoffset = 0; + int numpicparamsoffset = avccidx; + avccidx++; + do { + if (i >= csd1->size() - 4 || + memcmp(csd1->data() + i, "\x00\x00\x00\x01", 4) == 0) { + if (i >= csd1->size() - 4) { + // there can't be another param here, so use all the rest + i = csd1->size(); + } + ALOGV("block at %d, last was %d", i, lastparamoffset); + if (lastparamoffset > 0) { + int size = i - lastparamoffset; + avcc[avccidx++] = size >> 8; + avcc[avccidx++] = size & 0xff; + memcpy(avcc+avccidx, csd1->data() + lastparamoffset, size); + avccidx += size; + numparams++; + } + i += 4; + lastparamoffset = i; + } else { + i++; + } + } while(i < csd1->size()); + avcc[numpicparamsoffset] = numparams; + return avccidx; +} + +static void reassembleESDS(const sp &csd0, char *esds) { + int csd0size = csd0->size(); + esds[0] = 3; // kTag_ESDescriptor; + int esdescriptorsize = 26 + csd0size; + CHECK(esdescriptorsize < 268435456); // 7 bits per byte, so max is 2^28-1 + esds[1] = 0x80 | (esdescriptorsize >> 21); + esds[2] = 0x80 | ((esdescriptorsize >> 14) & 0x7f); + esds[3] = 0x80 | ((esdescriptorsize >> 7) & 0x7f); + esds[4] = (esdescriptorsize & 0x7f); + esds[5] = esds[6] = 0; // es id + esds[7] = 0; // flags + esds[8] = 4; // kTag_DecoderConfigDescriptor + int configdescriptorsize = 18 + csd0size; + esds[9] = 0x80 | (configdescriptorsize >> 21); + esds[10] = 0x80 | ((configdescriptorsize >> 14) & 0x7f); + esds[11] = 0x80 | ((configdescriptorsize >> 7) & 0x7f); + esds[12] = (configdescriptorsize & 0x7f); + esds[13] = 0x40; // objectTypeIndication + esds[14] = 0x15; // not sure what 14-25 mean, they are ignored by ESDS.cpp, + esds[15] = 0x00; // but the actual values here were taken from a real file. + esds[16] = 0x18; + esds[17] = 0x00; + esds[18] = 0x00; + esds[19] = 0x00; + esds[20] = 0xfa; + esds[21] = 0x00; + esds[22] = 0x00; + esds[23] = 0x00; + esds[24] = 0xfa; + esds[25] = 0x00; + esds[26] = 5; // kTag_DecoderSpecificInfo; + esds[27] = 0x80 | (csd0size >> 21); + esds[28] = 0x80 | ((csd0size >> 14) & 0x7f); + esds[29] = 0x80 | ((csd0size >> 7) & 0x7f); + esds[30] = (csd0size & 0x7f); + memcpy((void*)&esds[31], csd0->data(), csd0size); + // data following this is ignored, so don't bother appending it + +} + +void convertMessageToMetaData(const sp &msg, sp &meta) { + AString mime; + if (msg->findString("mime", &mime)) { + meta->setCString(kKeyMIMEType, mime.c_str()); + } else { + ALOGW("did not find mime type"); + } + + int64_t durationUs; + if (msg->findInt64("durationUs", &durationUs)) { + meta->setInt64(kKeyDuration, durationUs); + } + + if (mime.startsWith("video/")) { + int32_t width; + int32_t height; + if (msg->findInt32("width", &width) && msg->findInt32("height", &height)) { + meta->setInt32(kKeyWidth, width); + meta->setInt32(kKeyHeight, height); + } else { + ALOGW("did not find width and/or height"); + } + } else if (mime.startsWith("audio/")) { + int32_t numChannels; + if (msg->findInt32("channel-count", &numChannels)) { + meta->setInt32(kKeyChannelCount, numChannels); + } + int32_t sampleRate; + if (msg->findInt32("sample-rate", &sampleRate)) { + meta->setInt32(kKeySampleRate, sampleRate); + } + int32_t channelMask; + if (msg->findInt32("channel-mask", &channelMask)) { + meta->setInt32(kKeyChannelMask, channelMask); + } + int32_t delay = 0; + if (msg->findInt32("encoder-delay", &delay)) { + meta->setInt32(kKeyEncoderDelay, delay); + } + int32_t padding = 0; + if (msg->findInt32("encoder-padding", &padding)) { + meta->setInt32(kKeyEncoderPadding, padding); + } + + int32_t isADTS; + if (msg->findInt32("is-adts", &isADTS)) { + meta->setInt32(kKeyIsADTS, isADTS); + } + } + + int32_t maxInputSize; + if (msg->findInt32("max-input-size", &maxInputSize)) { + meta->setInt32(kKeyMaxInputSize, maxInputSize); + } + + // reassemble the csd data into its original form + sp csd0; + if (msg->findBuffer("csd-0", &csd0)) { + if (mime.startsWith("video/")) { // do we need to be stricter than this? + sp csd1; + if (msg->findBuffer("csd-1", &csd1)) { + char avcc[1024]; // that oughta be enough, right? + size_t outsize = reassembleAVCC(csd0, csd1, avcc); + meta->setData(kKeyAVCC, kKeyAVCC, avcc, outsize); + } + } else if (mime.startsWith("audio/")) { + int csd0size = csd0->size(); + char esds[csd0size + 31]; + reassembleESDS(csd0, esds); + meta->setData(kKeyESDS, kKeyESDS, esds, sizeof(esds)); + } + } + + // XXX TODO add whatever other keys there are + +#if 0 + ALOGI("converted %s to:", msg->debugString(0).c_str()); + meta->dumpToLog(); +#endif +} + + } // namespace android diff --git a/media/libstagefright/include/FragmentedMP4Extractor.h b/media/libstagefright/include/FragmentedMP4Extractor.h new file mode 100644 index 00000000..763cd3af --- /dev/null +++ b/media/libstagefright/include/FragmentedMP4Extractor.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FRAGMENTED_MP4_EXTRACTOR_H_ + +#define FRAGMENTED_MP4_EXTRACTOR_H_ + +#include "include/FragmentedMP4Parser.h" + +#include +#include +#include + +namespace android { + +struct AMessage; +class DataSource; +class SampleTable; +class String8; + +class FragmentedMP4Extractor : public MediaExtractor { +public: + // Extractor assumes ownership of "source". + FragmentedMP4Extractor(const sp &source); + + virtual size_t countTracks(); + virtual sp getTrack(size_t index); + virtual sp getTrackMetaData(size_t index, uint32_t flags); + virtual sp getMetaData(); + virtual uint32_t flags() const; + +protected: + virtual ~FragmentedMP4Extractor(); + +private: + sp mLooper; + sp mParser; + sp mDataSource; + status_t mInitCheck; + size_t mAudioTrackIndex; + size_t mTrackCount; + + sp mFileMetaData; + + Vector mPath; + + FragmentedMP4Extractor(const FragmentedMP4Extractor &); + FragmentedMP4Extractor &operator=(const FragmentedMP4Extractor &); +}; + +bool SniffFragmentedMP4( + const sp &source, String8 *mimeType, float *confidence, + sp *); + +} // namespace android + +#endif // MPEG4_EXTRACTOR_H_ diff --git a/media/libstagefright/include/FragmentedMP4Parser.h b/media/libstagefright/include/FragmentedMP4Parser.h index bd8fe325..0edafb9b 100644 --- a/media/libstagefright/include/FragmentedMP4Parser.h +++ b/media/libstagefright/include/FragmentedMP4Parser.h @@ -19,6 +19,7 @@ #define PARSER_H_ #include +#include #include namespace android { @@ -30,6 +31,7 @@ struct FragmentedMP4Parser : public AHandler { Source() {} virtual ssize_t readAt(off64_t offset, void *data, size_t size) = 0; + virtual bool isSeekable() = 0; protected: virtual ~Source() {} @@ -42,9 +44,12 @@ struct FragmentedMP4Parser : public AHandler { void start(const char *filename); void start(const sp &source); + void start(sp &source); - sp getFormat(bool audio); - status_t dequeueAccessUnit(bool audio, sp *accessUnit); + sp getFormat(bool audio, bool synchronous = false); + status_t dequeueAccessUnit(bool audio, sp *accessUnit, bool synchronous = false); + status_t seekTo(bool audio, int64_t timeUs); + bool isSeekable() const; virtual void onMessageReceived(const sp &msg); @@ -58,6 +63,7 @@ private: kWhatReadMore, kWhatGetFormat, kWhatDequeueAccessUnit, + kWhatSeekTo, }; struct TrackFragment; @@ -97,6 +103,11 @@ private: off64_t mOffset; }; + struct SidxEntry { + size_t mSize; + uint32_t mDurationUs; + }; + struct TrackInfo { enum Flags { kTrackEnabled = 0x01, @@ -107,6 +118,7 @@ private: uint32_t mTrackID; uint32_t mFlags; uint32_t mDuration; // This is the duration in terms of movie timescale! + uint64_t mSidxDuration; // usec, from sidx box, which can use a different timescale uint32_t mMediaTimeScale; @@ -121,6 +133,7 @@ private: uint32_t mDecodingTime; + Vector mSidx; sp mStaticFragment; List > mFragments; }; @@ -151,6 +164,8 @@ private: sp mSource; off_t mBufferPos; bool mSuspended; + bool mDoneWithMoov; + off_t mFirstMoofOffset; // used as the starting point for offsets calculated from the sidx box sp mBuffer; Vector mStack; KeyedVector mTracks; // TrackInfo by trackID @@ -164,6 +179,7 @@ private: status_t onProceed(); status_t onDequeueAccessUnit(size_t trackIndex, sp *accessUnit); + status_t onSeekTo(bool wantAudio, int64_t position); void enter(off64_t offset, uint32_t type, uint64_t size); @@ -222,6 +238,9 @@ private: status_t parseMediaData( uint32_t type, size_t offset, uint64_t size); + status_t parseSegmentIndex( + uint32_t type, size_t offset, uint64_t size); + TrackInfo *editTrack(uint32_t trackID, bool createIfNecessary = false); ssize_t findTrack(bool wantAudio) const; diff --git a/media/libstagefright/mp4/FragmentedMP4Parser.cpp b/media/libstagefright/mp4/FragmentedMP4Parser.cpp index e130a807..7fe4e636 100644 --- a/media/libstagefright/mp4/FragmentedMP4Parser.cpp +++ b/media/libstagefright/mp4/FragmentedMP4Parser.cpp @@ -18,8 +18,8 @@ #define LOG_TAG "FragmentedMP4Parser" #include -#include "include/FragmentedMP4Parser.h" #include "include/ESDS.h" +#include "include/FragmentedMP4Parser.h" #include "TrackFragment.h" @@ -31,6 +31,7 @@ #include #include + namespace android { static const char *Fourcc2String(uint32_t fourcc) { @@ -121,6 +122,8 @@ const FragmentedMP4Parser::DispatchEntry FragmentedMP4Parser::kDispatchTable[] = }, { FOURCC('m', 'f', 'r', 'a'), 0, NULL }, + + { FOURCC('s', 'i', 'd', 'x'), 0, &FragmentedMP4Parser::parseSegmentIndex }, }; struct FileSource : public FragmentedMP4Parser::Source { @@ -134,15 +137,92 @@ struct FileSource : public FragmentedMP4Parser::Source { return fread(data, 1, size, mFile); } + virtual bool isSeekable() { + return true; + } + private: FILE *mFile; DISALLOW_EVIL_CONSTRUCTORS(FileSource); }; +struct ReadTracker : public RefBase { + ReadTracker(off64_t size) { + allocSize = 1 + size / 8192; // 1 bit per kilobyte + bitmap = (char*) calloc(1, allocSize); + } + virtual ~ReadTracker() { + dumpToLog(); + free(bitmap); + } + void mark(off64_t offset, size_t size) { + int firstbit = offset / 1024; + int lastbit = (offset + size - 1) / 1024; + for (int i = firstbit; i <= lastbit; i++) { + bitmap[i/8] |= (0x80 >> (i & 7)); + } + } + + private: + void dumpToLog() { + // 96 chars per line, each char represents one kilobyte, 1 kb per bit + int numlines = allocSize / 12; + char buf[97]; + char *cur = bitmap; + for (int i = 0; i < numlines; i++ && cur) { + for (int j = 0; j < 12; j++) { + for (int k = 0; k < 8; k++) { + buf[(j * 8) + k] = (*cur & (0x80 >> k)) ? 'X' : '.'; + } + cur++; + } + buf[96] = '\0'; + ALOGI("%5dk: %s", i * 96, buf); + } + } + + size_t allocSize; + char *bitmap; +}; + +struct DataSourceSource : public FragmentedMP4Parser::Source { + DataSourceSource(sp &source) + : mDataSource(source) { + CHECK(mDataSource != NULL); +#if 0 + off64_t size; + if (source->getSize(&size) == OK) { + mReadTracker = new ReadTracker(size); + } else { + ALOGE("couldn't get data source size"); + } +#endif + } + + virtual ssize_t readAt(off64_t offset, void *data, size_t size) { + if (mReadTracker != NULL) { + mReadTracker->mark(offset, size); + } + return mDataSource->readAt(offset, data, size); + } + + virtual bool isSeekable() { + return true; + } + + private: + sp mDataSource; + sp mReadTracker; + + DISALLOW_EVIL_CONSTRUCTORS(DataSourceSource); +}; + FragmentedMP4Parser::FragmentedMP4Parser() : mBufferPos(0), mSuspended(false), + mDoneWithMoov(false), + mFirstMoofOffset(0), mFinalResult(OK) { } @@ -153,54 +233,142 @@ void FragmentedMP4Parser::start(const char *filename) { sp msg = new AMessage(kWhatStart, id()); msg->setObject("source", new FileSource(filename)); msg->post(); + ALOGV("Parser::start(%s)", filename); } void FragmentedMP4Parser::start(const sp &source) { sp msg = new AMessage(kWhatStart, id()); msg->setObject("source", source); msg->post(); + ALOGV("Parser::start(Source)"); +} + +void FragmentedMP4Parser::start(sp &source) { + sp msg = new AMessage(kWhatStart, id()); + msg->setObject("source", new DataSourceSource(source)); + msg->post(); + ALOGV("Parser::start(DataSource)"); } -sp FragmentedMP4Parser::getFormat(bool audio) { - sp msg = new AMessage(kWhatGetFormat, id()); - msg->setInt32("audio", audio); +sp FragmentedMP4Parser::getFormat(bool audio, bool synchronous) { - sp response; - status_t err = msg->postAndAwaitResponse(&response); + while (true) { + bool moovDone = mDoneWithMoov; + sp msg = new AMessage(kWhatGetFormat, id()); + msg->setInt32("audio", audio); - if (err != OK) { - return NULL; - } + sp response; + status_t err = msg->postAndAwaitResponse(&response); - if (response->findInt32("err", &err) && err != OK) { - return NULL; - } + if (err != OK) { + ALOGV("getFormat post failed: %d", err); + return NULL; + } + + if (response->findInt32("err", &err) && err != OK) { + if (synchronous && err == -EWOULDBLOCK && !moovDone) { + resumeIfNecessary(); + ALOGV("@getFormat parser not ready yet, retrying"); + usleep(10000); + continue; + } + ALOGV("getFormat failed: %d", err); + return NULL; + } - sp format; - CHECK(response->findMessage("format", &format)); + sp format; + CHECK(response->findMessage("format", &format)); - ALOGV("returning format %s", format->debugString().c_str()); - return format; + ALOGV("returning format %s", format->debugString().c_str()); + return format; + } } -status_t FragmentedMP4Parser::dequeueAccessUnit(bool audio, sp *accessUnit) { - sp msg = new AMessage(kWhatDequeueAccessUnit, id()); - msg->setInt32("audio", audio); +status_t FragmentedMP4Parser::seekTo(bool wantAudio, int64_t timeUs) { + sp msg = new AMessage(kWhatSeekTo, id()); + msg->setInt32("audio", wantAudio); + msg->setInt64("position", timeUs); sp response; status_t err = msg->postAndAwaitResponse(&response); + return err; +} - if (err != OK) { - return err; +bool FragmentedMP4Parser::isSeekable() const { + while (mFirstMoofOffset == 0 && mFinalResult == OK) { + usleep(10000); + } + bool seekable = mSource->isSeekable(); + for (size_t i = 0; seekable && i < mTracks.size(); i++) { + const TrackInfo *info = &mTracks.valueAt(i); + seekable &= !info->mSidx.empty(); } + return seekable; +} - if (response->findInt32("err", &err) && err != OK) { - return err; +status_t FragmentedMP4Parser::onSeekTo(bool wantAudio, int64_t position) { + status_t err = -EINVAL; + ssize_t trackIndex = findTrack(wantAudio); + if (trackIndex < 0) { + err = trackIndex; + } else { + TrackInfo *info = &mTracks.editValueAt(trackIndex); + + int numSidxEntries = info->mSidx.size(); + int64_t totalTime = 0; + off_t totalOffset = mFirstMoofOffset; + for (int i = 0; i < numSidxEntries; i++) { + const SidxEntry *se = &info->mSidx[i]; + totalTime += se->mDurationUs; + if (totalTime > position) { + mBuffer->setRange(0,0); + mBufferPos = totalOffset; + if (mFinalResult == ERROR_END_OF_STREAM) { + mFinalResult = OK; + mSuspended = true; // force resume + resumeIfNecessary(); + } + info->mFragments.clear(); + info->mDecodingTime = position * info->mMediaTimeScale / 1000000ll; + return OK; + } + totalOffset += se->mSize; + } } + ALOGV("seekTo out of range"); + return err; +} - CHECK(response->findBuffer("accessUnit", accessUnit)); +status_t FragmentedMP4Parser::dequeueAccessUnit(bool audio, sp *accessUnit, + bool synchronous) { - return OK; + while (true) { + sp msg = new AMessage(kWhatDequeueAccessUnit, id()); + msg->setInt32("audio", audio); + + sp response; + status_t err = msg->postAndAwaitResponse(&response); + + if (err != OK) { + ALOGV("dequeue fail 1: %d", err); + return err; + } + + if (response->findInt32("err", &err) && err != OK) { + if (synchronous && err == -EWOULDBLOCK) { + resumeIfNecessary(); + ALOGV("Parser not ready yet, retrying"); + usleep(10000); + continue; + } + ALOGV("dequeue fail 2: %d, %d", err, synchronous); + return err; + } + + CHECK(response->findBuffer("accessUnit", accessUnit)); + + return OK; + } } ssize_t FragmentedMP4Parser::findTrack(bool wantAudio) const { @@ -272,7 +440,7 @@ void FragmentedMP4Parser::onMessageReceived(const sp &msg) { size_t maxBytesToRead = mBuffer->capacity() - mBuffer->size(); if (maxBytesToRead < needed) { - ALOGI("resizing buffer."); + ALOGV("resizing buffer."); sp newBuffer = new ABuffer((mBuffer->size() + needed + 1023) & ~1023); @@ -290,7 +458,7 @@ void FragmentedMP4Parser::onMessageReceived(const sp &msg) { mBuffer->data() + mBuffer->size(), needed); if (n < (ssize_t)needed) { - ALOGI("%s", "Reached EOF"); + ALOGV("Reached EOF when reading %d @ %d + %d", needed, mBufferPos, mBuffer->size()); if (n < 0) { mFinalResult = n; } else if (n == 0) { @@ -321,8 +489,16 @@ void FragmentedMP4Parser::onMessageReceived(const sp &msg) { } else { TrackInfo *info = &mTracks.editValueAt(trackIndex); + sp format = info->mSampleDescs.itemAt(0).mFormat; + if (info->mSidxDuration) { + format->setInt64("durationUs", info->mSidxDuration); + } else { + // this is probably going to be zero. Oh well... + format->setInt64("durationUs", + 1000000ll * info->mDuration / info->mMediaTimeScale); + } response->setMessage( - "format", info->mSampleDescs.itemAt(0).mFormat); + "format", format); err = OK; } @@ -366,6 +542,30 @@ void FragmentedMP4Parser::onMessageReceived(const sp &msg) { break; } + case kWhatSeekTo: + { + ALOGV("kWhatSeekTo"); + int32_t wantAudio; + CHECK(msg->findInt32("audio", &wantAudio)); + int64_t position; + CHECK(msg->findInt64("position", &position)); + + status_t err = -EWOULDBLOCK; + sp response = new AMessage; + + ssize_t trackIndex = findTrack(wantAudio); + + if (trackIndex < 0) { + err = trackIndex; + } else { + err = onSeekTo(wantAudio, position); + } + response->setInt32("err", err); + uint32_t replyID; + CHECK(msg->senderAwaitsResponse(&replyID)); + response->postReply(replyID); + break; + } default: TRESPASS(); } @@ -429,6 +629,12 @@ status_t FragmentedMP4Parser::onProceed() { if ((i < kNumDispatchers && kDispatchTable[i].mHandler == 0) || isSampleEntryBox || ptype == FOURCC('i', 'l', 's', 't')) { // This is a container box. + if (type == FOURCC('m', 'o', 'o', 'f')) { + if (mFirstMoofOffset == 0) { + ALOGV("first moof @ %08x", mBufferPos + offset); + mFirstMoofOffset = mBufferPos + offset - 8; // point at the size + } + } if (type == FOURCC('m', 'e', 't', 'a')) { if ((err = need(offset + 4)) < OK) { return err; @@ -589,7 +795,7 @@ void FragmentedMP4Parser::resumeIfNecessary() { return; } - ALOGI("resuming."); + ALOGV("resuming."); mSuspended = false; (new AMessage(kWhatProceed, id()))->post(); @@ -647,7 +853,7 @@ status_t FragmentedMP4Parser::onDequeueAccessUnit( int cmp = CompareSampleLocation(sampleInfo, mdatInfo); - if (cmp < 0) { + if (cmp < 0 && !mSource->isSeekable()) { return -EPIPE; } else if (cmp == 0) { if (i > 0) { @@ -669,6 +875,8 @@ status_t FragmentedMP4Parser::onDequeueAccessUnit( size_t numDroppable = 0; bool done = false; + // XXX FIXME: if one of the tracks is not advanced (e.g. if you play an audio+video + // file with sf2), then mMediaData will not be pruned and keeps growing for (size_t i = 0; !done && i < mMediaData.size(); ++i) { const MediaDataInfo &mdatInfo = mMediaData.itemAt(i); @@ -896,6 +1104,8 @@ void FragmentedMP4Parser::skip(off_t distance) { static_cast( fragment.get())->signalCompletion(); + } else if (container->mType == FOURCC('m', 'o', 'o', 'v')) { + mDoneWithMoov = true; } container = NULL; @@ -953,6 +1163,10 @@ status_t FragmentedMP4Parser::parseTrackHeader( TrackInfo *info = editTrack(trackID, true /* createIfNecessary */); info->mFlags = flags; info->mDuration = duration; + if (info->mDuration == 0xffffffff) { + // ffmpeg sets this to -1, which is incorrect. + info->mDuration = 0; + } info->mStaticFragment = new StaticTrackFragment; @@ -1363,13 +1577,100 @@ status_t FragmentedMP4Parser::parseMediaData( info->mOffset = mBufferPos + offset; if (mMediaData.size() > 10) { - ALOGI("suspending for now."); + ALOGV("suspending for now."); mSuspended = true; } return OK; } +status_t FragmentedMP4Parser::parseSegmentIndex( + uint32_t type, size_t offset, uint64_t size) { + ALOGV("sidx box type %d, offset %d, size %d", type, int(offset), int(size)); +// AString sidxstr; +// hexdump(mBuffer->data() + offset, size, 0 /* indent */, &sidxstr); +// ALOGV("raw sidx:"); +// ALOGV("%s", sidxstr.c_str()); + if (offset + 12 > size) { + return -EINVAL; + } + + uint32_t flags = readU32(offset); + + uint32_t version = flags >> 24; + flags &= 0xffffff; + + ALOGV("sidx version %d", version); + + uint32_t referenceId = readU32(offset + 4); + uint32_t timeScale = readU32(offset + 8); + ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale); + + uint64_t earliestPresentationTime; + uint64_t firstOffset; + + offset += 12; + + if (version == 0) { + if (offset + 8 > size) { + return -EINVAL; + } + earliestPresentationTime = readU32(offset); + firstOffset = readU32(offset + 4); + offset += 8; + } else { + if (offset + 16 > size) { + return -EINVAL; + } + earliestPresentationTime = readU64(offset); + firstOffset = readU64(offset + 8); + offset += 16; + } + ALOGV("sidx pres/off: %Ld/%Ld", earliestPresentationTime, firstOffset); + + if (offset + 4 > size) { + return -EINVAL; + } + if (readU16(offset) != 0) { // reserved + return -EINVAL; + } + int32_t referenceCount = readU16(offset + 2); + offset += 4; + ALOGV("refcount: %d", referenceCount); + + if (offset + referenceCount * 12 > size) { + return -EINVAL; + } + + TrackInfo *info = editTrack(mCurrentTrackID); + uint64_t total_duration = 0; + for (int i = 0; i < referenceCount; i++) { + uint32_t d1 = readU32(offset); + uint32_t d2 = readU32(offset + 4); + uint32_t d3 = readU32(offset + 8); + + if (d1 & 0x80000000) { + ALOGW("sub-sidx boxes not supported yet"); + } + bool sap = d3 & 0x80000000; + bool saptype = d3 >> 28; + if (!sap || saptype > 2) { + ALOGW("not a stream access point, or unsupported type"); + } + total_duration += d2; + offset += 12; + ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3); + SidxEntry se; + se.mSize = d1 & 0x7fffffff; + se.mDurationUs = 1000000LL * d2 / timeScale; + info->mSidx.add(se); + } + + info->mSidxDuration = total_duration * 1000000 / timeScale; + ALOGV("duration: %lld", info->mSidxDuration); + return OK; +} + status_t FragmentedMP4Parser::parseTrackExtends( uint32_t type, size_t offset, uint64_t size) { if (offset + 24 > size) { @@ -1407,6 +1708,7 @@ FragmentedMP4Parser::TrackInfo *FragmentedMP4Parser::editTrack( info.mTrackID = trackID; info.mFlags = 0; info.mDuration = 0xffffffff; + info.mSidxDuration = 0; info.mMediaTimeScale = 0; info.mMediaHandlerType = 0; info.mDefaultSampleDescriptionIndex = 0; -- cgit v1.2.3