From 3f3d9a6b76238f0baa3829c6bdd74c9a8c2a4597 Mon Sep 17 00:00:00 2001 From: Kongqun Yang Date: Mon, 14 Oct 2013 13:55:48 -0700 Subject: [PATCH] Add duration, language, codec data to stream info. Change-Id: I83d221fd36adb53ccf5629c80b137ba0ec730d55 --- media/base/audio_stream_info.cc | 48 ++++++++++----- media/base/audio_stream_info.h | 36 ++++++----- media/base/stream_info.cc | 13 +++- media/base/stream_info.h | 21 +++++-- media/base/video_stream_info.cc | 36 +++++++++-- media/base/video_stream_info.h | 26 +++++--- media/mp4/aac.cc | 73 +++++++++++----------- media/mp4/aac.h | 34 +++++++---- media/mp4/box_definitions.cc | 25 +++++++- media/mp4/box_definitions.h | 9 +++ media/mp4/mp4_media_parser.cc | 104 +++++++++++++++++--------------- media/mp4/mp4_media_parser.h | 5 -- 12 files changed, 283 insertions(+), 147 deletions(-) diff --git a/media/base/audio_stream_info.cc b/media/base/audio_stream_info.cc index 49d2f49166..1c7fdaf99f 100644 --- a/media/base/audio_stream_info.cc +++ b/media/base/audio_stream_info.cc @@ -6,48 +6,68 @@ #include +#include "base/strings/string_number_conversions.h" #include "media/base/limits.h" namespace media { AudioStreamInfo::AudioStreamInfo(int track_id, - int time_scale, + uint32 time_scale, + uint64 duration, AudioCodec codec, - int bytes_per_channel, - int num_channels, - int samples_per_second, + const std::string& codec_string, + const std::string& language, + uint8 sample_bits, + uint8 num_channels, + uint32 sampling_frequency, const uint8* extra_data, size_t extra_data_size, bool is_encrypted) : StreamInfo(kStreamAudio, track_id, time_scale, + duration, + codec_string, + language, extra_data, extra_data_size, is_encrypted), codec_(codec), - bytes_per_channel_(bytes_per_channel), + sample_bits_(sample_bits), num_channels_(num_channels), - samples_per_second_(samples_per_second) {} + sampling_frequency_(sampling_frequency) {} AudioStreamInfo::~AudioStreamInfo() {} bool AudioStreamInfo::IsValidConfig() const { return codec_ != kUnknownAudioCodec && num_channels_ != 0 && - num_channels_ <= limits::kMaxChannels && bytes_per_channel_ > 0 && - bytes_per_channel_ <= limits::kMaxBytesPerSample && - samples_per_second_ > 0 && - samples_per_second_ <= limits::kMaxSampleRate; + num_channels_ <= limits::kMaxChannels && sample_bits_ > 0 && + sample_bits_ <= limits::kMaxBitsPerSample && + sampling_frequency_ > 0 && + sampling_frequency_ <= limits::kMaxSampleRate; } -std::string AudioStreamInfo::ToString() { +std::string AudioStreamInfo::ToString() const { std::ostringstream s; s << "codec: " << codec_ - << " bytes_per_channel: " << bytes_per_channel_ - << " num_channels: " << num_channels_ - << " samples_per_second: " << samples_per_second_ + << " sample_bits: " << static_cast(sample_bits_) + << " num_channels: " << static_cast(num_channels_) + << " sampling_frequency: " << sampling_frequency_ << " " << StreamInfo::ToString(); return s.str(); } +std::string AudioStreamInfo::GetCodecString(AudioCodec codec, + uint8 audio_object_type) { + switch (codec) { + case kCodecVorbis: + return "vorbis"; + case kCodecOpus: + return "opus"; + case kCodecAAC: + return "mp4a.40." + base::UintToString(audio_object_type); + } + return "unknown"; +} + } // namespace media diff --git a/media/base/audio_stream_info.h b/media/base/audio_stream_info.h index 14dd00bf32..9fb6a0ff6a 100644 --- a/media/base/audio_stream_info.h +++ b/media/base/audio_stream_info.h @@ -35,11 +35,14 @@ class AudioStreamInfo : public StreamInfo { // Constructs an initialized object. It is acceptable to pass in NULL for // |extra_data|, otherwise the memory is copied. AudioStreamInfo(int track_id, - int time_scale, + uint32 time_scale, + uint64 duration, AudioCodec codec, - int bytes_per_channel, - int num_channels, - int samples_per_second, + const std::string& codec_string, + const std::string& language, + uint8 sample_bits, + uint8 num_channels, + uint32 sampling_frequency, const uint8* extra_data, size_t extra_data_size, bool is_encrypted); @@ -48,23 +51,28 @@ class AudioStreamInfo : public StreamInfo { // Returns true if this object has appropriate configuration values, false // otherwise. - virtual bool IsValidConfig() const; + virtual bool IsValidConfig() const OVERRIDE; // Returns a human-readable string describing |*this|. - virtual std::string ToString(); + virtual std::string ToString() const OVERRIDE; AudioCodec codec() const { return codec_; } - int bits_per_channel() const { return bytes_per_channel_ * 8; } - int bytes_per_channel() const { return bytes_per_channel_; } - int num_channels() const { return num_channels_; } - int samples_per_second() const { return samples_per_second_; } - int bytes_per_frame() const { return num_channels_ * bytes_per_channel_; } + uint8 sample_bits() const { return sample_bits_; } + uint8 sample_bytes() const { return sample_bits_ / 8; } + uint8 num_channels() const { return num_channels_; } + uint32 sampling_frequency() const { return sampling_frequency_; } + uint32 bytes_per_frame() const { + return static_cast(num_channels_) * sample_bits_ / 8; + } + + // Returns the codec string. The second parameter is only used by AAC Codec. + static std::string GetCodecString(AudioCodec codec, uint8 audio_object_type); private: AudioCodec codec_; - int bytes_per_channel_; - int num_channels_; - int samples_per_second_; + uint8 sample_bits_; + uint8 num_channels_; + uint32 sampling_frequency_; // Not using DISALLOW_COPY_AND_ASSIGN here intentionally to allow the compiler // generated copy constructor and assignment operator. Since the extra data is diff --git a/media/base/stream_info.cc b/media/base/stream_info.cc index 480fec98f4..e8c6cbf327 100644 --- a/media/base/stream_info.cc +++ b/media/base/stream_info.cc @@ -10,13 +10,19 @@ namespace media { StreamInfo::StreamInfo(StreamType stream_type, int track_id, - int time_scale, + uint32 time_scale, + uint64 duration, + const std::string& codec_string, + const std::string& language, const uint8* extra_data, size_t extra_data_size, bool is_encrypted) : stream_type_(stream_type), track_id_(track_id), time_scale_(time_scale), + duration_(duration), + codec_string_(codec_string), + language_(language), is_encrypted_(is_encrypted) { CHECK((extra_data_size != 0) == (extra_data != NULL)); @@ -25,11 +31,14 @@ StreamInfo::StreamInfo(StreamType stream_type, StreamInfo::~StreamInfo() {} -std::string StreamInfo::ToString() { +std::string StreamInfo::ToString() const { std::ostringstream s; s << "type: " << (stream_type_ == kStreamAudio ? "Audio" : "Video") << " track_id: " << track_id_ << " time_scale: " << time_scale_ + << " duration: " << duration_ + << " codec_string: " << codec_string_ + << " language: " << language_ << " is_encrypted: " << is_encrypted_; return s.str(); } diff --git a/media/base/stream_info.h b/media/base/stream_info.h index 21a2b4316a..055941e23d 100644 --- a/media/base/stream_info.h +++ b/media/base/stream_info.h @@ -21,7 +21,10 @@ class StreamInfo : public base::RefCountedThreadSafe { public: StreamInfo(StreamType stream_type, int track_id, - int time_scale, + uint32 time_scale, + uint64 duration, + const std::string& codec_string, + const std::string& language, const uint8* extra_data, size_t extra_data_size, bool is_encrypted); @@ -32,11 +35,14 @@ class StreamInfo : public base::RefCountedThreadSafe { virtual bool IsValidConfig() const = 0; // Returns a human-readable string describing |*this|. - virtual std::string ToString(); + virtual std::string ToString() const; StreamType stream_type() const { return stream_type_; } int track_id() const { return track_id_; } - int time_scale() const { return time_scale_; } + uint32 time_scale() const { return time_scale_; } + uint64 duration() const { return duration_; } + const std::string& codec_string() const { return codec_string_; } + const std::string& language() const { return language_; } bool is_encrypted() const { return is_encrypted_; } @@ -47,11 +53,18 @@ class StreamInfo : public base::RefCountedThreadSafe { return extra_data_.size(); } + void set_duration(int duration) { duration_ = duration; } + private: // Whether the stream is Audio or Video. StreamType stream_type_; int track_id_; - int time_scale_; + // The actual time is calculated as time / time_scale_ in seconds. + uint32 time_scale_; + // Duration base on time_scale. + uint64 duration_; + std::string codec_string_; + std::string language_; // Whether the stream is potentially encrypted. // Note that in a potentially encrypted stream, individual buffers // can be encrypted or not encrypted. diff --git a/media/base/video_stream_info.cc b/media/base/video_stream_info.cc index 589cd49fa4..37a903d85c 100644 --- a/media/base/video_stream_info.cc +++ b/media/base/video_stream_info.cc @@ -6,21 +6,29 @@ #include +#include "base/strings/string_number_conversions.h" +#include "base/strings/string_util.h" #include "media/base/limits.h" namespace media { VideoStreamInfo::VideoStreamInfo(int track_id, - int time_scale, + uint32 time_scale, + uint64 duration, VideoCodec codec, - int width, - int height, + const std::string& codec_string, + const std::string& language, + uint16 width, + uint16 height, const uint8* extra_data, size_t extra_data_size, bool is_encrypted) : StreamInfo(kStreamVideo, track_id, time_scale, + duration, + codec_string, + language, extra_data, extra_data_size, is_encrypted), @@ -36,7 +44,7 @@ bool VideoStreamInfo::IsValidConfig() const { height_ > 0 && height_ <= limits::kMaxDimension; } -std::string VideoStreamInfo::ToString() { +std::string VideoStreamInfo::ToString() const { std::ostringstream s; s << "codec: " << codec_ << " width: " << width_ @@ -45,4 +53,24 @@ std::string VideoStreamInfo::ToString() { return s.str(); } +std::string VideoStreamInfo::GetCodecString(VideoCodec codec, + uint8 profile, + uint8 compatible_profiles, + uint8 level) { + switch (codec) { + case kCodecVP8: + return "vp8"; + case kCodecVP9: + return "vp9"; + case kCodecH264: { + const uint8 bytes[] = {profile, compatible_profiles, level}; + return "avc1." + + StringToLowerASCII(base::HexEncode(bytes, arraysize(bytes))); + } + default: + NOTIMPLEMENTED() << "Codec: " << codec; + return "unknown"; + } +} + } // namespace media diff --git a/media/base/video_stream_info.h b/media/base/video_stream_info.h index 9e5e7d5c1b..4772d921c9 100644 --- a/media/base/video_stream_info.h +++ b/media/base/video_stream_info.h @@ -27,10 +27,13 @@ class VideoStreamInfo : public StreamInfo { // Constructs an initialized object. It is acceptable to pass in NULL for // |extra_data|, otherwise the memory is copied. VideoStreamInfo(int track_id, - int time_scale, + uint32 time_scale, + uint64 duration, VideoCodec codec, - int width, - int height, + const std::string& codec_string, + const std::string& language, + uint16 width, + uint16 height, const uint8* extra_data, size_t extra_data_size, bool is_encrypted); @@ -39,19 +42,24 @@ class VideoStreamInfo : public StreamInfo { // Returns true if this object has appropriate configuration values, false // otherwise. - virtual bool IsValidConfig() const; + virtual bool IsValidConfig() const OVERRIDE; // Returns a human-readable string describing |*this|. - virtual std::string ToString(); + virtual std::string ToString() const OVERRIDE; VideoCodec codec() const { return codec_; } - int width() const { return width_; } - int height() const { return height_; } + uint16 width() const { return width_; } + uint16 height() const { return height_; } + + // Returns the codec string. The parameters beyond codec are only used by + // H.264 codec. + static std::string GetCodecString(VideoCodec codec, uint8 profile, + uint8 compatible_profiles, uint8 level); private: VideoCodec codec_; - int width_; - int height_; + uint16 width_; + uint16 height_; // Not using DISALLOW_COPY_AND_ASSIGN here intentionally to allow the compiler // generated copy constructor and assignment operator. Since the extra data is diff --git a/media/mp4/aac.cc b/media/mp4/aac.cc index 87866c71fe..311c5ee0b0 100644 --- a/media/mp4/aac.cc +++ b/media/mp4/aac.cc @@ -13,13 +13,13 @@ namespace { // Sampling Frequency Index table, from ISO 14496-3 Table 1.16 -static const int kSampleRates[] = { +static const uint32 kSampleRates[] = { 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000, 7350 }; // Channel Configuration table, from ISO 14496-3 Table 1.17 -const uint32 kChannelConfigs[] = {0, 1, 2, 3, 4, 5, 6, 8}; +const uint8 kChannelConfigs[] = {0, 1, 2, 3, 4, 5, 6, 8}; } // namespace @@ -28,46 +28,51 @@ namespace media { namespace mp4 { AAC::AAC() - : profile_(0), frequency_index_(0), channel_config_(0), frequency_(0), - extension_frequency_(0), num_channels_(0) { + : audio_object_type_(0), frequency_index_(0), channel_config_(0), + ps_present_(false), frequency_(0), extension_frequency_(0), + num_channels_(0) { } AAC::~AAC() { } bool AAC::Parse(const std::vector& data) { -#if defined(OS_ANDROID) codec_specific_data_ = data; -#endif + if (data.empty()) return false; BitReader reader(&data[0], data.size()); uint8 extension_type = 0; - bool ps_present = false; uint8 extension_frequency_index = 0xff; + ps_present_ = false; frequency_ = 0; extension_frequency_ = 0; // The following code is written according to ISO 14496 Part 3 Table 1.13 - // Syntax of AudioSpecificConfig. - // Read base configuration - RCHECK(reader.ReadBits(5, &profile_)); + // Read base configuration. + // Audio Object Types specified in ISO 14496-3, Table 1.15. + RCHECK(reader.ReadBits(5, &audio_object_type_)); + // Audio objects type >=31 is not supported yet. + RCHECK(audio_object_type_ < 31); RCHECK(reader.ReadBits(4, &frequency_index_)); if (frequency_index_ == 0xf) RCHECK(reader.ReadBits(24, &frequency_)); RCHECK(reader.ReadBits(4, &channel_config_)); // Read extension configuration. - if (profile_ == 5 || profile_ == 29) { - ps_present = (profile_ == 29); + if (audio_object_type_ == 5 || audio_object_type_ == 29) { + ps_present_ = (audio_object_type_ == 29); extension_type = 5; RCHECK(reader.ReadBits(4, &extension_frequency_index)); if (extension_frequency_index == 0xf) RCHECK(reader.ReadBits(24, &extension_frequency_)); - RCHECK(reader.ReadBits(5, &profile_)); + RCHECK(reader.ReadBits(5, &audio_object_type_)); + // Audio objects type >=31 is not supported yet. + RCHECK(audio_object_type_ < 31); } RCHECK(SkipDecoderGASpecificConfig(&reader)); @@ -96,7 +101,7 @@ bool AAC::Parse(const std::vector& data) { RCHECK(reader.ReadBits(11, &sync_extension_type)); if (sync_extension_type == 0x548) { RCHECK(reader.ReadBits(1, &ps_present_flag)); - ps_present = ps_present_flag != 0; + ps_present_ = ps_present_flag != 0; } } } @@ -114,20 +119,15 @@ bool AAC::Parse(const std::vector& data) { extension_frequency_ = kSampleRates[extension_frequency_index]; } - // TODO(kqyang): should we care about whether Parametric Stereo is on? - // When Parametric Stereo is on, mono will be played as stereo. - if (ps_present && channel_config_ == 1) - num_channels_ = 2; // CHANNEL_LAYOUT_STEREO - else { - RCHECK(channel_config_ < arraysize(kChannelConfigs)); - num_channels_ = kChannelConfigs[channel_config_]; - } + RCHECK(channel_config_ < arraysize(kChannelConfigs)); + num_channels_ = kChannelConfigs[channel_config_]; - return frequency_ != 0 && num_channels_ != 0 && profile_ >= 1 && - profile_ <= 4 && frequency_index_ != 0xf && channel_config_ <= 7; + return frequency_ != 0 && num_channels_ != 0 && audio_object_type_ >= 1 && + audio_object_type_ <= 4 && frequency_index_ != 0xf && + channel_config_ <= 7; } -int AAC::GetOutputSamplesPerSecond(bool sbr_in_mimetype) const { +uint32 AAC::GetOutputSamplesPerSecond(bool sbr_in_mimetype) const { if (extension_frequency_ > 0) return extension_frequency_; @@ -139,24 +139,28 @@ int AAC::GetOutputSamplesPerSecond(bool sbr_in_mimetype) const { // to SBR doubling the AAC sample rate.) // TODO(acolwell) : Extend sample rate cap to 96kHz for Level 5 content. DCHECK_GT(frequency_, 0); - return std::min(2 * frequency_, 48000); + return std::min(2 * frequency_, 48000u); } -int AAC::GetNumChannels(bool sbr_in_mimetype) const { +uint8 AAC::GetNumChannels(bool sbr_in_mimetype) const { // Check for implicit signalling of HE-AAC and indicate stereo output // if the mono channel configuration is signalled. // See ISO-14496-3 Section 1.6.6.1.2 for details about this special casing. if (sbr_in_mimetype && channel_config_ == 1) return 2; // CHANNEL_LAYOUT_STEREO + // When Parametric Stereo is on, mono will be played as stereo. + if (ps_present_ && channel_config_ == 1) + return 2; // CHANNEL_LAYOUT_STEREO + return num_channels_; } bool AAC::ConvertToADTS(std::vector* buffer) const { size_t size = buffer->size() + kADTSHeaderSize; - DCHECK(profile_ >= 1 && profile_ <= 4 && frequency_index_ != 0xf && - channel_config_ <= 7); + DCHECK(audio_object_type_ >= 1 && audio_object_type_ <= 4 && + frequency_index_ != 0xf && channel_config_ <= 7); // ADTS header uses 13 bits for packet size. if (size >= (1 << 13)) @@ -167,7 +171,7 @@ bool AAC::ConvertToADTS(std::vector* buffer) const { adts.insert(buffer->begin(), kADTSHeaderSize, 0); adts[0] = 0xff; adts[1] = 0xf1; - adts[2] = ((profile_ - 1) << 6) + (frequency_index_ << 2) + + adts[2] = ((audio_object_type_ - 1) << 6) + (frequency_index_ << 2) + (channel_config_ >> 2); adts[3] = ((channel_config_ & 0x3) << 6) + (size >> 11); adts[4] = (size & 0x7ff) >> 3; @@ -180,7 +184,7 @@ bool AAC::ConvertToADTS(std::vector* buffer) const { // Currently this function only support GASpecificConfig defined in // ISO 14496 Part 3 Table 4.1 - Syntax of GASpecificConfig() bool AAC::SkipDecoderGASpecificConfig(BitReader* bit_reader) const { - switch (profile_) { + switch (audio_object_type_) { case 1: case 2: case 3: @@ -202,7 +206,7 @@ bool AAC::SkipDecoderGASpecificConfig(BitReader* bit_reader) const { } bool AAC::SkipErrorSpecificConfig() const { - switch (profile_) { + switch (audio_object_type_) { case 17: case 19: case 20: @@ -236,16 +240,17 @@ bool AAC::SkipGASpecificConfig(BitReader* bit_reader) const { RCHECK(bit_reader->ReadBits(1, &extension_flag)); RCHECK(channel_config_ != 0); - if (profile_ == 6 || profile_ == 20) + if (audio_object_type_ == 6 || audio_object_type_ == 20) RCHECK(bit_reader->ReadBits(3, &dummy)); // layerNr if (extension_flag) { - if (profile_ == 22) { + if (audio_object_type_ == 22) { RCHECK(bit_reader->ReadBits(5, &dummy)); // numOfSubFrame RCHECK(bit_reader->ReadBits(11, &dummy)); // layer_length } - if (profile_ == 17 || profile_ == 19 || profile_ == 20 || profile_ == 23) { + if (audio_object_type_ == 17 || audio_object_type_ == 19 || + audio_object_type_ == 20 || audio_object_type_ == 23) { RCHECK(bit_reader->ReadBits(3, &dummy)); // resilience flags } diff --git a/media/mp4/aac.h b/media/mp4/aac.h index 19fa9c51b6..af1b235a45 100644 --- a/media/mp4/aac.h +++ b/media/mp4/aac.h @@ -30,17 +30,18 @@ class AAC { // The function will parse the data and get the ElementaryStreamDescriptor, // then it will parse the ElementaryStreamDescriptor to get audio stream // configurations. + // |data| is always copied to |codec_specific_data_|. bool Parse(const std::vector& data); // Gets the output sample rate for the AAC stream. // |sbr_in_mimetype| should be set to true if the SBR mode is // signalled in the mimetype. (ie mp4a.40.5 in the codecs parameter). - int GetOutputSamplesPerSecond(bool sbr_in_mimetype) const; + uint32 GetOutputSamplesPerSecond(bool sbr_in_mimetype) const; // Gets number of channels for the AAC stream. // |sbr_in_mimetype| should be set to true if the SBR mode is // signalled in the mimetype. (ie mp4a.40.5 in the codecs parameter). - int GetNumChannels(bool sbr_in_mimetype) const; + uint8 GetNumChannels(bool sbr_in_mimetype) const; // This function converts a raw AAC frame into an AAC frame with an ADTS // header. On success, the function returns true and stores the converted data @@ -48,12 +49,22 @@ class AAC { // unchanged. bool ConvertToADTS(std::vector* buffer) const; -#if defined(OS_ANDROID) - // Returns the codec specific data needed by android MediaCodec. + uint8 audio_object_type() const { + return audio_object_type_; + } + + uint32 frequency() const { + return frequency_; + } + + uint8 num_channels() const { + return num_channels_; + } + + // Returns the codec specific data. std::vector codec_specific_data() const { return codec_specific_data_; } -#endif // Size in bytes of the ADTS header added by ConvertEsdsToADTS(). static const size_t kADTSHeaderSize = 7; @@ -65,21 +76,20 @@ class AAC { // The following variables store the AAC specific configuration information // that are used to generate the ADTS header. - uint8 profile_; + uint8 audio_object_type_; uint8 frequency_index_; uint8 channel_config_; + // Is Parametric Stereo on? + bool ps_present_; -#if defined(OS_ANDROID) - // The codec specific data needed by the android MediaCodec. std::vector codec_specific_data_; -#endif // The following variables store audio configuration information. // They are based on the AAC specific configuration but can be overridden // by extensions in elementary stream descriptor. - int frequency_; - int extension_frequency_; - int num_channels_; + uint32 frequency_; + uint32 extension_frequency_; + uint8 num_channels_; }; } // namespace mp4 diff --git a/media/mp4/box_definitions.cc b/media/mp4/box_definitions.cc index 370de7cfa5..08728dbde9 100644 --- a/media/mp4/box_definitions.cc +++ b/media/mp4/box_definitions.cc @@ -5,6 +5,7 @@ #include "media/mp4/box_definitions.h" #include "base/logging.h" +#include "media/base/bit_reader.h" #include "media/mp4/es_descriptor.h" #include "media/mp4/rcheck.h" @@ -506,6 +507,13 @@ AVCDecoderConfigurationRecord::~AVCDecoderConfigurationRecord() {} FourCC AVCDecoderConfigurationRecord::BoxType() const { return FOURCC_AVCC; } bool AVCDecoderConfigurationRecord::Parse(BoxReader* reader) { + RCHECK(reader->ReadVec(&data, reader->size() - 8)); + + BufferReader buffer_reader(&data[0], data.size()); + return ParseData(&buffer_reader); +} + +bool AVCDecoderConfigurationRecord::ParseData(BufferReader* reader) { RCHECK(reader->Read1(&version) && version == 1 && reader->Read1(&profile_indication) && reader->Read1(&profile_compatibility) && @@ -679,8 +687,21 @@ bool MediaHeader::Parse(BoxReader* reader) { reader->Read4(×cale) && reader->Read4Into8(&duration)); } - // Skip language information - return reader->SkipBytes(4); + + // Read language codes into temp first then use BitReader to read the values. + // ISO-639-2/T language code: unsigned int(5)[3] language (2 bytes). + std::vector temp; + RCHECK(reader->ReadVec(&temp, 2)); + + BitReader bit_reader(&temp[0], 2); + bit_reader.SkipBits(1); + for (int i = 0; i < 3; ++i) { + CHECK(bit_reader.ReadBits(5, &language[i])); + language[i] += 0x60; + } + language[3] = '\0'; + + return reader->SkipBytes(2); } MediaInformation::MediaInformation() {} diff --git a/media/mp4/box_definitions.h b/media/mp4/box_definitions.h index 9d1f98b78a..b4240f1108 100644 --- a/media/mp4/box_definitions.h +++ b/media/mp4/box_definitions.h @@ -145,6 +145,13 @@ struct HandlerReference : Box { struct AVCDecoderConfigurationRecord : Box { DECLARE_BOX_METHODS(AVCDecoderConfigurationRecord); + bool ParseData(BufferReader* reader); + + // Contains full avc decoder configuration record as defined in iso14496-15 + // 5.2.4.1, including possible extension bytes described in paragraph 3. + // Known fields defined in the spec are also parsed and included in this + // structure. + std::vector data; uint8 version; uint8 profile_indication; @@ -307,6 +314,8 @@ struct MediaHeader : Box { uint64 modification_time; uint32 timescale; uint64 duration; + // 3-char language code + 1 null terminating char. + char language[4]; }; struct MediaInformation : Box { diff --git a/media/mp4/mp4_media_parser.cc b/media/mp4/mp4_media_parser.cc index 489a9e0d36..4d8ae21a89 100644 --- a/media/mp4/mp4_media_parser.cc +++ b/media/mp4/mp4_media_parser.cc @@ -8,9 +8,7 @@ #include "base/callback_helpers.h" #include "base/logging.h" #include "base/memory/ref_counted.h" -#include "base/time/time.h" #include "media/base/audio_stream_info.h" -#include "media/base/buffers.h" #include "media/base/media_sample.h" #include "media/base/video_stream_info.h" #include "media/mp4/box_definitions.h" @@ -21,11 +19,8 @@ namespace { -base::TimeDelta TimeDeltaFromRational(int64 numer, int64 denom) { - DCHECK_LT((numer > 0 ? numer : -numer), - kint64max / base::Time::kMicrosecondsPerSecond); - return base::TimeDelta::FromMicroseconds(base::Time::kMicrosecondsPerSecond * - numer / denom); +uint64 Rescale(uint64 time_in_old_scale, uint32 old_scale, uint32 new_scale) { + return (static_cast(time_in_old_scale) / old_scale) * new_scale; } } // namespace @@ -41,8 +36,6 @@ MP4MediaParser::MP4MediaParser() has_video_(false), audio_track_id_(0), video_track_id_(0), - // TODO(kqyang): do we need to care about it?? - has_sbr_(false), is_audio_track_encrypted_(false), is_video_track_encrypted_(false) { } @@ -150,6 +143,23 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { for (std::vector::const_iterator track = moov_->tracks.begin(); track != moov_->tracks.end(); ++track) { + const uint32 timescale = track->media.header.timescale; + + // Calculate duration (based on timescale). + uint64 duration = 0; + if (track->media.header.duration > 0) { + duration = track->media.header.duration; + } else if (moov_->extends.header.fragment_duration > 0) { + DCHECK(moov_->header.timescale != 0); + duration = Rescale(moov_->extends.header.fragment_duration, + moov_->header.timescale, timescale); + } else if (moov_->header.duration > 0 && + moov_->header.duration != kuint64max) { + DCHECK(moov_->header.timescale != 0); + duration = Rescale(moov_->header.duration, moov_->header.timescale, + timescale); + } + // TODO(strobe): Only the first audio and video track present in a file are // used. (Track selection is better accomplished via Source IDs, though, so // adding support for track selection within a stream is low-priority.) @@ -209,22 +219,22 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { } AudioCodec codec = kUnknownAudioCodec; - int num_channels = 0; - int sample_per_second = 0; + uint8 num_channels = 0; + uint32 sampling_frequency = 0; + uint8 audio_object_type = 0; std::vector extra_data; // Check if it is MPEG4 AAC defined in ISO 14496 Part 3 or - // supported MPEG2 AAC varients. + // supported MPEG2 AAC variants. if (ESDescriptor::IsAAC(audio_type)) { codec = kCodecAAC; - num_channels = aac.GetNumChannels(has_sbr_); - sample_per_second = aac.GetOutputSamplesPerSecond(has_sbr_); -#if defined(OS_ANDROID) + num_channels = aac.num_channels(); + sampling_frequency = aac.frequency(); + audio_object_type = aac.audio_object_type(); extra_data = aac.codec_specific_data(); -#endif } else if (audio_type == kEAC3) { codec = kCodecEAC3; num_channels = entry.channelcount; - sample_per_second = entry.samplerate; + sampling_frequency = entry.samplerate; } else { LOG(ERROR) << "Unsupported audio object type 0x" << std::hex << audio_type << " in esds."; @@ -234,15 +244,19 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { is_audio_track_encrypted_ = entry.sinf.info.track_encryption.is_encrypted; DVLOG(1) << "is_audio_track_encrypted_: " << is_audio_track_encrypted_; streams.push_back( - new AudioStreamInfo(track->header.track_id, - track->media.header.timescale, - codec, - entry.samplesize / 8, - num_channels, - sample_per_second, - extra_data.size() ? &extra_data[0] : NULL, - extra_data.size(), - is_audio_track_encrypted_)); + new AudioStreamInfo( + track->header.track_id, + timescale, + duration, + codec, + AudioStreamInfo::GetCodecString(codec, audio_object_type), + track->media.header.language, + entry.samplesize, + num_channels, + sampling_frequency, + extra_data.size() ? &extra_data[0] : NULL, + extra_data.size(), + is_audio_track_encrypted_)); has_audio_ = true; audio_track_id_ = track->header.track_id; } @@ -265,35 +279,31 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { return false; } + const std::string codec_string = + VideoStreamInfo::GetCodecString( + kCodecH264, entry.avcc.profile_indication, + entry.avcc.profile_compatibility, entry.avcc.avc_level); + is_video_track_encrypted_ = entry.sinf.info.track_encryption.is_encrypted; DVLOG(1) << "is_video_track_encrypted_: " << is_video_track_encrypted_; streams.push_back( - new VideoStreamInfo(track->header.track_id, - track->media.header.timescale, - kCodecH264, - entry.width, - entry.height, - // No decoder-specific buffer needed for AVC. - NULL, 0, - is_video_track_encrypted_)); + new VideoStreamInfo( + track->header.track_id, + timescale, + duration, + kCodecH264, + codec_string, + track->media.header.language, + entry.width, + entry.height, + &entry.avcc.data[0], + entry.avcc.data.size(), + is_video_track_encrypted_)); has_video_ = true; video_track_id_ = track->header.track_id; } } - // TODO(kqyang): figure out how to get duration for every tracks/streams. - base::TimeDelta duration; - if (moov_->extends.header.fragment_duration > 0) { - duration = TimeDeltaFromRational(moov_->extends.header.fragment_duration, - moov_->header.timescale); - } else if (moov_->header.duration > 0 && - moov_->header.duration != kuint64max) { - duration = TimeDeltaFromRational(moov_->header.duration, - moov_->header.timescale); - } else { - duration = kInfiniteDuration(); - } - init_cb_.Run(true, streams); EmitNeedKeyIfNecessary(moov_->pssh); diff --git a/media/mp4/mp4_media_parser.h b/media/mp4/mp4_media_parser.h index 54da502864..aeabd48a96 100644 --- a/media/mp4/mp4_media_parser.h +++ b/media/mp4/mp4_media_parser.h @@ -16,13 +16,9 @@ namespace media { -class SubsampleEntry; - namespace mp4 { struct Movie; -class AAC; -class AVCDecoderConfigurationRecord; class BoxReader; class ProtectionSystemSpecificHeader; class TrackRunIterator; @@ -92,7 +88,6 @@ class MP4MediaParser : public MediaParser { bool has_video_; uint32 audio_track_id_; uint32 video_track_id_; - bool has_sbr_; bool is_audio_track_encrypted_; bool is_video_track_encrypted_;