Handle preroll and codec_delay when tranmuxing opus

Closes Issue #102

Change-Id: I26aa56a63c17c85298311cc17963dd26f26e501e
This commit is contained in:
Kongqun Yang 2016-05-05 15:26:48 -07:00
parent d4e2f3c098
commit 26cb91e29b
21 changed files with 99 additions and 36 deletions

View File

@ -129,12 +129,14 @@ class PackagerAppTest(unittest.TestCase):
def testPackageVp9Webm(self):
self.packager.Package(
self._GetStreams(['video'],
output_format='webm',
test_files=['bear-320x240-vp9.webm']),
self._GetStreams(
['audio', 'video'],
output_format='webm',
test_files=['bear-320x240-vp9-opus.webm']),
self._GetFlags())
self._DiffGold(self.output[0], 'bear-320x240-vp9-golden.webm')
self._DiffGold(self.mpd_output, 'bear-320x240-vp9-webm-golden.mpd')
self._DiffGold(self.output[0], 'bear-320x240-opus-golden.webm')
self._DiffGold(self.output[1], 'bear-320x240-vp9-golden.webm')
self._DiffGold(self.mpd_output, 'bear-320x240-vp9-opus-webm-golden.mpd')
def testPackageVorbisWebm(self):
self.packager.Package(
@ -221,11 +223,11 @@ class PackagerAppTest(unittest.TestCase):
self.packager.Package(
self._GetStreams(['video'],
output_format='mp4',
test_files=['bear-320x240-vp9.webm']),
test_files=['bear-320x240-vp9-opus.webm']),
self._GetFlags(encryption=True))
self._DiffGold(self.output[0], 'bear-320x240-vp9-cenc-golden.mp4')
self._DiffGold(self.mpd_output, 'bear-320x240-vp9-cenc-golden.mpd')
self._VerifyDecryption(self.output[0], 'bear-640x360-vp9-golden.mp4')
self._VerifyDecryption(self.output[0], 'bear-320x240-vp9-golden.mp4')
def testPackageWithEncryptionAndRandomIv(self):
self.packager.Package(

Binary file not shown.

View File

@ -1,9 +1,18 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Generated with https://github.com/google/edash-packager version <tag>-<hash>-<test>-->
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 DASH-MPD.xsd" xmlns:cenc="urn:mpeg:cenc:2013" minBufferTime="PT2S" type="static" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" mediaPresentationDuration="PT2.7360000610351562S">
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 DASH-MPD.xsd" xmlns:cenc="urn:mpeg:cenc:2013" minBufferTime="PT2S" type="static" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" mediaPresentationDuration="PT2.7809998989105225S">
<Period id="0">
<AdaptationSet id="0" contentType="video" width="320" height="240" frameRate="1000000/34000" par="16:9">
<Representation id="0" bandwidth="203313" codecs="vp9" mimeType="video/webm" sar="427:320">
<AdaptationSet id="0" contentType="audio">
<Representation id="0" bandwidth="76531" codecs="opus" mimeType="audio/webm" audioSamplingRate="48000">
<AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>
<BaseURL>output_audio.webm</BaseURL>
<SegmentBase indexRange="26555-26603" timescale="1000000">
<Initialization range="0-322"/>
</SegmentBase>
</Representation>
</AdaptationSet>
<AdaptationSet id="1" contentType="video" width="320" height="240" frameRate="1000000/34000" par="16:9">
<Representation id="1" bandwidth="203313" codecs="vp9" mimeType="video/webm" sar="427:320">
<BaseURL>output_video.webm</BaseURL>
<SegmentBase indexRange="69485-69532" timescale="1000000">
<Initialization range="0-286"/>

View File

@ -6,6 +6,8 @@
#include "packager/media/base/audio_stream_info.h"
#include <inttypes.h>
#include "packager/base/logging.h"
#include "packager/base/strings/string_number_conversions.h"
#include "packager/base/strings/stringprintf.h"
@ -55,6 +57,8 @@ AudioStreamInfo::AudioStreamInfo(int track_id,
uint8_t sample_bits,
uint8_t num_channels,
uint32_t sampling_frequency,
uint64_t seek_preroll_ns,
uint64_t codec_delay_ns,
uint32_t max_bitrate,
uint32_t avg_bitrate,
const uint8_t* extra_data,
@ -73,6 +77,8 @@ AudioStreamInfo::AudioStreamInfo(int track_id,
sample_bits_(sample_bits),
num_channels_(num_channels),
sampling_frequency_(sampling_frequency),
seek_preroll_ns_(seek_preroll_ns),
codec_delay_ns_(codec_delay_ns),
max_bitrate_(max_bitrate),
avg_bitrate_(avg_bitrate) {}
@ -87,11 +93,20 @@ bool AudioStreamInfo::IsValidConfig() const {
}
std::string AudioStreamInfo::ToString() const {
return base::StringPrintf(
std::string str = base::StringPrintf(
"%s codec: %s\n sample_bits: %d\n num_channels: %d\n "
"sampling_frequency: %d\n language: %s\n",
StreamInfo::ToString().c_str(), AudioCodecToString(codec_).c_str(),
sample_bits_, num_channels_, sampling_frequency_, language().c_str());
if (seek_preroll_ns_ != 0) {
base::StringAppendF(&str, " seek_preroll_ns: %" PRIu64 "d\n",
seek_preroll_ns_);
}
if (codec_delay_ns_ != 0) {
base::StringAppendF(&str, " codec_delay_ns: %" PRIu64 "d\n",
codec_delay_ns_);
}
return str;
}
std::string AudioStreamInfo::GetCodecString(AudioCodec codec,

View File

@ -44,6 +44,8 @@ class AudioStreamInfo : public StreamInfo {
uint8_t sample_bits,
uint8_t num_channels,
uint32_t sampling_frequency,
uint64_t seek_preroll_ns,
uint64_t codec_delay_ns,
uint32_t max_bitrate,
uint32_t avg_bitrate,
const uint8_t* extra_data,
@ -64,6 +66,8 @@ class AudioStreamInfo : public StreamInfo {
uint32_t bytes_per_frame() const {
return static_cast<uint32_t>(num_channels_) * sample_bits_ / 8;
}
uint64_t seek_preroll_ns() const { return seek_preroll_ns_; }
uint64_t codec_delay_ns() const { return codec_delay_ns_; }
uint32_t max_bitrate() const { return max_bitrate_; }
uint32_t avg_bitrate() const { return avg_bitrate_; }
@ -84,6 +88,8 @@ class AudioStreamInfo : public StreamInfo {
uint8_t sample_bits_;
uint8_t num_channels_;
uint32_t sampling_frequency_;
uint64_t seek_preroll_ns_;
uint64_t codec_delay_ns_;
uint32_t max_bitrate_;
uint32_t avg_bitrate_;

View File

@ -233,8 +233,10 @@ bool EsParserAdts::UpdateAudioConfiguration(const uint8_t* adts_frame,
kAacSampleSizeBits,
adts_header.GetNumChannels(),
extended_samples_per_second,
0,
0,
0 /* seek preroll */,
0 /* codec delay */,
0 /* max bitrate */,
0 /* avg bitrate */,
audio_specific_config.data(),
audio_specific_config.size(),
false));

View File

@ -78,6 +78,8 @@ const bool kIsEncrypted = false;
const uint8_t kSampleBits = 16;
const uint8_t kNumChannels = 2;
const uint32_t kSamplingFrequency = 44100;
const uint64_t kSeekPreroll = 0;
const uint64_t kCodecDelay = 0;
const uint32_t kMaxBitrate = 320000;
const uint32_t kAverageBitrate = 256000;
@ -111,9 +113,9 @@ scoped_refptr<VideoStreamInfo> CreateVideoStreamInfo(VideoCodec codec) {
scoped_refptr<AudioStreamInfo> CreateAudioStreamInfo(AudioCodec codec) {
scoped_refptr<AudioStreamInfo> stream_info(new AudioStreamInfo(
kTrackId, kTimeScale, kDuration, codec, kCodecString,
kLanguage, kSampleBits, kNumChannels, kSamplingFrequency, kMaxBitrate,
kAverageBitrate, kAudioExtraData, arraysize(kAudioExtraData),
kTrackId, kTimeScale, kDuration, codec, kCodecString, kLanguage,
kSampleBits, kNumChannels, kSamplingFrequency, kSeekPreroll, kCodecDelay,
kMaxBitrate, kAverageBitrate, kAudioExtraData, arraysize(kAudioExtraData),
kIsEncrypted));
return stream_info;
}

View File

@ -46,6 +46,8 @@ const bool kIsEncrypted = false;
const uint8_t kSampleBits = 16;
const uint8_t kNumChannels = 2;
const uint32_t kSamplingFrequency = 44100;
const uint64_t kSeekPreroll = 0;
const uint64_t kCodecDelay = 0;
const uint32_t kMaxBitrate = 320000;
const uint32_t kAverageBitrate = 256000;
@ -175,16 +177,18 @@ TEST_F(TsWriterTest, InitializeVideoNonH264) {
TEST_F(TsWriterTest, InitializeAudioAac) {
scoped_refptr<AudioStreamInfo> stream_info(new AudioStreamInfo(
kTrackId, kTimeScale, kDuration, kAacAudioCodec, kCodecString, kLanguage,
kSampleBits, kNumChannels, kSamplingFrequency, kMaxBitrate,
kAverageBitrate, kExtraData, arraysize(kExtraData), kIsEncrypted));
kSampleBits, kNumChannels, kSamplingFrequency, kSeekPreroll, kCodecDelay,
kMaxBitrate, kAverageBitrate, kExtraData, arraysize(kExtraData),
kIsEncrypted));
EXPECT_TRUE(ts_writer_.Initialize(*stream_info, !kWillBeEncrypted));
}
TEST_F(TsWriterTest, InitializeAudioNonAac) {
scoped_refptr<AudioStreamInfo> stream_info(new AudioStreamInfo(
kTrackId, kTimeScale, kDuration, AudioCodec::kCodecOpus, kCodecString,
kLanguage, kSampleBits, kNumChannels, kSamplingFrequency, kMaxBitrate,
kAverageBitrate, kExtraData, arraysize(kExtraData), kIsEncrypted));
kLanguage, kSampleBits, kNumChannels, kSamplingFrequency, kSeekPreroll,
kCodecDelay, kMaxBitrate, kAverageBitrate, kExtraData,
arraysize(kExtraData), kIsEncrypted));
EXPECT_FALSE(ts_writer_.Initialize(*stream_info, !kWillBeEncrypted));
}
@ -252,8 +256,8 @@ TEST_F(TsWriterTest, ClearAacPmt) {
scoped_refptr<AudioStreamInfo> stream_info(new AudioStreamInfo(
kTrackId, kTimeScale, kDuration, kAacAudioCodec, kCodecString, kLanguage,
kSampleBits, kNumChannels, kSamplingFrequency, kMaxBitrate,
kAverageBitrate, kAacBasicProfileExtraData,
kSampleBits, kNumChannels, kSamplingFrequency, kSeekPreroll, kCodecDelay,
kMaxBitrate, kAverageBitrate, kAacBasicProfileExtraData,
arraysize(kAacBasicProfileExtraData), kIsEncrypted));
EXPECT_TRUE(ts_writer_.Initialize(*stream_info, !kWillBeEncrypted));
@ -340,8 +344,8 @@ TEST_F(TsWriterTest, ClearLeadAacPmt) {
scoped_refptr<AudioStreamInfo> stream_info(new AudioStreamInfo(
kTrackId, kTimeScale, kDuration, kAacAudioCodec, kCodecString, kLanguage,
kSampleBits, kNumChannels, kSamplingFrequency, kMaxBitrate,
kAverageBitrate, kAacBasicProfileExtraData,
kSampleBits, kNumChannels, kSamplingFrequency, kSeekPreroll, kCodecDelay,
kMaxBitrate, kAverageBitrate, kAacBasicProfileExtraData,
arraysize(kAacBasicProfileExtraData), kIsEncrypted));
EXPECT_TRUE(ts_writer_.Initialize(*stream_info, kWillBeEncrypted));
@ -370,8 +374,8 @@ TEST_F(TsWriterTest, EncryptedSegmentsAacPmt) {
scoped_refptr<AudioStreamInfo> stream_info(new AudioStreamInfo(
kTrackId, kTimeScale, kDuration, kAacAudioCodec, kCodecString, kLanguage,
kSampleBits, kNumChannels, kSamplingFrequency, kMaxBitrate,
kAverageBitrate, kAacBasicProfileExtraData,
kSampleBits, kNumChannels, kSamplingFrequency, kSeekPreroll, kCodecDelay,
kMaxBitrate, kAverageBitrate, kAacBasicProfileExtraData,
arraysize(kAacBasicProfileExtraData), kIsEncrypted));
EXPECT_TRUE(ts_writer_.Initialize(*stream_info, kWillBeEncrypted));

View File

@ -443,6 +443,8 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
entry.samplesize,
num_channels,
sampling_frequency,
0 /* seek preroll */,
0 /* codec delay */,
max_bitrate,
avg_bitrate,
extra_data.data(),

View File

@ -323,6 +323,8 @@ Status Segmenter::CreateAudioTrack(AudioStreamInfo* info) {
track->set_type(mkvmuxer::Tracks::kAudio);
track->set_sample_rate(info->sampling_frequency());
track->set_channels(info->num_channels());
track->set_seek_pre_roll(info->seek_preroll_ns());
track->set_codec_delay(info->codec_delay_ns());
if (encryptor_)
encryptor_->AddTrackInfo(track);

View File

@ -32,6 +32,8 @@ scoped_refptr<AudioStreamInfo> WebMAudioClient::GetAudioStreamInfo(
int64_t track_num,
const std::string& codec_id,
const std::vector<uint8_t>& codec_private,
int64_t seek_preroll,
int64_t codec_delay,
const std::string& language,
bool is_encrypted) {
AudioCodec audio_codec = kUnknownAudioCodec;
@ -69,8 +71,9 @@ scoped_refptr<AudioStreamInfo> WebMAudioClient::GetAudioStreamInfo(
return scoped_refptr<AudioStreamInfo>(new AudioStreamInfo(
track_num, kWebMTimeScale, 0, audio_codec,
AudioStreamInfo::GetCodecString(audio_codec, 0), language,
kSampleSizeInBits, channels_, sampling_frequency, 0, 0, extra_data,
extra_data_size, is_encrypted));
kSampleSizeInBits, channels_, sampling_frequency,
seek_preroll < 0 ? 0 : seek_preroll, codec_delay < 0 ? 0 : codec_delay, 0,
0, extra_data, extra_data_size, is_encrypted));
}
bool WebMAudioClient::OnUInt(int id, int64_t val) {

View File

@ -25,9 +25,18 @@ class WebMAudioClient : public WebMParserClient {
/// Reset this object's state so it can process a new audio track element.
void Reset();
/// Create an AudioStreamInfo with the data in |track_num|, |codec_id|,
/// |codec_private|, |is_encrypted| and the fields parsed from the last audio
/// track element this object was used to parse.
/// Create an AudioStreamInfo with the parameters specified.
/// @param track_num indicates the track number.
/// @param codec_id is the codec identifier.
/// @param codec_private contains codec specific data.
/// @param seek_preroll indicates seek preroll in nanoseconds. A negative
/// value means that the value is not set; in this case, a default
/// value of 0 is used.
/// @param codec delay indicates codec delay in nanoseconds. A negative
/// value means that the value is not set; in this case, a default
/// value of 0 is used.
/// @param language indicates the language for the track.
/// @param is_encrypted indicates whether the stream is encrypted.
/// @return An AudioStreamInfo scoped_refptr if successful.
/// @return An empty scoped_refptr if there was unexpected values in the
/// provided parameters or audio track element fields.
@ -35,6 +44,8 @@ class WebMAudioClient : public WebMParserClient {
int64_t track_num,
const std::string& codec_id,
const std::vector<uint8_t>& codec_private,
int64_t seek_preroll,
int64_t codec_delay,
const std::string& language,
bool is_encrypted);

View File

@ -82,6 +82,8 @@ const char kLanguage[] = "eng";
const uint8_t kBitsPerSample = 8u;
const uint8_t kNumChannels = 2u;
const uint32_t kSamplingFrequency = 48000u;
const uint64_t kSeekPreroll = 0u;
const uint64_t kCodecDelay = 0u;
const size_t kExtraDataSize = 0u;
const bool kEncrypted = true;
const uint16_t kWidth = 320u;
@ -323,6 +325,8 @@ class WebMClusterParserTest : public testing::Test {
kBitsPerSample,
kNumChannels,
kSamplingFrequency,
kSeekPreroll,
kCodecDelay,
0,
0,
NULL,

View File

@ -203,8 +203,8 @@ bool WebMTracksParser::OnListEnd(int id) {
DCHECK(!audio_stream_info_);
audio_stream_info_ = audio_client_.GetAudioStreamInfo(
audio_track_num_, codec_id_, codec_private_, track_language_,
!audio_encryption_key_id_.empty());
audio_track_num_, codec_id_, codec_private_, seek_preroll_,
codec_delay_, track_language_, !audio_encryption_key_id_.empty());
if (!audio_stream_info_)
return false;
} else {

View File

@ -752,12 +752,13 @@ bool WvmMediaParser::ParseIndexEntry() {
stream_id_count_++;
}
if (has_audio) {
AudioCodec audio_codec = kCodecAAC;
const AudioCodec audio_codec = kCodecAAC;
// TODO(beil): Pass in max and average bitrate in wvm container.
stream_infos_.push_back(new AudioStreamInfo(
stream_id_count_, time_scale, track_duration, audio_codec,
std::string(), std::string(), kAacSampleSizeBits, num_channels,
sampling_frequency, 0, 0, audio_codec_config.data(),
sampling_frequency, 0 /* seek preroll */, 0 /* codec delay */,
0 /* max bitrate */, 0 /* avg bitrate */, audio_codec_config.data(),
audio_codec_config.size(), true));
program_demux_stream_map_[base::UintToString(index_program_id_) + ":" +
base::UintToString(audio_pes_stream_id ?

View File

@ -3,7 +3,7 @@
// found in the LICENSE file.
bear-320x240.webm - WebM encode of bear.1280x720.mp4 resized to 320x240.
bear-320x240-vp9.webm - Same as above, but with vp9 codec.
bear-320x240-vp9-opus.webm - Same as above, but with vp9 and opus codec.
no_streams.webm - Header, Info, & Tracks element from bear-320x240.webm slightly corrupted so it looks
like there are no tracks.
nonzero-start-time.webm - Has the same headers as bear-320x240.webm but the first cluster of this file

Binary file not shown.