Handle preroll and codec_delay when tranmuxing opus
Closes Issue #102 Change-Id: I26aa56a63c17c85298311cc17963dd26f26e501e
This commit is contained in:
parent
d4e2f3c098
commit
26cb91e29b
|
@ -129,12 +129,14 @@ class PackagerAppTest(unittest.TestCase):
|
|||
|
||||
def testPackageVp9Webm(self):
|
||||
self.packager.Package(
|
||||
self._GetStreams(['video'],
|
||||
self._GetStreams(
|
||||
['audio', 'video'],
|
||||
output_format='webm',
|
||||
test_files=['bear-320x240-vp9.webm']),
|
||||
test_files=['bear-320x240-vp9-opus.webm']),
|
||||
self._GetFlags())
|
||||
self._DiffGold(self.output[0], 'bear-320x240-vp9-golden.webm')
|
||||
self._DiffGold(self.mpd_output, 'bear-320x240-vp9-webm-golden.mpd')
|
||||
self._DiffGold(self.output[0], 'bear-320x240-opus-golden.webm')
|
||||
self._DiffGold(self.output[1], 'bear-320x240-vp9-golden.webm')
|
||||
self._DiffGold(self.mpd_output, 'bear-320x240-vp9-opus-webm-golden.mpd')
|
||||
|
||||
def testPackageVorbisWebm(self):
|
||||
self.packager.Package(
|
||||
|
@ -221,11 +223,11 @@ class PackagerAppTest(unittest.TestCase):
|
|||
self.packager.Package(
|
||||
self._GetStreams(['video'],
|
||||
output_format='mp4',
|
||||
test_files=['bear-320x240-vp9.webm']),
|
||||
test_files=['bear-320x240-vp9-opus.webm']),
|
||||
self._GetFlags(encryption=True))
|
||||
self._DiffGold(self.output[0], 'bear-320x240-vp9-cenc-golden.mp4')
|
||||
self._DiffGold(self.mpd_output, 'bear-320x240-vp9-cenc-golden.mpd')
|
||||
self._VerifyDecryption(self.output[0], 'bear-640x360-vp9-golden.mp4')
|
||||
self._VerifyDecryption(self.output[0], 'bear-320x240-vp9-golden.mp4')
|
||||
|
||||
def testPackageWithEncryptionAndRandomIv(self):
|
||||
self.packager.Package(
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,9 +1,18 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--Generated with https://github.com/google/edash-packager version <tag>-<hash>-<test>-->
|
||||
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 DASH-MPD.xsd" xmlns:cenc="urn:mpeg:cenc:2013" minBufferTime="PT2S" type="static" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" mediaPresentationDuration="PT2.7360000610351562S">
|
||||
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 DASH-MPD.xsd" xmlns:cenc="urn:mpeg:cenc:2013" minBufferTime="PT2S" type="static" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" mediaPresentationDuration="PT2.7809998989105225S">
|
||||
<Period id="0">
|
||||
<AdaptationSet id="0" contentType="video" width="320" height="240" frameRate="1000000/34000" par="16:9">
|
||||
<Representation id="0" bandwidth="203313" codecs="vp9" mimeType="video/webm" sar="427:320">
|
||||
<AdaptationSet id="0" contentType="audio">
|
||||
<Representation id="0" bandwidth="76531" codecs="opus" mimeType="audio/webm" audioSamplingRate="48000">
|
||||
<AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>
|
||||
<BaseURL>output_audio.webm</BaseURL>
|
||||
<SegmentBase indexRange="26555-26603" timescale="1000000">
|
||||
<Initialization range="0-322"/>
|
||||
</SegmentBase>
|
||||
</Representation>
|
||||
</AdaptationSet>
|
||||
<AdaptationSet id="1" contentType="video" width="320" height="240" frameRate="1000000/34000" par="16:9">
|
||||
<Representation id="1" bandwidth="203313" codecs="vp9" mimeType="video/webm" sar="427:320">
|
||||
<BaseURL>output_video.webm</BaseURL>
|
||||
<SegmentBase indexRange="69485-69532" timescale="1000000">
|
||||
<Initialization range="0-286"/>
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
#include "packager/media/base/audio_stream_info.h"
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "packager/base/logging.h"
|
||||
#include "packager/base/strings/string_number_conversions.h"
|
||||
#include "packager/base/strings/stringprintf.h"
|
||||
|
@ -55,6 +57,8 @@ AudioStreamInfo::AudioStreamInfo(int track_id,
|
|||
uint8_t sample_bits,
|
||||
uint8_t num_channels,
|
||||
uint32_t sampling_frequency,
|
||||
uint64_t seek_preroll_ns,
|
||||
uint64_t codec_delay_ns,
|
||||
uint32_t max_bitrate,
|
||||
uint32_t avg_bitrate,
|
||||
const uint8_t* extra_data,
|
||||
|
@ -73,6 +77,8 @@ AudioStreamInfo::AudioStreamInfo(int track_id,
|
|||
sample_bits_(sample_bits),
|
||||
num_channels_(num_channels),
|
||||
sampling_frequency_(sampling_frequency),
|
||||
seek_preroll_ns_(seek_preroll_ns),
|
||||
codec_delay_ns_(codec_delay_ns),
|
||||
max_bitrate_(max_bitrate),
|
||||
avg_bitrate_(avg_bitrate) {}
|
||||
|
||||
|
@ -87,11 +93,20 @@ bool AudioStreamInfo::IsValidConfig() const {
|
|||
}
|
||||
|
||||
std::string AudioStreamInfo::ToString() const {
|
||||
return base::StringPrintf(
|
||||
std::string str = base::StringPrintf(
|
||||
"%s codec: %s\n sample_bits: %d\n num_channels: %d\n "
|
||||
"sampling_frequency: %d\n language: %s\n",
|
||||
StreamInfo::ToString().c_str(), AudioCodecToString(codec_).c_str(),
|
||||
sample_bits_, num_channels_, sampling_frequency_, language().c_str());
|
||||
if (seek_preroll_ns_ != 0) {
|
||||
base::StringAppendF(&str, " seek_preroll_ns: %" PRIu64 "d\n",
|
||||
seek_preroll_ns_);
|
||||
}
|
||||
if (codec_delay_ns_ != 0) {
|
||||
base::StringAppendF(&str, " codec_delay_ns: %" PRIu64 "d\n",
|
||||
codec_delay_ns_);
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
std::string AudioStreamInfo::GetCodecString(AudioCodec codec,
|
||||
|
|
|
@ -44,6 +44,8 @@ class AudioStreamInfo : public StreamInfo {
|
|||
uint8_t sample_bits,
|
||||
uint8_t num_channels,
|
||||
uint32_t sampling_frequency,
|
||||
uint64_t seek_preroll_ns,
|
||||
uint64_t codec_delay_ns,
|
||||
uint32_t max_bitrate,
|
||||
uint32_t avg_bitrate,
|
||||
const uint8_t* extra_data,
|
||||
|
@ -64,6 +66,8 @@ class AudioStreamInfo : public StreamInfo {
|
|||
uint32_t bytes_per_frame() const {
|
||||
return static_cast<uint32_t>(num_channels_) * sample_bits_ / 8;
|
||||
}
|
||||
uint64_t seek_preroll_ns() const { return seek_preroll_ns_; }
|
||||
uint64_t codec_delay_ns() const { return codec_delay_ns_; }
|
||||
uint32_t max_bitrate() const { return max_bitrate_; }
|
||||
uint32_t avg_bitrate() const { return avg_bitrate_; }
|
||||
|
||||
|
@ -84,6 +88,8 @@ class AudioStreamInfo : public StreamInfo {
|
|||
uint8_t sample_bits_;
|
||||
uint8_t num_channels_;
|
||||
uint32_t sampling_frequency_;
|
||||
uint64_t seek_preroll_ns_;
|
||||
uint64_t codec_delay_ns_;
|
||||
uint32_t max_bitrate_;
|
||||
uint32_t avg_bitrate_;
|
||||
|
||||
|
|
|
@ -233,8 +233,10 @@ bool EsParserAdts::UpdateAudioConfiguration(const uint8_t* adts_frame,
|
|||
kAacSampleSizeBits,
|
||||
adts_header.GetNumChannels(),
|
||||
extended_samples_per_second,
|
||||
0,
|
||||
0,
|
||||
0 /* seek preroll */,
|
||||
0 /* codec delay */,
|
||||
0 /* max bitrate */,
|
||||
0 /* avg bitrate */,
|
||||
audio_specific_config.data(),
|
||||
audio_specific_config.size(),
|
||||
false));
|
||||
|
|
|
@ -78,6 +78,8 @@ const bool kIsEncrypted = false;
|
|||
const uint8_t kSampleBits = 16;
|
||||
const uint8_t kNumChannels = 2;
|
||||
const uint32_t kSamplingFrequency = 44100;
|
||||
const uint64_t kSeekPreroll = 0;
|
||||
const uint64_t kCodecDelay = 0;
|
||||
const uint32_t kMaxBitrate = 320000;
|
||||
const uint32_t kAverageBitrate = 256000;
|
||||
|
||||
|
@ -111,9 +113,9 @@ scoped_refptr<VideoStreamInfo> CreateVideoStreamInfo(VideoCodec codec) {
|
|||
|
||||
scoped_refptr<AudioStreamInfo> CreateAudioStreamInfo(AudioCodec codec) {
|
||||
scoped_refptr<AudioStreamInfo> stream_info(new AudioStreamInfo(
|
||||
kTrackId, kTimeScale, kDuration, codec, kCodecString,
|
||||
kLanguage, kSampleBits, kNumChannels, kSamplingFrequency, kMaxBitrate,
|
||||
kAverageBitrate, kAudioExtraData, arraysize(kAudioExtraData),
|
||||
kTrackId, kTimeScale, kDuration, codec, kCodecString, kLanguage,
|
||||
kSampleBits, kNumChannels, kSamplingFrequency, kSeekPreroll, kCodecDelay,
|
||||
kMaxBitrate, kAverageBitrate, kAudioExtraData, arraysize(kAudioExtraData),
|
||||
kIsEncrypted));
|
||||
return stream_info;
|
||||
}
|
||||
|
|
|
@ -46,6 +46,8 @@ const bool kIsEncrypted = false;
|
|||
const uint8_t kSampleBits = 16;
|
||||
const uint8_t kNumChannels = 2;
|
||||
const uint32_t kSamplingFrequency = 44100;
|
||||
const uint64_t kSeekPreroll = 0;
|
||||
const uint64_t kCodecDelay = 0;
|
||||
const uint32_t kMaxBitrate = 320000;
|
||||
const uint32_t kAverageBitrate = 256000;
|
||||
|
||||
|
@ -175,16 +177,18 @@ TEST_F(TsWriterTest, InitializeVideoNonH264) {
|
|||
TEST_F(TsWriterTest, InitializeAudioAac) {
|
||||
scoped_refptr<AudioStreamInfo> stream_info(new AudioStreamInfo(
|
||||
kTrackId, kTimeScale, kDuration, kAacAudioCodec, kCodecString, kLanguage,
|
||||
kSampleBits, kNumChannels, kSamplingFrequency, kMaxBitrate,
|
||||
kAverageBitrate, kExtraData, arraysize(kExtraData), kIsEncrypted));
|
||||
kSampleBits, kNumChannels, kSamplingFrequency, kSeekPreroll, kCodecDelay,
|
||||
kMaxBitrate, kAverageBitrate, kExtraData, arraysize(kExtraData),
|
||||
kIsEncrypted));
|
||||
EXPECT_TRUE(ts_writer_.Initialize(*stream_info, !kWillBeEncrypted));
|
||||
}
|
||||
|
||||
TEST_F(TsWriterTest, InitializeAudioNonAac) {
|
||||
scoped_refptr<AudioStreamInfo> stream_info(new AudioStreamInfo(
|
||||
kTrackId, kTimeScale, kDuration, AudioCodec::kCodecOpus, kCodecString,
|
||||
kLanguage, kSampleBits, kNumChannels, kSamplingFrequency, kMaxBitrate,
|
||||
kAverageBitrate, kExtraData, arraysize(kExtraData), kIsEncrypted));
|
||||
kLanguage, kSampleBits, kNumChannels, kSamplingFrequency, kSeekPreroll,
|
||||
kCodecDelay, kMaxBitrate, kAverageBitrate, kExtraData,
|
||||
arraysize(kExtraData), kIsEncrypted));
|
||||
EXPECT_FALSE(ts_writer_.Initialize(*stream_info, !kWillBeEncrypted));
|
||||
}
|
||||
|
||||
|
@ -252,8 +256,8 @@ TEST_F(TsWriterTest, ClearAacPmt) {
|
|||
|
||||
scoped_refptr<AudioStreamInfo> stream_info(new AudioStreamInfo(
|
||||
kTrackId, kTimeScale, kDuration, kAacAudioCodec, kCodecString, kLanguage,
|
||||
kSampleBits, kNumChannels, kSamplingFrequency, kMaxBitrate,
|
||||
kAverageBitrate, kAacBasicProfileExtraData,
|
||||
kSampleBits, kNumChannels, kSamplingFrequency, kSeekPreroll, kCodecDelay,
|
||||
kMaxBitrate, kAverageBitrate, kAacBasicProfileExtraData,
|
||||
arraysize(kAacBasicProfileExtraData), kIsEncrypted));
|
||||
EXPECT_TRUE(ts_writer_.Initialize(*stream_info, !kWillBeEncrypted));
|
||||
|
||||
|
@ -340,8 +344,8 @@ TEST_F(TsWriterTest, ClearLeadAacPmt) {
|
|||
|
||||
scoped_refptr<AudioStreamInfo> stream_info(new AudioStreamInfo(
|
||||
kTrackId, kTimeScale, kDuration, kAacAudioCodec, kCodecString, kLanguage,
|
||||
kSampleBits, kNumChannels, kSamplingFrequency, kMaxBitrate,
|
||||
kAverageBitrate, kAacBasicProfileExtraData,
|
||||
kSampleBits, kNumChannels, kSamplingFrequency, kSeekPreroll, kCodecDelay,
|
||||
kMaxBitrate, kAverageBitrate, kAacBasicProfileExtraData,
|
||||
arraysize(kAacBasicProfileExtraData), kIsEncrypted));
|
||||
EXPECT_TRUE(ts_writer_.Initialize(*stream_info, kWillBeEncrypted));
|
||||
|
||||
|
@ -370,8 +374,8 @@ TEST_F(TsWriterTest, EncryptedSegmentsAacPmt) {
|
|||
|
||||
scoped_refptr<AudioStreamInfo> stream_info(new AudioStreamInfo(
|
||||
kTrackId, kTimeScale, kDuration, kAacAudioCodec, kCodecString, kLanguage,
|
||||
kSampleBits, kNumChannels, kSamplingFrequency, kMaxBitrate,
|
||||
kAverageBitrate, kAacBasicProfileExtraData,
|
||||
kSampleBits, kNumChannels, kSamplingFrequency, kSeekPreroll, kCodecDelay,
|
||||
kMaxBitrate, kAverageBitrate, kAacBasicProfileExtraData,
|
||||
arraysize(kAacBasicProfileExtraData), kIsEncrypted));
|
||||
EXPECT_TRUE(ts_writer_.Initialize(*stream_info, kWillBeEncrypted));
|
||||
|
||||
|
|
|
@ -443,6 +443,8 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
|
|||
entry.samplesize,
|
||||
num_channels,
|
||||
sampling_frequency,
|
||||
0 /* seek preroll */,
|
||||
0 /* codec delay */,
|
||||
max_bitrate,
|
||||
avg_bitrate,
|
||||
extra_data.data(),
|
||||
|
|
|
@ -323,6 +323,8 @@ Status Segmenter::CreateAudioTrack(AudioStreamInfo* info) {
|
|||
track->set_type(mkvmuxer::Tracks::kAudio);
|
||||
track->set_sample_rate(info->sampling_frequency());
|
||||
track->set_channels(info->num_channels());
|
||||
track->set_seek_pre_roll(info->seek_preroll_ns());
|
||||
track->set_codec_delay(info->codec_delay_ns());
|
||||
|
||||
if (encryptor_)
|
||||
encryptor_->AddTrackInfo(track);
|
||||
|
|
|
@ -32,6 +32,8 @@ scoped_refptr<AudioStreamInfo> WebMAudioClient::GetAudioStreamInfo(
|
|||
int64_t track_num,
|
||||
const std::string& codec_id,
|
||||
const std::vector<uint8_t>& codec_private,
|
||||
int64_t seek_preroll,
|
||||
int64_t codec_delay,
|
||||
const std::string& language,
|
||||
bool is_encrypted) {
|
||||
AudioCodec audio_codec = kUnknownAudioCodec;
|
||||
|
@ -69,8 +71,9 @@ scoped_refptr<AudioStreamInfo> WebMAudioClient::GetAudioStreamInfo(
|
|||
return scoped_refptr<AudioStreamInfo>(new AudioStreamInfo(
|
||||
track_num, kWebMTimeScale, 0, audio_codec,
|
||||
AudioStreamInfo::GetCodecString(audio_codec, 0), language,
|
||||
kSampleSizeInBits, channels_, sampling_frequency, 0, 0, extra_data,
|
||||
extra_data_size, is_encrypted));
|
||||
kSampleSizeInBits, channels_, sampling_frequency,
|
||||
seek_preroll < 0 ? 0 : seek_preroll, codec_delay < 0 ? 0 : codec_delay, 0,
|
||||
0, extra_data, extra_data_size, is_encrypted));
|
||||
}
|
||||
|
||||
bool WebMAudioClient::OnUInt(int id, int64_t val) {
|
||||
|
|
|
@ -25,9 +25,18 @@ class WebMAudioClient : public WebMParserClient {
|
|||
/// Reset this object's state so it can process a new audio track element.
|
||||
void Reset();
|
||||
|
||||
/// Create an AudioStreamInfo with the data in |track_num|, |codec_id|,
|
||||
/// |codec_private|, |is_encrypted| and the fields parsed from the last audio
|
||||
/// track element this object was used to parse.
|
||||
/// Create an AudioStreamInfo with the parameters specified.
|
||||
/// @param track_num indicates the track number.
|
||||
/// @param codec_id is the codec identifier.
|
||||
/// @param codec_private contains codec specific data.
|
||||
/// @param seek_preroll indicates seek preroll in nanoseconds. A negative
|
||||
/// value means that the value is not set; in this case, a default
|
||||
/// value of 0 is used.
|
||||
/// @param codec delay indicates codec delay in nanoseconds. A negative
|
||||
/// value means that the value is not set; in this case, a default
|
||||
/// value of 0 is used.
|
||||
/// @param language indicates the language for the track.
|
||||
/// @param is_encrypted indicates whether the stream is encrypted.
|
||||
/// @return An AudioStreamInfo scoped_refptr if successful.
|
||||
/// @return An empty scoped_refptr if there was unexpected values in the
|
||||
/// provided parameters or audio track element fields.
|
||||
|
@ -35,6 +44,8 @@ class WebMAudioClient : public WebMParserClient {
|
|||
int64_t track_num,
|
||||
const std::string& codec_id,
|
||||
const std::vector<uint8_t>& codec_private,
|
||||
int64_t seek_preroll,
|
||||
int64_t codec_delay,
|
||||
const std::string& language,
|
||||
bool is_encrypted);
|
||||
|
||||
|
|
|
@ -82,6 +82,8 @@ const char kLanguage[] = "eng";
|
|||
const uint8_t kBitsPerSample = 8u;
|
||||
const uint8_t kNumChannels = 2u;
|
||||
const uint32_t kSamplingFrequency = 48000u;
|
||||
const uint64_t kSeekPreroll = 0u;
|
||||
const uint64_t kCodecDelay = 0u;
|
||||
const size_t kExtraDataSize = 0u;
|
||||
const bool kEncrypted = true;
|
||||
const uint16_t kWidth = 320u;
|
||||
|
@ -323,6 +325,8 @@ class WebMClusterParserTest : public testing::Test {
|
|||
kBitsPerSample,
|
||||
kNumChannels,
|
||||
kSamplingFrequency,
|
||||
kSeekPreroll,
|
||||
kCodecDelay,
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
|
|
|
@ -203,8 +203,8 @@ bool WebMTracksParser::OnListEnd(int id) {
|
|||
|
||||
DCHECK(!audio_stream_info_);
|
||||
audio_stream_info_ = audio_client_.GetAudioStreamInfo(
|
||||
audio_track_num_, codec_id_, codec_private_, track_language_,
|
||||
!audio_encryption_key_id_.empty());
|
||||
audio_track_num_, codec_id_, codec_private_, seek_preroll_,
|
||||
codec_delay_, track_language_, !audio_encryption_key_id_.empty());
|
||||
if (!audio_stream_info_)
|
||||
return false;
|
||||
} else {
|
||||
|
|
|
@ -752,12 +752,13 @@ bool WvmMediaParser::ParseIndexEntry() {
|
|||
stream_id_count_++;
|
||||
}
|
||||
if (has_audio) {
|
||||
AudioCodec audio_codec = kCodecAAC;
|
||||
const AudioCodec audio_codec = kCodecAAC;
|
||||
// TODO(beil): Pass in max and average bitrate in wvm container.
|
||||
stream_infos_.push_back(new AudioStreamInfo(
|
||||
stream_id_count_, time_scale, track_duration, audio_codec,
|
||||
std::string(), std::string(), kAacSampleSizeBits, num_channels,
|
||||
sampling_frequency, 0, 0, audio_codec_config.data(),
|
||||
sampling_frequency, 0 /* seek preroll */, 0 /* codec delay */,
|
||||
0 /* max bitrate */, 0 /* avg bitrate */, audio_codec_config.data(),
|
||||
audio_codec_config.size(), true));
|
||||
program_demux_stream_map_[base::UintToString(index_program_id_) + ":" +
|
||||
base::UintToString(audio_pes_stream_id ?
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
// found in the LICENSE file.
|
||||
|
||||
bear-320x240.webm - WebM encode of bear.1280x720.mp4 resized to 320x240.
|
||||
bear-320x240-vp9.webm - Same as above, but with vp9 codec.
|
||||
bear-320x240-vp9-opus.webm - Same as above, but with vp9 and opus codec.
|
||||
no_streams.webm - Header, Info, & Tracks element from bear-320x240.webm slightly corrupted so it looks
|
||||
like there are no tracks.
|
||||
nonzero-start-time.webm - Has the same headers as bear-320x240.webm but the first cluster of this file
|
||||
|
|
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue