Always set ES_ID to 0 when writing ES Descriptor in mp4 (#798)

Required by ISO/IEC 14496-14:2018 and ISO/IEC 23000-19:2018.

Issue #755.
This commit is contained in:
koln67 2020-07-15 21:14:15 +00:00 committed by GitHub
parent db5413ed7a
commit a8ea7fd085
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
74 changed files with 66 additions and 67 deletions

View File

@ -15,18 +15,18 @@
<AdaptationSet id="1" contentType="video" maxWidth="1280" maxHeight="720" frameRate="30000/1001" subsegmentAlignment="true" par="16:9">
<SupplementalProperty schemeIdUri="urn:mpeg:dash:adaptation-set-switching:2016" value="0"/>
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="main"/>
<Representation id="1" bandwidth="2627285" codecs="avc1.64001f" mimeType="video/mp4" sar="1:1" width="1280" height="720">
<BaseURL>bear-1280x720-video.mp4</BaseURL>
<SegmentBase indexRange="858-925" timescale="30000">
<Initialization range="0-857"/>
</SegmentBase>
</Representation>
<Representation id="2" bandwidth="973483" codecs="avc1.64001e" mimeType="video/mp4" sar="1:1" width="640" height="360">
<Representation id="1" bandwidth="973483" codecs="avc1.64001e" mimeType="video/mp4" sar="1:1" width="640" height="360">
<BaseURL>bear-640x360-video.mp4</BaseURL>
<SegmentBase indexRange="859-926" timescale="30000">
<Initialization range="0-858"/>
</SegmentBase>
</Representation>
<Representation id="2" bandwidth="2627285" codecs="avc1.64001f" mimeType="video/mp4" sar="1:1" width="1280" height="720">
<BaseURL>bear-1280x720-video.mp4</BaseURL>
<SegmentBase indexRange="858-925" timescale="30000">
<Initialization range="0-857"/>
</SegmentBase>
</Representation>
</AdaptationSet>
<AdaptationSet id="2" contentType="audio" subsegmentAlignment="true">
<Representation id="3" bandwidth="133334" codecs="mp4a.40.2" mimeType="audio/mp4" audioSamplingRate="44100">

View File

@ -7,15 +7,7 @@
<ContentProtection schemeIdUri="urn:uuid:1077efec-c0b2-4d02-ace3-3c1e52e2fb4b">
<cenc:pssh>AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA==</cenc:pssh>
</ContentProtection>
<Representation id="0" bandwidth="2632184" codecs="avc1.64001f" mimeType="video/mp4" sar="1:1" width="1280" height="720">
<SegmentTemplate timescale="30000" initialization="bear-1280x720-video-init.mp4" media="bear-1280x720-video-$Number$.m4s" startNumber="1">
<SegmentTimeline>
<S t="0" d="30030" r="1"/>
<S t="60060" d="22022"/>
</SegmentTimeline>
</SegmentTemplate>
</Representation>
<Representation id="1" bandwidth="978382" codecs="avc1.64001e" mimeType="video/mp4" sar="1:1" width="640" height="360">
<Representation id="0" bandwidth="978382" codecs="avc1.64001e" mimeType="video/mp4" sar="1:1" width="640" height="360">
<SegmentTemplate timescale="30000" initialization="bear-640x360-video-init.mp4" media="bear-640x360-video-$Number$.m4s" startNumber="1">
<SegmentTimeline>
<S t="0" d="30030" r="1"/>
@ -23,7 +15,7 @@
</SegmentTimeline>
</SegmentTemplate>
</Representation>
<Representation id="2" bandwidth="383593" codecs="avc1.64000d" mimeType="video/mp4" sar="1:1" width="320" height="180">
<Representation id="1" bandwidth="383593" codecs="avc1.64000d" mimeType="video/mp4" sar="1:1" width="320" height="180">
<SegmentTemplate timescale="30000" initialization="bear-320x180-video-init.mp4" media="bear-320x180-video-$Number$.m4s" startNumber="1">
<SegmentTimeline>
<S t="0" d="30030" r="1"/>
@ -31,6 +23,14 @@
</SegmentTimeline>
</SegmentTemplate>
</Representation>
<Representation id="2" bandwidth="2632184" codecs="avc1.64001f" mimeType="video/mp4" sar="1:1" width="1280" height="720">
<SegmentTemplate timescale="30000" initialization="bear-1280x720-video-init.mp4" media="bear-1280x720-video-$Number$.m4s" startNumber="1">
<SegmentTimeline>
<S t="0" d="30030" r="1"/>
<S t="60060" d="22022"/>
</SegmentTimeline>
</SegmentTemplate>
</Representation>
</AdaptationSet>
<AdaptationSet id="1" contentType="audio" segmentAlignment="true">
<ContentProtection value="cenc" schemeIdUri="urn:mpeg:dash:mp4protection:2011" cenc:default_KID="31323334-3536-3738-3930-313233343536"/>
@ -39,7 +39,7 @@
</ContentProtection>
<Representation id="3" bandwidth="134272" codecs="mp4a.40.2" mimeType="audio/mp4" audioSamplingRate="44100">
<AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>
<SegmentTemplate timescale="44100" initialization="bear-1280x720-audio-init.mp4" media="bear-1280x720-audio-$Number$.m4s" startNumber="1">
<SegmentTemplate timescale="44100" initialization="bear-640x360-audio-init.mp4" media="bear-640x360-audio-$Number$.m4s" startNumber="1">
<SegmentTimeline>
<S t="0" d="45056"/>
<S t="45056" d="44032"/>
@ -49,7 +49,7 @@
</Representation>
<Representation id="4" bandwidth="134272" codecs="mp4a.40.2" mimeType="audio/mp4" audioSamplingRate="44100">
<AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>
<SegmentTemplate timescale="44100" initialization="bear-640x360-audio-init.mp4" media="bear-640x360-audio-$Number$.m4s" startNumber="1">
<SegmentTemplate timescale="44100" initialization="bear-1280x720-audio-init.mp4" media="bear-1280x720-audio-$Number$.m4s" startNumber="1">
<SegmentTimeline>
<S t="0" d="45056"/>
<S t="45056" d="44032"/>

View File

@ -2,22 +2,22 @@
<!--Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>-->
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 DASH-MPD.xsd" profiles="urn:mpeg:dash:profile:isoff-live:2011" minBufferTime="PT2S" type="static" mediaPresentationDuration="PT2.7360668182373047S">
<Period id="0" duration="PT2.002S">
<AdaptationSet id="0" contentType="text" segmentAlignment="true">
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="subtitle"/>
<Representation id="0" bandwidth="1912" codecs="wvtt" mimeType="application/mp4">
<SegmentTemplate timescale="1000" initialization="bear-english-text-init.mp4" media="bear-english-text-$Number$.m4s" startNumber="1">
<AdaptationSet id="0" contentType="video" width="640" height="360" frameRate="30000/1001" segmentAlignment="true" par="16:9">
<Representation id="0" bandwidth="974122" codecs="avc1.64001e" mimeType="video/mp4" sar="1:1">
<SegmentTemplate timescale="30000" initialization="bear-640x360-video-init.mp4" media="bear-640x360-video-$Number$.m4s" startNumber="1">
<SegmentTimeline>
<S t="0" d="1000" r="1"/>
<S t="2000" d="1"/>
<S t="0" d="30030" r="1"/>
</SegmentTimeline>
</SegmentTemplate>
</Representation>
</AdaptationSet>
<AdaptationSet id="1" contentType="video" width="640" height="360" frameRate="30000/1001" segmentAlignment="true" par="16:9">
<Representation id="1" bandwidth="974122" codecs="avc1.64001e" mimeType="video/mp4" sar="1:1">
<SegmentTemplate timescale="30000" initialization="bear-640x360-video-init.mp4" media="bear-640x360-video-$Number$.m4s" startNumber="1">
<AdaptationSet id="1" contentType="text" segmentAlignment="true">
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="subtitle"/>
<Representation id="1" bandwidth="1912" codecs="wvtt" mimeType="application/mp4">
<SegmentTemplate timescale="1000" initialization="bear-english-text-init.mp4" media="bear-english-text-$Number$.m4s" startNumber="1">
<SegmentTimeline>
<S t="0" d="30030" r="1"/>
<S t="0" d="1000" r="1"/>
<S t="2000" d="1"/>
</SegmentTimeline>
</SegmentTemplate>
</Representation>
@ -35,21 +35,21 @@
</AdaptationSet>
</Period>
<Period id="1" duration="PT.7340666666666671S">
<AdaptationSet id="0" contentType="text" segmentAlignment="true">
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="subtitle"/>
<Representation id="0" bandwidth="2024" codecs="wvtt" mimeType="application/mp4">
<SegmentTemplate timescale="1000" presentationTimeOffset="2001" initialization="bear-english-text-init.mp4" media="bear-english-text-$Number$.m4s" startNumber="4">
<AdaptationSet id="0" contentType="video" width="640" height="360" frameRate="30000/1001" segmentAlignment="true" par="16:9">
<Representation id="0" bandwidth="869044" codecs="avc1.64001e" mimeType="video/mp4" sar="1:1">
<SegmentTemplate timescale="30000" presentationTimeOffset="60059" initialization="bear-640x360-video-init.mp4" media="bear-640x360-video-$Number$.m4s" startNumber="3">
<SegmentTimeline>
<S t="2001" d="1000" r="2"/>
<S t="60060" d="22022"/>
</SegmentTimeline>
</SegmentTemplate>
</Representation>
</AdaptationSet>
<AdaptationSet id="1" contentType="video" width="640" height="360" frameRate="30000/1001" segmentAlignment="true" par="16:9">
<Representation id="1" bandwidth="869044" codecs="avc1.64001e" mimeType="video/mp4" sar="1:1">
<SegmentTemplate timescale="30000" presentationTimeOffset="60059" initialization="bear-640x360-video-init.mp4" media="bear-640x360-video-$Number$.m4s" startNumber="3">
<AdaptationSet id="1" contentType="text" segmentAlignment="true">
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="subtitle"/>
<Representation id="1" bandwidth="2024" codecs="wvtt" mimeType="application/mp4">
<SegmentTemplate timescale="1000" presentationTimeOffset="2001" initialization="bear-english-text-init.mp4" media="bear-english-text-$Number$.m4s" startNumber="4">
<SegmentTimeline>
<S t="60060" d="22022"/>
<S t="2001" d="1000" r="2"/>
</SegmentTimeline>
</SegmentTemplate>
</Representation>

View File

@ -219,7 +219,10 @@ bool ESDescriptor::ReadData(BitReader* reader) {
void ESDescriptor::WriteInternal(BufferWriter* writer) {
WriteHeader(writer);
writer->AppendInt(esid_);
// According to ISO/IEC 14496-14:2018 Section 4.1.2,
// ES_ID is set to 0 when stored
const uint16_t kEsid = 0;
writer->AppendInt(kEsid);
const uint8_t kNoEsFlags = 0;
writer->AppendInt(kNoEsFlags);

View File

@ -175,7 +175,6 @@ class ESDescriptor : public BaseDescriptor {
ESDescriptor() : BaseDescriptor(DescriptorTag::kES) {}
uint16_t esid() const { return esid_; }
void set_esid(uint16_t esid) { esid_ = esid; }
const DecoderConfigDescriptor& decoder_config_descriptor() const {
return decoder_config_descriptor_;

View File

@ -21,7 +21,7 @@ TEST(ESDescriptorTest, SingleByteLengthTest) {
// ESDescriptor tag with one byte size.
0x03, 0x19,
// ESDescriptor fields.
0x00, 0x01, 0x00,
0x00, 0x00, 0x00,
// DecoderConfigDescriptor tag with one byte size.
0x04, 0x11,
// Object Type.
@ -56,6 +56,19 @@ TEST(ESDescriptorTest, SingleByteLengthTest) {
EXPECT_THAT(
std::vector<uint8_t>(writer.Buffer(), writer.Buffer() + writer.Size()),
ElementsAreArray(kBuffer));
EXPECT_EQ(0u, es_desc.esid());
const size_t kEsIdOffset = 3;
const uint8_t kEsId = 5;
data[kEsIdOffset] = kEsId;
ASSERT_TRUE(es_desc.Parse(data));
EXPECT_EQ(kEsId, es_desc.esid());
writer.Clear();
es_desc.Write(&writer);
EXPECT_THAT(
std::vector<uint8_t>(writer.Buffer(), writer.Buffer() + writer.Size()),
ElementsAreArray(kBuffer));
}
TEST(ESDescriptorTest, NonAACTest) {
@ -64,7 +77,7 @@ TEST(ESDescriptorTest, NonAACTest) {
// ESDescriptor tag with one byte size.
0x03, 0x19,
// ESDescriptor fields.
0x00, 0x01, 0x00,
0x00, 0x00, 0x00,
// DecoderConfigDescriptor tag with one byte size.
0x04, 0x11,
// Object Type.

View File

@ -383,7 +383,6 @@ class BoxDefinitionsTestGeneral : public testing::Test {
void Fill(ElementaryStreamDescriptor* esds) {
const uint8_t kDecoderSpecificInfo[] = {18, 16};
esds->es_descriptor.set_esid(1);
esds->es_descriptor.mutable_decoder_config_descriptor()->set_object_type(
ObjectType::kISO_14496_3);
std::vector<uint8_t> decoder_specific_info(
@ -394,10 +393,6 @@ class BoxDefinitionsTestGeneral : public testing::Test {
->set_data(decoder_specific_info);
}
void Modify(ElementaryStreamDescriptor* esds) {
esds->es_descriptor.set_esid(2);
}
void Fill(DTSSpecific* ddts) {
const uint8_t kDdtsExtraData[] = {0xe4, 0x7c, 0, 4, 0, 0x0f, 0};
ddts->max_bitrate = 768000;

View File

@ -250,15 +250,15 @@ Status MP4Muxer::DelayInitializeMuxer() {
switch (stream->stream_type()) {
case kStreamVideo:
generate_trak_result = GenerateVideoTrak(
static_cast<const VideoStreamInfo*>(stream), &trak, i + 1);
static_cast<const VideoStreamInfo*>(stream), &trak);
break;
case kStreamAudio:
generate_trak_result = GenerateAudioTrak(
static_cast<const AudioStreamInfo*>(stream), &trak, i + 1);
static_cast<const AudioStreamInfo*>(stream), &trak);
break;
case kStreamText:
generate_trak_result = GenerateTextTrak(
static_cast<const TextStreamInfo*>(stream), &trak, i + 1);
static_cast<const TextStreamInfo*>(stream), &trak);
break;
default:
NOTIMPLEMENTED() << "Not implemented for stream type: "
@ -395,8 +395,7 @@ void MP4Muxer::InitializeTrak(const StreamInfo* info, Track* trak) {
}
bool MP4Muxer::GenerateVideoTrak(const VideoStreamInfo* video_info,
Track* trak,
uint32_t track_id) {
Track* trak) {
InitializeTrak(video_info, trak);
// width and height specify the track's visual presentation size as
@ -449,8 +448,7 @@ bool MP4Muxer::GenerateVideoTrak(const VideoStreamInfo* video_info,
}
bool MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info,
Track* trak,
uint32_t track_id) {
Track* trak) {
InitializeTrak(audio_info, trak);
trak->header.volume = 0x100;
@ -460,7 +458,6 @@ bool MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info,
CodecToFourCC(audio_info->codec(), H26xStreamFormat::kUnSpecified);
switch(audio_info->codec()){
case kCodecAAC: {
audio.esds.es_descriptor.set_esid(track_id);
DecoderConfigDescriptor* decoder_config =
audio.esds.es_descriptor.mutable_decoder_config_descriptor();
decoder_config->set_object_type(ObjectType::kISO_14496_3); // MPEG4 AAC.
@ -494,7 +491,6 @@ bool MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info,
audio.dfla.data = audio_info->codec_config();
break;
case kCodecMP3: {
audio.esds.es_descriptor.set_esid(track_id);
DecoderConfigDescriptor* decoder_config =
audio.esds.es_descriptor.mutable_decoder_config_descriptor();
uint32_t samplerate = audio_info->sampling_frequency();
@ -567,8 +563,7 @@ bool MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info,
}
bool MP4Muxer::GenerateTextTrak(const TextStreamInfo* text_info,
Track* trak,
uint32_t track_id) {
Track* trak) {
InitializeTrak(text_info, trak);
if (text_info->codec_string() == "wvtt") {

View File

@ -48,15 +48,9 @@ class MP4Muxer : public Muxer {
// Generate Audio/Video Track box.
void InitializeTrak(const StreamInfo* info, Track* trak);
bool GenerateAudioTrak(const AudioStreamInfo* audio_info,
Track* trak,
uint32_t track_id);
bool GenerateVideoTrak(const VideoStreamInfo* video_info,
Track* trak,
uint32_t track_id);
bool GenerateTextTrak(const TextStreamInfo* video_info,
Track* trak,
uint32_t track_id);
bool GenerateAudioTrak(const AudioStreamInfo* audio_info, Track* trak);
bool GenerateVideoTrak(const VideoStreamInfo* video_info, Track* trak);
bool GenerateTextTrak(const TextStreamInfo* video_info, Track* trak);
// Gets |start| and |end| initialization range. Returns true if there is an
// init range and sets start-end byte-range-spec specified in RFC2616.