7 #include "packager/media/formats/mp4/mp4_muxer.h" 11 #include "packager/base/strings/string_number_conversions.h" 12 #include "packager/base/time/clock.h" 13 #include "packager/base/time/time.h" 14 #include "packager/file/file.h" 15 #include "packager/media/base/aes_encryptor.h" 16 #include "packager/media/base/audio_stream_info.h" 17 #include "packager/media/base/fourccs.h" 18 #include "packager/media/base/key_source.h" 19 #include "packager/media/base/media_sample.h" 20 #include "packager/media/base/text_stream_info.h" 21 #include "packager/media/base/video_stream_info.h" 22 #include "packager/media/codecs/es_descriptor.h" 23 #include "packager/media/event/muxer_listener.h" 24 #include "packager/media/formats/mp4/box_definitions.h" 25 #include "packager/media/formats/mp4/multi_segment_segmenter.h" 26 #include "packager/media/formats/mp4/single_segment_segmenter.h" 27 #include "packager/status_macros.h" 37 void SetStartAndEndFromOffsetAndSize(
size_t offset,
41 range->start =
static_cast<uint32_t
>(offset);
43 range->end = range->start +
static_cast<uint32_t
>(size) - 1;
46 FourCC CodecToFourCC(Codec codec, H26xStreamFormat h26x_stream_format) {
51 return h26x_stream_format ==
52 H26xStreamFormat::kNalUnitStreamWithParameterSetNalus
56 return h26x_stream_format ==
57 H26xStreamFormat::kNalUnitStreamWithParameterSetNalus
60 case kCodecH265DolbyVision:
61 return h26x_stream_format ==
62 H26xStreamFormat::kNalUnitStreamWithParameterSetNalus
94 void GenerateSinf(FourCC old_type,
95 const EncryptionConfig& encryption_config,
96 ProtectionSchemeInfo* sinf) {
97 sinf->format.format = old_type;
99 DCHECK_NE(encryption_config.protection_scheme, FOURCC_NULL);
100 sinf->type.type = encryption_config.protection_scheme;
103 const int kCencSchemeVersion = 0x00010000;
104 sinf->type.version = kCencSchemeVersion;
106 auto& track_encryption = sinf->info.track_encryption;
107 track_encryption.default_is_protected = 1;
109 track_encryption.default_crypt_byte_block =
110 encryption_config.crypt_byte_block;
111 track_encryption.default_skip_byte_block = encryption_config.skip_byte_block;
112 switch (encryption_config.protection_scheme) {
115 DCHECK_EQ(track_encryption.default_crypt_byte_block, 0u);
116 DCHECK_EQ(track_encryption.default_skip_byte_block, 0u);
119 track_encryption.version = 0;
126 track_encryption.version = 1;
129 NOTREACHED() <<
"Unexpected protection scheme " 130 << encryption_config.protection_scheme;
133 track_encryption.default_per_sample_iv_size =
134 encryption_config.per_sample_iv_size;
135 track_encryption.default_constant_iv = encryption_config.constant_iv;
136 track_encryption.default_kid = encryption_config.key_id;
141 int16_t GetRollDistance(uint64_t seek_preroll_ns, uint32_t sampling_frequency) {
142 const double kNanosecondsPerSecond = 1000000000;
143 const double preroll_in_samples =
144 seek_preroll_ns / kNanosecondsPerSecond * sampling_frequency;
146 return -
static_cast<int16_t
>(preroll_in_samples + 0.5);
152 MP4Muxer::~MP4Muxer() {}
154 Status MP4Muxer::InitializeMuxer() {
156 to_be_initialized_ =
true;
160 Status MP4Muxer::Finalize() {
165 DCHECK(to_be_initialized_);
167 <<
"' which does not contain any sample.";
171 Status segmenter_finalized = segmenter_->Finalize();
173 if (!segmenter_finalized.ok())
174 return segmenter_finalized;
176 FireOnMediaEndEvent();
182 if (to_be_initialized_) {
183 RETURN_IF_ERROR(UpdateEditListOffsetFromSample(sample));
184 RETURN_IF_ERROR(DelayInitializeMuxer());
185 to_be_initialized_ =
false;
188 return segmenter_->AddSample(stream_id, sample);
191 Status MP4Muxer::FinalizeSegment(
size_t stream_id,
194 VLOG(3) <<
"Finalizing " << (segment_info.is_subsegment ?
"sub" :
"")
195 <<
"segment " << segment_info.start_timestamp <<
" duration " 196 << segment_info.duration;
197 return segmenter_->FinalizeSegment(stream_id, segment_info);
200 Status MP4Muxer::DelayInitializeMuxer() {
201 DCHECK(!streams().empty());
203 std::unique_ptr<FileType> ftyp(
new FileType);
204 std::unique_ptr<Movie> moov(
new Movie);
206 ftyp->major_brand = FOURCC_isom;
207 ftyp->compatible_brands.push_back(FOURCC_iso8);
208 ftyp->compatible_brands.push_back(FOURCC_mp41);
209 ftyp->compatible_brands.push_back(FOURCC_dash);
211 if (streams().size() == 1) {
212 FourCC codec_fourcc = FOURCC_NULL;
213 if (streams()[0]->stream_type() == kStreamVideo) {
215 CodecToFourCC(streams()[0]->codec(),
216 static_cast<const VideoStreamInfo*>(streams()[0].
get())
217 ->h26x_stream_format());
218 if (codec_fourcc != FOURCC_NULL)
219 ftyp->compatible_brands.push_back(codec_fourcc);
225 if (codec_fourcc != FOURCC_avc3 && codec_fourcc != FOURCC_hev1)
226 ftyp->compatible_brands.push_back(FOURCC_cmfc);
229 moov->header.creation_time = IsoTimeNow();
230 moov->header.modification_time = IsoTimeNow();
231 moov->header.next_track_id =
static_cast<uint32_t
>(streams().size()) + 1;
233 moov->tracks.resize(streams().size());
234 moov->extends.tracks.resize(streams().size());
237 for (uint32_t i = 0; i < streams().size(); ++i) {
238 const StreamInfo* stream = streams()[i].get();
239 Track& trak = moov->tracks[i];
240 trak.header.track_id = i + 1;
243 trex.track_id = trak.header.track_id;
244 trex.default_sample_description_index = 1;
246 bool generate_trak_result =
false;
247 switch (stream->stream_type()) {
249 generate_trak_result = GenerateVideoTrak(
250 static_cast<const VideoStreamInfo*>(stream), &trak, i + 1);
253 generate_trak_result = GenerateAudioTrak(
254 static_cast<const AudioStreamInfo*>(stream), &trak, i + 1);
257 generate_trak_result = GenerateTextTrak(
258 static_cast<const TextStreamInfo*>(stream), &trak, i + 1);
261 NOTIMPLEMENTED() <<
"Not implemented for stream type: " 262 << stream->stream_type();
264 if (!generate_trak_result)
265 return Status(error::MUXER_FAILURE,
"Failed to generate trak.");
269 if (edit_list_offset_.value() > 0) {
271 entry.media_time = edit_list_offset_.value();
272 entry.media_rate_integer = 1;
273 trak.edit.list.edits.push_back(entry);
278 const auto& key_system_info = stream->encryption_config().key_system_info;
280 if (system.psshs.empty())
283 pssh.raw_box = system.psshs;
284 moov->pssh.push_back(pssh);
289 if (options().segment_template.empty()) {
297 const Status segmenter_initialized =
298 segmenter_->Initialize(streams(), muxer_listener(), progress_listener());
299 if (!segmenter_initialized.ok())
300 return segmenter_initialized;
302 FireOnMediaStartEvent();
307 if (edit_list_offset_)
310 const int64_t pts = sample.pts();
311 const int64_t dts = sample.dts();
343 const int64_t pts_dts_offset = pts - dts;
344 if (pts_dts_offset > 0) {
346 LOG(ERROR) <<
"Negative presentation timestamp (" << pts
347 <<
") is not supported when there is an offset between " 348 "presentation timestamp and decoding timestamp (" 350 return Status(error::MUXER_FAILURE,
351 "Unsupported negative pts when there is an offset between " 354 edit_list_offset_ = pts_dts_offset;
357 if (pts_dts_offset < 0) {
358 LOG(ERROR) <<
"presentation timestamp (" << pts
359 <<
") is not supposed to be greater than decoding timestamp (" 361 return Status(error::MUXER_FAILURE,
"Not expecting pts < dts.");
363 edit_list_offset_ = std::max(-sample.pts(),
static_cast<int64_t
>(0));
368 int64_t now = IsoTimeNow();
369 trak->header.creation_time = now;
370 trak->header.modification_time = now;
371 trak->header.duration = 0;
372 trak->media.header.creation_time = now;
373 trak->media.header.modification_time = now;
374 trak->media.header.timescale = info->time_scale();
375 trak->media.header.duration = 0;
376 if (!info->language().empty()) {
378 std::string main_language = info->language();
379 size_t dash = main_language.find(
'-');
380 if (dash != std::string::npos) {
381 main_language.erase(dash);
385 if (main_language.size() != 3) {
386 LOG(WARNING) <<
"'" << main_language <<
"' is not a valid ISO-639-2 " 387 <<
"language code, ignoring.";
389 trak->media.header.language.code = main_language;
397 InitializeTrak(video_info, trak);
403 if (pixel_width == 0 || pixel_height == 0) {
404 LOG(WARNING) <<
"pixel width/height are not set. Assuming 1:1.";
408 const double sample_aspect_ratio =
409 static_cast<double>(pixel_width) / pixel_height;
410 trak->header.width = video_info->width() * sample_aspect_ratio * 0x10000;
411 trak->header.height = video_info->height() * 0x10000;
415 CodecToFourCC(video_info->codec(), video_info->h26x_stream_format());
416 video.width = video_info->width();
417 video.height = video_info->height();
418 video.codec_configuration.data = video_info->codec_config();
419 if (!video.ParseExtraCodecConfigsVector(video_info->extra_config())) {
420 LOG(ERROR) <<
"Malformed extra codec configs: " 421 << base::HexEncode(video_info->extra_config().data(),
422 video_info->extra_config().size());
425 if (pixel_width != 1 || pixel_height != 1) {
426 video.pixel_aspect.h_spacing = pixel_width;
427 video.pixel_aspect.v_spacing = pixel_height;
431 trak->media.information.sample_table.description;
432 sample_description.type = kVideo;
433 sample_description.video_entries.push_back(video);
435 if (video_info->is_encrypted()) {
436 if (video_info->has_clear_lead()) {
438 sample_description.video_entries.push_back(video);
442 GenerateSinf(entry.format, video_info->encryption_config(), &entry.sinf);
443 entry.format = FOURCC_encv;
451 InitializeTrak(audio_info, trak);
453 trak->header.volume = 0x100;
457 CodecToFourCC(audio_info->codec(), H26xStreamFormat::kUnSpecified);
458 switch(audio_info->codec()){
460 audio.esds.es_descriptor.set_esid(track_id);
462 audio.esds.es_descriptor.mutable_decoder_config_descriptor();
463 decoder_config->set_object_type(ObjectType::kISO_14496_3);
464 decoder_config->set_max_bitrate(audio_info->max_bitrate());
465 decoder_config->set_avg_bitrate(audio_info->avg_bitrate());
466 decoder_config->mutable_decoder_specific_info_descriptor()->set_data(
467 audio_info->codec_config());
475 audio.ddts.extra_data = audio_info->codec_config();
476 audio.ddts.max_bitrate = audio_info->max_bitrate();
477 audio.ddts.avg_bitrate = audio_info->avg_bitrate();
478 audio.ddts.sampling_frequency = audio_info->sampling_frequency();
479 audio.ddts.pcm_sample_depth = audio_info->sample_bits();
482 audio.dac3.data = audio_info->codec_config();
485 audio.dec3.data = audio_info->codec_config();
488 audio.dfla.data = audio_info->codec_config();
491 audio.dops.opus_identification_header = audio_info->codec_config();
494 NOTIMPLEMENTED() <<
" Unsupported audio codec " << audio_info->codec();
498 if (audio_info->codec() == kCodecAC3 || audio_info->codec() == kCodecEAC3) {
501 audio.channelcount = 2;
502 audio.samplesize = 16;
504 audio.channelcount = audio_info->num_channels();
505 audio.samplesize = audio_info->sample_bits();
507 audio.samplerate = audio_info->sampling_frequency();
508 SampleTable& sample_table = trak->media.information.sample_table;
510 sample_description.type = kAudio;
511 sample_description.audio_entries.push_back(audio);
513 if (audio_info->is_encrypted()) {
514 if (audio_info->has_clear_lead()) {
516 sample_description.audio_entries.push_back(audio);
520 GenerateSinf(entry.format, audio_info->encryption_config(), &entry.sinf);
521 entry.format = FOURCC_enca;
524 if (audio_info->seek_preroll_ns() > 0) {
525 sample_table.sample_group_descriptions.resize(1);
527 sample_table.sample_group_descriptions.back();
528 sample_group_description.grouping_type = FOURCC_roll;
529 sample_group_description.audio_roll_recovery_entries.resize(1);
530 sample_group_description.audio_roll_recovery_entries[0].roll_distance =
531 GetRollDistance(audio_info->seek_preroll_ns(), audio.samplerate);
541 InitializeTrak(text_info, trak);
543 if (text_info->codec_string() ==
"wvtt") {
546 webvtt.format = FOURCC_wvtt;
552 webvtt.config.config =
"WEBVTT";
555 if (!text_info->codec_config().empty()) {
556 LOG(INFO) <<
"Skipping possible style / region configuration as the spec " 557 "does not define a way to carry them inside ISO-BMFF files.";
563 webvtt.label.source_label =
"source_label";
565 trak->media.information.sample_table.description;
566 sample_description.type = kText;
567 sample_description.text_entries.push_back(webvtt);
570 NOTIMPLEMENTED() << text_info->codec_string()
571 <<
" handling not implemented yet.";
575 base::Optional<Range> MP4Muxer::GetInitRangeStartAndEnd() {
576 size_t range_offset = 0;
577 size_t range_size = 0;
578 const bool has_range = segmenter_->GetInitRange(&range_offset, &range_size);
581 return base::nullopt;
584 SetStartAndEndFromOffsetAndSize(range_offset, range_size, &range);
588 base::Optional<Range> MP4Muxer::GetIndexRangeStartAndEnd() {
589 size_t range_offset = 0;
590 size_t range_size = 0;
591 const bool has_range = segmenter_->GetIndexRange(&range_offset, &range_size);
594 return base::nullopt;
597 SetStartAndEndFromOffsetAndSize(range_offset, range_size, &range);
601 void MP4Muxer::FireOnMediaStartEvent() {
602 if (!muxer_listener())
605 if (streams().size() > 1) {
606 LOG(ERROR) <<
"MuxerListener cannot take more than 1 stream.";
609 DCHECK(!streams().empty()) <<
"Media started without a stream.";
611 const uint32_t timescale = segmenter_->GetReferenceTimeScale();
612 muxer_listener()->
OnMediaStart(options(), *streams().front(), timescale,
613 MuxerListener::kContainerMp4);
616 void MP4Muxer::FireOnMediaEndEvent() {
617 if (!muxer_listener())
621 media_range.
init_range = GetInitRangeStartAndEnd();
622 media_range.
index_range = GetIndexRangeStartAndEnd();
625 const float duration_seconds =
static_cast<float>(segmenter_->GetDuration());
626 muxer_listener()->
OnMediaEnd(media_range, duration_seconds);
629 uint64_t MP4Muxer::IsoTimeNow() {
631 const uint64_t kIsomTimeOffset = 2082844800l;
632 return kIsomTimeOffset +
633 (clock() ? clock()->Now() : base::Time::Now()).ToDoubleT();
All the methods that are virtual are virtual for mocking.
bool include_pssh_in_stream