7 #include "packager/media/formats/mp4/mp4_muxer.h"
11 #include "packager/base/strings/string_number_conversions.h"
12 #include "packager/base/time/clock.h"
13 #include "packager/base/time/time.h"
14 #include "packager/file/file.h"
15 #include "packager/media/base/aes_encryptor.h"
16 #include "packager/media/base/audio_stream_info.h"
17 #include "packager/media/base/fourccs.h"
18 #include "packager/media/base/key_source.h"
19 #include "packager/media/base/media_sample.h"
20 #include "packager/media/base/text_stream_info.h"
21 #include "packager/media/base/video_stream_info.h"
22 #include "packager/media/codecs/es_descriptor.h"
23 #include "packager/media/event/muxer_listener.h"
24 #include "packager/media/formats/mp4/box_definitions.h"
25 #include "packager/media/formats/mp4/multi_segment_segmenter.h"
26 #include "packager/media/formats/mp4/single_segment_segmenter.h"
27 #include "packager/media/formats/ttml/ttml_generator.h"
28 #include "packager/status_macros.h"
38 void SetStartAndEndFromOffsetAndSize(
size_t offset,
42 range->start =
static_cast<uint32_t
>(offset);
44 range->end = range->start +
static_cast<uint32_t
>(size) - 1;
47 FourCC CodecToFourCC(Codec codec, H26xStreamFormat h26x_stream_format) {
52 return h26x_stream_format ==
53 H26xStreamFormat::kNalUnitStreamWithParameterSetNalus
57 return h26x_stream_format ==
58 H26xStreamFormat::kNalUnitStreamWithParameterSetNalus
61 case kCodecH265DolbyVision:
62 return h26x_stream_format ==
63 H26xStreamFormat::kNalUnitStreamWithParameterSetNalus
98 void GenerateSinf(FourCC old_type,
99 const EncryptionConfig& encryption_config,
100 ProtectionSchemeInfo* sinf) {
101 sinf->format.format = old_type;
103 DCHECK_NE(encryption_config.protection_scheme, FOURCC_NULL);
104 sinf->type.type = encryption_config.protection_scheme;
107 const int kCencSchemeVersion = 0x00010000;
108 sinf->type.version = kCencSchemeVersion;
110 auto& track_encryption = sinf->info.track_encryption;
111 track_encryption.default_is_protected = 1;
113 track_encryption.default_crypt_byte_block =
114 encryption_config.crypt_byte_block;
115 track_encryption.default_skip_byte_block = encryption_config.skip_byte_block;
116 switch (encryption_config.protection_scheme) {
119 DCHECK_EQ(track_encryption.default_crypt_byte_block, 0u);
120 DCHECK_EQ(track_encryption.default_skip_byte_block, 0u);
123 track_encryption.version = 0;
130 track_encryption.version = 1;
133 NOTREACHED() <<
"Unexpected protection scheme "
134 << encryption_config.protection_scheme;
137 track_encryption.default_per_sample_iv_size =
138 encryption_config.per_sample_iv_size;
139 track_encryption.default_constant_iv = encryption_config.constant_iv;
140 track_encryption.default_kid = encryption_config.key_id;
145 int16_t GetRollDistance(uint64_t seek_preroll_ns, uint32_t sampling_frequency) {
146 const double kNanosecondsPerSecond = 1000000000;
147 const double preroll_in_samples =
148 seek_preroll_ns / kNanosecondsPerSecond * sampling_frequency;
150 return -
static_cast<int16_t
>(preroll_in_samples + 0.5);
156 MP4Muxer::~MP4Muxer() {}
158 Status MP4Muxer::InitializeMuxer() {
160 to_be_initialized_ =
true;
164 Status MP4Muxer::Finalize() {
169 DCHECK(to_be_initialized_);
171 <<
"' which does not contain any sample.";
175 Status segmenter_finalized = segmenter_->Finalize();
177 if (!segmenter_finalized.ok())
178 return segmenter_finalized;
180 FireOnMediaEndEvent();
185 Status MP4Muxer::AddMediaSample(
size_t stream_id,
const MediaSample& sample) {
186 if (to_be_initialized_) {
187 RETURN_IF_ERROR(UpdateEditListOffsetFromSample(sample));
188 RETURN_IF_ERROR(DelayInitializeMuxer());
189 to_be_initialized_ =
false;
192 return segmenter_->AddSample(stream_id, sample);
195 Status MP4Muxer::FinalizeSegment(
size_t stream_id,
196 const SegmentInfo& segment_info) {
198 VLOG(3) <<
"Finalizing " << (segment_info.is_subsegment ?
"sub" :
"")
199 <<
"segment " << segment_info.start_timestamp <<
" duration "
200 << segment_info.duration;
201 return segmenter_->FinalizeSegment(stream_id, segment_info);
204 Status MP4Muxer::DelayInitializeMuxer() {
205 DCHECK(!streams().empty());
207 std::unique_ptr<FileType> ftyp(
new FileType);
208 std::unique_ptr<Movie> moov(
new Movie);
210 ftyp->major_brand = FOURCC_mp41;
211 ftyp->compatible_brands.push_back(FOURCC_iso8);
212 ftyp->compatible_brands.push_back(FOURCC_isom);
213 ftyp->compatible_brands.push_back(FOURCC_mp41);
214 ftyp->compatible_brands.push_back(FOURCC_dash);
216 if (streams().size() == 1) {
217 FourCC codec_fourcc = FOURCC_NULL;
218 if (streams()[0]->stream_type() == kStreamVideo) {
220 CodecToFourCC(streams()[0]->codec(),
221 static_cast<const VideoStreamInfo*
>(streams()[0].get())
222 ->h26x_stream_format());
223 if (codec_fourcc != FOURCC_NULL)
224 ftyp->compatible_brands.push_back(codec_fourcc);
230 if (codec_fourcc != FOURCC_avc3 && codec_fourcc != FOURCC_hev1)
231 ftyp->compatible_brands.push_back(FOURCC_cmfc);
234 moov->header.creation_time = IsoTimeNow();
235 moov->header.modification_time = IsoTimeNow();
236 moov->header.next_track_id =
static_cast<uint32_t
>(streams().size()) + 1;
238 moov->tracks.resize(streams().size());
239 moov->extends.tracks.resize(streams().size());
242 for (uint32_t i = 0; i < streams().size(); ++i) {
243 const StreamInfo* stream = streams()[i].get();
244 Track& trak = moov->tracks[i];
245 trak.header.track_id = i + 1;
247 TrackExtends& trex = moov->extends.tracks[i];
248 trex.track_id = trak.header.track_id;
249 trex.default_sample_description_index = 1;
251 bool generate_trak_result =
false;
252 switch (stream->stream_type()) {
254 generate_trak_result = GenerateVideoTrak(
255 static_cast<const VideoStreamInfo*
>(stream), &trak);
258 generate_trak_result = GenerateAudioTrak(
259 static_cast<const AudioStreamInfo*
>(stream), &trak);
262 generate_trak_result = GenerateTextTrak(
263 static_cast<const TextStreamInfo*
>(stream), &trak);
266 NOTIMPLEMENTED() <<
"Not implemented for stream type: "
267 << stream->stream_type();
269 if (!generate_trak_result)
270 return Status(error::MUXER_FAILURE,
"Failed to generate trak.");
274 if (edit_list_offset_.value() > 0) {
276 entry.media_time = edit_list_offset_.value();
277 entry.media_rate_integer = 1;
278 trak.edit.list.edits.push_back(entry);
281 if (stream->is_encrypted() && options().mp4_params.include_pssh_in_stream) {
283 const auto& key_system_info = stream->encryption_config().key_system_info;
284 for (
const ProtectionSystemSpecificInfo& system : key_system_info) {
285 if (system.psshs.empty())
287 ProtectionSystemSpecificHeader pssh;
288 pssh.raw_box = system.psshs;
289 moov->pssh.push_back(pssh);
294 if (options().segment_template.empty()) {
295 segmenter_.reset(
new SingleSegmentSegmenter(options(), std::move(ftyp),
299 new MultiSegmentSegmenter(options(), std::move(ftyp), std::move(moov)));
302 const Status segmenter_initialized =
303 segmenter_->Initialize(streams(), muxer_listener(), progress_listener());
304 if (!segmenter_initialized.ok())
305 return segmenter_initialized;
307 FireOnMediaStartEvent();
311 Status MP4Muxer::UpdateEditListOffsetFromSample(
const MediaSample& sample) {
312 if (edit_list_offset_)
315 const int64_t pts = sample.pts();
316 const int64_t dts = sample.dts();
348 const int64_t pts_dts_offset = pts - dts;
349 if (pts_dts_offset > 0) {
351 LOG(ERROR) <<
"Negative presentation timestamp (" << pts
352 <<
") is not supported when there is an offset between "
353 "presentation timestamp and decoding timestamp ("
355 return Status(error::MUXER_FAILURE,
356 "Unsupported negative pts when there is an offset between "
359 edit_list_offset_ = pts_dts_offset;
362 if (pts_dts_offset < 0) {
363 LOG(ERROR) <<
"presentation timestamp (" << pts
364 <<
") is not supposed to be greater than decoding timestamp ("
366 return Status(error::MUXER_FAILURE,
"Not expecting pts < dts.");
368 edit_list_offset_ = std::max(-sample.pts(),
static_cast<int64_t
>(0));
372 void MP4Muxer::InitializeTrak(
const StreamInfo* info, Track* trak) {
373 int64_t now = IsoTimeNow();
374 trak->header.creation_time = now;
375 trak->header.modification_time = now;
376 trak->header.duration = 0;
377 trak->media.header.creation_time = now;
378 trak->media.header.modification_time = now;
379 trak->media.header.timescale = info->time_scale();
380 trak->media.header.duration = 0;
381 if (!info->language().empty()) {
383 std::string main_language = info->language();
384 size_t dash = main_language.find(
'-');
385 if (dash != std::string::npos) {
386 main_language.erase(dash);
390 if (main_language.size() != 3) {
391 LOG(WARNING) <<
"'" << main_language <<
"' is not a valid ISO-639-2 "
392 <<
"language code, ignoring.";
394 trak->media.header.language.code = main_language;
399 bool MP4Muxer::GenerateVideoTrak(
const VideoStreamInfo* video_info,
401 InitializeTrak(video_info, trak);
405 uint32_t pixel_width = video_info->pixel_width();
406 uint32_t pixel_height = video_info->pixel_height();
407 if (pixel_width == 0 || pixel_height == 0) {
408 LOG(WARNING) <<
"pixel width/height are not set. Assuming 1:1.";
412 const double sample_aspect_ratio =
413 static_cast<double>(pixel_width) / pixel_height;
414 trak->header.width = video_info->width() * sample_aspect_ratio * 0x10000;
415 trak->header.height = video_info->height() * 0x10000;
417 VideoSampleEntry video;
419 CodecToFourCC(video_info->codec(), video_info->h26x_stream_format());
420 video.width = video_info->width();
421 video.height = video_info->height();
422 video.codec_configuration.data = video_info->codec_config();
423 if (!video.ParseExtraCodecConfigsVector(video_info->extra_config())) {
424 LOG(ERROR) <<
"Malformed extra codec configs: "
425 << base::HexEncode(video_info->extra_config().data(),
426 video_info->extra_config().size());
429 if (pixel_width != 1 || pixel_height != 1) {
430 video.pixel_aspect.h_spacing = pixel_width;
431 video.pixel_aspect.v_spacing = pixel_height;
434 SampleDescription& sample_description =
435 trak->media.information.sample_table.description;
436 sample_description.type = kVideo;
437 sample_description.video_entries.push_back(video);
439 if (video_info->is_encrypted()) {
440 if (video_info->has_clear_lead()) {
442 sample_description.video_entries.push_back(video);
445 VideoSampleEntry& entry = sample_description.video_entries[0];
446 GenerateSinf(entry.format, video_info->encryption_config(), &entry.sinf);
447 entry.format = FOURCC_encv;
452 bool MP4Muxer::GenerateAudioTrak(
const AudioStreamInfo* audio_info,
454 InitializeTrak(audio_info, trak);
456 trak->header.volume = 0x100;
458 AudioSampleEntry audio;
460 CodecToFourCC(audio_info->codec(), H26xStreamFormat::kUnSpecified);
461 switch(audio_info->codec()){
463 DecoderConfigDescriptor* decoder_config =
464 audio.esds.es_descriptor.mutable_decoder_config_descriptor();
465 decoder_config->set_object_type(ObjectType::kISO_14496_3);
466 decoder_config->set_max_bitrate(audio_info->max_bitrate());
467 decoder_config->set_avg_bitrate(audio_info->avg_bitrate());
468 decoder_config->mutable_decoder_specific_info_descriptor()->set_data(
469 audio_info->codec_config());
477 audio.ddts.extra_data = audio_info->codec_config();
478 audio.ddts.max_bitrate = audio_info->max_bitrate();
479 audio.ddts.avg_bitrate = audio_info->avg_bitrate();
480 audio.ddts.sampling_frequency = audio_info->sampling_frequency();
481 audio.ddts.pcm_sample_depth = audio_info->sample_bits();
484 audio.dac3.data = audio_info->codec_config();
487 audio.dec3.data = audio_info->codec_config();
490 audio.dac4.data = audio_info->codec_config();
493 audio.dfla.data = audio_info->codec_config();
496 DecoderConfigDescriptor* decoder_config =
497 audio.esds.es_descriptor.mutable_decoder_config_descriptor();
498 uint32_t samplerate = audio_info->sampling_frequency();
499 if (samplerate < 32000)
500 decoder_config->set_object_type(ObjectType::kISO_13818_3_MPEG1);
502 decoder_config->set_object_type(ObjectType::kISO_11172_3_MPEG1);
503 decoder_config->set_max_bitrate(audio_info->max_bitrate());
504 decoder_config->set_avg_bitrate(audio_info->avg_bitrate());
514 audio.dops.opus_identification_header = audio_info->codec_config();
517 NOTIMPLEMENTED() <<
" Unsupported audio codec " << audio_info->codec();
521 if (audio_info->codec() == kCodecAC3 || audio_info->codec() == kCodecEAC3) {
524 audio.channelcount = 2;
525 audio.samplesize = 16;
526 }
else if (audio_info->codec() == kCodecAC4) {
529 audio.channelcount = audio_info->num_channels();
531 audio.samplesize = 16;
533 audio.channelcount = audio_info->num_channels();
534 audio.samplesize = audio_info->sample_bits();
536 audio.samplerate = audio_info->sampling_frequency();
537 SampleTable& sample_table = trak->media.information.sample_table;
538 SampleDescription& sample_description = sample_table.description;
539 sample_description.type = kAudio;
540 sample_description.audio_entries.push_back(audio);
542 if (audio_info->is_encrypted()) {
543 if (audio_info->has_clear_lead()) {
545 sample_description.audio_entries.push_back(audio);
548 AudioSampleEntry& entry = sample_description.audio_entries[0];
549 GenerateSinf(entry.format, audio_info->encryption_config(), &entry.sinf);
550 entry.format = FOURCC_enca;
553 if (audio_info->seek_preroll_ns() > 0) {
554 sample_table.sample_group_descriptions.resize(1);
555 SampleGroupDescription& sample_group_description =
556 sample_table.sample_group_descriptions.back();
557 sample_group_description.grouping_type = FOURCC_roll;
558 sample_group_description.audio_roll_recovery_entries.resize(1);
559 sample_group_description.audio_roll_recovery_entries[0].roll_distance =
560 GetRollDistance(audio_info->seek_preroll_ns(), audio.samplerate);
567 bool MP4Muxer::GenerateTextTrak(
const TextStreamInfo* text_info,
569 InitializeTrak(text_info, trak);
571 if (text_info->codec_string() ==
"wvtt") {
573 TextSampleEntry webvtt;
574 webvtt.format = FOURCC_wvtt;
580 webvtt.config.config =
"WEBVTT";
583 if (!text_info->regions().empty() || !text_info->css_styles().empty()) {
584 LOG(INFO) <<
"Skipping possible style / region configuration as the spec "
585 "does not define a way to carry them inside ISO-BMFF files.";
591 webvtt.label.source_label =
"source_label";
592 SampleDescription& sample_description =
593 trak->media.information.sample_table.description;
594 sample_description.type = kText;
595 sample_description.text_entries.push_back(webvtt);
597 }
else if (text_info->codec_string() ==
"ttml") {
599 TextSampleEntry ttml;
600 ttml.format = FOURCC_stpp;
601 ttml.namespace_ = ttml::TtmlGenerator::kTtNamespace;
603 SampleDescription& sample_description =
604 trak->media.information.sample_table.description;
605 sample_description.type = kSubtitle;
606 sample_description.text_entries.push_back(ttml);
609 NOTIMPLEMENTED() << text_info->codec_string()
610 <<
" handling not implemented yet.";
614 base::Optional<Range> MP4Muxer::GetInitRangeStartAndEnd() {
615 size_t range_offset = 0;
616 size_t range_size = 0;
617 const bool has_range = segmenter_->GetInitRange(&range_offset, &range_size);
620 return base::nullopt;
623 SetStartAndEndFromOffsetAndSize(range_offset, range_size, &range);
627 base::Optional<Range> MP4Muxer::GetIndexRangeStartAndEnd() {
628 size_t range_offset = 0;
629 size_t range_size = 0;
630 const bool has_range = segmenter_->GetIndexRange(&range_offset, &range_size);
633 return base::nullopt;
636 SetStartAndEndFromOffsetAndSize(range_offset, range_size, &range);
640 void MP4Muxer::FireOnMediaStartEvent() {
641 if (!muxer_listener())
644 if (streams().size() > 1) {
645 LOG(ERROR) <<
"MuxerListener cannot take more than 1 stream.";
648 DCHECK(!streams().empty()) <<
"Media started without a stream.";
650 const uint32_t timescale = segmenter_->GetReferenceTimeScale();
651 muxer_listener()->
OnMediaStart(options(), *streams().front(), timescale,
652 MuxerListener::kContainerMp4);
655 void MP4Muxer::FireOnMediaEndEvent() {
656 if (!muxer_listener())
659 MuxerListener::MediaRanges media_range;
660 media_range.init_range = GetInitRangeStartAndEnd();
661 media_range.index_range = GetIndexRangeStartAndEnd();
662 media_range.subsegment_ranges = segmenter_->GetSegmentRanges();
664 const float duration_seconds =
static_cast<float>(segmenter_->GetDuration());
665 muxer_listener()->
OnMediaEnd(media_range, duration_seconds);
668 uint64_t MP4Muxer::IsoTimeNow() {
670 const uint64_t kIsomTimeOffset = 2082844800l;
671 return kIsomTimeOffset +
672 (clock() ? clock()->Now() : base::Time::Now()).ToDoubleT();