7 #include "packager/media/formats/mp4/mp4_muxer.h" 11 #include "packager/base/time/clock.h" 12 #include "packager/base/time/time.h" 13 #include "packager/file/file.h" 14 #include "packager/media/base/aes_encryptor.h" 15 #include "packager/media/base/audio_stream_info.h" 16 #include "packager/media/base/fourccs.h" 17 #include "packager/media/base/key_source.h" 18 #include "packager/media/base/media_sample.h" 19 #include "packager/media/base/text_stream_info.h" 20 #include "packager/media/base/video_stream_info.h" 21 #include "packager/media/codecs/es_descriptor.h" 22 #include "packager/media/event/muxer_listener.h" 23 #include "packager/media/formats/mp4/box_definitions.h" 24 #include "packager/media/formats/mp4/multi_segment_segmenter.h" 25 #include "packager/media/formats/mp4/single_segment_segmenter.h" 26 #include "packager/status_macros.h" 36 void SetStartAndEndFromOffsetAndSize(
size_t offset,
40 range->start =
static_cast<uint32_t
>(offset);
42 range->end = range->start +
static_cast<uint32_t
>(size) - 1;
45 FourCC CodecToFourCC(Codec codec, H26xStreamFormat h26x_stream_format) {
50 return h26x_stream_format ==
51 H26xStreamFormat::kNalUnitStreamWithParameterSetNalus
55 return h26x_stream_format ==
56 H26xStreamFormat::kNalUnitStreamWithParameterSetNalus
88 void GenerateSinf(FourCC old_type,
89 const EncryptionConfig& encryption_config,
90 ProtectionSchemeInfo* sinf) {
91 sinf->format.format = old_type;
93 DCHECK_NE(encryption_config.protection_scheme, FOURCC_NULL);
94 sinf->type.type = encryption_config.protection_scheme;
97 const int kCencSchemeVersion = 0x00010000;
98 sinf->type.version = kCencSchemeVersion;
100 auto& track_encryption = sinf->info.track_encryption;
101 track_encryption.default_is_protected = 1;
103 track_encryption.default_crypt_byte_block =
104 encryption_config.crypt_byte_block;
105 track_encryption.default_skip_byte_block = encryption_config.skip_byte_block;
106 switch (encryption_config.protection_scheme) {
109 DCHECK_EQ(track_encryption.default_crypt_byte_block, 0u);
110 DCHECK_EQ(track_encryption.default_skip_byte_block, 0u);
113 track_encryption.version = 0;
120 track_encryption.version = 1;
123 NOTREACHED() <<
"Unexpected protection scheme " 124 << encryption_config.protection_scheme;
127 track_encryption.default_per_sample_iv_size =
128 encryption_config.per_sample_iv_size;
129 track_encryption.default_constant_iv = encryption_config.constant_iv;
130 track_encryption.default_kid = encryption_config.key_id;
135 int16_t GetRollDistance(uint64_t seek_preroll_ns, uint32_t sampling_frequency) {
136 const double kNanosecondsPerSecond = 1000000000;
137 const double preroll_in_samples =
138 seek_preroll_ns / kNanosecondsPerSecond * sampling_frequency;
140 return -
static_cast<int16_t
>(preroll_in_samples + 0.5);
146 MP4Muxer::~MP4Muxer() {}
148 Status MP4Muxer::InitializeMuxer() {
150 to_be_initialized_ =
true;
154 Status MP4Muxer::Finalize() {
159 DCHECK(to_be_initialized_);
161 <<
"' which does not contain any sample.";
165 Status segmenter_finalized = segmenter_->Finalize();
167 if (!segmenter_finalized.ok())
168 return segmenter_finalized;
170 FireOnMediaEndEvent();
176 if (to_be_initialized_) {
177 RETURN_IF_ERROR(UpdateEditListOffsetFromSample(sample));
178 RETURN_IF_ERROR(DelayInitializeMuxer());
179 to_be_initialized_ =
false;
182 return segmenter_->AddSample(stream_id, sample);
185 Status MP4Muxer::FinalizeSegment(
size_t stream_id,
188 VLOG(3) <<
"Finalizing " << (segment_info.is_subsegment ?
"sub" :
"")
189 <<
"segment " << segment_info.start_timestamp <<
" duration " 190 << segment_info.duration;
191 return segmenter_->FinalizeSegment(stream_id, segment_info);
194 Status MP4Muxer::DelayInitializeMuxer() {
195 DCHECK(!streams().empty());
197 std::unique_ptr<FileType> ftyp(
new FileType);
198 std::unique_ptr<Movie> moov(
new Movie);
200 ftyp->major_brand = FOURCC_isom;
201 ftyp->compatible_brands.push_back(FOURCC_iso8);
202 ftyp->compatible_brands.push_back(FOURCC_mp41);
203 ftyp->compatible_brands.push_back(FOURCC_dash);
205 if (streams().size() == 1) {
206 FourCC codec_fourcc = FOURCC_NULL;
207 if (streams()[0]->stream_type() == kStreamVideo) {
209 CodecToFourCC(streams()[0]->codec(),
210 static_cast<const VideoStreamInfo*>(streams()[0].
get())
211 ->h26x_stream_format());
212 if (codec_fourcc != FOURCC_NULL)
213 ftyp->compatible_brands.push_back(codec_fourcc);
219 if (codec_fourcc != FOURCC_avc3 && codec_fourcc != FOURCC_hev1)
220 ftyp->compatible_brands.push_back(FOURCC_cmfc);
223 moov->header.creation_time = IsoTimeNow();
224 moov->header.modification_time = IsoTimeNow();
225 moov->header.next_track_id =
static_cast<uint32_t
>(streams().size()) + 1;
227 moov->tracks.resize(streams().size());
228 moov->extends.tracks.resize(streams().size());
231 for (uint32_t i = 0; i < streams().size(); ++i) {
232 const StreamInfo* stream = streams()[i].get();
233 Track& trak = moov->tracks[i];
234 trak.header.track_id = i + 1;
237 trex.track_id = trak.header.track_id;
238 trex.default_sample_description_index = 1;
240 bool generate_trak_result =
false;
241 switch (stream->stream_type()) {
243 generate_trak_result = GenerateVideoTrak(
244 static_cast<const VideoStreamInfo*>(stream), &trak, i + 1);
247 generate_trak_result = GenerateAudioTrak(
248 static_cast<const AudioStreamInfo*>(stream), &trak, i + 1);
251 generate_trak_result = GenerateTextTrak(
252 static_cast<const TextStreamInfo*>(stream), &trak, i + 1);
255 NOTIMPLEMENTED() <<
"Not implemented for stream type: " 256 << stream->stream_type();
258 if (!generate_trak_result)
259 return Status(error::MUXER_FAILURE,
"Failed to generate trak.");
263 if (edit_list_offset_.value() > 0) {
265 entry.media_time = edit_list_offset_.value();
266 entry.media_rate_integer = 1;
267 trak.edit.list.edits.push_back(entry);
272 const auto& key_system_info = stream->encryption_config().key_system_info;
274 if (system.psshs.empty())
277 pssh.raw_box = system.psshs;
278 moov->pssh.push_back(pssh);
283 if (options().segment_template.empty()) {
291 const Status segmenter_initialized =
292 segmenter_->Initialize(streams(), muxer_listener(), progress_listener());
293 if (!segmenter_initialized.ok())
294 return segmenter_initialized;
296 FireOnMediaStartEvent();
301 if (edit_list_offset_)
304 const int64_t pts = sample.pts();
305 const int64_t dts = sample.dts();
337 const int64_t pts_dts_offset = pts - dts;
338 if (pts_dts_offset > 0) {
340 LOG(ERROR) <<
"Negative presentation timestamp (" << pts
341 <<
") is not supported when there is an offset between " 342 "presentation timestamp and decoding timestamp (" 344 return Status(error::MUXER_FAILURE,
345 "Unsupported negative pts when there is an offset between " 348 edit_list_offset_ = pts_dts_offset;
351 if (pts_dts_offset < 0) {
352 LOG(ERROR) <<
"presentation timestamp (" << pts
353 <<
") is not supposed to be greater than decoding timestamp (" 355 return Status(error::MUXER_FAILURE,
"Not expecting pts < dts.");
357 edit_list_offset_ = std::max(-sample.pts(),
static_cast<int64_t
>(0));
362 int64_t now = IsoTimeNow();
363 trak->header.creation_time = now;
364 trak->header.modification_time = now;
365 trak->header.duration = 0;
366 trak->media.header.creation_time = now;
367 trak->media.header.modification_time = now;
368 trak->media.header.timescale = info->time_scale();
369 trak->media.header.duration = 0;
370 if (!info->language().empty()) {
372 std::string main_language = info->language();
373 size_t dash = main_language.find(
'-');
374 if (dash != std::string::npos) {
375 main_language.erase(dash);
379 if (main_language.size() != 3) {
380 LOG(WARNING) <<
"'" << main_language <<
"' is not a valid ISO-639-2 " 381 <<
"language code, ignoring.";
383 trak->media.header.language.code = main_language;
391 InitializeTrak(video_info, trak);
397 if (pixel_width == 0 || pixel_height == 0) {
398 LOG(WARNING) <<
"pixel width/height are not set. Assuming 1:1.";
402 const double sample_aspect_ratio =
403 static_cast<double>(pixel_width) / pixel_height;
404 trak->header.width = video_info->width() * sample_aspect_ratio * 0x10000;
405 trak->header.height = video_info->height() * 0x10000;
409 CodecToFourCC(video_info->codec(), video_info->h26x_stream_format());
410 video.width = video_info->width();
411 video.height = video_info->height();
412 video.codec_configuration.data = video_info->codec_config();
413 if (pixel_width != 1 || pixel_height != 1) {
414 video.pixel_aspect.h_spacing = pixel_width;
415 video.pixel_aspect.v_spacing = pixel_height;
419 trak->media.information.sample_table.description;
420 sample_description.type = kVideo;
421 sample_description.video_entries.push_back(video);
423 if (video_info->is_encrypted()) {
424 if (video_info->has_clear_lead()) {
426 sample_description.video_entries.push_back(video);
430 GenerateSinf(entry.format, video_info->encryption_config(), &entry.sinf);
431 entry.format = FOURCC_encv;
439 InitializeTrak(audio_info, trak);
441 trak->header.volume = 0x100;
445 CodecToFourCC(audio_info->codec(), H26xStreamFormat::kUnSpecified);
446 switch(audio_info->codec()){
448 audio.esds.es_descriptor.set_object_type(
449 ObjectType::kISO_14496_3);
450 audio.esds.es_descriptor.set_esid(track_id);
451 audio.esds.es_descriptor.set_decoder_specific_info(
452 audio_info->codec_config());
453 audio.esds.es_descriptor.set_max_bitrate(audio_info->max_bitrate());
454 audio.esds.es_descriptor.set_avg_bitrate(audio_info->avg_bitrate());
461 audio.ddts.extra_data = audio_info->codec_config();
462 audio.ddts.max_bitrate = audio_info->max_bitrate();
463 audio.ddts.avg_bitrate = audio_info->avg_bitrate();
464 audio.ddts.sampling_frequency = audio_info->sampling_frequency();
465 audio.ddts.pcm_sample_depth = audio_info->sample_bits();
468 audio.dac3.data = audio_info->codec_config();
471 audio.dec3.data = audio_info->codec_config();
474 audio.dfla.data = audio_info->codec_config();
477 audio.dops.opus_identification_header = audio_info->codec_config();
480 NOTIMPLEMENTED() <<
" Unsupported audio codec " << audio_info->codec();
484 if (audio_info->codec() == kCodecAC3 || audio_info->codec() == kCodecEAC3) {
487 audio.channelcount = 2;
488 audio.samplesize = 16;
490 audio.channelcount = audio_info->num_channels();
491 audio.samplesize = audio_info->sample_bits();
493 audio.samplerate = audio_info->sampling_frequency();
494 SampleTable& sample_table = trak->media.information.sample_table;
496 sample_description.type = kAudio;
497 sample_description.audio_entries.push_back(audio);
499 if (audio_info->is_encrypted()) {
500 if (audio_info->has_clear_lead()) {
502 sample_description.audio_entries.push_back(audio);
506 GenerateSinf(entry.format, audio_info->encryption_config(), &entry.sinf);
507 entry.format = FOURCC_enca;
510 if (audio_info->seek_preroll_ns() > 0) {
511 sample_table.sample_group_descriptions.resize(1);
513 sample_table.sample_group_descriptions.back();
514 sample_group_description.grouping_type = FOURCC_roll;
515 sample_group_description.audio_roll_recovery_entries.resize(1);
516 sample_group_description.audio_roll_recovery_entries[0].roll_distance =
517 GetRollDistance(audio_info->seek_preroll_ns(), audio.samplerate);
527 InitializeTrak(text_info, trak);
529 if (text_info->codec_string() ==
"wvtt") {
532 webvtt.format = FOURCC_wvtt;
538 webvtt.config.config =
"WEBVTT";
541 if (!text_info->codec_config().empty()) {
542 LOG(INFO) <<
"Skipping possible style / region configuration as the spec " 543 "does not define a way to carry them inside ISO-BMFF files.";
549 webvtt.label.source_label =
"source_label";
551 trak->media.information.sample_table.description;
552 sample_description.type = kText;
553 sample_description.text_entries.push_back(webvtt);
556 NOTIMPLEMENTED() << text_info->codec_string()
557 <<
" handling not implemented yet.";
561 base::Optional<Range> MP4Muxer::GetInitRangeStartAndEnd() {
562 size_t range_offset = 0;
563 size_t range_size = 0;
564 const bool has_range = segmenter_->GetInitRange(&range_offset, &range_size);
567 return base::nullopt;
570 SetStartAndEndFromOffsetAndSize(range_offset, range_size, &range);
574 base::Optional<Range> MP4Muxer::GetIndexRangeStartAndEnd() {
575 size_t range_offset = 0;
576 size_t range_size = 0;
577 const bool has_range = segmenter_->GetIndexRange(&range_offset, &range_size);
580 return base::nullopt;
583 SetStartAndEndFromOffsetAndSize(range_offset, range_size, &range);
587 void MP4Muxer::FireOnMediaStartEvent() {
588 if (!muxer_listener())
591 if (streams().size() > 1) {
592 LOG(ERROR) <<
"MuxerListener cannot take more than 1 stream.";
595 DCHECK(!streams().empty()) <<
"Media started without a stream.";
597 const uint32_t timescale = segmenter_->GetReferenceTimeScale();
598 muxer_listener()->
OnMediaStart(options(), *streams().front(), timescale,
599 MuxerListener::kContainerMp4);
602 void MP4Muxer::FireOnMediaEndEvent() {
603 if (!muxer_listener())
607 media_range.
init_range = GetInitRangeStartAndEnd();
608 media_range.
index_range = GetIndexRangeStartAndEnd();
611 const float duration_seconds =
static_cast<float>(segmenter_->GetDuration());
612 muxer_listener()->
OnMediaEnd(media_range, duration_seconds);
615 uint64_t MP4Muxer::IsoTimeNow() {
617 const uint64_t kIsomTimeOffset = 2082844800l;
618 return kIsomTimeOffset +
619 (clock() ? clock()->Now() : base::Time::Now()).ToDoubleT();
All the methods that are virtual are virtual for mocking.
bool include_pssh_in_stream