From 92280bf21490f7c9ca9647e3fc5dd170f7f70837 Mon Sep 17 00:00:00 2001 From: Rintaro Kuroiwa Date: Mon, 23 Nov 2015 15:12:04 -0800 Subject: [PATCH] Full vtt in mp4 path Change-Id: I7f49dbfed188ff7a451b0b4fbd6947590e8935d1 --- packager/app/packager_main.cc | 6 ++-- packager/media/base/demuxer.cc | 14 ++++++++- .../media/event/muxer_listener_internal.cc | 19 ++++++++++++ packager/media/formats/mp4/mp4_muxer.cc | 30 +++++++++++++++++++ packager/media/formats/mp4/mp4_muxer.h | 4 +++ packager/mpd/base/mpd_builder.cc | 6 +++- packager/mpd/base/mpd_utils.cc | 4 +++ 7 files changed, 78 insertions(+), 5 deletions(-) diff --git a/packager/app/packager_main.cc b/packager/app/packager_main.cc index b97fcfb45f..6c68868e09 100644 --- a/packager/app/packager_main.cc +++ b/packager/app/packager_main.cc @@ -82,7 +82,7 @@ const char kUsage[] = " - input (in): Required input/source media file path or network stream\n" " URL.\n" " - stream_selector (stream): Required field with value 'audio',\n" - " 'video', or stream number (zero based).\n" + " 'video', 'text', or stream number (zero based).\n" " - output (out): Required output file (single file) or initialization\n" " file path (multiple file).\n" " - segment_template (segment): Optional value which specifies the\n" @@ -253,8 +253,8 @@ bool CreateRemuxJobs(const StreamDescriptorList& stream_descriptors, } stream_muxer_options.bandwidth = stream_iter->bandwidth; - // Handle text input. - if (stream_iter->stream_selector == "text") { + if (stream_iter->stream_selector == "text" && + stream_iter->output_format != CONTAINER_MOV) { MediaInfo text_media_info; if (!StreamInfoToTextMediaInfo(*stream_iter, stream_muxer_options, &text_media_info)) { diff --git a/packager/media/base/demuxer.cc b/packager/media/base/demuxer.cc index ad6f2c98e3..8f59c4676b 100644 --- a/packager/media/base/demuxer.cc +++ b/packager/media/base/demuxer.cc @@ -33,6 +33,7 @@ const size_t kQueuedSamplesLimit = 10000; const size_t kInvalidStreamIndex = static_cast(-1); const size_t kBaseVideoOutputStreamIndex = 0x100; const size_t kBaseAudioOutputStreamIndex = 0x200; +const size_t kBaseTextOutputStreamIndex = 0x300; std::string GetStreamLabel(size_t stream_index) { switch (stream_index) { @@ -40,6 +41,8 @@ std::string GetStreamLabel(size_t stream_index) { return "video"; case kBaseAudioOutputStreamIndex: return "audio"; + case kBaseTextOutputStreamIndex: + return "text"; default: return base::SizeTToString(stream_index); } @@ -51,11 +54,13 @@ bool GetStreamIndex(const std::string& stream_label, size_t* stream_index) { *stream_index = kBaseVideoOutputStreamIndex; } else if (stream_label == "audio") { *stream_index = kBaseAudioOutputStreamIndex; + } else if (stream_label == "text") { + *stream_index = kBaseTextOutputStreamIndex; } else { // Expect stream_label to be a zero based stream id. if (!base::StringToSizeT(stream_label, stream_index)) { LOG(ERROR) << "Invalid argument --stream=" << stream_label << "; " - << "should be 'audio', 'video', or a number"; + << "should be 'audio', 'video', 'text', or a number"; return false; } } @@ -221,6 +226,9 @@ void Demuxer::ParserInitEvent( bool audio_handler_set = output_handlers().find(kBaseAudioOutputStreamIndex) != output_handlers().end(); + bool text_handler_set = + output_handlers().find(kBaseTextOutputStreamIndex) != + output_handlers().end(); for (const std::shared_ptr& stream_info : stream_infos) { size_t stream_index = base_stream_index; if (video_handler_set && stream_info->stream_type() == kStreamVideo) { @@ -233,6 +241,10 @@ void Demuxer::ParserInitEvent( // Only for the first audio stream. audio_handler_set = false; } + if (text_handler_set && stream_info->stream_type() == kStreamText) { + stream_index = kBaseTextOutputStreamIndex; + text_handler_set = false; + } const bool handler_set = output_handlers().find(stream_index) != output_handlers().end(); diff --git a/packager/media/event/muxer_listener_internal.cc b/packager/media/event/muxer_listener_internal.cc index 48c74740c0..23f949befd 100644 --- a/packager/media/event/muxer_listener_internal.cc +++ b/packager/media/event/muxer_listener_internal.cc @@ -14,6 +14,7 @@ #include "packager/media/base/audio_stream_info.h" #include "packager/media/base/muxer_options.h" #include "packager/media/base/protection_system_specific_info.h" +#include "packager/media/base/text_stream_info.h" #include "packager/media/base/video_stream_info.h" #include "packager/media/codecs/ec3_audio_util.h" #include "packager/mpd/base/media_info.pb.h" @@ -134,11 +135,29 @@ void AddAudioInfo(const AudioStreamInfo* audio_stream_info, } } +void AddTextInfo(const TextStreamInfo& text_stream_info, + MediaInfo* media_info) { + MediaInfo::TextInfo* text_info = media_info->mutable_text_info(); + // For now, set everything as subtitle. + text_info->set_type(MediaInfo::TextInfo::SUBTITLE); + if (text_stream_info.codec_string() == "wvtt") { + text_info->set_format("vtt"); + } else { + LOG(WARNING) << "Unhandled codec " << text_stream_info.codec_string() + << " copying it as format."; + text_info->set_format(text_stream_info.codec_string()); + } + + text_info->set_language(text_stream_info.language()); +} + void SetMediaInfoStreamInfo(const StreamInfo& stream_info, MediaInfo* media_info) { if (stream_info.stream_type() == kStreamAudio) { AddAudioInfo(static_cast(&stream_info), media_info); + } else if (stream_info.stream_type() == kStreamText) { + AddTextInfo(static_cast(stream_info), media_info); } else { DCHECK_EQ(stream_info.stream_type(), kStreamVideo); AddVideoInfo(static_cast(&stream_info), diff --git a/packager/media/formats/mp4/mp4_muxer.cc b/packager/media/formats/mp4/mp4_muxer.cc index 8b90fb4d90..af10f4a06f 100644 --- a/packager/media/formats/mp4/mp4_muxer.cc +++ b/packager/media/formats/mp4/mp4_muxer.cc @@ -13,6 +13,7 @@ #include "packager/media/base/fourccs.h" #include "packager/media/base/key_source.h" #include "packager/media/base/media_sample.h" +#include "packager/media/base/text_stream_info.h" #include "packager/media/base/video_stream_info.h" #include "packager/media/codecs/es_descriptor.h" #include "packager/media/event/muxer_listener.h" @@ -150,6 +151,10 @@ Status MP4Muxer::InitializeMuxer() { GenerateAudioTrak(static_cast(streams()[i].get()), &trak, i + 1); break; + case kStreamText: + GenerateTextTrak(static_cast(streams()[i].get()), + &trak, i + 1); + break; default: NOTIMPLEMENTED() << "Not implemented for stream type: " << streams()[i]->stream_type(); @@ -373,6 +378,31 @@ void MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info, } } +void MP4Muxer::GenerateTextTrak(const TextStreamInfo* text_info, + Track* trak, + uint32_t track_id) { + InitializeTrak(text_info, trak); + + if (text_info->codec_string() == "wvtt") { + // Handle WebVTT. + TextSampleEntry webvtt; + webvtt.format = FOURCC_wvtt; + webvtt.config.config.assign(text_info->codec_config().begin(), + text_info->codec_config().end()); + // TODO(rkuroiwa): This should be the source file URI(s). Putting bogus + // string for now so that the box will be there for samples with overlapping + // cues. + webvtt.label.source_label = "source_label"; + SampleDescription& sample_description = + trak->media.information.sample_table.description; + sample_description.type = kText; + sample_description.text_entries.push_back(webvtt); + return; + } + NOTIMPLEMENTED() << text_info->codec_string() + << " handling not implemented yet."; +} + bool MP4Muxer::GetInitRangeStartAndEnd(uint32_t* start, uint32_t* end) { DCHECK(start && end); size_t range_offset = 0; diff --git a/packager/media/formats/mp4/mp4_muxer.h b/packager/media/formats/mp4/mp4_muxer.h index 5f60ea611c..8d9865ef82 100644 --- a/packager/media/formats/mp4/mp4_muxer.h +++ b/packager/media/formats/mp4/mp4_muxer.h @@ -16,6 +16,7 @@ namespace media { class AudioStreamInfo; class StreamInfo; +class TextStreamInfo; class VideoStreamInfo; namespace mp4 { @@ -50,6 +51,9 @@ class MP4Muxer : public Muxer { void GenerateVideoTrak(const VideoStreamInfo* video_info, Track* trak, uint32_t track_id); + void GenerateTextTrak(const TextStreamInfo* video_info, + Track* trak, + uint32_t track_id); // Gets |start| and |end| initialization range. Returns true if there is an // init range and sets start-end byte-range-spec specified in RFC2616. diff --git a/packager/mpd/base/mpd_builder.cc b/packager/mpd/base/mpd_builder.cc index 69cb769624..d92406e614 100644 --- a/packager/mpd/base/mpd_builder.cc +++ b/packager/mpd/base/mpd_builder.cc @@ -707,8 +707,10 @@ Representation* AdaptationSet::AddRepresentation(const MediaInfo& media_info) { std::unique_ptr representation(new Representation( media_info, mpd_options_, representation_id, std::move(listener))); - if (!representation->Init()) + if (!representation->Init()) { + LOG(ERROR) << "Failed to initialize Representation."; return NULL; + } // For videos, record the width, height, and the frame rate to calculate the // max {width,height,framerate} required for DASH IOP. @@ -1383,6 +1385,8 @@ std::string Representation::GetTextMimeType() const { if (media_info_.text_info().format() == "vtt") { if (media_info_.container_type() == MediaInfo::CONTAINER_TEXT) { return "text/vtt"; + } else if (media_info_.container_type() == MediaInfo::CONTAINER_MP4) { + return "application/mp4"; } LOG(ERROR) << "Failed to determine MIME type for VTT container: " << media_info_.container_type(); diff --git a/packager/mpd/base/mpd_utils.cc b/packager/mpd/base/mpd_utils.cc index a3cd39d81a..6fa388ed1f 100644 --- a/packager/mpd/base/mpd_utils.cc +++ b/packager/mpd/base/mpd_utils.cc @@ -23,6 +23,10 @@ std::string TextCodecString(const MediaInfo& media_info) { (media_info.container_type() == MediaInfo::CONTAINER_MP4)) { return "stpp"; } + if (format == "vtt" && + (media_info.container_type() == MediaInfo::CONTAINER_MP4)) { + return "wvtt"; + } // Otherwise codec doesn't need to be specified, e.g. vtt and ttml+xml are // obvious from the mime type.