Full vtt in mp4 path

Change-Id: I7f49dbfed188ff7a451b0b4fbd6947590e8935d1
This commit is contained in:
Rintaro Kuroiwa 2015-11-23 15:12:04 -08:00
parent ec904f3f79
commit 92280bf214
7 changed files with 78 additions and 5 deletions

View File

@ -82,7 +82,7 @@ const char kUsage[] =
" - input (in): Required input/source media file path or network stream\n" " - input (in): Required input/source media file path or network stream\n"
" URL.\n" " URL.\n"
" - stream_selector (stream): Required field with value 'audio',\n" " - stream_selector (stream): Required field with value 'audio',\n"
" 'video', or stream number (zero based).\n" " 'video', 'text', or stream number (zero based).\n"
" - output (out): Required output file (single file) or initialization\n" " - output (out): Required output file (single file) or initialization\n"
" file path (multiple file).\n" " file path (multiple file).\n"
" - segment_template (segment): Optional value which specifies the\n" " - segment_template (segment): Optional value which specifies the\n"
@ -253,8 +253,8 @@ bool CreateRemuxJobs(const StreamDescriptorList& stream_descriptors,
} }
stream_muxer_options.bandwidth = stream_iter->bandwidth; stream_muxer_options.bandwidth = stream_iter->bandwidth;
// Handle text input. if (stream_iter->stream_selector == "text" &&
if (stream_iter->stream_selector == "text") { stream_iter->output_format != CONTAINER_MOV) {
MediaInfo text_media_info; MediaInfo text_media_info;
if (!StreamInfoToTextMediaInfo(*stream_iter, stream_muxer_options, if (!StreamInfoToTextMediaInfo(*stream_iter, stream_muxer_options,
&text_media_info)) { &text_media_info)) {

View File

@ -33,6 +33,7 @@ const size_t kQueuedSamplesLimit = 10000;
const size_t kInvalidStreamIndex = static_cast<size_t>(-1); const size_t kInvalidStreamIndex = static_cast<size_t>(-1);
const size_t kBaseVideoOutputStreamIndex = 0x100; const size_t kBaseVideoOutputStreamIndex = 0x100;
const size_t kBaseAudioOutputStreamIndex = 0x200; const size_t kBaseAudioOutputStreamIndex = 0x200;
const size_t kBaseTextOutputStreamIndex = 0x300;
std::string GetStreamLabel(size_t stream_index) { std::string GetStreamLabel(size_t stream_index) {
switch (stream_index) { switch (stream_index) {
@ -40,6 +41,8 @@ std::string GetStreamLabel(size_t stream_index) {
return "video"; return "video";
case kBaseAudioOutputStreamIndex: case kBaseAudioOutputStreamIndex:
return "audio"; return "audio";
case kBaseTextOutputStreamIndex:
return "text";
default: default:
return base::SizeTToString(stream_index); return base::SizeTToString(stream_index);
} }
@ -51,11 +54,13 @@ bool GetStreamIndex(const std::string& stream_label, size_t* stream_index) {
*stream_index = kBaseVideoOutputStreamIndex; *stream_index = kBaseVideoOutputStreamIndex;
} else if (stream_label == "audio") { } else if (stream_label == "audio") {
*stream_index = kBaseAudioOutputStreamIndex; *stream_index = kBaseAudioOutputStreamIndex;
} else if (stream_label == "text") {
*stream_index = kBaseTextOutputStreamIndex;
} else { } else {
// Expect stream_label to be a zero based stream id. // Expect stream_label to be a zero based stream id.
if (!base::StringToSizeT(stream_label, stream_index)) { if (!base::StringToSizeT(stream_label, stream_index)) {
LOG(ERROR) << "Invalid argument --stream=" << stream_label << "; " LOG(ERROR) << "Invalid argument --stream=" << stream_label << "; "
<< "should be 'audio', 'video', or a number"; << "should be 'audio', 'video', 'text', or a number";
return false; return false;
} }
} }
@ -221,6 +226,9 @@ void Demuxer::ParserInitEvent(
bool audio_handler_set = bool audio_handler_set =
output_handlers().find(kBaseAudioOutputStreamIndex) != output_handlers().find(kBaseAudioOutputStreamIndex) !=
output_handlers().end(); output_handlers().end();
bool text_handler_set =
output_handlers().find(kBaseTextOutputStreamIndex) !=
output_handlers().end();
for (const std::shared_ptr<StreamInfo>& stream_info : stream_infos) { for (const std::shared_ptr<StreamInfo>& stream_info : stream_infos) {
size_t stream_index = base_stream_index; size_t stream_index = base_stream_index;
if (video_handler_set && stream_info->stream_type() == kStreamVideo) { if (video_handler_set && stream_info->stream_type() == kStreamVideo) {
@ -233,6 +241,10 @@ void Demuxer::ParserInitEvent(
// Only for the first audio stream. // Only for the first audio stream.
audio_handler_set = false; audio_handler_set = false;
} }
if (text_handler_set && stream_info->stream_type() == kStreamText) {
stream_index = kBaseTextOutputStreamIndex;
text_handler_set = false;
}
const bool handler_set = const bool handler_set =
output_handlers().find(stream_index) != output_handlers().end(); output_handlers().find(stream_index) != output_handlers().end();

View File

@ -14,6 +14,7 @@
#include "packager/media/base/audio_stream_info.h" #include "packager/media/base/audio_stream_info.h"
#include "packager/media/base/muxer_options.h" #include "packager/media/base/muxer_options.h"
#include "packager/media/base/protection_system_specific_info.h" #include "packager/media/base/protection_system_specific_info.h"
#include "packager/media/base/text_stream_info.h"
#include "packager/media/base/video_stream_info.h" #include "packager/media/base/video_stream_info.h"
#include "packager/media/codecs/ec3_audio_util.h" #include "packager/media/codecs/ec3_audio_util.h"
#include "packager/mpd/base/media_info.pb.h" #include "packager/mpd/base/media_info.pb.h"
@ -134,11 +135,29 @@ void AddAudioInfo(const AudioStreamInfo* audio_stream_info,
} }
} }
void AddTextInfo(const TextStreamInfo& text_stream_info,
MediaInfo* media_info) {
MediaInfo::TextInfo* text_info = media_info->mutable_text_info();
// For now, set everything as subtitle.
text_info->set_type(MediaInfo::TextInfo::SUBTITLE);
if (text_stream_info.codec_string() == "wvtt") {
text_info->set_format("vtt");
} else {
LOG(WARNING) << "Unhandled codec " << text_stream_info.codec_string()
<< " copying it as format.";
text_info->set_format(text_stream_info.codec_string());
}
text_info->set_language(text_stream_info.language());
}
void SetMediaInfoStreamInfo(const StreamInfo& stream_info, void SetMediaInfoStreamInfo(const StreamInfo& stream_info,
MediaInfo* media_info) { MediaInfo* media_info) {
if (stream_info.stream_type() == kStreamAudio) { if (stream_info.stream_type() == kStreamAudio) {
AddAudioInfo(static_cast<const AudioStreamInfo*>(&stream_info), AddAudioInfo(static_cast<const AudioStreamInfo*>(&stream_info),
media_info); media_info);
} else if (stream_info.stream_type() == kStreamText) {
AddTextInfo(static_cast<const TextStreamInfo&>(stream_info), media_info);
} else { } else {
DCHECK_EQ(stream_info.stream_type(), kStreamVideo); DCHECK_EQ(stream_info.stream_type(), kStreamVideo);
AddVideoInfo(static_cast<const VideoStreamInfo*>(&stream_info), AddVideoInfo(static_cast<const VideoStreamInfo*>(&stream_info),

View File

@ -13,6 +13,7 @@
#include "packager/media/base/fourccs.h" #include "packager/media/base/fourccs.h"
#include "packager/media/base/key_source.h" #include "packager/media/base/key_source.h"
#include "packager/media/base/media_sample.h" #include "packager/media/base/media_sample.h"
#include "packager/media/base/text_stream_info.h"
#include "packager/media/base/video_stream_info.h" #include "packager/media/base/video_stream_info.h"
#include "packager/media/codecs/es_descriptor.h" #include "packager/media/codecs/es_descriptor.h"
#include "packager/media/event/muxer_listener.h" #include "packager/media/event/muxer_listener.h"
@ -150,6 +151,10 @@ Status MP4Muxer::InitializeMuxer() {
GenerateAudioTrak(static_cast<AudioStreamInfo*>(streams()[i].get()), GenerateAudioTrak(static_cast<AudioStreamInfo*>(streams()[i].get()),
&trak, i + 1); &trak, i + 1);
break; break;
case kStreamText:
GenerateTextTrak(static_cast<TextStreamInfo*>(streams()[i].get()),
&trak, i + 1);
break;
default: default:
NOTIMPLEMENTED() << "Not implemented for stream type: " NOTIMPLEMENTED() << "Not implemented for stream type: "
<< streams()[i]->stream_type(); << streams()[i]->stream_type();
@ -373,6 +378,31 @@ void MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info,
} }
} }
void MP4Muxer::GenerateTextTrak(const TextStreamInfo* text_info,
Track* trak,
uint32_t track_id) {
InitializeTrak(text_info, trak);
if (text_info->codec_string() == "wvtt") {
// Handle WebVTT.
TextSampleEntry webvtt;
webvtt.format = FOURCC_wvtt;
webvtt.config.config.assign(text_info->codec_config().begin(),
text_info->codec_config().end());
// TODO(rkuroiwa): This should be the source file URI(s). Putting bogus
// string for now so that the box will be there for samples with overlapping
// cues.
webvtt.label.source_label = "source_label";
SampleDescription& sample_description =
trak->media.information.sample_table.description;
sample_description.type = kText;
sample_description.text_entries.push_back(webvtt);
return;
}
NOTIMPLEMENTED() << text_info->codec_string()
<< " handling not implemented yet.";
}
bool MP4Muxer::GetInitRangeStartAndEnd(uint32_t* start, uint32_t* end) { bool MP4Muxer::GetInitRangeStartAndEnd(uint32_t* start, uint32_t* end) {
DCHECK(start && end); DCHECK(start && end);
size_t range_offset = 0; size_t range_offset = 0;

View File

@ -16,6 +16,7 @@ namespace media {
class AudioStreamInfo; class AudioStreamInfo;
class StreamInfo; class StreamInfo;
class TextStreamInfo;
class VideoStreamInfo; class VideoStreamInfo;
namespace mp4 { namespace mp4 {
@ -50,6 +51,9 @@ class MP4Muxer : public Muxer {
void GenerateVideoTrak(const VideoStreamInfo* video_info, void GenerateVideoTrak(const VideoStreamInfo* video_info,
Track* trak, Track* trak,
uint32_t track_id); uint32_t track_id);
void GenerateTextTrak(const TextStreamInfo* video_info,
Track* trak,
uint32_t track_id);
// Gets |start| and |end| initialization range. Returns true if there is an // Gets |start| and |end| initialization range. Returns true if there is an
// init range and sets start-end byte-range-spec specified in RFC2616. // init range and sets start-end byte-range-spec specified in RFC2616.

View File

@ -707,8 +707,10 @@ Representation* AdaptationSet::AddRepresentation(const MediaInfo& media_info) {
std::unique_ptr<Representation> representation(new Representation( std::unique_ptr<Representation> representation(new Representation(
media_info, mpd_options_, representation_id, std::move(listener))); media_info, mpd_options_, representation_id, std::move(listener)));
if (!representation->Init()) if (!representation->Init()) {
LOG(ERROR) << "Failed to initialize Representation.";
return NULL; return NULL;
}
// For videos, record the width, height, and the frame rate to calculate the // For videos, record the width, height, and the frame rate to calculate the
// max {width,height,framerate} required for DASH IOP. // max {width,height,framerate} required for DASH IOP.
@ -1383,6 +1385,8 @@ std::string Representation::GetTextMimeType() const {
if (media_info_.text_info().format() == "vtt") { if (media_info_.text_info().format() == "vtt") {
if (media_info_.container_type() == MediaInfo::CONTAINER_TEXT) { if (media_info_.container_type() == MediaInfo::CONTAINER_TEXT) {
return "text/vtt"; return "text/vtt";
} else if (media_info_.container_type() == MediaInfo::CONTAINER_MP4) {
return "application/mp4";
} }
LOG(ERROR) << "Failed to determine MIME type for VTT container: " LOG(ERROR) << "Failed to determine MIME type for VTT container: "
<< media_info_.container_type(); << media_info_.container_type();

View File

@ -23,6 +23,10 @@ std::string TextCodecString(const MediaInfo& media_info) {
(media_info.container_type() == MediaInfo::CONTAINER_MP4)) { (media_info.container_type() == MediaInfo::CONTAINER_MP4)) {
return "stpp"; return "stpp";
} }
if (format == "vtt" &&
(media_info.container_type() == MediaInfo::CONTAINER_MP4)) {
return "wvtt";
}
// Otherwise codec doesn't need to be specified, e.g. vtt and ttml+xml are // Otherwise codec doesn't need to be specified, e.g. vtt and ttml+xml are
// obvious from the mime type. // obvious from the mime type.