Handle segmented text same as audio/video.

Now the same pipeline for handling the audio/videos streams will handle
the segmented text streams too.  This doesn't apply to the text output,
only to the MP4 variants.  This also fixes a bug where we added the
X-TIMESTAMP-MAP tag even when there wasn't TS streams; this doesn't
otherwise change the behavior around that tag.

Change-Id: I03f7cea56efa42e96311c00841330629a14aa053
This commit is contained in:
Jacob Trimble 2020-07-27 12:30:37 -07:00
parent 9babfb883b
commit 2909ca0c77
16 changed files with 128 additions and 170 deletions

View File

@ -3,9 +3,9 @@
#EXT-X-INDEPENDENT-SEGMENTS #EXT-X-INDEPENDENT-SEGMENTS
#EXT-X-MEDIA:TYPE=AUDIO,URI="stream_2.m3u8",GROUP-ID="default-audio-group",NAME="stream_2",AUTOSELECT=YES,CHANNELS="2" #EXT-X-MEDIA:TYPE=AUDIO,URI="stream_1.m3u8",GROUP-ID="default-audio-group",NAME="stream_1",AUTOSELECT=YES,CHANNELS="2"
#EXT-X-MEDIA:TYPE=SUBTITLES,URI="stream_1.m3u8",GROUP-ID="default-text-group",NAME="stream_1",AUTOSELECT=YES #EXT-X-MEDIA:TYPE=SUBTITLES,URI="stream_0.m3u8",GROUP-ID="default-text-group",NAME="stream_0",AUTOSELECT=YES
#EXT-X-STREAM-INF:BANDWIDTH=1108115,AVERAGE-BANDWIDTH=1006069,CODECS="avc1.64001e,mp4a.40.2",RESOLUTION=640x360,FRAME-RATE=29.970,AUDIO="default-audio-group",SUBTITLES="default-text-group" #EXT-X-STREAM-INF:BANDWIDTH=1108115,AVERAGE-BANDWIDTH=1006069,CODECS="avc1.64001e,mp4a.40.2",RESOLUTION=640x360,FRAME-RATE=29.970,AUDIO="default-audio-group",SUBTITLES="default-text-group"
stream_3.m3u8 stream_2.m3u8

View File

@ -0,0 +1,16 @@
#EXTM3U
#EXT-X-VERSION:6
## Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>
#EXT-X-TARGETDURATION:2
#EXT-X-PLAYLIST-TYPE:VOD
#EXTINF:1.000,
bear-english-text-1.vtt
#EXTINF:1.000,
bear-english-text-2.vtt
#EXTINF:1.000,
bear-english-text-3.vtt
#EXTINF:1.000,
bear-english-text-4.vtt
#EXTINF:1.000,
bear-english-text-5.vtt
#EXT-X-ENDLIST

View File

@ -3,14 +3,11 @@
## Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test> ## Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>
#EXT-X-TARGETDURATION:2 #EXT-X-TARGETDURATION:2
#EXT-X-PLAYLIST-TYPE:VOD #EXT-X-PLAYLIST-TYPE:VOD
#EXTINF:1.000, #EXT-X-MAP:URI="bear-640x360-audio-init.mp4"
bear-english-text-1.vtt #EXTINF:1.022,
#EXTINF:1.000, bear-640x360-audio-1.m4s
bear-english-text-2.vtt #EXTINF:0.998,
#EXTINF:1.000, bear-640x360-audio-2.m4s
bear-english-text-3.vtt #EXTINF:0.720,
#EXTINF:1.000, bear-640x360-audio-3.m4s
bear-english-text-4.vtt
#EXTINF:1.000,
bear-english-text-5.vtt
#EXT-X-ENDLIST #EXT-X-ENDLIST

View File

@ -3,11 +3,11 @@
## Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test> ## Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>
#EXT-X-TARGETDURATION:2 #EXT-X-TARGETDURATION:2
#EXT-X-PLAYLIST-TYPE:VOD #EXT-X-PLAYLIST-TYPE:VOD
#EXT-X-MAP:URI="bear-640x360-audio-init.mp4" #EXT-X-MAP:URI="bear-640x360-video-init.mp4"
#EXTINF:1.022, #EXTINF:1.001,
bear-640x360-audio-1.m4s bear-640x360-video-1.m4s
#EXTINF:0.998, #EXTINF:1.001,
bear-640x360-audio-2.m4s bear-640x360-video-2.m4s
#EXTINF:0.720, #EXTINF:0.734,
bear-640x360-audio-3.m4s bear-640x360-video-3.m4s
#EXT-X-ENDLIST #EXT-X-ENDLIST

View File

@ -1,13 +0,0 @@
#EXTM3U
#EXT-X-VERSION:6
## Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>
#EXT-X-TARGETDURATION:2
#EXT-X-PLAYLIST-TYPE:VOD
#EXT-X-MAP:URI="bear-640x360-video-init.mp4"
#EXTINF:1.001,
bear-640x360-video-1.m4s
#EXTINF:1.001,
bear-640x360-video-2.m4s
#EXTINF:0.734,
bear-640x360-video-3.m4s
#EXT-X-ENDLIST

View File

@ -1,5 +1,4 @@
WEBVTT WEBVTT
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
STYLE STYLE
::cue { color:lime } ::cue { color:lime }

View File

@ -1,5 +1,4 @@
WEBVTT WEBVTT
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
STYLE STYLE
::cue { color:lime } ::cue { color:lime }

View File

@ -1,5 +1,4 @@
WEBVTT WEBVTT
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
STYLE STYLE
::cue { color:lime } ::cue { color:lime }

View File

@ -1,5 +1,4 @@
WEBVTT WEBVTT
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
STYLE STYLE
::cue { color:lime } ::cue { color:lime }

View File

@ -1,5 +1,4 @@
WEBVTT WEBVTT
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
STYLE STYLE
::cue { color:lime } ::cue { color:lime }

View File

@ -3,4 +3,4 @@
#EXT-X-INDEPENDENT-SEGMENTS #EXT-X-INDEPENDENT-SEGMENTS
#EXT-X-MEDIA:TYPE=SUBTITLES,URI="stream_1.m3u8",GROUP-ID="default-text-group",LANGUAGE="pt",NAME="stream_1",AUTOSELECT=YES #EXT-X-MEDIA:TYPE=SUBTITLES,URI="stream_0.m3u8",GROUP-ID="default-text-group",LANGUAGE="pt",NAME="stream_0",AUTOSELECT=YES

View File

@ -3,9 +3,9 @@
#EXT-X-INDEPENDENT-SEGMENTS #EXT-X-INDEPENDENT-SEGMENTS
#EXT-X-MEDIA:TYPE=AUDIO,URI="bear-640x360-audio.m3u8",GROUP-ID="default-audio-group",NAME="stream_1",AUTOSELECT=YES,CHANNELS="2" #EXT-X-MEDIA:TYPE=AUDIO,URI="bear-640x360-audio.m3u8",GROUP-ID="default-audio-group",NAME="stream_0",AUTOSELECT=YES,CHANNELS="2"
#EXT-X-MEDIA:TYPE=SUBTITLES,URI="bear-english-text.m3u8",GROUP-ID="default-text-group",NAME="stream_0",AUTOSELECT=YES #EXT-X-MEDIA:TYPE=SUBTITLES,URI="bear-english-text.m3u8",GROUP-ID="default-text-group",NAME="stream_2",AUTOSELECT=YES
#EXT-X-STREAM-INF:BANDWIDTH=1108115,AVERAGE-BANDWIDTH=1006069,CODECS="avc1.64001e,mp4a.40.2",RESOLUTION=640x360,FRAME-RATE=29.970,AUDIO="default-audio-group",SUBTITLES="default-text-group" #EXT-X-STREAM-INF:BANDWIDTH=1108115,AVERAGE-BANDWIDTH=1006069,CODECS="avc1.64001e,mp4a.40.2",RESOLUTION=640x360,FRAME-RATE=29.970,AUDIO="default-audio-group",SUBTITLES="default-text-group"
bear-640x360-video.m3u8 bear-640x360-video.m3u8

View File

@ -61,10 +61,10 @@ Status MediaHandler::Initialize() {
} }
Status MediaHandler::Chain( Status MediaHandler::Chain(
std::initializer_list<std::shared_ptr<MediaHandler>> list) { const std::vector<std::shared_ptr<MediaHandler>>& list) {
std::shared_ptr<MediaHandler> previous; std::shared_ptr<MediaHandler> previous;
for (auto& next : list) { for (const auto& next : list) {
// Skip null entries. // Skip null entries.
if (!next) { if (!next) {
continue; continue;

View File

@ -172,8 +172,7 @@ class MediaHandler {
/// Validate if the handler is connected to its upstream handler. /// Validate if the handler is connected to its upstream handler.
bool IsConnected() { return num_input_streams_ > 0; } bool IsConnected() { return num_input_streams_ > 0; }
static Status Chain( static Status Chain(const std::vector<std::shared_ptr<MediaHandler>>& list);
std::initializer_list<std::shared_ptr<MediaHandler>> list);
protected: protected:
/// Internal implementation of initialize. Note that it should only initialize /// Internal implementation of initialize. Note that it should only initialize

View File

@ -478,11 +478,11 @@ std::shared_ptr<MediaHandler> CreateEncryptionHandler(
return std::make_shared<EncryptionHandler>(encryption_params, key_source); return std::make_shared<EncryptionHandler>(encryption_params, key_source);
} }
std::unique_ptr<TextChunker> CreateTextChunker( std::unique_ptr<MediaHandler> CreateTextChunker(
const ChunkingParams& chunking_params) { const ChunkingParams& chunking_params) {
const float segment_length_in_seconds = const float segment_length_in_seconds =
chunking_params.segment_duration_in_seconds; chunking_params.segment_duration_in_seconds;
return std::unique_ptr<TextChunker>( return std::unique_ptr<MediaHandler>(
new TextChunker(segment_length_in_seconds)); new TextChunker(segment_length_in_seconds));
} }
@ -527,39 +527,6 @@ Status CreateHlsTextJob(const StreamDescriptor& stream,
std::move(chunker), std::move(output)}); std::move(chunker), std::move(output)});
} }
Status CreateWebVttToMp4TextJob(const StreamDescriptor& stream,
const PackagingParams& packaging_params,
std::unique_ptr<MuxerListener> muxer_listener,
SyncPointQueue* sync_points,
MuxerFactory* muxer_factory,
std::shared_ptr<OriginHandler>* root) {
std::shared_ptr<Demuxer> demuxer;
RETURN_IF_ERROR(CreateDemuxer(stream, packaging_params, &demuxer));
if (!stream.language.empty())
demuxer->SetLanguageOverride(stream.stream_selector, stream.language);
auto padder = std::make_shared<TextPadder>(kDefaultTextZeroBiasMs);
RETURN_IF_ERROR(demuxer->SetHandler(stream.stream_selector, padder));
auto text_to_mp4 = std::make_shared<WebVttToMp4Handler>();
auto muxer = muxer_factory->CreateMuxer(GetOutputFormat(stream), stream);
muxer->SetMuxerListener(std::move(muxer_listener));
// Optional Cue Alignment Handler
std::shared_ptr<MediaHandler> cue_aligner;
if (sync_points) {
cue_aligner = std::make_shared<CueAlignmentHandler>(sync_points);
}
std::shared_ptr<MediaHandler> chunker =
CreateTextChunker(packaging_params.chunking_params);
*root = demuxer;
return MediaHandler::Chain({std::move(padder), std::move(cue_aligner),
std::move(chunker), std::move(text_to_mp4),
std::move(muxer)});
}
Status CreateTextJobs( Status CreateTextJobs(
const std::vector<std::reference_wrapper<const StreamDescriptor>>& streams, const std::vector<std::reference_wrapper<const StreamDescriptor>>& streams,
const PackagingParams& packaging_params, const PackagingParams& packaging_params,
@ -578,7 +545,6 @@ Status CreateTextJobs(
// MP4 WEBVTT --> MP4 WEBVTT [ unsupported ] // MP4 WEBVTT --> MP4 WEBVTT [ unsupported ]
// MP4 WEBVTT --> TEXT WEBVTT [ unsupported ] // MP4 WEBVTT --> TEXT WEBVTT [ unsupported ]
const auto input_container = DetermineContainerFromFileName(stream.input); const auto input_container = DetermineContainerFromFileName(stream.input);
const auto output_container = GetOutputFormat(stream);
if (input_container != CONTAINER_WEBVTT && if (input_container != CONTAINER_WEBVTT &&
input_container != CONTAINER_TTML) { input_container != CONTAINER_TTML) {
@ -586,27 +552,8 @@ Status CreateTextJobs(
"Text output format is not support for " + stream.input); "Text output format is not support for " + stream.input);
} }
if (output_container == CONTAINER_MOV) {
if (input_container == CONTAINER_TTML) {
return Status(error::INVALID_ARGUMENT,
"TTML in MP4 is not supported yet. Please follow "
"https://github.com/google/shaka-packager/issues/87 for "
"the updates.");
}
std::unique_ptr<MuxerListener> muxer_listener =
muxer_listener_factory->CreateListener(ToMuxerListenerData(stream));
std::shared_ptr<OriginHandler> root;
RETURN_IF_ERROR(CreateWebVttToMp4TextJob(
stream, packaging_params, std::move(muxer_listener), sync_points,
muxer_factory, &root));
job_manager->Add("MP4 text job", std::move(root));
} else {
std::unique_ptr<MuxerListener> hls_listener = std::unique_ptr<MuxerListener> hls_listener =
muxer_listener_factory->CreateHlsListener( muxer_listener_factory->CreateHlsListener(ToMuxerListenerData(stream));
ToMuxerListenerData(stream));
// Check input to ensure that output is possible. // Check input to ensure that output is possible.
if (hls_listener && !stream.dash_only) { if (hls_listener && !stream.dash_only) {
@ -621,8 +568,7 @@ Status CreateTextJobs(
} }
} }
if (mpd_notifier && !stream.segment_template.empty() && if (mpd_notifier && !stream.segment_template.empty() && !stream.hls_only) {
!stream.hls_only) {
return Status(error::INVALID_ARGUMENT, return Status(error::INVALID_ARGUMENT,
"Cannot create text output for MPD with segment output."); "Cannot create text output for MPD with segment output.");
} }
@ -668,7 +614,6 @@ Status CreateTextJobs(
} }
} }
} }
}
return Status::OK; return Status::OK;
} }
@ -722,6 +667,8 @@ Status CreateAudioVideoJobs(
const bool new_input_file = stream.input != previous_input; const bool new_input_file = stream.input != previous_input;
const bool new_stream = const bool new_stream =
new_input_file || previous_selector != stream.stream_selector; new_input_file || previous_selector != stream.stream_selector;
// TODO(modmaker): Use a better detector of text streams.
const bool is_text = stream.stream_selector == "text";
previous_input = stream.input; previous_input = stream.input;
previous_selector = stream.stream_selector; previous_selector = stream.stream_selector;
@ -739,27 +686,35 @@ Status CreateAudioVideoJobs(
demuxer->SetLanguageOverride(stream.stream_selector, stream.language); demuxer->SetLanguageOverride(stream.stream_selector, stream.language);
} }
replicator = std::make_shared<Replicator>(); std::vector<std::shared_ptr<MediaHandler>> handlers;
auto chunker = if (is_text) {
std::make_shared<ChunkingHandler>(packaging_params.chunking_params); handlers.emplace_back(
auto encryptor = CreateEncryptionHandler(packaging_params, stream, std::make_shared<TextPadder>(kDefaultTextZeroBiasMs));
encryption_key_source);
// TODO(vaage) : Create a nicer way to connect handlers to demuxers.
if (sync_points) {
RETURN_IF_ERROR(
MediaHandler::Chain({cue_aligner, chunker, encryptor, replicator}));
RETURN_IF_ERROR(
demuxer->SetHandler(stream.stream_selector, cue_aligner));
} else {
RETURN_IF_ERROR(MediaHandler::Chain({chunker, encryptor, replicator}));
RETURN_IF_ERROR(demuxer->SetHandler(stream.stream_selector, chunker));
} }
if (sync_points) {
handlers.emplace_back(cue_aligner);
}
if (is_text) {
handlers.emplace_back(
CreateTextChunker(packaging_params.chunking_params));
} else {
handlers.emplace_back(std::make_shared<ChunkingHandler>(
packaging_params.chunking_params));
}
handlers.emplace_back(CreateEncryptionHandler(packaging_params, stream,
encryption_key_source));
replicator = std::make_shared<Replicator>();
handlers.emplace_back(replicator);
RETURN_IF_ERROR(MediaHandler::Chain(handlers));
RETURN_IF_ERROR(demuxer->SetHandler(stream.stream_selector, handlers[0]));
} }
// Create the muxer (output) for this track. // Create the muxer (output) for this track.
const auto output_format = GetOutputFormat(stream);
std::shared_ptr<Muxer> muxer = std::shared_ptr<Muxer> muxer =
muxer_factory->CreateMuxer(GetOutputFormat(stream), stream); muxer_factory->CreateMuxer(output_format, stream);
if (!muxer) { if (!muxer) {
return Status(error::INVALID_ARGUMENT, "Failed to create muxer for " + return Status(error::INVALID_ARGUMENT, "Failed to create muxer for " +
stream.input + ":" + stream.input + ":" +
@ -776,7 +731,15 @@ Status CreateAudioVideoJobs(
? std::make_shared<TrickPlayHandler>(stream.trick_play_factor) ? std::make_shared<TrickPlayHandler>(stream.trick_play_factor)
: nullptr; : nullptr;
RETURN_IF_ERROR(MediaHandler::Chain({replicator, trick_play, muxer})); // TODO(modmaker): Move to MOV muxer?
const auto input_container = DetermineContainerFromFileName(stream.input);
auto text_to_mp4 =
input_container == CONTAINER_WEBVTT && output_format == CONTAINER_MOV
? std::make_shared<WebVttToMp4Handler>()
: nullptr;
RETURN_IF_ERROR(
MediaHandler::Chain({replicator, trick_play, text_to_mp4, muxer}));
} }
return Status::OK; return Status::OK;
@ -806,12 +769,13 @@ Status CreateAllJobs(const std::vector<StreamDescriptor>& stream_descriptors,
// TODO: Find a better way to determine what stream type a stream // TODO: Find a better way to determine what stream type a stream
// descriptor is as |stream_selector| may use an index. This would // descriptor is as |stream_selector| may use an index. This would
// also allow us to use a simpler audio pipeline. // also allow us to use a simpler audio pipeline.
if (stream.stream_selector == "text") { const auto output_format = GetOutputFormat(stream);
if (stream.stream_selector == "text" && output_format != CONTAINER_MOV) {
text_streams.push_back(stream); text_streams.push_back(stream);
} else { } else {
audio_video_streams.push_back(stream); audio_video_streams.push_back(stream);
switch (GetOutputFormat(stream)) { switch (output_format) {
case CONTAINER_MPEG2TS: case CONTAINER_MPEG2TS:
case CONTAINER_AAC: case CONTAINER_AAC:
case CONTAINER_MP3: case CONTAINER_MP3: