diff --git a/packager/app/test/packager_test.py b/packager/app/test/packager_test.py index 779f5eceb7..4605bbd658 100755 --- a/packager/app/test/packager_test.py +++ b/packager/app/test/packager_test.py @@ -243,7 +243,9 @@ def _UpdateMpdTimes(mpd_filepath): def GetExtension(input_file_path, output_format): - if output_format: + if output_format in {'vtt+mp4', 'ttml+mp4'}: + return 'mp4' + elif output_format: return output_format # Otherwise use the same extension as the input. ext = os.path.splitext(input_file_path)[1] @@ -857,6 +859,14 @@ class PackagerFunctionalTest(PackagerAppTest): self.assertPackageSuccess(streams, flags) self._CheckTestResults('segmented-ttml-text') + def testSegmentedTtmlMp4(self): + streams = self._GetStreams(['text'], test_files=['bear-english.vtt'], + output_format='ttml+mp4', segmented=True) + flags = self._GetFlags(output_hls=True, output_dash=True) + + self.assertPackageSuccess(streams, flags) + self._CheckTestResults('segmented-ttml-mp4') + def testMp4TrailingMoov(self): self.assertPackageSuccess( self._GetStreams(['audio', 'video'], diff --git a/packager/app/test/testdata/hls-only-dash-only-captions/bear-english-text-init.mp4 b/packager/app/test/testdata/hls-only-dash-only-captions/bear-english-text-init.mp4 index 8d60268521..1ae945d7fb 100644 Binary files a/packager/app/test/testdata/hls-only-dash-only-captions/bear-english-text-init.mp4 and b/packager/app/test/testdata/hls-only-dash-only-captions/bear-english-text-init.mp4 differ diff --git a/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-1.m4s b/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-1.m4s new file mode 100644 index 0000000000..9cba9773d2 Binary files /dev/null and b/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-1.m4s differ diff --git a/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-2.m4s b/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-2.m4s new file mode 100644 index 0000000000..473e7de4ba Binary files /dev/null and b/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-2.m4s differ diff --git a/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-3.m4s b/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-3.m4s new file mode 100644 index 0000000000..f015e810cf Binary files /dev/null and b/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-3.m4s differ diff --git a/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-4.m4s b/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-4.m4s new file mode 100644 index 0000000000..4253cd74b7 Binary files /dev/null and b/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-4.m4s differ diff --git a/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-5.m4s b/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-5.m4s new file mode 100644 index 0000000000..8232dc03b5 Binary files /dev/null and b/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-5.m4s differ diff --git a/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-init.mp4 b/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-init.mp4 new file mode 100644 index 0000000000..464efde3eb Binary files /dev/null and b/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-init.mp4 differ diff --git a/packager/app/test/testdata/segmented-ttml-mp4/output.m3u8 b/packager/app/test/testdata/segmented-ttml-mp4/output.m3u8 new file mode 100644 index 0000000000..ad00c97c90 --- /dev/null +++ b/packager/app/test/testdata/segmented-ttml-mp4/output.m3u8 @@ -0,0 +1,6 @@ +#EXTM3U +## Generated with https://github.com/google/shaka-packager version -- + +#EXT-X-INDEPENDENT-SEGMENTS + +#EXT-X-MEDIA:TYPE=SUBTITLES,URI="stream_0.m3u8",GROUP-ID="default-text-group",NAME="stream_0",AUTOSELECT=YES diff --git a/packager/app/test/testdata/segmented-ttml-mp4/output.mpd b/packager/app/test/testdata/segmented-ttml-mp4/output.mpd new file mode 100644 index 0000000000..1455948e9a --- /dev/null +++ b/packager/app/test/testdata/segmented-ttml-mp4/output.mpd @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/packager/app/test/testdata/segmented-ttml-mp4/stream_0.m3u8 b/packager/app/test/testdata/segmented-ttml-mp4/stream_0.m3u8 new file mode 100644 index 0000000000..a6990bdc74 --- /dev/null +++ b/packager/app/test/testdata/segmented-ttml-mp4/stream_0.m3u8 @@ -0,0 +1,17 @@ +#EXTM3U +#EXT-X-VERSION:6 +## Generated with https://github.com/google/shaka-packager version -- +#EXT-X-TARGETDURATION:1 +#EXT-X-PLAYLIST-TYPE:VOD +#EXT-X-MAP:URI="bear-english-text-init.mp4" +#EXTINF:1.000, +bear-english-text-1.m4s +#EXTINF:1.000, +bear-english-text-2.m4s +#EXTINF:1.000, +bear-english-text-3.m4s +#EXTINF:1.000, +bear-english-text-4.m4s +#EXTINF:1.000, +bear-english-text-5.m4s +#EXT-X-ENDLIST diff --git a/packager/app/test/testdata/segmented-webvtt-mp4/bear-english-text-init.mp4 b/packager/app/test/testdata/segmented-webvtt-mp4/bear-english-text-init.mp4 index 8d60268521..1ae945d7fb 100644 Binary files a/packager/app/test/testdata/segmented-webvtt-mp4/bear-english-text-init.mp4 and b/packager/app/test/testdata/segmented-webvtt-mp4/bear-english-text-init.mp4 differ diff --git a/packager/app/test/testdata/segmented-webvtt-with-language-override/bear-english-text-init.mp4 b/packager/app/test/testdata/segmented-webvtt-with-language-override/bear-english-text-init.mp4 index a61c68a57e..4cf90547bc 100644 Binary files a/packager/app/test/testdata/segmented-webvtt-with-language-override/bear-english-text-init.mp4 and b/packager/app/test/testdata/segmented-webvtt-with-language-override/bear-english-text-init.mp4 differ diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-english-text-init.mp4 b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-english-text-init.mp4 index 56d2ac6ca3..f0c01ecc4b 100644 Binary files a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-english-text-init.mp4 and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-english-text-init.mp4 differ diff --git a/packager/media/base/buffer_reader.cc b/packager/media/base/buffer_reader.cc index 456c67d6dd..fcc066cf05 100644 --- a/packager/media/base/buffer_reader.cc +++ b/packager/media/base/buffer_reader.cc @@ -62,6 +62,18 @@ bool BufferReader::ReadToString(std::string* str, size_t size) { return true; } +bool BufferReader::ReadCString(std::string* str) { + DCHECK(str); + for (size_t count = 0; pos_ + count < size_; count++) { + if (buf_[pos_ + count] == 0) { + str->assign(buf_ + pos_, buf_ + pos_ + count); + pos_ += count + 1; + return true; + } + } + return false; // EOF +} + bool BufferReader::SkipBytes(size_t num_bytes) { if (!HasBytes(num_bytes)) return false; diff --git a/packager/media/base/buffer_reader.h b/packager/media/base/buffer_reader.h index d4aa735aba..bb3c69c0fa 100644 --- a/packager/media/base/buffer_reader.h +++ b/packager/media/base/buffer_reader.h @@ -56,6 +56,9 @@ class BufferReader { bool ReadToVector(std::vector* t, size_t count) WARN_UNUSED_RESULT; bool ReadToString(std::string* str, size_t size) WARN_UNUSED_RESULT; + /// Reads a null-terminated string. + bool ReadCString(std::string* str) WARN_UNUSED_RESULT; + /// Advance the stream by this many bytes. /// @return false if there are not enough bytes in the buffer, true otherwise. bool SkipBytes(size_t num_bytes) WARN_UNUSED_RESULT; diff --git a/packager/media/base/container_names.cc b/packager/media/base/container_names.cc index d18f35cfd3..4007879332 100644 --- a/packager/media/base/container_names.cc +++ b/packager/media/base/container_names.cc @@ -1745,7 +1745,10 @@ MediaContainerName DetermineContainerFromFormatName( base::EqualsCaseInsensitiveASCII(format_name, "m4s") || base::EqualsCaseInsensitiveASCII(format_name, "m4v") || base::EqualsCaseInsensitiveASCII(format_name, "mov") || - base::EqualsCaseInsensitiveASCII(format_name, "mp4")) { + base::EqualsCaseInsensitiveASCII(format_name, "mp4") || + base::EqualsCaseInsensitiveASCII(format_name, "ttml+mp4") || + base::EqualsCaseInsensitiveASCII(format_name, "webvtt+mp4") || + base::EqualsCaseInsensitiveASCII(format_name, "vtt+mp4")) { return CONTAINER_MOV; } else if (base::EqualsCaseInsensitiveASCII(format_name, "ts") || base::EqualsCaseInsensitiveASCII(format_name, "mpeg2ts")) { diff --git a/packager/media/base/fourccs.h b/packager/media/base/fourccs.h index 0b6b29b71c..5fb11fb20d 100644 --- a/packager/media/base/fourccs.h +++ b/packager/media/base/fourccs.h @@ -98,6 +98,7 @@ enum FourCC : uint32_t { FOURCC_mp4v = 0x6d703476, FOURCC_mvex = 0x6d766578, FOURCC_mvhd = 0x6d766864, + FOURCC_nmhd = 0x6e6d6864, FOURCC_pasp = 0x70617370, FOURCC_payl = 0x7061796c, FOURCC_pdin = 0x7064696e, @@ -122,6 +123,7 @@ enum FourCC : uint32_t { FOURCC_stbl = 0x7374626c, FOURCC_stco = 0x7374636f, FOURCC_sthd = 0x73746864, + FOURCC_stpp = 0x73747070, FOURCC_stsc = 0x73747363, FOURCC_stsd = 0x73747364, FOURCC_stss = 0x73747373, diff --git a/packager/media/formats/mp4/box_buffer.h b/packager/media/formats/mp4/box_buffer.h index a9bad80d9d..6bf3efc300 100644 --- a/packager/media/formats/mp4/box_buffer.h +++ b/packager/media/formats/mp4/box_buffer.h @@ -145,6 +145,16 @@ class BoxBuffer { return true; } + bool ReadWriteCString(std::string* str) { + if (reader_) + return reader_->ReadCString(str); + // Cannot contain embedded nulls. + DCHECK_EQ(str->find('\0'), std::string::npos); + writer_->AppendString(*str); + writer_->AppendInt(static_cast('\0')); + return true; + } + bool ReadWriteFourCC(FourCC* fourcc) { if (reader_) return reader_->ReadFourCC(fourcc); diff --git a/packager/media/formats/mp4/box_definitions.cc b/packager/media/formats/mp4/box_definitions.cc index 7bc205d276..e42df84e74 100644 --- a/packager/media/formats/mp4/box_definitions.cc +++ b/packager/media/formats/mp4/box_definitions.cc @@ -33,6 +33,7 @@ const uint8_t kUnityMatrix[] = {0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, const char kVideoHandlerName[] = "VideoHandler"; const char kAudioHandlerName[] = "SoundHandler"; const char kTextHandlerName[] = "TextHandler"; +const char kSubtitleHandlerName[] = "SubtitleHandler"; // Default values for VideoSampleEntry box. const uint32_t kVideoResolution = 0x00480000; // 72 dpi. @@ -106,6 +107,8 @@ TrackType FourCCToTrackType(FourCC fourcc) { return kAudio; case FOURCC_text: return kText; + case FOURCC_subt: + return kSubtitle; default: return kInvalid; } @@ -119,6 +122,8 @@ FourCC TrackTypeToFourCC(TrackType track_type) { return FOURCC_soun; case kText: return FOURCC_text; + case kSubtitle: + return FOURCC_subt; default: return FOURCC_NULL; } @@ -628,6 +633,7 @@ bool SampleDescription::ReadWriteInternal(BoxBuffer* buffer) { count = static_cast(audio_entries.size()); break; case kText: + case kSubtitle: count = static_cast(text_entries.size()); break; default: @@ -649,7 +655,7 @@ bool SampleDescription::ReadWriteInternal(BoxBuffer* buffer) { } else if (type == kAudio) { RCHECK(reader->ReadAllChildren(&audio_entries)); RCHECK(audio_entries.size() == count); - } else if (type == kText) { + } else if (type == kText || type == kSubtitle) { RCHECK(reader->ReadAllChildren(&text_entries)); RCHECK(text_entries.size() == count); } @@ -661,7 +667,7 @@ bool SampleDescription::ReadWriteInternal(BoxBuffer* buffer) { } else if (type == kAudio) { for (uint32_t i = 0; i < count; ++i) RCHECK(buffer->ReadWriteChild(&audio_entries[i])); - } else if (type == kText) { + } else if (type == kText || type == kSubtitle) { for (uint32_t i = 0; i < count; ++i) RCHECK(buffer->ReadWriteChild(&text_entries[i])); } else { @@ -679,7 +685,7 @@ size_t SampleDescription::ComputeSizeInternal() { } else if (type == kAudio) { for (uint32_t i = 0; i < audio_entries.size(); ++i) box_size += audio_entries[i].ComputeSize(); - } else if (type == kText) { + } else if (type == kText || type == kSubtitle) { for (uint32_t i = 0; i < text_entries.size(); ++i) box_size += text_entries[i].ComputeSize(); } @@ -1293,6 +1299,11 @@ bool HandlerReference::ReadWriteInternal(BoxBuffer* buffer) { handler_name.assign(kTextHandlerName, kTextHandlerName + arraysize(kTextHandlerName)); break; + case FOURCC_subt: + handler_name.assign( + kSubtitleHandlerName, + kSubtitleHandlerName + arraysize(kSubtitleHandlerName)); + break; case FOURCC_ID32: break; default: @@ -1322,6 +1333,9 @@ size_t HandlerReference::ComputeSizeInternal() { case FOURCC_text: box_size += sizeof(kTextHandlerName); break; + case FOURCC_subt: + box_size += sizeof(kSubtitleHandlerName); + break; case FOURCC_ID32: break; default: @@ -2000,14 +2014,25 @@ bool TextSampleEntry::ReadWriteInternal(BoxBuffer* buffer) { // TODO(rkuroiwa): Handle the optional MPEG4BitRateBox. RCHECK(buffer->PrepareChildren() && buffer->ReadWriteChild(&config) && buffer->ReadWriteChild(&label)); + } else if (format == FOURCC_stpp) { + // These are marked as "optional"; but they should still have the + // null-terminator, so this should still work. + RCHECK(buffer->ReadWriteCString(&namespace_) && + buffer->ReadWriteCString(&schema_location)); } return true; } size_t TextSampleEntry::ComputeSizeInternal() { // 6 for the (anonymous) reserved bytes for SampleEntry class. - return HeaderSize() + 6 + sizeof(data_reference_index) + - config.ComputeSize() + label.ComputeSize(); + size_t ret = HeaderSize() + 6 + sizeof(data_reference_index); + if (format == FOURCC_wvtt) { + ret += config.ComputeSize() + label.ComputeSize(); + } else if (format == FOURCC_stpp) { + // +2 for the two null terminators for these strings. + ret += namespace_.size() + schema_location.size() + 2; + } + return ret; } MediaHeader::MediaHeader() = default; @@ -2079,6 +2104,21 @@ size_t SoundMediaHeader::ComputeSizeInternal() { return HeaderSize() + sizeof(balance) + sizeof(uint16_t); } +NullMediaHeader::NullMediaHeader() = default; +NullMediaHeader::~NullMediaHeader() = default; + +FourCC NullMediaHeader::BoxType() const { + return FOURCC_nmhd; +} + +bool NullMediaHeader::ReadWriteInternal(BoxBuffer* buffer) { + return ReadWriteHeaderInternal(buffer); +} + +size_t NullMediaHeader::ComputeSizeInternal() { + return HeaderSize(); +} + SubtitleMediaHeader::SubtitleMediaHeader() = default; SubtitleMediaHeader::~SubtitleMediaHeader() = default; @@ -2178,6 +2218,9 @@ bool MediaInformation::ReadWriteInternal(BoxBuffer* buffer) { RCHECK(buffer->ReadWriteChild(&smhd)); break; case kText: + RCHECK(buffer->TryReadWriteChild(&nmhd)); + break; + case kSubtitle: RCHECK(buffer->TryReadWriteChild(&sthd)); break; default: @@ -2198,6 +2241,9 @@ size_t MediaInformation::ComputeSizeInternal() { box_size += smhd.ComputeSize(); break; case kText: + box_size += nmhd.ComputeSize(); + break; + case kSubtitle: box_size += sthd.ComputeSize(); break; default: diff --git a/packager/media/formats/mp4/box_definitions.h b/packager/media/formats/mp4/box_definitions.h index 5c577b7116..b4494ef178 100644 --- a/packager/media/formats/mp4/box_definitions.h +++ b/packager/media/formats/mp4/box_definitions.h @@ -26,6 +26,7 @@ enum TrackType { kAudio, kHint, kText, + kSubtitle, }; class BoxBuffer; @@ -407,6 +408,11 @@ struct TextSampleEntry : Box { // always present. uint16_t data_reference_index = 1u; + // Sub fields for ttml text sample entry. + std::string namespace_; + std::string schema_location; + // Optional MPEG4BitRateBox. + // Sub boxes for wvtt text sample entry. WebVTTConfigurationBox config; WebVTTSourceLabelBox label; @@ -597,6 +603,10 @@ struct SoundMediaHeader : FullBox { uint16_t balance = 0u; }; +struct NullMediaHeader : FullBox { + DECLARE_BOX_METHODS(NullMediaHeader); +}; + struct SubtitleMediaHeader : FullBox { DECLARE_BOX_METHODS(SubtitleMediaHeader); }; @@ -628,6 +638,7 @@ struct MediaInformation : Box { // Exactly one specific meida header shall be present, vmhd, smhd, hmhd, nmhd. VideoMediaHeader vmhd; SoundMediaHeader smhd; + NullMediaHeader nmhd; SubtitleMediaHeader sthd; }; diff --git a/packager/media/formats/mp4/mp4.gyp b/packager/media/formats/mp4/mp4.gyp index 0707fd5aff..604e5979d1 100644 --- a/packager/media/formats/mp4/mp4.gyp +++ b/packager/media/formats/mp4/mp4.gyp @@ -50,6 +50,7 @@ '../../base/media_base.gyp:media_base', '../../codecs/codecs.gyp:codecs', '../../event/media_event.gyp:media_event', + '../../formats/ttml/ttml.gyp:ttml', ], }, { diff --git a/packager/media/formats/mp4/mp4_muxer.cc b/packager/media/formats/mp4/mp4_muxer.cc index 4e4b9c31a8..23ca8273ca 100644 --- a/packager/media/formats/mp4/mp4_muxer.cc +++ b/packager/media/formats/mp4/mp4_muxer.cc @@ -24,6 +24,7 @@ #include "packager/media/formats/mp4/box_definitions.h" #include "packager/media/formats/mp4/multi_segment_segmenter.h" #include "packager/media/formats/mp4/single_segment_segmenter.h" +#include "packager/media/formats/ttml/ttml_generator.h" #include "packager/status_macros.h" namespace shaka { @@ -593,6 +594,17 @@ bool MP4Muxer::GenerateTextTrak(const TextStreamInfo* text_info, sample_description.type = kText; sample_description.text_entries.push_back(webvtt); return true; + } else if (text_info->codec_string() == "ttml") { + // Handle TTML. + TextSampleEntry ttml; + ttml.format = FOURCC_stpp; + ttml.namespace_ = ttml::TtmlGenerator::kTtNamespace; + + SampleDescription& sample_description = + trak->media.information.sample_table.description; + sample_description.type = kSubtitle; + sample_description.text_entries.push_back(ttml); + return true; } NOTIMPLEMENTED() << text_info->codec_string() << " handling not implemented yet."; diff --git a/packager/media/formats/ttml/ttml.gyp b/packager/media/formats/ttml/ttml.gyp index 7a9b02bcb1..224de428a9 100644 --- a/packager/media/formats/ttml/ttml.gyp +++ b/packager/media/formats/ttml/ttml.gyp @@ -17,6 +17,8 @@ 'ttml_generator.h', 'ttml_muxer.cc', 'ttml_muxer.h', + 'ttml_to_mp4_handler.cc', + 'ttml_to_mp4_handler.h', ], 'dependencies': [ '../../base/media_base.gyp:media_base', diff --git a/packager/media/formats/ttml/ttml_generator.cc b/packager/media/formats/ttml/ttml_generator.cc index 61d8b92085..cc4d228cc1 100644 --- a/packager/media/formats/ttml/ttml_generator.cc +++ b/packager/media/formats/ttml/ttml_generator.cc @@ -38,6 +38,8 @@ std::string ToTtmlSize(const TextNumber& x, const TextNumber& y) { } // namespace +const char* TtmlGenerator::kTtNamespace = "http://www.w3.org/ns/ttml"; + TtmlGenerator::TtmlGenerator() {} TtmlGenerator::~TtmlGenerator() {} @@ -60,7 +62,7 @@ void TtmlGenerator::Reset() { bool TtmlGenerator::Dump(std::string* result) const { xml::XmlNode root("tt"); - RCHECK(root.SetStringAttribute("xmlns", "http://www.w3.org/ns/ttml")); + RCHECK(root.SetStringAttribute("xmlns", kTtNamespace)); RCHECK(root.SetStringAttribute("xmlns:tts", "http://www.w3.org/ns/ttml#styling")); diff --git a/packager/media/formats/ttml/ttml_generator.h b/packager/media/formats/ttml/ttml_generator.h index 3ac003a084..817339f01d 100644 --- a/packager/media/formats/ttml/ttml_generator.h +++ b/packager/media/formats/ttml/ttml_generator.h @@ -24,6 +24,8 @@ class TtmlGenerator { explicit TtmlGenerator(); ~TtmlGenerator(); + static const char* kTtNamespace; + void Initialize(const std::map& regions, const std::string& language, uint32_t time_scale); diff --git a/packager/media/formats/ttml/ttml_to_mp4_handler.cc b/packager/media/formats/ttml/ttml_to_mp4_handler.cc new file mode 100644 index 0000000000..80d315bb0b --- /dev/null +++ b/packager/media/formats/ttml/ttml_to_mp4_handler.cc @@ -0,0 +1,123 @@ +// Copyright 2020 Google LLC. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "packager/media/formats/ttml/ttml_to_mp4_handler.h" + +#include "packager/status_macros.h" + +namespace shaka { +namespace media { +namespace ttml { + +namespace { + +size_t kTrackId = 0; + +std::shared_ptr CreateMediaSample(const std::string& data, + int64_t start_time, + int64_t duration) { + DCHECK_GE(start_time, 0); + DCHECK_GT(duration, 0); + + const bool kIsKeyFrame = true; + + std::shared_ptr sample = MediaSample::CopyFrom( + reinterpret_cast(data.data()), data.size(), kIsKeyFrame); + sample->set_pts(start_time); + sample->set_dts(start_time); + sample->set_duration(duration); + + return sample; +} + +} // namespace + +Status TtmlToMp4Handler::InitializeInternal() { + return Status::OK; +} + +Status TtmlToMp4Handler::Process(std::unique_ptr stream_data) { + switch (stream_data->stream_data_type) { + case StreamDataType::kStreamInfo: + return OnStreamInfo(std::move(stream_data)); + case StreamDataType::kCueEvent: + return OnCueEvent(std::move(stream_data)); + case StreamDataType::kSegmentInfo: + return OnSegmentInfo(std::move(stream_data)); + case StreamDataType::kTextSample: + return OnTextSample(std::move(stream_data)); + default: + return Status(error::INTERNAL_ERROR, + "Invalid stream data type (" + + StreamDataTypeToString(stream_data->stream_data_type) + + ") for this TtmlToMp4 handler"); + } +} + +Status TtmlToMp4Handler::OnStreamInfo(std::unique_ptr stream_data) { + DCHECK(stream_data); + DCHECK(stream_data->stream_info); + + auto clone = stream_data->stream_info->Clone(); + clone->set_codec(kCodecTtml); + clone->set_codec_string("ttml"); + + if (clone->stream_type() != kStreamText) + return Status(error::MUXER_FAILURE, "Incorrect stream type"); + auto* text_stream = static_cast(clone.get()); + generator_.Initialize(text_stream->regions(), text_stream->language(), + text_stream->time_scale()); + + return Dispatch( + StreamData::FromStreamInfo(stream_data->stream_index, std::move(clone))); +} + +Status TtmlToMp4Handler::OnCueEvent(std::unique_ptr stream_data) { + DCHECK(stream_data); + DCHECK(stream_data->cue_event); + return Dispatch(std::move(stream_data)); +} + +Status TtmlToMp4Handler::OnSegmentInfo( + std::unique_ptr stream_data) { + DCHECK(stream_data); + DCHECK(stream_data->segment_info); + + const auto& segment = stream_data->segment_info; + + std::string data; + if (!generator_.Dump(&data)) + return Status(error::INTERNAL_ERROR, "Error generating XML"); + generator_.Reset(); + + RETURN_IF_ERROR(DispatchMediaSample( + kTrackId, + CreateMediaSample(data, segment->start_timestamp, segment->duration))); + + return Dispatch(std::move(stream_data)); +} + +Status TtmlToMp4Handler::OnTextSample(std::unique_ptr stream_data) { + DCHECK(stream_data); + DCHECK(stream_data->text_sample); + + auto& sample = stream_data->text_sample; + + // Ignore empty samples. This will create gaps, but we will handle that + // later. + if (sample->body().is_empty()) { + return Status::OK; + } + + // Add the new text sample to the cache of samples that belong in the + // current segment. + generator_.AddSample(*sample); + return Status::OK; +} + +} // namespace ttml +} // namespace media +} // namespace shaka diff --git a/packager/media/formats/ttml/ttml_to_mp4_handler.h b/packager/media/formats/ttml/ttml_to_mp4_handler.h new file mode 100644 index 0000000000..48798a64f9 --- /dev/null +++ b/packager/media/formats/ttml/ttml_to_mp4_handler.h @@ -0,0 +1,43 @@ +// Copyright 2020 Google LLC. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#ifndef PACKAGER_MEDIA_FORMATS_TTML_TTML_TO_MP4_HANDLER_H_ +#define PACKAGER_MEDIA_FORMATS_TTML_TTML_TO_MP4_HANDLER_H_ + +#include + +#include "packager/media/base/media_handler.h" +#include "packager/media/formats/ttml/ttml_generator.h" + +namespace shaka { +namespace media { +namespace ttml { + +// A media handler that should come after the cue aligner and segmenter and +// should come before the muxer. This handler is to convert text samples +// to media samples so that they can be sent to a mp4 muxer. +class TtmlToMp4Handler : public MediaHandler { + public: + TtmlToMp4Handler() = default; + ~TtmlToMp4Handler() override = default; + + private: + Status InitializeInternal() override; + Status Process(std::unique_ptr stream_data) override; + + Status OnStreamInfo(std::unique_ptr stream_data); + Status OnCueEvent(std::unique_ptr stream_data); + Status OnSegmentInfo(std::unique_ptr stream_data); + Status OnTextSample(std::unique_ptr stream_data); + + TtmlGenerator generator_; +}; + +} // namespace ttml +} // namespace media +} // namespace shaka + +#endif // PACKAGER_MEDIA_FORMATS_TTML_TTML_TO_MP4_HANDLER_H_ diff --git a/packager/packager.cc b/packager/packager.cc index edb7ef18d5..9e9a1b6cb6 100644 --- a/packager/packager.cc +++ b/packager/packager.cc @@ -40,6 +40,7 @@ #include "packager/media/demuxer/demuxer.h" #include "packager/media/event/muxer_listener_factory.h" #include "packager/media/event/vod_media_info_dump_muxer_listener.h" +#include "packager/media/formats/ttml/ttml_to_mp4_handler.h" #include "packager/media/formats/webvtt/text_padder.h" #include "packager/media/formats/webvtt/webvtt_to_mp4_handler.h" #include "packager/media/replicator/replicator.h" @@ -161,6 +162,27 @@ MediaContainerName GetOutputFormat(const StreamDescriptor& descriptor) { return CONTAINER_UNKNOWN; } +MediaContainerName GetTextOutputCodec(const StreamDescriptor& descriptor) { + const auto output_container = GetOutputFormat(descriptor); + if (output_container != CONTAINER_MOV) + return output_container; + + const auto input_container = DetermineContainerFromFileName(descriptor.input); + if (base::EqualsCaseInsensitiveASCII(descriptor.output_format, "vtt+mp4") || + base::EqualsCaseInsensitiveASCII(descriptor.output_format, + "webvtt+mp4")) { + return CONTAINER_WEBVTT; + } else if (!base::EqualsCaseInsensitiveASCII(descriptor.output_format, + "ttml+mp4") && + input_container == CONTAINER_WEBVTT) { + // With WebVTT input, default to WebVTT output. + return CONTAINER_WEBVTT; + } else { + // Otherwise default to TTML since it has more features. + return CONTAINER_TTML; + } +} + Status ValidateStreamDescriptor(bool dump_stream_info, const StreamDescriptor& stream) { if (stream.input.empty()) { @@ -640,27 +662,32 @@ Status CreateAudioVideoJobs( muxer_listener_factory->CreateListener(ToMuxerListenerData(stream)); muxer->SetMuxerListener(std::move(muxer_listener)); + std::vector> handlers; + handlers.emplace_back(replicator); + // Trick play is optional. - std::shared_ptr trick_play = - stream.trick_play_factor - ? std::make_shared(stream.trick_play_factor) - : nullptr; + if (stream.trick_play_factor) { + handlers.emplace_back( + std::make_shared(stream.trick_play_factor)); + } - std::shared_ptr chunker = - is_text && (!stream.segment_template.empty() || - output_format == CONTAINER_MOV) - ? CreateTextChunker(packaging_params.chunking_params) - : nullptr; + if (is_text && + (!stream.segment_template.empty() || output_format == CONTAINER_MOV)) { + handlers.emplace_back( + CreateTextChunker(packaging_params.chunking_params)); + } - // TODO(modmaker): Move to MOV muxer? - const auto input_container = DetermineContainerFromFileName(stream.input); - auto text_to_mp4 = - input_container == CONTAINER_WEBVTT && output_format == CONTAINER_MOV - ? std::make_shared() - : nullptr; + if (is_text && output_format == CONTAINER_MOV) { + const auto output_codec = GetTextOutputCodec(stream); + if (output_codec == CONTAINER_WEBVTT) { + handlers.emplace_back(std::make_shared()); + } else if (output_codec == CONTAINER_TTML) { + handlers.emplace_back(std::make_shared()); + } + } - RETURN_IF_ERROR(MediaHandler::Chain( - {replicator, trick_play, chunker, text_to_mp4, muxer})); + handlers.emplace_back(muxer); + RETURN_IF_ERROR(MediaHandler::Chain(handlers)); } return Status::OK;