From a93eeca5db4f506112430bb3bb2ce95421c8b55a Mon Sep 17 00:00:00 2001 From: Jacob Trimble Date: Tue, 13 Oct 2020 14:43:18 -0700 Subject: [PATCH] Add TTML-in-MP4 output support. This changes the default MP4 output to use TTML and adds a way to choose which one is used. This is done with 'format=ttml+mp4' or 'format=vtt+mp4'. This also fixes the boxes output in WebVTT in MP4. Change-Id: Ieaa7fc44fbf4dc020a5bb70cfa3578ec10e088ce --- packager/app/test/packager_test.py | 12 +- .../bear-english-text-init.mp4 | Bin 728 -> 728 bytes .../bear-english-text-1.m4s | Bin 0 -> 500 bytes .../bear-english-text-2.m4s | Bin 0 -> 514 bytes .../bear-english-text-3.m4s | Bin 0 -> 514 bytes .../bear-english-text-4.m4s | Bin 0 -> 514 bytes .../bear-english-text-5.m4s | Bin 0 -> 514 bytes .../bear-english-text-init.mp4 | Bin 0 -> 725 bytes .../testdata/segmented-ttml-mp4/output.m3u8 | 6 + .../testdata/segmented-ttml-mp4/output.mpd | 16 +++ .../testdata/segmented-ttml-mp4/stream_0.m3u8 | 17 +++ .../bear-english-text-init.mp4 | Bin 728 -> 728 bytes .../bear-english-text-init.mp4 | Bin 728 -> 728 bytes .../bear-english-text-init.mp4 | Bin 728 -> 728 bytes packager/media/base/buffer_reader.cc | 12 ++ packager/media/base/buffer_reader.h | 3 + packager/media/base/container_names.cc | 5 +- packager/media/base/fourccs.h | 2 + packager/media/formats/mp4/box_buffer.h | 10 ++ packager/media/formats/mp4/box_definitions.cc | 56 +++++++- packager/media/formats/mp4/box_definitions.h | 11 ++ packager/media/formats/mp4/mp4.gyp | 1 + packager/media/formats/mp4/mp4_muxer.cc | 12 ++ packager/media/formats/ttml/ttml.gyp | 2 + packager/media/formats/ttml/ttml_generator.cc | 4 +- packager/media/formats/ttml/ttml_generator.h | 2 + .../media/formats/ttml/ttml_to_mp4_handler.cc | 123 ++++++++++++++++++ .../media/formats/ttml/ttml_to_mp4_handler.h | 43 ++++++ packager/packager.cc | 61 ++++++--- 29 files changed, 373 insertions(+), 25 deletions(-) create mode 100644 packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-1.m4s create mode 100644 packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-2.m4s create mode 100644 packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-3.m4s create mode 100644 packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-4.m4s create mode 100644 packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-5.m4s create mode 100644 packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-init.mp4 create mode 100644 packager/app/test/testdata/segmented-ttml-mp4/output.m3u8 create mode 100644 packager/app/test/testdata/segmented-ttml-mp4/output.mpd create mode 100644 packager/app/test/testdata/segmented-ttml-mp4/stream_0.m3u8 create mode 100644 packager/media/formats/ttml/ttml_to_mp4_handler.cc create mode 100644 packager/media/formats/ttml/ttml_to_mp4_handler.h diff --git a/packager/app/test/packager_test.py b/packager/app/test/packager_test.py index 779f5eceb7..4605bbd658 100755 --- a/packager/app/test/packager_test.py +++ b/packager/app/test/packager_test.py @@ -243,7 +243,9 @@ def _UpdateMpdTimes(mpd_filepath): def GetExtension(input_file_path, output_format): - if output_format: + if output_format in {'vtt+mp4', 'ttml+mp4'}: + return 'mp4' + elif output_format: return output_format # Otherwise use the same extension as the input. ext = os.path.splitext(input_file_path)[1] @@ -857,6 +859,14 @@ class PackagerFunctionalTest(PackagerAppTest): self.assertPackageSuccess(streams, flags) self._CheckTestResults('segmented-ttml-text') + def testSegmentedTtmlMp4(self): + streams = self._GetStreams(['text'], test_files=['bear-english.vtt'], + output_format='ttml+mp4', segmented=True) + flags = self._GetFlags(output_hls=True, output_dash=True) + + self.assertPackageSuccess(streams, flags) + self._CheckTestResults('segmented-ttml-mp4') + def testMp4TrailingMoov(self): self.assertPackageSuccess( self._GetStreams(['audio', 'video'], diff --git a/packager/app/test/testdata/hls-only-dash-only-captions/bear-english-text-init.mp4 b/packager/app/test/testdata/hls-only-dash-only-captions/bear-english-text-init.mp4 index 8d60268521f6ec0efd5531645467682400a388b9..1ae945d7fb6f10d6679562d21ea863a0bfe3422f 100644 GIT binary patch delta 14 Vcmcb?dV_Vt3?`<$+{v?;>;Nm>1y29~ delta 14 Vcmcb?dV_Vt3?`=HlF74}>;Nnt1zP|B diff --git a/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-1.m4s b/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-1.m4s new file mode 100644 index 0000000000000000000000000000000000000000..9cba9773d22091518761c02a7089f6074d08dd94 GIT binary patch literal 500 zcmaKoJx;_h5QV+aZAHNWEQ3U|#92WRMM)MKDoQ9Qy|MEn#XquVH@mms5FCzUFiwI* z#VD~pKfla8hY)fb;Ow+}+!Cxl*bHAC_fmwe)=fZp6MA`y=U6L3F285|-LJl)_Bo4E zTiYOB>86v555FO=;6>CaME!!~$WdSVtuo|TSI|h9aaymz9}LpsB?ID zeCVwy*)~ZTRi?JGH*Lw@-(K!BwyV|!07at-CF=lO&iOcw$@q|1-*OW;fF9WF$N~Oy z-Qb@Zu=(W92NB1xYE5a;DIxhpdn?ZoP?Y^~CS(@}?n literal 0 HcmV?d00001 diff --git a/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-2.m4s b/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-2.m4s new file mode 100644 index 0000000000000000000000000000000000000000..473e7de4bada9c88fbf6d6d50539590200dbc75f GIT binary patch literal 514 zcmaJ-OHRWu5OrbKRTmtz zl94}eo}Xskm=JOo;MCb}wImoixHWDYb6JF@v{gX*A!vDw?-&z8u7AdT$tT}2{~A&v zt#cK^nXMW*QsfHnSMWvTDMbE?B#5!rrQT$VIK~Vr2_uYs$PM^?i}zVe48k`n3rIId zYv@7wpk14>C0{VA+R{ng)*1VF-_F-;lTT9sio!NztO4i}A^N`O{Udk27Htp!*052L z0Q{%A#h)@*d{8H*h;3Ltr8I3+A;o}pPM#t_QtHEq5O=Xb61t*P8SA_X${!S?J5_76 zzE~u2;RM47@{d{Zr%0i#1k@3pjjmB%suq-I`HQ0WfpbpxHlEU TPeph3BhH3WF`7(q^nCgY+cSf_ literal 0 HcmV?d00001 diff --git a/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-3.m4s b/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-3.m4s new file mode 100644 index 0000000000000000000000000000000000000000..f015e810cf64a30cefc9a611533087aec58b4852 GIT binary patch literal 514 zcmaJ-K~BRk5OnDQsiz)zfMt-lP`eeCsG=klmx=@81P@4KC$Wg*$lf;XFL(kQ)!@*0k#t!O-4raGQ|J%vZT7Jkobw%TxG|F(G97O~~cfkcNEn z8Mu#ukjB~q;oKCJ95quspTT8;rx5rH5+TOBzx5_N#3ANTNEl$>;|koN!F<$`8sUqP z8N}O@sp(NUuWggC6<;!{n%qj=lnHx#-7Pk3o6ceYio(VxtO95wA-b;P-95Li6pa@E zre=d80{BmLjW1PW@m?L(S!l!38KrThGAVkrxAGhSl2RWBgt!e2BHw1YN?7ZZSMI17 z-K$cg_2n`O3n%DDkblgYKLiSGMW9abSnCqyxoSW;mOd$Z>pAE2V8T~g^oAufx2W~I Xq8<)K!DG>me#B@f7K6zcM^9%zosNT* literal 0 HcmV?d00001 diff --git a/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-4.m4s b/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-4.m4s new file mode 100644 index 0000000000000000000000000000000000000000..4253cd74b70017e5407f541b68de4b1b81a3e556 GIT binary patch literal 514 zcmaJ-K~BRk5Ojf4x%9vTEQ7>_+O4QW6(y;-R2&c|ct9FEiA5Yo_O@w%!CN@-B%Z-K z2^?A3@$6*0J8MG7jfYcf+SQU^Xm8iJO~_^DtK1YG={v9GG5p7v5OV%a$mQ3NhJ5lF zxQ~I5#@Yhm%oLRzH5Yh3gUbR>A@FA;LX3BR>rJ+ZL(HI%Fu=aY6}Wwa`KTo|!WSbm zh&M-5(}Qwe+a_U4zF<@}xs|#p6ZZDHov+y@oyGtZg^f>G1<*!9bX~{0dv0AR8ZQ7$ z%?3pT@So}$U#iC9y*jG1(1xW`O5;jpQuJtN8}hi$xR`PSB4a|Ckkj2o&0iKpo++)+New)qrv=eNyz+bI$4Bgs-&d4NGQjUh8*7 XJ?x8u$D$qmh|y3i29q(4o=$%NU+IHx literal 0 HcmV?d00001 diff --git a/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-5.m4s b/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-5.m4s new file mode 100644 index 0000000000000000000000000000000000000000..8232dc03b592b701225fb394fec1b8b6ae108998 GIT binary patch literal 514 zcmaJ-OHRWu5OqOpx}ggWU>PJf)NVy3swhdtrec9u!2#0PNi5K!kH;5IchHOeg>BXo#TAz^@hk1KHd2J=x%YJ@LF zW)N?VrltqwytYlkmVCjeYH}-eQzq=~bvs|PO*)MMC<+^&unM4!gy_1CclX@7QZ!xw zn3@fW2;e`}HU3nM#d~#BXQ2&Cr>K6y$PqZ=nYF|ZeHtm YMLq0`g2$pA{fN;}EC!P?j-F0`03t+#M*si- literal 0 HcmV?d00001 diff --git a/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-init.mp4 b/packager/app/test/testdata/segmented-ttml-mp4/bear-english-text-init.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..464efde3eb0e2cdccb4bfa203d5e1349f2bcec87 GIT binary patch literal 725 zcmZuvJx{|h5OoU@ONUB;SQs*t3AF*rP(f97K!|P#1QSAXV>gKs*O7f~_!`(i{2KTL zEJ*wnegmayyOHdR>ii#N&BF{dA=>Q&2L1j zRy{qx7+fRj5)uF;`p9+t5Q53DbwDH9OsE%8pDWS@>i3%60G(q3uAzs% z9zoFvPr8A~x?h{SH@=s{ii?#6-v859*%B@tKjEt{+pt%jgq{qce9R^C5Mq8t z$1FxW7@r~>k1#h_mbaPWhMO>7RGX;S$?=PZVT|}#gSLM;*LO7}31Sl!GfNz0nx?I^ z-IB_8$JzxDrjd}k2GAB|FXS38&@48PKg?JLaE~WTT1`VX0@ONU%I`4O1uwMqSL6;T OtD81s16O_#k^BNE8Fcvo literal 0 HcmV?d00001 diff --git a/packager/app/test/testdata/segmented-ttml-mp4/output.m3u8 b/packager/app/test/testdata/segmented-ttml-mp4/output.m3u8 new file mode 100644 index 0000000000..ad00c97c90 --- /dev/null +++ b/packager/app/test/testdata/segmented-ttml-mp4/output.m3u8 @@ -0,0 +1,6 @@ +#EXTM3U +## Generated with https://github.com/google/shaka-packager version -- + +#EXT-X-INDEPENDENT-SEGMENTS + +#EXT-X-MEDIA:TYPE=SUBTITLES,URI="stream_0.m3u8",GROUP-ID="default-text-group",NAME="stream_0",AUTOSELECT=YES diff --git a/packager/app/test/testdata/segmented-ttml-mp4/output.mpd b/packager/app/test/testdata/segmented-ttml-mp4/output.mpd new file mode 100644 index 0000000000..1455948e9a --- /dev/null +++ b/packager/app/test/testdata/segmented-ttml-mp4/output.mpd @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/packager/app/test/testdata/segmented-ttml-mp4/stream_0.m3u8 b/packager/app/test/testdata/segmented-ttml-mp4/stream_0.m3u8 new file mode 100644 index 0000000000..a6990bdc74 --- /dev/null +++ b/packager/app/test/testdata/segmented-ttml-mp4/stream_0.m3u8 @@ -0,0 +1,17 @@ +#EXTM3U +#EXT-X-VERSION:6 +## Generated with https://github.com/google/shaka-packager version -- +#EXT-X-TARGETDURATION:1 +#EXT-X-PLAYLIST-TYPE:VOD +#EXT-X-MAP:URI="bear-english-text-init.mp4" +#EXTINF:1.000, +bear-english-text-1.m4s +#EXTINF:1.000, +bear-english-text-2.m4s +#EXTINF:1.000, +bear-english-text-3.m4s +#EXTINF:1.000, +bear-english-text-4.m4s +#EXTINF:1.000, +bear-english-text-5.m4s +#EXT-X-ENDLIST diff --git a/packager/app/test/testdata/segmented-webvtt-mp4/bear-english-text-init.mp4 b/packager/app/test/testdata/segmented-webvtt-mp4/bear-english-text-init.mp4 index 8d60268521f6ec0efd5531645467682400a388b9..1ae945d7fb6f10d6679562d21ea863a0bfe3422f 100644 GIT binary patch delta 14 Vcmcb?dV_Vt3?`<$+{v?;>;Nm>1y29~ delta 14 Vcmcb?dV_Vt3?`=HlF74}>;Nnt1zP|B diff --git a/packager/app/test/testdata/segmented-webvtt-with-language-override/bear-english-text-init.mp4 b/packager/app/test/testdata/segmented-webvtt-with-language-override/bear-english-text-init.mp4 index a61c68a57e2ce04bc38bc50af6508ab122c4c970..4cf90547bc6c2291b61fead9911fcb7a0cb8dfbd 100644 GIT binary patch delta 14 Vcmcb?dV_Vt3?`<$+{v?;>;Nm>1y29~ delta 14 Vcmcb?dV_Vt3?`=HlF74}>;Nnt1zP|B diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-english-text-init.mp4 b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-english-text-init.mp4 index 56d2ac6ca34f99c504e42dc0fbe1d2fadf7cfba5..f0c01ecc4bc12715e991beb2f902b0700b47a73e 100644 GIT binary patch delta 14 Vcmcb?dV_Vt3?`<$+{v?;>;Nm>1y29~ delta 14 Vcmcb?dV_Vt3?`=HlF74}>;Nnt1zP|B diff --git a/packager/media/base/buffer_reader.cc b/packager/media/base/buffer_reader.cc index 456c67d6dd..fcc066cf05 100644 --- a/packager/media/base/buffer_reader.cc +++ b/packager/media/base/buffer_reader.cc @@ -62,6 +62,18 @@ bool BufferReader::ReadToString(std::string* str, size_t size) { return true; } +bool BufferReader::ReadCString(std::string* str) { + DCHECK(str); + for (size_t count = 0; pos_ + count < size_; count++) { + if (buf_[pos_ + count] == 0) { + str->assign(buf_ + pos_, buf_ + pos_ + count); + pos_ += count + 1; + return true; + } + } + return false; // EOF +} + bool BufferReader::SkipBytes(size_t num_bytes) { if (!HasBytes(num_bytes)) return false; diff --git a/packager/media/base/buffer_reader.h b/packager/media/base/buffer_reader.h index d4aa735aba..bb3c69c0fa 100644 --- a/packager/media/base/buffer_reader.h +++ b/packager/media/base/buffer_reader.h @@ -56,6 +56,9 @@ class BufferReader { bool ReadToVector(std::vector* t, size_t count) WARN_UNUSED_RESULT; bool ReadToString(std::string* str, size_t size) WARN_UNUSED_RESULT; + /// Reads a null-terminated string. + bool ReadCString(std::string* str) WARN_UNUSED_RESULT; + /// Advance the stream by this many bytes. /// @return false if there are not enough bytes in the buffer, true otherwise. bool SkipBytes(size_t num_bytes) WARN_UNUSED_RESULT; diff --git a/packager/media/base/container_names.cc b/packager/media/base/container_names.cc index d18f35cfd3..4007879332 100644 --- a/packager/media/base/container_names.cc +++ b/packager/media/base/container_names.cc @@ -1745,7 +1745,10 @@ MediaContainerName DetermineContainerFromFormatName( base::EqualsCaseInsensitiveASCII(format_name, "m4s") || base::EqualsCaseInsensitiveASCII(format_name, "m4v") || base::EqualsCaseInsensitiveASCII(format_name, "mov") || - base::EqualsCaseInsensitiveASCII(format_name, "mp4")) { + base::EqualsCaseInsensitiveASCII(format_name, "mp4") || + base::EqualsCaseInsensitiveASCII(format_name, "ttml+mp4") || + base::EqualsCaseInsensitiveASCII(format_name, "webvtt+mp4") || + base::EqualsCaseInsensitiveASCII(format_name, "vtt+mp4")) { return CONTAINER_MOV; } else if (base::EqualsCaseInsensitiveASCII(format_name, "ts") || base::EqualsCaseInsensitiveASCII(format_name, "mpeg2ts")) { diff --git a/packager/media/base/fourccs.h b/packager/media/base/fourccs.h index 0b6b29b71c..5fb11fb20d 100644 --- a/packager/media/base/fourccs.h +++ b/packager/media/base/fourccs.h @@ -98,6 +98,7 @@ enum FourCC : uint32_t { FOURCC_mp4v = 0x6d703476, FOURCC_mvex = 0x6d766578, FOURCC_mvhd = 0x6d766864, + FOURCC_nmhd = 0x6e6d6864, FOURCC_pasp = 0x70617370, FOURCC_payl = 0x7061796c, FOURCC_pdin = 0x7064696e, @@ -122,6 +123,7 @@ enum FourCC : uint32_t { FOURCC_stbl = 0x7374626c, FOURCC_stco = 0x7374636f, FOURCC_sthd = 0x73746864, + FOURCC_stpp = 0x73747070, FOURCC_stsc = 0x73747363, FOURCC_stsd = 0x73747364, FOURCC_stss = 0x73747373, diff --git a/packager/media/formats/mp4/box_buffer.h b/packager/media/formats/mp4/box_buffer.h index a9bad80d9d..6bf3efc300 100644 --- a/packager/media/formats/mp4/box_buffer.h +++ b/packager/media/formats/mp4/box_buffer.h @@ -145,6 +145,16 @@ class BoxBuffer { return true; } + bool ReadWriteCString(std::string* str) { + if (reader_) + return reader_->ReadCString(str); + // Cannot contain embedded nulls. + DCHECK_EQ(str->find('\0'), std::string::npos); + writer_->AppendString(*str); + writer_->AppendInt(static_cast('\0')); + return true; + } + bool ReadWriteFourCC(FourCC* fourcc) { if (reader_) return reader_->ReadFourCC(fourcc); diff --git a/packager/media/formats/mp4/box_definitions.cc b/packager/media/formats/mp4/box_definitions.cc index 7bc205d276..e42df84e74 100644 --- a/packager/media/formats/mp4/box_definitions.cc +++ b/packager/media/formats/mp4/box_definitions.cc @@ -33,6 +33,7 @@ const uint8_t kUnityMatrix[] = {0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, const char kVideoHandlerName[] = "VideoHandler"; const char kAudioHandlerName[] = "SoundHandler"; const char kTextHandlerName[] = "TextHandler"; +const char kSubtitleHandlerName[] = "SubtitleHandler"; // Default values for VideoSampleEntry box. const uint32_t kVideoResolution = 0x00480000; // 72 dpi. @@ -106,6 +107,8 @@ TrackType FourCCToTrackType(FourCC fourcc) { return kAudio; case FOURCC_text: return kText; + case FOURCC_subt: + return kSubtitle; default: return kInvalid; } @@ -119,6 +122,8 @@ FourCC TrackTypeToFourCC(TrackType track_type) { return FOURCC_soun; case kText: return FOURCC_text; + case kSubtitle: + return FOURCC_subt; default: return FOURCC_NULL; } @@ -628,6 +633,7 @@ bool SampleDescription::ReadWriteInternal(BoxBuffer* buffer) { count = static_cast(audio_entries.size()); break; case kText: + case kSubtitle: count = static_cast(text_entries.size()); break; default: @@ -649,7 +655,7 @@ bool SampleDescription::ReadWriteInternal(BoxBuffer* buffer) { } else if (type == kAudio) { RCHECK(reader->ReadAllChildren(&audio_entries)); RCHECK(audio_entries.size() == count); - } else if (type == kText) { + } else if (type == kText || type == kSubtitle) { RCHECK(reader->ReadAllChildren(&text_entries)); RCHECK(text_entries.size() == count); } @@ -661,7 +667,7 @@ bool SampleDescription::ReadWriteInternal(BoxBuffer* buffer) { } else if (type == kAudio) { for (uint32_t i = 0; i < count; ++i) RCHECK(buffer->ReadWriteChild(&audio_entries[i])); - } else if (type == kText) { + } else if (type == kText || type == kSubtitle) { for (uint32_t i = 0; i < count; ++i) RCHECK(buffer->ReadWriteChild(&text_entries[i])); } else { @@ -679,7 +685,7 @@ size_t SampleDescription::ComputeSizeInternal() { } else if (type == kAudio) { for (uint32_t i = 0; i < audio_entries.size(); ++i) box_size += audio_entries[i].ComputeSize(); - } else if (type == kText) { + } else if (type == kText || type == kSubtitle) { for (uint32_t i = 0; i < text_entries.size(); ++i) box_size += text_entries[i].ComputeSize(); } @@ -1293,6 +1299,11 @@ bool HandlerReference::ReadWriteInternal(BoxBuffer* buffer) { handler_name.assign(kTextHandlerName, kTextHandlerName + arraysize(kTextHandlerName)); break; + case FOURCC_subt: + handler_name.assign( + kSubtitleHandlerName, + kSubtitleHandlerName + arraysize(kSubtitleHandlerName)); + break; case FOURCC_ID32: break; default: @@ -1322,6 +1333,9 @@ size_t HandlerReference::ComputeSizeInternal() { case FOURCC_text: box_size += sizeof(kTextHandlerName); break; + case FOURCC_subt: + box_size += sizeof(kSubtitleHandlerName); + break; case FOURCC_ID32: break; default: @@ -2000,14 +2014,25 @@ bool TextSampleEntry::ReadWriteInternal(BoxBuffer* buffer) { // TODO(rkuroiwa): Handle the optional MPEG4BitRateBox. RCHECK(buffer->PrepareChildren() && buffer->ReadWriteChild(&config) && buffer->ReadWriteChild(&label)); + } else if (format == FOURCC_stpp) { + // These are marked as "optional"; but they should still have the + // null-terminator, so this should still work. + RCHECK(buffer->ReadWriteCString(&namespace_) && + buffer->ReadWriteCString(&schema_location)); } return true; } size_t TextSampleEntry::ComputeSizeInternal() { // 6 for the (anonymous) reserved bytes for SampleEntry class. - return HeaderSize() + 6 + sizeof(data_reference_index) + - config.ComputeSize() + label.ComputeSize(); + size_t ret = HeaderSize() + 6 + sizeof(data_reference_index); + if (format == FOURCC_wvtt) { + ret += config.ComputeSize() + label.ComputeSize(); + } else if (format == FOURCC_stpp) { + // +2 for the two null terminators for these strings. + ret += namespace_.size() + schema_location.size() + 2; + } + return ret; } MediaHeader::MediaHeader() = default; @@ -2079,6 +2104,21 @@ size_t SoundMediaHeader::ComputeSizeInternal() { return HeaderSize() + sizeof(balance) + sizeof(uint16_t); } +NullMediaHeader::NullMediaHeader() = default; +NullMediaHeader::~NullMediaHeader() = default; + +FourCC NullMediaHeader::BoxType() const { + return FOURCC_nmhd; +} + +bool NullMediaHeader::ReadWriteInternal(BoxBuffer* buffer) { + return ReadWriteHeaderInternal(buffer); +} + +size_t NullMediaHeader::ComputeSizeInternal() { + return HeaderSize(); +} + SubtitleMediaHeader::SubtitleMediaHeader() = default; SubtitleMediaHeader::~SubtitleMediaHeader() = default; @@ -2178,6 +2218,9 @@ bool MediaInformation::ReadWriteInternal(BoxBuffer* buffer) { RCHECK(buffer->ReadWriteChild(&smhd)); break; case kText: + RCHECK(buffer->TryReadWriteChild(&nmhd)); + break; + case kSubtitle: RCHECK(buffer->TryReadWriteChild(&sthd)); break; default: @@ -2198,6 +2241,9 @@ size_t MediaInformation::ComputeSizeInternal() { box_size += smhd.ComputeSize(); break; case kText: + box_size += nmhd.ComputeSize(); + break; + case kSubtitle: box_size += sthd.ComputeSize(); break; default: diff --git a/packager/media/formats/mp4/box_definitions.h b/packager/media/formats/mp4/box_definitions.h index 5c577b7116..b4494ef178 100644 --- a/packager/media/formats/mp4/box_definitions.h +++ b/packager/media/formats/mp4/box_definitions.h @@ -26,6 +26,7 @@ enum TrackType { kAudio, kHint, kText, + kSubtitle, }; class BoxBuffer; @@ -407,6 +408,11 @@ struct TextSampleEntry : Box { // always present. uint16_t data_reference_index = 1u; + // Sub fields for ttml text sample entry. + std::string namespace_; + std::string schema_location; + // Optional MPEG4BitRateBox. + // Sub boxes for wvtt text sample entry. WebVTTConfigurationBox config; WebVTTSourceLabelBox label; @@ -597,6 +603,10 @@ struct SoundMediaHeader : FullBox { uint16_t balance = 0u; }; +struct NullMediaHeader : FullBox { + DECLARE_BOX_METHODS(NullMediaHeader); +}; + struct SubtitleMediaHeader : FullBox { DECLARE_BOX_METHODS(SubtitleMediaHeader); }; @@ -628,6 +638,7 @@ struct MediaInformation : Box { // Exactly one specific meida header shall be present, vmhd, smhd, hmhd, nmhd. VideoMediaHeader vmhd; SoundMediaHeader smhd; + NullMediaHeader nmhd; SubtitleMediaHeader sthd; }; diff --git a/packager/media/formats/mp4/mp4.gyp b/packager/media/formats/mp4/mp4.gyp index 0707fd5aff..604e5979d1 100644 --- a/packager/media/formats/mp4/mp4.gyp +++ b/packager/media/formats/mp4/mp4.gyp @@ -50,6 +50,7 @@ '../../base/media_base.gyp:media_base', '../../codecs/codecs.gyp:codecs', '../../event/media_event.gyp:media_event', + '../../formats/ttml/ttml.gyp:ttml', ], }, { diff --git a/packager/media/formats/mp4/mp4_muxer.cc b/packager/media/formats/mp4/mp4_muxer.cc index 4e4b9c31a8..23ca8273ca 100644 --- a/packager/media/formats/mp4/mp4_muxer.cc +++ b/packager/media/formats/mp4/mp4_muxer.cc @@ -24,6 +24,7 @@ #include "packager/media/formats/mp4/box_definitions.h" #include "packager/media/formats/mp4/multi_segment_segmenter.h" #include "packager/media/formats/mp4/single_segment_segmenter.h" +#include "packager/media/formats/ttml/ttml_generator.h" #include "packager/status_macros.h" namespace shaka { @@ -593,6 +594,17 @@ bool MP4Muxer::GenerateTextTrak(const TextStreamInfo* text_info, sample_description.type = kText; sample_description.text_entries.push_back(webvtt); return true; + } else if (text_info->codec_string() == "ttml") { + // Handle TTML. + TextSampleEntry ttml; + ttml.format = FOURCC_stpp; + ttml.namespace_ = ttml::TtmlGenerator::kTtNamespace; + + SampleDescription& sample_description = + trak->media.information.sample_table.description; + sample_description.type = kSubtitle; + sample_description.text_entries.push_back(ttml); + return true; } NOTIMPLEMENTED() << text_info->codec_string() << " handling not implemented yet."; diff --git a/packager/media/formats/ttml/ttml.gyp b/packager/media/formats/ttml/ttml.gyp index 7a9b02bcb1..224de428a9 100644 --- a/packager/media/formats/ttml/ttml.gyp +++ b/packager/media/formats/ttml/ttml.gyp @@ -17,6 +17,8 @@ 'ttml_generator.h', 'ttml_muxer.cc', 'ttml_muxer.h', + 'ttml_to_mp4_handler.cc', + 'ttml_to_mp4_handler.h', ], 'dependencies': [ '../../base/media_base.gyp:media_base', diff --git a/packager/media/formats/ttml/ttml_generator.cc b/packager/media/formats/ttml/ttml_generator.cc index 61d8b92085..cc4d228cc1 100644 --- a/packager/media/formats/ttml/ttml_generator.cc +++ b/packager/media/formats/ttml/ttml_generator.cc @@ -38,6 +38,8 @@ std::string ToTtmlSize(const TextNumber& x, const TextNumber& y) { } // namespace +const char* TtmlGenerator::kTtNamespace = "http://www.w3.org/ns/ttml"; + TtmlGenerator::TtmlGenerator() {} TtmlGenerator::~TtmlGenerator() {} @@ -60,7 +62,7 @@ void TtmlGenerator::Reset() { bool TtmlGenerator::Dump(std::string* result) const { xml::XmlNode root("tt"); - RCHECK(root.SetStringAttribute("xmlns", "http://www.w3.org/ns/ttml")); + RCHECK(root.SetStringAttribute("xmlns", kTtNamespace)); RCHECK(root.SetStringAttribute("xmlns:tts", "http://www.w3.org/ns/ttml#styling")); diff --git a/packager/media/formats/ttml/ttml_generator.h b/packager/media/formats/ttml/ttml_generator.h index 3ac003a084..817339f01d 100644 --- a/packager/media/formats/ttml/ttml_generator.h +++ b/packager/media/formats/ttml/ttml_generator.h @@ -24,6 +24,8 @@ class TtmlGenerator { explicit TtmlGenerator(); ~TtmlGenerator(); + static const char* kTtNamespace; + void Initialize(const std::map& regions, const std::string& language, uint32_t time_scale); diff --git a/packager/media/formats/ttml/ttml_to_mp4_handler.cc b/packager/media/formats/ttml/ttml_to_mp4_handler.cc new file mode 100644 index 0000000000..80d315bb0b --- /dev/null +++ b/packager/media/formats/ttml/ttml_to_mp4_handler.cc @@ -0,0 +1,123 @@ +// Copyright 2020 Google LLC. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "packager/media/formats/ttml/ttml_to_mp4_handler.h" + +#include "packager/status_macros.h" + +namespace shaka { +namespace media { +namespace ttml { + +namespace { + +size_t kTrackId = 0; + +std::shared_ptr CreateMediaSample(const std::string& data, + int64_t start_time, + int64_t duration) { + DCHECK_GE(start_time, 0); + DCHECK_GT(duration, 0); + + const bool kIsKeyFrame = true; + + std::shared_ptr sample = MediaSample::CopyFrom( + reinterpret_cast(data.data()), data.size(), kIsKeyFrame); + sample->set_pts(start_time); + sample->set_dts(start_time); + sample->set_duration(duration); + + return sample; +} + +} // namespace + +Status TtmlToMp4Handler::InitializeInternal() { + return Status::OK; +} + +Status TtmlToMp4Handler::Process(std::unique_ptr stream_data) { + switch (stream_data->stream_data_type) { + case StreamDataType::kStreamInfo: + return OnStreamInfo(std::move(stream_data)); + case StreamDataType::kCueEvent: + return OnCueEvent(std::move(stream_data)); + case StreamDataType::kSegmentInfo: + return OnSegmentInfo(std::move(stream_data)); + case StreamDataType::kTextSample: + return OnTextSample(std::move(stream_data)); + default: + return Status(error::INTERNAL_ERROR, + "Invalid stream data type (" + + StreamDataTypeToString(stream_data->stream_data_type) + + ") for this TtmlToMp4 handler"); + } +} + +Status TtmlToMp4Handler::OnStreamInfo(std::unique_ptr stream_data) { + DCHECK(stream_data); + DCHECK(stream_data->stream_info); + + auto clone = stream_data->stream_info->Clone(); + clone->set_codec(kCodecTtml); + clone->set_codec_string("ttml"); + + if (clone->stream_type() != kStreamText) + return Status(error::MUXER_FAILURE, "Incorrect stream type"); + auto* text_stream = static_cast(clone.get()); + generator_.Initialize(text_stream->regions(), text_stream->language(), + text_stream->time_scale()); + + return Dispatch( + StreamData::FromStreamInfo(stream_data->stream_index, std::move(clone))); +} + +Status TtmlToMp4Handler::OnCueEvent(std::unique_ptr stream_data) { + DCHECK(stream_data); + DCHECK(stream_data->cue_event); + return Dispatch(std::move(stream_data)); +} + +Status TtmlToMp4Handler::OnSegmentInfo( + std::unique_ptr stream_data) { + DCHECK(stream_data); + DCHECK(stream_data->segment_info); + + const auto& segment = stream_data->segment_info; + + std::string data; + if (!generator_.Dump(&data)) + return Status(error::INTERNAL_ERROR, "Error generating XML"); + generator_.Reset(); + + RETURN_IF_ERROR(DispatchMediaSample( + kTrackId, + CreateMediaSample(data, segment->start_timestamp, segment->duration))); + + return Dispatch(std::move(stream_data)); +} + +Status TtmlToMp4Handler::OnTextSample(std::unique_ptr stream_data) { + DCHECK(stream_data); + DCHECK(stream_data->text_sample); + + auto& sample = stream_data->text_sample; + + // Ignore empty samples. This will create gaps, but we will handle that + // later. + if (sample->body().is_empty()) { + return Status::OK; + } + + // Add the new text sample to the cache of samples that belong in the + // current segment. + generator_.AddSample(*sample); + return Status::OK; +} + +} // namespace ttml +} // namespace media +} // namespace shaka diff --git a/packager/media/formats/ttml/ttml_to_mp4_handler.h b/packager/media/formats/ttml/ttml_to_mp4_handler.h new file mode 100644 index 0000000000..48798a64f9 --- /dev/null +++ b/packager/media/formats/ttml/ttml_to_mp4_handler.h @@ -0,0 +1,43 @@ +// Copyright 2020 Google LLC. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#ifndef PACKAGER_MEDIA_FORMATS_TTML_TTML_TO_MP4_HANDLER_H_ +#define PACKAGER_MEDIA_FORMATS_TTML_TTML_TO_MP4_HANDLER_H_ + +#include + +#include "packager/media/base/media_handler.h" +#include "packager/media/formats/ttml/ttml_generator.h" + +namespace shaka { +namespace media { +namespace ttml { + +// A media handler that should come after the cue aligner and segmenter and +// should come before the muxer. This handler is to convert text samples +// to media samples so that they can be sent to a mp4 muxer. +class TtmlToMp4Handler : public MediaHandler { + public: + TtmlToMp4Handler() = default; + ~TtmlToMp4Handler() override = default; + + private: + Status InitializeInternal() override; + Status Process(std::unique_ptr stream_data) override; + + Status OnStreamInfo(std::unique_ptr stream_data); + Status OnCueEvent(std::unique_ptr stream_data); + Status OnSegmentInfo(std::unique_ptr stream_data); + Status OnTextSample(std::unique_ptr stream_data); + + TtmlGenerator generator_; +}; + +} // namespace ttml +} // namespace media +} // namespace shaka + +#endif // PACKAGER_MEDIA_FORMATS_TTML_TTML_TO_MP4_HANDLER_H_ diff --git a/packager/packager.cc b/packager/packager.cc index edb7ef18d5..9e9a1b6cb6 100644 --- a/packager/packager.cc +++ b/packager/packager.cc @@ -40,6 +40,7 @@ #include "packager/media/demuxer/demuxer.h" #include "packager/media/event/muxer_listener_factory.h" #include "packager/media/event/vod_media_info_dump_muxer_listener.h" +#include "packager/media/formats/ttml/ttml_to_mp4_handler.h" #include "packager/media/formats/webvtt/text_padder.h" #include "packager/media/formats/webvtt/webvtt_to_mp4_handler.h" #include "packager/media/replicator/replicator.h" @@ -161,6 +162,27 @@ MediaContainerName GetOutputFormat(const StreamDescriptor& descriptor) { return CONTAINER_UNKNOWN; } +MediaContainerName GetTextOutputCodec(const StreamDescriptor& descriptor) { + const auto output_container = GetOutputFormat(descriptor); + if (output_container != CONTAINER_MOV) + return output_container; + + const auto input_container = DetermineContainerFromFileName(descriptor.input); + if (base::EqualsCaseInsensitiveASCII(descriptor.output_format, "vtt+mp4") || + base::EqualsCaseInsensitiveASCII(descriptor.output_format, + "webvtt+mp4")) { + return CONTAINER_WEBVTT; + } else if (!base::EqualsCaseInsensitiveASCII(descriptor.output_format, + "ttml+mp4") && + input_container == CONTAINER_WEBVTT) { + // With WebVTT input, default to WebVTT output. + return CONTAINER_WEBVTT; + } else { + // Otherwise default to TTML since it has more features. + return CONTAINER_TTML; + } +} + Status ValidateStreamDescriptor(bool dump_stream_info, const StreamDescriptor& stream) { if (stream.input.empty()) { @@ -640,27 +662,32 @@ Status CreateAudioVideoJobs( muxer_listener_factory->CreateListener(ToMuxerListenerData(stream)); muxer->SetMuxerListener(std::move(muxer_listener)); + std::vector> handlers; + handlers.emplace_back(replicator); + // Trick play is optional. - std::shared_ptr trick_play = - stream.trick_play_factor - ? std::make_shared(stream.trick_play_factor) - : nullptr; + if (stream.trick_play_factor) { + handlers.emplace_back( + std::make_shared(stream.trick_play_factor)); + } - std::shared_ptr chunker = - is_text && (!stream.segment_template.empty() || - output_format == CONTAINER_MOV) - ? CreateTextChunker(packaging_params.chunking_params) - : nullptr; + if (is_text && + (!stream.segment_template.empty() || output_format == CONTAINER_MOV)) { + handlers.emplace_back( + CreateTextChunker(packaging_params.chunking_params)); + } - // TODO(modmaker): Move to MOV muxer? - const auto input_container = DetermineContainerFromFileName(stream.input); - auto text_to_mp4 = - input_container == CONTAINER_WEBVTT && output_format == CONTAINER_MOV - ? std::make_shared() - : nullptr; + if (is_text && output_format == CONTAINER_MOV) { + const auto output_codec = GetTextOutputCodec(stream); + if (output_codec == CONTAINER_WEBVTT) { + handlers.emplace_back(std::make_shared()); + } else if (output_codec == CONTAINER_TTML) { + handlers.emplace_back(std::make_shared()); + } + } - RETURN_IF_ERROR(MediaHandler::Chain( - {replicator, trick_play, chunker, text_to_mp4, muxer})); + handlers.emplace_back(muxer); + RETURN_IF_ERROR(MediaHandler::Chain(handlers)); } return Status::OK;