From ef7b136116513754b1a247e4ce97fc6ced7aa09b Mon Sep 17 00:00:00 2001 From: Rintaro Kuroiwa Date: Mon, 16 Nov 2015 15:47:39 -0800 Subject: [PATCH] Better TTML input check - Check whether if it can be parsed by libxml2 and check if the root element is a 'tt' element. - Added unit tests for determining TTML and WebVTT. Change-Id: I4fea1881f0ab70970700601d8d7cea32cc04752c --- packager/media/base/container_names.cc | 29 +++++++++---- .../media/base/container_names_unittest.cc | 43 +++++++++++++++++++ packager/media/base/media_base.gyp | 1 + 3 files changed, 65 insertions(+), 8 deletions(-) diff --git a/packager/media/base/container_names.cc b/packager/media/base/container_names.cc index cbd7c3fc74..ab0fcd7667 100644 --- a/packager/media/base/container_names.cc +++ b/packager/media/base/container_names.cc @@ -4,6 +4,8 @@ #include "packager/media/base/container_names.h" +#include +#include #include #include @@ -11,6 +13,7 @@ #include "packager/base/logging.h" #include "packager/media/base/bit_reader.h" +#include "packager/mpd/base/xml/scoped_xml_ptr.h" namespace edash_packager { namespace media { @@ -1630,15 +1633,25 @@ bool CheckWebVtt(const uint8_t* buffer, int buffer_size) { arraysize(kWebVtt) - 1); } -// TODO(rkuroiwa): This check is a very simple check to see if it is UTF-8 or -// UTF-16, which is not sufficient to determine whether it is TTML. Check if the -// entire buffer is a valid TTML. bool CheckTtml(const uint8_t* buffer, int buffer_size) { - return StartsWith(buffer, buffer_size, - "") || - StartsWith(buffer, buffer_size, - ""); + // Sanity check first before reading the entire thing. + if (!StartsWith(buffer, buffer_size, " doc( + xmlParseMemory(reinterpret_cast(buffer), buffer_size)); + if (!doc) + return false; + + xmlNodePtr root_node = xmlDocGetRootElement(doc.get()); + std::string root_node_name(reinterpret_cast(root_node->name)); + // "tt" is supposed to be the top level element for ttml. + return root_node_name == "tt"; } + } // namespace // Attempt to determine the container name from the buffer provided. @@ -1693,7 +1706,7 @@ MediaContainerName DetermineContainer(const uint8_t* buffer, int buffer_size) { return CONTAINER_EAC3; } - // To do a TTML check, it (should) do a schema check which requires scanning + // To do a TTML check, it parses the XML which requires scanning // the whole content. if (CheckTtml(buffer, buffer_size)) return CONTAINER_TTML; diff --git a/packager/media/base/container_names_unittest.cc b/packager/media/base/container_names_unittest.cc index 8ef93fb9d7..8af3192c79 100644 --- a/packager/media/base/container_names_unittest.cc +++ b/packager/media/base/container_names_unittest.cc @@ -5,6 +5,7 @@ #include #include "packager/base/files/file_util.h" +#include "packager/base/stl_util.h" #include "packager/media/base/container_names.h" #include "packager/media/test/test_data_util.h" @@ -120,6 +121,48 @@ void TestFile(MediaContainerName expected, const base::FilePath& filename) { << "Failure with file " << filename.value(); } +TEST(ContainerNamesTest, Ttml) { + // One of the actual TTMLs from the TTML spec page. + const char kTtml[] = + "\n" + "\n" + " \n" + "
\n" + "

\n" + " Some subtitle.\n" + "

\n" + "
\n" + " \n" + "
\n"; + + EXPECT_EQ(CONTAINER_TTML, + DetermineContainer(reinterpret_cast(kTtml), + arraysize(kTtml))); +} + +TEST(ContainerNamesTest, WebVtt) { + const char kWebVtt[] = + "WEBVTT\n" + "\n" + "00:1.000 --> 00:2.000\n" + "Subtitle"; + EXPECT_EQ(CONTAINER_WEBVTT, + DetermineContainer(reinterpret_cast(kWebVtt), + arraysize(kWebVtt))); + + const uint8_t kUtf8ByteOrderMark[] = {0xef, 0xbb, 0xbf}; + std::vector webvtt_with_utf8_byte_order_mark( + kUtf8ByteOrderMark, kUtf8ByteOrderMark + arraysize(kUtf8ByteOrderMark)); + webvtt_with_utf8_byte_order_mark.insert( + webvtt_with_utf8_byte_order_mark.end(), kWebVtt, + kWebVtt + arraysize(kWebVtt)); + + EXPECT_EQ( + CONTAINER_WEBVTT, + DetermineContainer(vector_as_array(&webvtt_with_utf8_byte_order_mark), + webvtt_with_utf8_byte_order_mark.size())); +} + TEST(ContainerNamesTest, FileCheckOGG) { TestFile(CONTAINER_OGG, GetTestDataFilePath("bear.ogv")); TestFile(CONTAINER_OGG, GetTestDataFilePath("9ch.ogg")); diff --git a/packager/media/base/media_base.gyp b/packager/media/base/media_base.gyp index 36996adb66..44b38ee551 100644 --- a/packager/media/base/media_base.gyp +++ b/packager/media/base/media_base.gyp @@ -80,6 +80,7 @@ '../../base/base.gyp:base', '../../third_party/boringssl/boringssl.gyp:boringssl', '../../third_party/curl/curl.gyp:libcurl', + '../../third_party/libxml/libxml.gyp:libxml', ], }, {