Better TTML input check

- Check whether if it can be parsed by libxml2 and check if the root
  element is a 'tt' element.
- Added unit tests for determining TTML and WebVTT.

Change-Id: I4fea1881f0ab70970700601d8d7cea32cc04752c
This commit is contained in:
Rintaro Kuroiwa 2015-11-16 15:47:39 -08:00 committed by Gerrit Code Review
parent c393b71b6f
commit ef7b136116
3 changed files with 65 additions and 8 deletions

View File

@ -4,6 +4,8 @@
#include "packager/media/base/container_names.h"
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <stdint.h>
#include <cctype>
@ -11,6 +13,7 @@
#include "packager/base/logging.h"
#include "packager/media/base/bit_reader.h"
#include "packager/mpd/base/xml/scoped_xml_ptr.h"
namespace edash_packager {
namespace media {
@ -1630,15 +1633,25 @@ bool CheckWebVtt(const uint8_t* buffer, int buffer_size) {
arraysize(kWebVtt) - 1);
}
// TODO(rkuroiwa): This check is a very simple check to see if it is UTF-8 or
// UTF-16, which is not sufficient to determine whether it is TTML. Check if the
// entire buffer is a valid TTML.
bool CheckTtml(const uint8_t* buffer, int buffer_size) {
return StartsWith(buffer, buffer_size,
"<?xml version='1.0' encoding='UTF-8'?>") ||
StartsWith(buffer, buffer_size,
"<?xml version='1.0' encoding='UTF-16'?>");
// Sanity check first before reading the entire thing.
if (!StartsWith(buffer, buffer_size, "<?xml"))
return false;
// Make sure that it can be parsed so that it doesn't error later in the
// process. Not doing a schema check to allow TTMLs that makes some sense but
// not necessarily compliant to the schema.
xml::scoped_xml_ptr<xmlDoc> doc(
xmlParseMemory(reinterpret_cast<const char*>(buffer), buffer_size));
if (!doc)
return false;
xmlNodePtr root_node = xmlDocGetRootElement(doc.get());
std::string root_node_name(reinterpret_cast<const char*>(root_node->name));
// "tt" is supposed to be the top level element for ttml.
return root_node_name == "tt";
}
} // namespace
// Attempt to determine the container name from the buffer provided.
@ -1693,7 +1706,7 @@ MediaContainerName DetermineContainer(const uint8_t* buffer, int buffer_size) {
return CONTAINER_EAC3;
}
// To do a TTML check, it (should) do a schema check which requires scanning
// To do a TTML check, it parses the XML which requires scanning
// the whole content.
if (CheckTtml(buffer, buffer_size))
return CONTAINER_TTML;

View File

@ -5,6 +5,7 @@
#include <gtest/gtest.h>
#include "packager/base/files/file_util.h"
#include "packager/base/stl_util.h"
#include "packager/media/base/container_names.h"
#include "packager/media/test/test_data_util.h"
@ -120,6 +121,48 @@ void TestFile(MediaContainerName expected, const base::FilePath& filename) {
<< "Failure with file " << filename.value();
}
TEST(ContainerNamesTest, Ttml) {
// One of the actual TTMLs from the TTML spec page.
const char kTtml[] =
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
"<tt xml:lang=\"en\" xmlns=\"http://www.w3.org/ns/ttml\">\n"
" <body>\n"
" <div>\n"
" <p dur=\"10s\">\n"
" Some subtitle.\n"
" </p>\n"
" </div>\n"
" </body>\n"
"</tt>\n";
EXPECT_EQ(CONTAINER_TTML,
DetermineContainer(reinterpret_cast<const uint8_t*>(kTtml),
arraysize(kTtml)));
}
TEST(ContainerNamesTest, WebVtt) {
const char kWebVtt[] =
"WEBVTT\n"
"\n"
"00:1.000 --> 00:2.000\n"
"Subtitle";
EXPECT_EQ(CONTAINER_WEBVTT,
DetermineContainer(reinterpret_cast<const uint8_t*>(kWebVtt),
arraysize(kWebVtt)));
const uint8_t kUtf8ByteOrderMark[] = {0xef, 0xbb, 0xbf};
std::vector<uint8_t> webvtt_with_utf8_byte_order_mark(
kUtf8ByteOrderMark, kUtf8ByteOrderMark + arraysize(kUtf8ByteOrderMark));
webvtt_with_utf8_byte_order_mark.insert(
webvtt_with_utf8_byte_order_mark.end(), kWebVtt,
kWebVtt + arraysize(kWebVtt));
EXPECT_EQ(
CONTAINER_WEBVTT,
DetermineContainer(vector_as_array(&webvtt_with_utf8_byte_order_mark),
webvtt_with_utf8_byte_order_mark.size()));
}
TEST(ContainerNamesTest, FileCheckOGG) {
TestFile(CONTAINER_OGG, GetTestDataFilePath("bear.ogv"));
TestFile(CONTAINER_OGG, GetTestDataFilePath("9ch.ogg"));

View File

@ -80,6 +80,7 @@
'../../base/base.gyp:base',
'../../third_party/boringssl/boringssl.gyp:boringssl',
'../../third_party/curl/curl.gyp:libcurl',
'../../third_party/libxml/libxml.gyp:libxml',
],
},
{