Better TTML input check
- Check whether if it can be parsed by libxml2 and check if the root element is a 'tt' element. - Added unit tests for determining TTML and WebVTT. Change-Id: I4fea1881f0ab70970700601d8d7cea32cc04752c
This commit is contained in:
parent
c393b71b6f
commit
ef7b136116
|
@ -4,6 +4,8 @@
|
|||
|
||||
#include "packager/media/base/container_names.h"
|
||||
|
||||
#include <libxml/parser.h>
|
||||
#include <libxml/tree.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <cctype>
|
||||
|
@ -11,6 +13,7 @@
|
|||
|
||||
#include "packager/base/logging.h"
|
||||
#include "packager/media/base/bit_reader.h"
|
||||
#include "packager/mpd/base/xml/scoped_xml_ptr.h"
|
||||
|
||||
namespace edash_packager {
|
||||
namespace media {
|
||||
|
@ -1630,15 +1633,25 @@ bool CheckWebVtt(const uint8_t* buffer, int buffer_size) {
|
|||
arraysize(kWebVtt) - 1);
|
||||
}
|
||||
|
||||
// TODO(rkuroiwa): This check is a very simple check to see if it is UTF-8 or
|
||||
// UTF-16, which is not sufficient to determine whether it is TTML. Check if the
|
||||
// entire buffer is a valid TTML.
|
||||
bool CheckTtml(const uint8_t* buffer, int buffer_size) {
|
||||
return StartsWith(buffer, buffer_size,
|
||||
"<?xml version='1.0' encoding='UTF-8'?>") ||
|
||||
StartsWith(buffer, buffer_size,
|
||||
"<?xml version='1.0' encoding='UTF-16'?>");
|
||||
// Sanity check first before reading the entire thing.
|
||||
if (!StartsWith(buffer, buffer_size, "<?xml"))
|
||||
return false;
|
||||
|
||||
// Make sure that it can be parsed so that it doesn't error later in the
|
||||
// process. Not doing a schema check to allow TTMLs that makes some sense but
|
||||
// not necessarily compliant to the schema.
|
||||
xml::scoped_xml_ptr<xmlDoc> doc(
|
||||
xmlParseMemory(reinterpret_cast<const char*>(buffer), buffer_size));
|
||||
if (!doc)
|
||||
return false;
|
||||
|
||||
xmlNodePtr root_node = xmlDocGetRootElement(doc.get());
|
||||
std::string root_node_name(reinterpret_cast<const char*>(root_node->name));
|
||||
// "tt" is supposed to be the top level element for ttml.
|
||||
return root_node_name == "tt";
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Attempt to determine the container name from the buffer provided.
|
||||
|
@ -1693,7 +1706,7 @@ MediaContainerName DetermineContainer(const uint8_t* buffer, int buffer_size) {
|
|||
return CONTAINER_EAC3;
|
||||
}
|
||||
|
||||
// To do a TTML check, it (should) do a schema check which requires scanning
|
||||
// To do a TTML check, it parses the XML which requires scanning
|
||||
// the whole content.
|
||||
if (CheckTtml(buffer, buffer_size))
|
||||
return CONTAINER_TTML;
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include <gtest/gtest.h>
|
||||
|
||||
#include "packager/base/files/file_util.h"
|
||||
#include "packager/base/stl_util.h"
|
||||
#include "packager/media/base/container_names.h"
|
||||
#include "packager/media/test/test_data_util.h"
|
||||
|
||||
|
@ -120,6 +121,48 @@ void TestFile(MediaContainerName expected, const base::FilePath& filename) {
|
|||
<< "Failure with file " << filename.value();
|
||||
}
|
||||
|
||||
TEST(ContainerNamesTest, Ttml) {
|
||||
// One of the actual TTMLs from the TTML spec page.
|
||||
const char kTtml[] =
|
||||
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
|
||||
"<tt xml:lang=\"en\" xmlns=\"http://www.w3.org/ns/ttml\">\n"
|
||||
" <body>\n"
|
||||
" <div>\n"
|
||||
" <p dur=\"10s\">\n"
|
||||
" Some subtitle.\n"
|
||||
" </p>\n"
|
||||
" </div>\n"
|
||||
" </body>\n"
|
||||
"</tt>\n";
|
||||
|
||||
EXPECT_EQ(CONTAINER_TTML,
|
||||
DetermineContainer(reinterpret_cast<const uint8_t*>(kTtml),
|
||||
arraysize(kTtml)));
|
||||
}
|
||||
|
||||
TEST(ContainerNamesTest, WebVtt) {
|
||||
const char kWebVtt[] =
|
||||
"WEBVTT\n"
|
||||
"\n"
|
||||
"00:1.000 --> 00:2.000\n"
|
||||
"Subtitle";
|
||||
EXPECT_EQ(CONTAINER_WEBVTT,
|
||||
DetermineContainer(reinterpret_cast<const uint8_t*>(kWebVtt),
|
||||
arraysize(kWebVtt)));
|
||||
|
||||
const uint8_t kUtf8ByteOrderMark[] = {0xef, 0xbb, 0xbf};
|
||||
std::vector<uint8_t> webvtt_with_utf8_byte_order_mark(
|
||||
kUtf8ByteOrderMark, kUtf8ByteOrderMark + arraysize(kUtf8ByteOrderMark));
|
||||
webvtt_with_utf8_byte_order_mark.insert(
|
||||
webvtt_with_utf8_byte_order_mark.end(), kWebVtt,
|
||||
kWebVtt + arraysize(kWebVtt));
|
||||
|
||||
EXPECT_EQ(
|
||||
CONTAINER_WEBVTT,
|
||||
DetermineContainer(vector_as_array(&webvtt_with_utf8_byte_order_mark),
|
||||
webvtt_with_utf8_byte_order_mark.size()));
|
||||
}
|
||||
|
||||
TEST(ContainerNamesTest, FileCheckOGG) {
|
||||
TestFile(CONTAINER_OGG, GetTestDataFilePath("bear.ogv"));
|
||||
TestFile(CONTAINER_OGG, GetTestDataFilePath("9ch.ogg"));
|
||||
|
|
|
@ -80,6 +80,7 @@
|
|||
'../../base/base.gyp:base',
|
||||
'../../third_party/boringssl/boringssl.gyp:boringssl',
|
||||
'../../third_party/curl/curl.gyp:libcurl',
|
||||
'../../third_party/libxml/libxml.gyp:libxml',
|
||||
],
|
||||
},
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue