Better TTML input check
- Check whether if it can be parsed by libxml2 and check if the root element is a 'tt' element. - Added unit tests for determining TTML and WebVTT. Change-Id: I4fea1881f0ab70970700601d8d7cea32cc04752c
This commit is contained in:
parent
c393b71b6f
commit
ef7b136116
|
@ -4,6 +4,8 @@
|
||||||
|
|
||||||
#include "packager/media/base/container_names.h"
|
#include "packager/media/base/container_names.h"
|
||||||
|
|
||||||
|
#include <libxml/parser.h>
|
||||||
|
#include <libxml/tree.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
|
@ -11,6 +13,7 @@
|
||||||
|
|
||||||
#include "packager/base/logging.h"
|
#include "packager/base/logging.h"
|
||||||
#include "packager/media/base/bit_reader.h"
|
#include "packager/media/base/bit_reader.h"
|
||||||
|
#include "packager/mpd/base/xml/scoped_xml_ptr.h"
|
||||||
|
|
||||||
namespace edash_packager {
|
namespace edash_packager {
|
||||||
namespace media {
|
namespace media {
|
||||||
|
@ -1630,15 +1633,25 @@ bool CheckWebVtt(const uint8_t* buffer, int buffer_size) {
|
||||||
arraysize(kWebVtt) - 1);
|
arraysize(kWebVtt) - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(rkuroiwa): This check is a very simple check to see if it is UTF-8 or
|
|
||||||
// UTF-16, which is not sufficient to determine whether it is TTML. Check if the
|
|
||||||
// entire buffer is a valid TTML.
|
|
||||||
bool CheckTtml(const uint8_t* buffer, int buffer_size) {
|
bool CheckTtml(const uint8_t* buffer, int buffer_size) {
|
||||||
return StartsWith(buffer, buffer_size,
|
// Sanity check first before reading the entire thing.
|
||||||
"<?xml version='1.0' encoding='UTF-8'?>") ||
|
if (!StartsWith(buffer, buffer_size, "<?xml"))
|
||||||
StartsWith(buffer, buffer_size,
|
return false;
|
||||||
"<?xml version='1.0' encoding='UTF-16'?>");
|
|
||||||
|
// Make sure that it can be parsed so that it doesn't error later in the
|
||||||
|
// process. Not doing a schema check to allow TTMLs that makes some sense but
|
||||||
|
// not necessarily compliant to the schema.
|
||||||
|
xml::scoped_xml_ptr<xmlDoc> doc(
|
||||||
|
xmlParseMemory(reinterpret_cast<const char*>(buffer), buffer_size));
|
||||||
|
if (!doc)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
xmlNodePtr root_node = xmlDocGetRootElement(doc.get());
|
||||||
|
std::string root_node_name(reinterpret_cast<const char*>(root_node->name));
|
||||||
|
// "tt" is supposed to be the top level element for ttml.
|
||||||
|
return root_node_name == "tt";
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
// Attempt to determine the container name from the buffer provided.
|
// Attempt to determine the container name from the buffer provided.
|
||||||
|
@ -1693,7 +1706,7 @@ MediaContainerName DetermineContainer(const uint8_t* buffer, int buffer_size) {
|
||||||
return CONTAINER_EAC3;
|
return CONTAINER_EAC3;
|
||||||
}
|
}
|
||||||
|
|
||||||
// To do a TTML check, it (should) do a schema check which requires scanning
|
// To do a TTML check, it parses the XML which requires scanning
|
||||||
// the whole content.
|
// the whole content.
|
||||||
if (CheckTtml(buffer, buffer_size))
|
if (CheckTtml(buffer, buffer_size))
|
||||||
return CONTAINER_TTML;
|
return CONTAINER_TTML;
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
#include "packager/base/files/file_util.h"
|
#include "packager/base/files/file_util.h"
|
||||||
|
#include "packager/base/stl_util.h"
|
||||||
#include "packager/media/base/container_names.h"
|
#include "packager/media/base/container_names.h"
|
||||||
#include "packager/media/test/test_data_util.h"
|
#include "packager/media/test/test_data_util.h"
|
||||||
|
|
||||||
|
@ -120,6 +121,48 @@ void TestFile(MediaContainerName expected, const base::FilePath& filename) {
|
||||||
<< "Failure with file " << filename.value();
|
<< "Failure with file " << filename.value();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(ContainerNamesTest, Ttml) {
|
||||||
|
// One of the actual TTMLs from the TTML spec page.
|
||||||
|
const char kTtml[] =
|
||||||
|
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
|
||||||
|
"<tt xml:lang=\"en\" xmlns=\"http://www.w3.org/ns/ttml\">\n"
|
||||||
|
" <body>\n"
|
||||||
|
" <div>\n"
|
||||||
|
" <p dur=\"10s\">\n"
|
||||||
|
" Some subtitle.\n"
|
||||||
|
" </p>\n"
|
||||||
|
" </div>\n"
|
||||||
|
" </body>\n"
|
||||||
|
"</tt>\n";
|
||||||
|
|
||||||
|
EXPECT_EQ(CONTAINER_TTML,
|
||||||
|
DetermineContainer(reinterpret_cast<const uint8_t*>(kTtml),
|
||||||
|
arraysize(kTtml)));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(ContainerNamesTest, WebVtt) {
|
||||||
|
const char kWebVtt[] =
|
||||||
|
"WEBVTT\n"
|
||||||
|
"\n"
|
||||||
|
"00:1.000 --> 00:2.000\n"
|
||||||
|
"Subtitle";
|
||||||
|
EXPECT_EQ(CONTAINER_WEBVTT,
|
||||||
|
DetermineContainer(reinterpret_cast<const uint8_t*>(kWebVtt),
|
||||||
|
arraysize(kWebVtt)));
|
||||||
|
|
||||||
|
const uint8_t kUtf8ByteOrderMark[] = {0xef, 0xbb, 0xbf};
|
||||||
|
std::vector<uint8_t> webvtt_with_utf8_byte_order_mark(
|
||||||
|
kUtf8ByteOrderMark, kUtf8ByteOrderMark + arraysize(kUtf8ByteOrderMark));
|
||||||
|
webvtt_with_utf8_byte_order_mark.insert(
|
||||||
|
webvtt_with_utf8_byte_order_mark.end(), kWebVtt,
|
||||||
|
kWebVtt + arraysize(kWebVtt));
|
||||||
|
|
||||||
|
EXPECT_EQ(
|
||||||
|
CONTAINER_WEBVTT,
|
||||||
|
DetermineContainer(vector_as_array(&webvtt_with_utf8_byte_order_mark),
|
||||||
|
webvtt_with_utf8_byte_order_mark.size()));
|
||||||
|
}
|
||||||
|
|
||||||
TEST(ContainerNamesTest, FileCheckOGG) {
|
TEST(ContainerNamesTest, FileCheckOGG) {
|
||||||
TestFile(CONTAINER_OGG, GetTestDataFilePath("bear.ogv"));
|
TestFile(CONTAINER_OGG, GetTestDataFilePath("bear.ogv"));
|
||||||
TestFile(CONTAINER_OGG, GetTestDataFilePath("9ch.ogg"));
|
TestFile(CONTAINER_OGG, GetTestDataFilePath("9ch.ogg"));
|
||||||
|
|
|
@ -80,6 +80,7 @@
|
||||||
'../../base/base.gyp:base',
|
'../../base/base.gyp:base',
|
||||||
'../../third_party/boringssl/boringssl.gyp:boringssl',
|
'../../third_party/boringssl/boringssl.gyp:boringssl',
|
||||||
'../../third_party/curl/curl.gyp:libcurl',
|
'../../third_party/curl/curl.gyp:libcurl',
|
||||||
|
'../../third_party/libxml/libxml.gyp:libxml',
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue