From 1cdce293431c12f646f6cb185b5b06f6400a2352 Mon Sep 17 00:00:00 2001 From: Rintaro Kuroiwa Date: Thu, 19 Nov 2015 15:58:29 -0800 Subject: [PATCH] WebVttMediaParser and TextStreamInfo - Add WebVttMediaParser which parses and creates MediaSamples from text WebVtt inputt Change-Id: Ia7bb7474df7f15e454e887b8c291fdfdc3195e46 --- packager/media/base/demuxer.cc | 4 + packager/media/base/media_base.gyp | 4 +- packager/media/base/media_sample.cc | 9 +- packager/media/base/media_sample.h | 21 +- packager/media/base/text_stream_info.cc | 39 ++ packager/media/base/text_stream_info.h | 57 +++ packager/media/formats/webvtt/webvtt.gyp | 38 ++ .../formats/webvtt/webvtt_media_parser.cc | 380 ++++++++++++++++++ .../formats/webvtt/webvtt_media_parser.h | 84 ++++ .../webvtt/webvtt_media_parser_unittest.cc | 318 +++++++++++++++ packager/packager.gyp | 2 + 11 files changed, 950 insertions(+), 6 deletions(-) create mode 100644 packager/media/base/text_stream_info.cc create mode 100644 packager/media/base/text_stream_info.h create mode 100644 packager/media/formats/webvtt/webvtt.gyp create mode 100644 packager/media/formats/webvtt/webvtt_media_parser.cc create mode 100644 packager/media/formats/webvtt/webvtt_media_parser.h create mode 100644 packager/media/formats/webvtt/webvtt_media_parser_unittest.cc diff --git a/packager/media/base/demuxer.cc b/packager/media/base/demuxer.cc index 00a2ff7dff..0979f497c7 100644 --- a/packager/media/base/demuxer.cc +++ b/packager/media/base/demuxer.cc @@ -18,6 +18,7 @@ #include "packager/media/formats/mp2t/mp2t_media_parser.h" #include "packager/media/formats/mp4/mp4_media_parser.h" #include "packager/media/formats/webm/webm_media_parser.h" +#include "packager/media/formats/webvtt/webvtt_media_parser.h" #include "packager/media/formats/wvm/wvm_media_parser.h" namespace { @@ -87,6 +88,9 @@ Status Demuxer::Initialize() { case CONTAINER_WEBM: parser_.reset(new WebMMediaParser()); break; + case CONTAINER_WEBVTT: + parser_.reset(new WebVttMediaParser()); + break; default: NOTIMPLEMENTED(); return Status(error::UNIMPLEMENTED, "Container not supported."); diff --git a/packager/media/base/media_base.gyp b/packager/media/base/media_base.gyp index 44b38ee551..caf5e093e3 100644 --- a/packager/media/base/media_base.gyp +++ b/packager/media/base/media_base.gyp @@ -67,10 +67,12 @@ 'status.h', 'stream_info.cc', 'stream_info.h', + 'text_stream_info.cc', + 'text_stream_info.h', 'text_track.h', - 'timestamp.h', 'text_track_config.cc', 'text_track_config.h', + 'timestamp.h', 'video_stream_info.cc', 'video_stream_info.h', 'widevine_key_source.cc', diff --git a/packager/media/base/media_sample.cc b/packager/media/base/media_sample.cc index c16cef8f4c..119115f5ff 100644 --- a/packager/media/base/media_sample.cc +++ b/packager/media/base/media_sample.cc @@ -26,8 +26,6 @@ MediaSample::MediaSample(const uint8_t* data, is_encrypted_(false) { if (!data) { CHECK_EQ(size, 0u); - CHECK(!side_data); - return; } data_.assign(data, data + size); @@ -65,6 +63,13 @@ scoped_refptr MediaSample::CopyFrom(const uint8_t* data, data, data_size, side_data, side_data_size, is_key_frame)); } +// static +scoped_refptr MediaSample::FromMetadata(const uint8_t* metadata, + size_t metadata_size) { + return make_scoped_refptr( + new MediaSample(nullptr, 0, metadata, metadata_size, false)); +} + // static scoped_refptr MediaSample::CreateEmptyMediaSample() { MediaSample* media_sample = new MediaSample(); diff --git a/packager/media/base/media_sample.h b/packager/media/base/media_sample.h index f3983e9e4d..f0c61cf659 100644 --- a/packager/media/base/media_sample.h +++ b/packager/media/base/media_sample.h @@ -38,7 +38,6 @@ class MediaSample : public base::RefCountedThreadSafe { /// Must not be NULL. /// @param size indicates sample size in bytes. Must not be negative. /// @param side_data_size indicates additional sample data size in bytes. - /// Must not be negative. /// @param is_key_frame indicates whether the sample is a key frame. static scoped_refptr CopyFrom(const uint8_t* data, size_t size, @@ -46,6 +45,15 @@ class MediaSample : public base::RefCountedThreadSafe { size_t side_data_size, bool is_key_frame); + /// Create a MediaSample object from metadata. + /// Unlike other factory methods, this cannot be a key frame. It must be only + /// for metadata. + /// @param metadata points to the buffer containing metadata. + /// Must not be NULL. + /// @param metadata_size is the size of metadata in bytes. + static scoped_refptr FromMetadata(const uint8_t* metadata, + size_t metadata_size); + /// Create a MediaSample object with default members. static scoped_refptr CreateEmptyMediaSample(); @@ -103,12 +111,10 @@ class MediaSample : public base::RefCountedThreadSafe { } const uint8_t* side_data() const { - DCHECK(!end_of_stream()); return &side_data_[0]; } size_t side_data_size() const { - DCHECK(!end_of_stream()); return side_data_.size(); } @@ -127,6 +133,11 @@ class MediaSample : public base::RefCountedThreadSafe { // If there's no data in this buffer, it represents end of stream. bool end_of_stream() const { return data_.size() == 0; } + const std::string& config_id() const { return config_id_; } + void set_config_id(const std::string& config_id) { + config_id_ = config_id; + } + /// @return a human-readable string describing |*this|. std::string ToString() const; @@ -160,6 +171,10 @@ class MediaSample : public base::RefCountedThreadSafe { // Not used by mp4 and other containers. std::vector side_data_; + // Text specific fields. + // For now this is the cue identifier for WebVTT. + std::string config_id_; + DISALLOW_COPY_AND_ASSIGN(MediaSample); }; diff --git a/packager/media/base/text_stream_info.cc b/packager/media/base/text_stream_info.cc new file mode 100644 index 0000000000..59509996c9 --- /dev/null +++ b/packager/media/base/text_stream_info.cc @@ -0,0 +1,39 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "packager/media/base/text_stream_info.h" + +namespace edash_packager { +namespace media { + +TextStreamInfo::TextStreamInfo(int track_id, + uint32_t time_scale, + uint64_t duration, + const std::string& codec_string, + const std::string& language, + const std::string& extra_data, + uint16_t width, + uint16_t height) + : StreamInfo(kStreamText, + track_id, + time_scale, + duration, + codec_string, + language, + reinterpret_cast(extra_data.data()), + extra_data.size(), + false), + width_(width), + height_(height) {} + +TextStreamInfo::~TextStreamInfo() {} + +bool TextStreamInfo::IsValidConfig() const { + return true; +} + +} // namespace media +} // namespace edash_packager diff --git a/packager/media/base/text_stream_info.h b/packager/media/base/text_stream_info.h new file mode 100644 index 0000000000..e968de0798 --- /dev/null +++ b/packager/media/base/text_stream_info.h @@ -0,0 +1,57 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#ifndef PACKAGER_MEDIA_BASE_TEXT_STREAM_INFO_H_ +#define PACKAGER_MEDIA_BASE_TEXT_STREAM_INFO_H_ + +#include "packager/media/base/stream_info.h" + +#include + +namespace edash_packager { +namespace media { + +class TextStreamInfo : public StreamInfo { + public: + /// No encryption supported. + /// @param track_id is the track ID of this stream. + /// @param time_scale is the time scale of this stream. + /// @param duration is the duration of this stream. + /// @param codec_string is the codec. + /// @param language is the language of this stream. This may be empty. + /// @param extra_data is extra data for this text stream. This could be the + /// metadata that applies to all the samples of this stream. This may + /// be empty. + /// @param width of the text. This may be 0. + /// @param height of the text. This may be 0. + TextStreamInfo(int track_id, + uint32_t time_scale, + uint64_t duration, + const std::string& codec_string, + const std::string& language, + const std::string& extra_data, + uint16_t width, + uint16_t height); + + bool IsValidConfig() const override; + + uint16_t width() const { return width_; } + uint16_t height() const { return height_; } + + protected: + ~TextStreamInfo() override; + + private: + uint16_t width_; + uint16_t height_; + + // Allow copying. This is very light weight. +}; + +} // namespace media +} // namespace edash_packager + +#endif // PACKAGER_MEDIA_BASE_TEXT_STREAM_INFO_H_ diff --git a/packager/media/formats/webvtt/webvtt.gyp b/packager/media/formats/webvtt/webvtt.gyp new file mode 100644 index 0000000000..c2f88bde8a --- /dev/null +++ b/packager/media/formats/webvtt/webvtt.gyp @@ -0,0 +1,38 @@ +# Copyright 2015 Google Inc. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file or at +# https://developers.google.com/open-source/licenses/bsd + +{ + 'includes': [ + '../../../common.gypi', + ], + 'targets': [ + { + 'target_name': 'webvtt', + 'type': '<(component)', + 'sources': [ + 'webvtt_media_parser.cc', + 'webvtt_media_parser.h', + ], + 'dependencies': [ + '../../../base/base.gyp:base', + '../../base/media_base.gyp:base', + ], + }, + { + 'target_name': 'webvtt_unittest', + 'type': '<(gtest_target_type)', + 'sources': [ + 'webvtt_media_parser_unittest.cc', + ], + 'dependencies': [ + '../../../testing/gmock.gyp:gmock', + '../../../testing/gtest.gyp:gtest', + '../../test/media_test.gyp:media_test_support', + 'webvtt', + ] + }, + ], +} diff --git a/packager/media/formats/webvtt/webvtt_media_parser.cc b/packager/media/formats/webvtt/webvtt_media_parser.cc new file mode 100644 index 0000000000..45f731f0c4 --- /dev/null +++ b/packager/media/formats/webvtt/webvtt_media_parser.cc @@ -0,0 +1,380 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "packager/media/formats/webvtt/webvtt_media_parser.h" + +#include +#include + +#include "packager/base/logging.h" +#include "packager/base/strings/string_number_conversions.h" +#include "packager/base/strings/string_split.h" +#include "packager/base/strings/string_util.h" +#include "packager/media/base/media_sample.h" +#include "packager/media/base/text_stream_info.h" + +namespace edash_packager { +namespace media { + +namespace { + +// There's only one track in a WebVTT file. +const int kTrackId = 0; + +const char kCR = 0x0D; +const char kLF = 0x0A; + +// Reads the first line from |data| and removes the line. Returns false if there +// isn't a line break. Sets |line| with the content of the first line without +// the line break. +bool ReadLine(std::string* data, std::string* line) { + if (data->size() == 0) { + return false; + } + size_t string_position = 0; + // Length of the line break mark. 1 for LF and CR, 2 for CRLF. + int line_break_length = 1; + bool found_line_break = false; + while (string_position < data->size()) { + if (data->at(string_position) == kLF) { + found_line_break = true; + break; + } + + if (data->at(string_position) == kCR) { + found_line_break = true; + if (string_position + 1 >= data->size()) + break; + + if (data->at(string_position + 1) == kLF) + line_break_length = 2; + break; + } + + ++string_position; + } + + if (!found_line_break) + return false; + + *line = data->substr(0, string_position); + data->erase(0, string_position + line_break_length); + return true; +} + +bool TimestampToMilliseconds(const std::string& original_str, + uint64_t* time_ms) { + const size_t kMinutesLength = 2; + const size_t kSecondsLength = 2; + const size_t kMillisecondsLength = 3; + + // +2 for a colon and a dot for splitting minutes and seconds AND seconds and + // milliseconds, respectively. + const size_t kMinimalLength = + kMinutesLength + kSecondsLength + kMillisecondsLength + 2; + + base::StringPiece str(original_str); + if (str.size() < kMinimalLength) + return false; + + int hours = 0; + int minutes = 0; + int seconds = 0; + int milliseconds = 0; + + size_t str_index = 0; + if (str.size() > kMinimalLength) { + // Check if hours is in the right format, if so get the number. + // -1 for excluding colon for splitting hours and minutes. + const size_t hours_length = str.size() - kMinimalLength - 1; + if (!base::StringToInt(str.substr(0, hours_length), &hours)) + return false; + str_index += hours_length; + + if (str[str_index] != ':') + return false; + ++str_index; + } + + DCHECK_EQ(str.size() - str_index, kMinimalLength); + + if (!base::StringToInt(str.substr(str_index, kMinutesLength), &minutes)) + return false; + if (minutes < 0 || minutes > 60) + return false; + + str_index += kMinutesLength; + if (str[str_index] != ':') + return false; + ++str_index; + + if (!base::StringToInt(str.substr(str_index, kSecondsLength), &seconds)) + return false; + if (seconds < 0 || seconds > 60) + return false; + + str_index += kSecondsLength; + if (str[str_index] != '.') + return false; + ++str_index; + + if (!base::StringToInt(str.substr(str_index, kMillisecondsLength), + &milliseconds)) { + return false; + } + str_index += kMillisecondsLength; + + if (milliseconds < 0 || milliseconds > 999) + return false; + + DCHECK_EQ(str.size(), str_index); + *time_ms = milliseconds + + seconds * 1000 + + minutes * 60 * 1000 + + hours * 60 * 60 * 1000; + return true; +} + +// Clears |settings| and 0s |start_time| and |duration| regardless of the +// parsing result. +bool ParseTimingAndSettingsLine(const std::string& line, + uint64_t* start_time, + uint64_t* duration, + std::string* settings) { + *start_time = 0; + *duration = 0; + settings->clear(); + std::vector entries = base::SplitString( + line, " ", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY); + if (entries.size() < 3) { + // The timing is time1 --> time3 so if there aren't 3 entries, this is parse + // error. + LOG(ERROR) << "Not enough tokens to be a timing " << line; + return false; + } + + if (entries[1] != "-->") { + LOG(ERROR) << "Cannot find an arrow at the right place " << line; + return false; + } + + const std::string& start_time_str = entries[0]; + if (!TimestampToMilliseconds(start_time_str, start_time)) { + LOG(ERROR) << "Failed to parse " << start_time_str << " in " << line; + return false; + } + + const std::string& end_time_str = entries[2]; + uint64_t end_time = 0; + if (!TimestampToMilliseconds(end_time_str, &end_time)) { + LOG(ERROR) << "Failed to parse " << end_time_str << " in " << line; + return false; + } + *duration = end_time - *start_time; + + entries.erase(entries.begin(), entries.begin() + 3); + *settings = base::JoinString(entries, " "); + return true; +} + +// Mapping: +// comment --> side data (and side data only sample) +// settings --> side data +// start_time --> pts +scoped_refptr CueToMediaSample(const Cue& cue) { + const bool kKeyFrame = true; + if (!cue.comment.empty()) { + const std::string comment = base::JoinString(cue.comment, "\n"); + return MediaSample::FromMetadata( + reinterpret_cast(comment.data()), comment.size()); + } + + const std::string payload = base::JoinString(cue.payload, "\n"); + scoped_refptr media_sample = MediaSample::CopyFrom( + reinterpret_cast(payload.data()), + payload.size(), + reinterpret_cast(cue.settings.data()), + cue.settings.size(), + !kKeyFrame); + + media_sample->set_config_id(cue.identifier); + media_sample->set_pts(cue.start_time); + media_sample->set_duration(cue.duration); + return media_sample; +} + +} // namespace + +Cue::Cue() : start_time(0), duration(0) {} +Cue::~Cue() {} + +WebVttMediaParser::WebVttMediaParser() : state_(kHeader) {} +WebVttMediaParser::~WebVttMediaParser() {} + +void WebVttMediaParser::Init(const InitCB& init_cb, + const NewSampleCB& new_sample_cb, + KeySource* decryption_key_source) { + init_cb_ = init_cb; + new_sample_cb_ = new_sample_cb; +} + +void WebVttMediaParser::Flush() { + // If not in one of these states just be ready for more data. + if (state_ != kCuePayload && state_ != kComment) + return; + + if (!data_.empty()) { + // If it was in the middle of the payload and the stream finished, then this + // is an end of the payload. The rest of the data is part of the payload. + if (state_ == kCuePayload) { + current_cue_.payload.push_back(data_); + } else { + current_cue_.comment.push_back(data_); + } + data_.clear(); + } + + new_sample_cb_.Run(kTrackId, CueToMediaSample(current_cue_)); + current_cue_ = Cue(); + state_ = kCueIdentifierOrTimingOrComment; +} + +bool WebVttMediaParser::Parse(const uint8_t* buf, int size) { + if (state_ == kParseError) { + LOG(WARNING) << "The parser is in an error state, ignoring input."; + return false; + } + + data_.insert(data_.end(), buf, buf + size); + + std::string line; + while (ReadLine(&data_, &line)) { + // Only kCueIdentifierOrTimingOrComment and kCueTiming states accept -->. + // Error otherwise. + const bool has_arrow = line.find("-->") != std::string::npos; + if (state_ == kCueTiming) { + if (!has_arrow) { + LOG(ERROR) << "Expected --> in: " << line; + state_ = kParseError; + return false; + } + } else if (state_ != kCueIdentifierOrTimingOrComment) { + if (has_arrow) { + LOG(ERROR) << "Unexpected --> in " << line; + state_ = kParseError; + return false; + } + } + + switch (state_) { + case kHeader: + // No check. This should be WEBVTT when this object was created. + header_.push_back(line); + state_ = kMetadata; + break; + case kMetadata: { + if (line.empty()) { + std::vector > streams; + // The resolution of timings are in milliseconds. + const int kTimescale = 1000; + + // The duration passed here is not very important. Also the whole file + // must be read before determining the real duration which doesn't + // work nicely with the current demuxer. + const int kDuration = 0; + + // There is no one metadata to determine what the language is. Parts + // of the text may be annotated as some specific language. + const char kLanguage[] = ""; + streams.push_back(new TextStreamInfo( + kTrackId, + kTimescale, + kDuration, + "wvtt", + kLanguage, + base::JoinString(header_, "\n"), + 0, // Not necessary. + 0)); // Not necessary. + + init_cb_.Run(streams); + state_ = kCueIdentifierOrTimingOrComment; + break; + } + + header_.push_back(line); + break; + } + case kCueIdentifierOrTimingOrComment: { + // Note that there can be one or more line breaks before a cue starts; + // skip this line. + // Or the file could end without a new cue. + if (line.empty()) + break; + + if (!has_arrow) { + if (base::StartsWith(line, "NOTE", + base::CompareCase::INSENSITIVE_ASCII)) { + state_ = kComment; + current_cue_.comment.push_back(line); + } else { + // A cue can start from a cue identifier. + // https://w3c.github.io/webvtt/#webvtt-cue-identifier + current_cue_.identifier = line; + // The next line must be a timing. + state_ = kCueTiming; + } + break; + } + + // No break statement if the line has an arrow; it should be a WebVTT + // timing, so fall thru. Setting state_ to kCueTiming so that the state + // always matches the case. + state_ = kCueTiming; + } + case kCueTiming: { + DCHECK(has_arrow); + if (!ParseTimingAndSettingsLine(line, ¤t_cue_.start_time, + ¤t_cue_.duration, + ¤t_cue_.settings)) { + state_ = kParseError; + return false; + } + state_ = kCuePayload; + break; + } + case kCuePayload: { + if (line.empty()) { + state_ = kCueIdentifierOrTimingOrComment; + new_sample_cb_.Run(kTrackId, CueToMediaSample(current_cue_)); + current_cue_ = Cue(); + break; + } + + current_cue_.payload.push_back(line); + break; + } + case kComment: { + if (line.empty()) { + state_ = kCueIdentifierOrTimingOrComment; + new_sample_cb_.Run(kTrackId, CueToMediaSample(current_cue_)); + current_cue_ = Cue(); + break; + } + + current_cue_.comment.push_back(line); + break; + } + case kParseError: + NOTREACHED(); + return false; + } + } + + return true; +} + +} // namespace media +} // namespace edash_packager diff --git a/packager/media/formats/webvtt/webvtt_media_parser.h b/packager/media/formats/webvtt/webvtt_media_parser.h new file mode 100644 index 0000000000..49446ba653 --- /dev/null +++ b/packager/media/formats/webvtt/webvtt_media_parser.h @@ -0,0 +1,84 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#ifndef MEDIA_FORMATS_WEBVTT_WEBVTT_MEDIA_PARSER_H_ +#define MEDIA_FORMATS_WEBVTT_WEBVTT_MEDIA_PARSER_H_ + +#include "packager/media/base/media_parser.h" + +#include +#include +#include + +namespace edash_packager { +namespace media { + +// If comment is not empty, then this is metadata and other fields must +// be empty. +// Data that can be multiline are vector of strings. +struct Cue { + Cue(); + ~Cue(); + + std::string identifier; + uint64_t start_time; + uint64_t duration; + std::string settings; + std::vector payload; + std::vector comment; +}; + +// WebVTT parser. +// The input may not be encrypted so decryption_key_source is ignored. +class WebVttMediaParser : public MediaParser { + public: + WebVttMediaParser(); + ~WebVttMediaParser() override; + + /// @name MediaParser implementation overrides. + /// @{ + void Init(const InitCB& init_cb, + const NewSampleCB& new_sample_cb, + KeySource* decryption_key_source) override; + void Flush() override; + bool Parse(const uint8_t* buf, int size) override; + /// @} + + private: + enum WebVttReadingState { + kHeader, + kMetadata, + kCueIdentifierOrTimingOrComment, + kCueTiming, + kCuePayload, + kComment, + kParseError, + }; + + InitCB init_cb_; + NewSampleCB new_sample_cb_; + + // All the unprocessed data passed to this parser. + std::string data_; + + // The WEBVTT text + metadata header (global settings) for this webvtt. + // One element per line. + std::vector header_; + + // This is set to what the parser is expecting. For example, if the parse is + // expecting a kCueTiming, then the next line that it parses should be a + // WebVTT timing line or an empty line. + WebVttReadingState state_; + + Cue current_cue_; + + DISALLOW_COPY_AND_ASSIGN(WebVttMediaParser); +}; + +} // namespace media +} // namespace edash_packager + +#endif // MEDIA_FORMATS_WEBVTT_WEBVTT_MEDIA_PARSER_H_ diff --git a/packager/media/formats/webvtt/webvtt_media_parser_unittest.cc b/packager/media/formats/webvtt/webvtt_media_parser_unittest.cc new file mode 100644 index 0000000000..16b73bd9b5 --- /dev/null +++ b/packager/media/formats/webvtt/webvtt_media_parser_unittest.cc @@ -0,0 +1,318 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include +#include + +#include "packager/base/bind.h" +#include "packager/media/base/media_sample.h" +#include "packager/media/base/stream_info.h" +#include "packager/media/formats/webvtt/webvtt_media_parser.h" + +namespace edash_packager { +namespace media { + +typedef testing::MockFunction>& + stream_info)> MockInitCallback; +typedef testing::MockFunction& media_sample)> MockNewSampleCallback; + +using testing::_; +using testing::InSequence; +using testing::Return; + +class WebVttMediaParserTest : public ::testing::Test { + public: + WebVttMediaParserTest() {} + ~WebVttMediaParserTest() override {} + + void InitializeParser() { + parser_.Init( + base::Bind(&MockInitCallback::Call, base::Unretained(&init_callback_)), + base::Bind(&MockNewSampleCallback::Call, + base::Unretained(&new_sample_callback_)), + nullptr); + } + + MockInitCallback init_callback_; + MockNewSampleCallback new_sample_callback_; + + WebVttMediaParser parser_; +}; + +TEST_F(WebVttMediaParserTest, Init) { + InitializeParser(); +} + +TEST_F(WebVttMediaParserTest, ParseOneCue) { + EXPECT_CALL(init_callback_, Call(_)); + EXPECT_CALL(new_sample_callback_, Call(_, _)).WillOnce(Return(true)); + + const char kWebVtt[] = + "WEBVTT\n" + "\n" + "00:01:00.000 --> 01:00:00.000\n" + "subtitle"; + InitializeParser(); + EXPECT_TRUE(parser_.Parse(reinterpret_cast(kWebVtt), + arraysize(kWebVtt) - 1)); + + parser_.Flush(); +} + +// Verify that different types of line breaks work. +TEST_F(WebVttMediaParserTest, DifferentLineBreaks) { + EXPECT_CALL(init_callback_, Call(_)); + EXPECT_CALL(new_sample_callback_, Call(_, _)).WillOnce(Return(true)); + + const char kWebVtt[] = + "WEBVTT\r\n" + "\r\n" + "00:01:00.000 --> 01:00:00.000\n" + "subtitle\r"; + InitializeParser(); + EXPECT_TRUE(parser_.Parse(reinterpret_cast(kWebVtt), + arraysize(kWebVtt) - 1)); + + parser_.Flush(); +} + +TEST_F(WebVttMediaParserTest, ParseMultpleCues) { + EXPECT_CALL(init_callback_, Call(_)); + EXPECT_CALL(new_sample_callback_, Call(_, _)) + .Times(2) + .WillRepeatedly(Return(true)); + + const char kWebVtt[] = + "WEBVTT\n" + "\n" + "00:01:00.000 --> 01:00:00.000\n" + "subtitle\n" + "\n" + "02:01:00.000 --> 02:02:00.000\n" + "more subtitle"; + InitializeParser(); + EXPECT_TRUE(parser_.Parse(reinterpret_cast(kWebVtt), + arraysize(kWebVtt) - 1)); + + parser_.Flush(); +} + +MATCHER_P2(MatchesStartTimeAndDuration, start_time, duration, "") { + return arg->pts() == start_time && arg->duration() == duration; +} + +// Verify that the timing parsing is done correctly and gets the right start +// time and duration in milliseconds. +TEST_F(WebVttMediaParserTest, VerifyTimingParsing) { + EXPECT_CALL(init_callback_, Call(_)); + EXPECT_CALL(new_sample_callback_, + Call(_, MatchesStartTimeAndDuration(61004, 204088))) + .WillOnce(Return(true)); + + const char kWebVtt[] = + "WEBVTT\n" + "\n" + "00:01:01.004 --> 00:04:25.092\n" + "subtitle"; + InitializeParser(); + EXPECT_TRUE(parser_.Parse(reinterpret_cast(kWebVtt), + arraysize(kWebVtt) - 1)); + + parser_.Flush(); +} + +// Expect parse failure if hour part of the timestamp is too short. +TEST_F(WebVttMediaParserTest, MalformedHourTimestamp) { + EXPECT_CALL(new_sample_callback_, Call(_, _)).Times(0); + + const char kHourStringTooShort[] = + "WEBVTT\n" + "\n" + "0:01:01.004 --> 00:04:25.092\n" + "subtitle\n"; + InitializeParser(); + + EXPECT_FALSE( + parser_.Parse(reinterpret_cast(kHourStringTooShort), + arraysize(kHourStringTooShort) - 1)); +} + +// Each component of the timestamp is correct but contains spaces. +TEST_F(WebVttMediaParserTest, SpacesInTimestamp) { + EXPECT_CALL(new_sample_callback_, Call(_, _)).Times(0); + + const char kHourStringTooShort[] = + "WEBVTT\n" + "\n" + "0:01: 1.004 --> 0 :04:25.092\n" + "subtitle\n"; + InitializeParser(); + + EXPECT_FALSE( + parser_.Parse(reinterpret_cast(kHourStringTooShort), + arraysize(kHourStringTooShort) - 1)); +} + +MATCHER_P(MatchesPayload, data, "") { + std::vector arg_data(arg->data(), arg->data() + arg->data_size()); + return arg_data == data; +} + +TEST_F(WebVttMediaParserTest, VerifyCuePayload) { + const char kExpectedPayload1[] = "subtitle"; + const char kExpectedPayload2[] = "hello"; + std::vector expected_payload( + kExpectedPayload1, kExpectedPayload1 + arraysize(kExpectedPayload1) - 1); + + InSequence s; + EXPECT_CALL(init_callback_, Call(_)); + EXPECT_CALL(new_sample_callback_, Call(_, MatchesPayload(expected_payload))) + .WillOnce(Return(true)); + + expected_payload.assign(kExpectedPayload2, + kExpectedPayload2 + arraysize(kExpectedPayload2) - 1); + EXPECT_CALL(new_sample_callback_, Call(_, MatchesPayload(expected_payload))) + .WillOnce(Return(true)); + + const char kWebVtt[] = + "WEBVTT\n" + "\n" + "00:01:01.004 --> 00:01:22.088\n" + "subtitle\n" + "\n" + "02:06:00.000 --> 02:30:02.006\n" + "hello"; + + InitializeParser(); + EXPECT_TRUE(parser_.Parse(reinterpret_cast(kWebVtt), + arraysize(kWebVtt) - 1)); + + parser_.Flush(); +} + +// Verify that a sample can be created from multiple calls to Parse(), i.e. one +// Parse() is not a full sample. +TEST_F(WebVttMediaParserTest, PartialParse) { + EXPECT_CALL(init_callback_, Call(_)); + EXPECT_CALL(new_sample_callback_, Call(_, _)).WillOnce(Return(true)); + + const char kWebVtt[] = + "WEBVTT\n" + "\n" + "00:01:01.004 --> 00:04:25.092\n" + "subtitle"; + InitializeParser(); + // Pass in the first 8 bytes, i.e. right before the first cue. + EXPECT_TRUE(parser_.Parse(reinterpret_cast(kWebVtt), 8)); + // Pass in the rest of the cue. + EXPECT_TRUE(parser_.Parse(reinterpret_cast(kWebVtt) + 8, + arraysize(kWebVtt) - 1 - 8)); + + parser_.Flush(); +} + +// Verify that metadata header with --> is rejected. +TEST_F(WebVttMediaParserTest, BadMetadataHeader) { + EXPECT_CALL(init_callback_, Call(_)).Times(0); + EXPECT_CALL(new_sample_callback_, Call(_, _)).Times(0); + const char kBadWebVtt[] = + "WEBVTT\n" + "00:01:01.004 --> 00:04:25.092\n"; + InitializeParser(); + EXPECT_FALSE(parser_.Parse(reinterpret_cast(kBadWebVtt), + arraysize(kBadWebVtt) - 1)); + parser_.Flush(); +} + +MATCHER_P(MatchesComment, comment, "") { + std::vector arg_comment(arg->side_data(), + arg->side_data() + arg->side_data_size()); + return arg_comment == comment; +} + +// Verify that comment is parsed. +TEST_F(WebVttMediaParserTest, Comment) { + const char kExpectedComment[] = "NOTE This is a comment"; + std::vector expected_comment( + kExpectedComment, kExpectedComment + arraysize(kExpectedComment) - 1); + + EXPECT_CALL(init_callback_, Call(_)); + EXPECT_CALL(new_sample_callback_, Call(_, MatchesComment(expected_comment))) + .WillOnce(Return(true)); + + const char kWebVtt[] = + "WEBVTT\n" + "\n" + "NOTE This is a comment\n"; + + InitializeParser(); + EXPECT_TRUE(parser_.Parse(reinterpret_cast(kWebVtt), + arraysize(kWebVtt) - 1)); + parser_.Flush(); +} + +// Verify that comment with --> is rejected. +TEST_F(WebVttMediaParserTest, BadComment) { + EXPECT_CALL(init_callback_, Call(_)); + EXPECT_CALL(new_sample_callback_, Call(_, _)).Times(0); + + const char kWebVtt[] = + "WEBVTT\n" + "\n" + "NOTE BAD Comment -->.\n"; + + InitializeParser(); + EXPECT_FALSE(parser_.Parse(reinterpret_cast(kWebVtt), + arraysize(kWebVtt) - 1)); + parser_.Flush(); +} + +MATCHER_P(HeaderMatches, header, "") { + const std::vector& extra_data = arg.at(0)->extra_data(); + return extra_data == header; +} + +// Verify that the metadata header and the WEBVTT magic string is there. +TEST_F(WebVttMediaParserTest, Header) { + const char kHeader[] = "WEBVTT\nRegion: id=anything width=40%"; + std::vector expected_header(kHeader, + kHeader + arraysize(kHeader) - 1); + + EXPECT_CALL(init_callback_, Call(HeaderMatches(expected_header))); + ON_CALL(new_sample_callback_, Call(_, _)).WillByDefault(Return(true)); + const char kWebVtt[] = + "WEBVTT\n" + "Region: id=anything width=40%\n" + "\n" + "00:01:01.004 --> 00:04:25.092\n" + "subtitle"; + + InitializeParser(); + EXPECT_TRUE(parser_.Parse(reinterpret_cast(kWebVtt), + arraysize(kWebVtt) - 1)); + parser_.Flush(); +} + +// Verify that if timing is not present after an identifier, the parser errors. +TEST_F(WebVttMediaParserTest, NoTimingAfterIdentifier) { + EXPECT_CALL(init_callback_, Call(_)); + EXPECT_CALL(new_sample_callback_, Call(_, _)).Times(0); + + const char kWebVtt[] = + "WEBVTT\n" + "\n" + "anyid\n" + "00:12.000 00:13.000\n"; // This line doesn't have -->, so error. + InitializeParser(); + EXPECT_FALSE(parser_.Parse(reinterpret_cast(kWebVtt), + arraysize(kWebVtt) - 1)); + parser_.Flush(); +} + +} // namespace media +} // namespace edash_packager diff --git a/packager/packager.gyp b/packager/packager.gyp index 6c77b0ee3c..d2b116baf7 100644 --- a/packager/packager.gyp +++ b/packager/packager.gyp @@ -41,6 +41,7 @@ 'media/formats/mp4/mp4.gyp:mp4', 'media/formats/mpeg/mpeg.gyp:mpeg', 'media/formats/webm/webm.gyp:webm', + 'media/formats/webvtt/webvtt.gyp:webvtt', 'media/formats/wvm/wvm.gyp:wvm', 'mpd/mpd.gyp:mpd_builder', 'third_party/boringssl/boringssl.gyp:boringssl', @@ -75,6 +76,7 @@ 'media/formats/mp4/mp4.gyp:mp4', 'media/formats/mpeg/mpeg.gyp:mpeg', 'media/formats/webm/webm.gyp:webm', + 'media/formats/webvtt/webvtt.gyp:webvtt', 'media/formats/wvm/wvm.gyp:wvm', 'media/test/media_test.gyp:media_test_support', 'testing/gtest.gyp:gtest',