Make WebVttMediaParser use WebVttSampleConverter

- WebVttMediaParser uses WebVttSampleConverter to generate non
  overlapping media samples.
- The media samples contains ISO BMFF boxes.
- Add kCodecWebVtt to signal that the media is webvtt and
  the samples will be in ISO BMFF boxes.

Change-Id: I639902cdba7b04af75428bc20622e26b8203cfb2
This commit is contained in:
Rintaro Kuroiwa 2017-02-14 13:40:09 -08:00
parent 924d6d4693
commit a3ce51785a
12 changed files with 273 additions and 288 deletions

View File

@ -51,6 +51,7 @@ enum Codec {
kCodecAudioMaxPlusOne, kCodecAudioMaxPlusOne,
kCodecText = 300, kCodecText = 300,
kCodecWebVtt = kCodecText,
}; };
/// Abstract class holds stream information. /// Abstract class holds stream information.

View File

@ -11,10 +11,11 @@ namespace media {
TextStreamInfo::TextStreamInfo(int track_id, uint32_t time_scale, TextStreamInfo::TextStreamInfo(int track_id, uint32_t time_scale,
uint64_t duration, uint64_t duration,
Codec codec,
const std::string& codec_string, const std::string& codec_string,
const std::string& codec_config, uint16_t width, const std::string& codec_config, uint16_t width,
uint16_t height, const std::string& language) uint16_t height, const std::string& language)
: StreamInfo(kStreamText, track_id, time_scale, duration, kCodecText, : StreamInfo(kStreamText, track_id, time_scale, duration, codec,
codec_string, codec_string,
reinterpret_cast<const uint8_t*>(codec_config.data()), reinterpret_cast<const uint8_t*>(codec_config.data()),
codec_config.size(), language, false), codec_config.size(), language, false),

View File

@ -20,7 +20,8 @@ class TextStreamInfo : public StreamInfo {
/// @param track_id is the track ID of this stream. /// @param track_id is the track ID of this stream.
/// @param time_scale is the time scale of this stream. /// @param time_scale is the time scale of this stream.
/// @param duration is the duration of this stream. /// @param duration is the duration of this stream.
/// @param codec_string is the codec. /// @param codec is the media codec.
/// @param codec_string is the codec in string format.
/// @param codec_config is configuration for this text stream. This could be /// @param codec_config is configuration for this text stream. This could be
/// the metadata that applies to all the samples of this stream. This /// the metadata that applies to all the samples of this stream. This
/// may be empty. /// may be empty.
@ -28,6 +29,7 @@ class TextStreamInfo : public StreamInfo {
/// @param height of the text. This may be 0. /// @param height of the text. This may be 0.
/// @param language is the language of this stream. This may be empty. /// @param language is the language of this stream. This may be empty.
TextStreamInfo(int track_id, uint32_t time_scale, uint64_t duration, TextStreamInfo(int track_id, uint32_t time_scale, uint64_t duration,
Codec codec,
const std::string& codec_string, const std::string& codec_string,
const std::string& codec_config, uint16_t width, const std::string& codec_config, uint16_t width,
uint16_t height, const std::string& language); uint16_t height, const std::string& language);

View File

@ -247,9 +247,9 @@ TEST_F(PesPacketGeneratorTest, InitializeAudioNonAac) {
// Text is not supported yet. // Text is not supported yet.
TEST_F(PesPacketGeneratorTest, InitializeTextInfo) { TEST_F(PesPacketGeneratorTest, InitializeTextInfo) {
std::shared_ptr<TextStreamInfo> stream_info( std::shared_ptr<TextStreamInfo> stream_info(new TextStreamInfo(
new TextStreamInfo(kTrackId, kTimeScale, kDuration, kCodecString, kTrackId, kTimeScale, kDuration, kCodecText, kCodecString, std::string(),
std::string(), kWidth, kHeight, kLanguage)); kWidth, kHeight, kLanguage));
EXPECT_FALSE(generator_.Initialize(*stream_info)); EXPECT_FALSE(generator_.Initialize(*stream_info));
} }

View File

@ -8,53 +8,5 @@ namespace media {
Cue::Cue() : start_time(0), duration(0) {} Cue::Cue() : start_time(0), duration(0) {}
Cue::~Cue() {} Cue::~Cue() {}
// Mapping:
// comment --> side data (and side data only sample)
// settings --> side data
// start_time --> pts
std::shared_ptr<MediaSample> CueToMediaSample(const Cue& cue) {
const bool kKeyFrame = true;
if (!cue.comment.empty()) {
const std::string comment = base::JoinString(cue.comment, "\n");
return MediaSample::FromMetadata(
reinterpret_cast<const uint8_t*>(comment.data()), comment.size());
}
const std::string payload = base::JoinString(cue.payload, "\n");
std::shared_ptr<MediaSample> media_sample = MediaSample::CopyFrom(
reinterpret_cast<const uint8_t*>(payload.data()), payload.size(),
reinterpret_cast<const uint8_t*>(cue.settings.data()),
cue.settings.size(), !kKeyFrame);
media_sample->set_config_id(cue.identifier);
media_sample->set_pts(cue.start_time);
media_sample->set_duration(cue.duration);
return media_sample;
}
// TODO(rkuroiwa): Cue gets converted to MediaSample in WebVttMediaParser and
// then back to Cue in the muxer. Consider making MediaSample a protobuf or make
// Cue a protobuf and (ab)use MediaSample::data() to store serialized Cue.
Cue MediaSampleToCue(const MediaSample& sample) {
Cue cue;
if (sample.data_size() == 0) {
std::string comment(sample.side_data(),
sample.side_data() + sample.side_data_size());
cue.comment.push_back(comment);
return cue;
}
std::string payload(sample.data(), sample.data() + sample.data_size());
cue.payload.push_back(payload);
cue.identifier.assign(sample.config_id());
cue.start_time = sample.pts();
cue.duration = sample.duration();
if (sample.side_data_size() != 0) {
cue.settings.assign(sample.side_data(),
sample.side_data() + sample.side_data_size());
}
return cue;
}
} // namespace media } // namespace media
} // namespace shaka } // namespace shaka

View File

@ -1,3 +1,6 @@
#ifndef PACKAGER_MEDIA_FORMATS_WEBVTT_CUE_H_
#define PACKAGER_MEDIA_FORMATS_WEBVTT_CUE_H_
#include <stdint.h> #include <stdint.h>
#include <memory> #include <memory>
@ -20,19 +23,13 @@ struct Cue {
uint64_t start_time; uint64_t start_time;
uint64_t duration; uint64_t duration;
std::string settings; std::string settings;
std::vector<std::string> payload;
std::vector<std::string> comment; // |payload| and |comment| may have trailing "\n" character.
std::string payload;
std::string comment;
}; };
/// Convert Cue to MediaSample.
/// @param cue data.
/// @return @a cue converted to a MediaSample.
std::shared_ptr<MediaSample> CueToMediaSample(const Cue& cue);
/// Convert MediaSample to Cue.
/// @param sample to be converted.
/// @return @a sample converted to Cue.
Cue MediaSampleToCue(const MediaSample& sample);
} // namespace media } // namespace media
} // namespace shaka } // namespace shaka
#endif // PACKAGER_MEDIA_FORMATS_WEBVTT_CUE_H_

View File

@ -22,6 +22,8 @@ namespace media {
namespace { namespace {
const bool kFlush = true;
// There's only one track in a WebVTT file. // There's only one track in a WebVTT file.
const int kTrackId = 0; const int kTrackId = 0;
@ -186,7 +188,8 @@ bool ParseTimingAndSettingsLine(const std::string& line,
} // namespace } // namespace
WebVttMediaParser::WebVttMediaParser() : state_(kHeader) {} WebVttMediaParser::WebVttMediaParser()
: state_(kHeader), sample_converter_(new WebVttSampleConverter()) {}
WebVttMediaParser::~WebVttMediaParser() {} WebVttMediaParser::~WebVttMediaParser() {}
void WebVttMediaParser::Init(const InitCB& init_cb, void WebVttMediaParser::Init(const InitCB& init_cb,
@ -205,17 +208,20 @@ bool WebVttMediaParser::Flush() {
// If it was in the middle of the payload and the stream finished, then this // If it was in the middle of the payload and the stream finished, then this
// is an end of the payload. The rest of the data is part of the payload. // is an end of the payload. The rest of the data is part of the payload.
if (state_ == kCuePayload) { if (state_ == kCuePayload) {
current_cue_.payload.push_back(data_); current_cue_.payload += data_ + "\n";
} else { } else {
current_cue_.comment.push_back(data_); current_cue_.comment += data_ + "\n";
} }
data_.clear(); data_.clear();
} }
bool result = new_sample_cb_.Run(kTrackId, CueToMediaSample(current_cue_)); if (!ProcessCurrentCue(kFlush)) {
current_cue_ = Cue(); state_ = kParseError;
return false;
}
state_ = kCueIdentifierOrTimingOrComment; state_ = kCueIdentifierOrTimingOrComment;
return result; return true;
} }
bool WebVttMediaParser::Parse(const uint8_t* buf, int size) { bool WebVttMediaParser::Parse(const uint8_t* buf, int size) {
@ -265,8 +271,11 @@ bool WebVttMediaParser::Parse(const uint8_t* buf, int size) {
// There is no one metadata to determine what the language is. Parts // There is no one metadata to determine what the language is. Parts
// of the text may be annotated as some specific language. // of the text may be annotated as some specific language.
const char kLanguage[] = ""; const char kLanguage[] = "";
const char kWebVttCodecString[] = "wvtt";
streams.emplace_back( streams.emplace_back(
new TextStreamInfo(kTrackId, kTimescale, kDuration, "wvtt", new TextStreamInfo(kTrackId, kTimescale, kDuration,
kCodecWebVtt, kWebVttCodecString,
base::JoinString(header_, "\n"), base::JoinString(header_, "\n"),
0, // Not necessary. 0, // Not necessary.
0, 0,
@ -291,7 +300,7 @@ bool WebVttMediaParser::Parse(const uint8_t* buf, int size) {
if (base::StartsWith(line, "NOTE", if (base::StartsWith(line, "NOTE",
base::CompareCase::INSENSITIVE_ASCII)) { base::CompareCase::INSENSITIVE_ASCII)) {
state_ = kComment; state_ = kComment;
current_cue_.comment.push_back(line); current_cue_.comment += line + "\n";
} else { } else {
// A cue can start from a cue identifier. // A cue can start from a cue identifier.
// https://w3c.github.io/webvtt/#webvtt-cue-identifier // https://w3c.github.io/webvtt/#webvtt-cue-identifier
@ -322,29 +331,27 @@ bool WebVttMediaParser::Parse(const uint8_t* buf, int size) {
case kCuePayload: { case kCuePayload: {
if (line.empty()) { if (line.empty()) {
state_ = kCueIdentifierOrTimingOrComment; state_ = kCueIdentifierOrTimingOrComment;
if (!new_sample_cb_.Run(kTrackId, CueToMediaSample(current_cue_))) { if (!ProcessCurrentCue(!kFlush)) {
state_ = kParseError; state_ = kParseError;
return false; return false;
} }
current_cue_ = Cue();
break; break;
} }
current_cue_.payload.push_back(line); current_cue_.payload += line + "\n";
break; break;
} }
case kComment: { case kComment: {
if (line.empty()) { if (line.empty()) {
state_ = kCueIdentifierOrTimingOrComment; state_ = kCueIdentifierOrTimingOrComment;
if (!new_sample_cb_.Run(kTrackId, CueToMediaSample(current_cue_))) { if (!ProcessCurrentCue(!kFlush)) {
state_ = kParseError; state_ = kParseError;
return false; return false;
} }
current_cue_ = Cue();
break; break;
} }
current_cue_.comment.push_back(line); current_cue_.comment += line + "\n";
break; break;
} }
case kParseError: case kParseError:
@ -356,5 +363,25 @@ bool WebVttMediaParser::Parse(const uint8_t* buf, int size) {
return true; return true;
} }
void WebVttMediaParser::InjectWebVttSampleConvertForTesting(
std::unique_ptr<WebVttSampleConverter> converter) {
sample_converter_ = std::move(converter);
}
bool WebVttMediaParser::ProcessCurrentCue(bool flush) {
sample_converter_->PushCue(current_cue_);
current_cue_ = Cue();
if (flush)
sample_converter_->Flush();
while (sample_converter_->ReadySamplesSize() > 0) {
if (!new_sample_cb_.Run(kTrackId, sample_converter_->PopSample())) {
LOG(ERROR) << "New sample callback failed.";
return false;
}
}
return true;
}
} // namespace media } // namespace media
} // namespace shaka } // namespace shaka

View File

@ -8,12 +8,15 @@
#define MEDIA_FORMATS_WEBVTT_WEBVTT_MEDIA_PARSER_H_ #define MEDIA_FORMATS_WEBVTT_WEBVTT_MEDIA_PARSER_H_
#include <stdint.h> #include <stdint.h>
#include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include "packager/base/compiler_specific.h" #include "packager/base/compiler_specific.h"
#include "packager/media/base/media_parser.h" #include "packager/media/base/media_parser.h"
#include "packager/media/formats/webvtt/cue.h" #include "packager/media/formats/webvtt/cue.h"
#include "packager/media/formats/webvtt/webvtt_sample_converter.h"
namespace shaka { namespace shaka {
namespace media { namespace media {
@ -34,6 +37,9 @@ class WebVttMediaParser : public MediaParser {
bool Parse(const uint8_t* buf, int size) override WARN_UNUSED_RESULT; bool Parse(const uint8_t* buf, int size) override WARN_UNUSED_RESULT;
/// @} /// @}
void InjectWebVttSampleConvertForTesting(
std::unique_ptr<WebVttSampleConverter> converter);
private: private:
enum WebVttReadingState { enum WebVttReadingState {
kHeader, kHeader,
@ -45,6 +51,11 @@ class WebVttMediaParser : public MediaParser {
kParseError, kParseError,
}; };
// Sends current cue to sample converter, and dispatches any ready samples to
// the callback.
// current_cue_ is always cleared.
bool ProcessCurrentCue(bool flush);
InitCB init_cb_; InitCB init_cb_;
NewSampleCB new_sample_cb_; NewSampleCB new_sample_cb_;
@ -62,6 +73,8 @@ class WebVttMediaParser : public MediaParser {
Cue current_cue_; Cue current_cue_;
std::unique_ptr<WebVttSampleConverter> sample_converter_;
DISALLOW_COPY_AND_ASSIGN(WebVttMediaParser); DISALLOW_COPY_AND_ASSIGN(WebVttMediaParser);
}; };

View File

@ -8,13 +8,30 @@
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "packager/base/bind.h" #include "packager/base/bind.h"
#include "packager/base/strings/string_number_conversions.h"
#include "packager/media/base/media_sample.h" #include "packager/media/base/media_sample.h"
#include "packager/media/base/stream_info.h" #include "packager/media/base/stream_info.h"
#include "packager/media/formats/mp4/box_definitions.h"
#include "packager/media/formats/webvtt/webvtt_media_parser.h" #include "packager/media/formats/webvtt/webvtt_media_parser.h"
namespace shaka { namespace shaka {
namespace media { namespace media {
using mp4::VTTCueBox;
namespace {
// Data is a vector and must not be empty.
MATCHER_P3(MatchesStartTimeEndTimeAndData, start_time, end_time, data, "") {
*result_listener << "which is (" << arg->pts() << ", "
<< (arg->pts() + arg->duration()) << ", "
<< base::HexEncode(arg->data(), arg->data_size()) << ")";
return arg->pts() == start_time &&
(arg->pts() + arg->duration() == end_time) &&
arg->data_size() == data.size() &&
(memcmp(&data[0], arg->data(), arg->data_size()) == 0);
}
} // namespace
typedef testing::MockFunction<void( typedef testing::MockFunction<void(
const std::vector<std::shared_ptr<StreamInfo>>& stream_info)> const std::vector<std::shared_ptr<StreamInfo>>& stream_info)>
MockInitCallback; MockInitCallback;
@ -22,15 +39,13 @@ typedef testing::MockFunction<
bool(uint32_t track_id, const std::shared_ptr<MediaSample>& media_sample)> bool(uint32_t track_id, const std::shared_ptr<MediaSample>& media_sample)>
MockNewSampleCallback; MockNewSampleCallback;
using testing::_; using testing::AtLeast;
using testing::InSequence; using testing::InSequence;
using testing::Return; using testing::Return;
using testing::_;
class WebVttMediaParserTest : public ::testing::Test { class WebVttMediaParserTest : public ::testing::Test {
public: public:
WebVttMediaParserTest() {}
~WebVttMediaParserTest() override {}
void InitializeParser() { void InitializeParser() {
parser_.Init( parser_.Init(
base::Bind(&MockInitCallback::Call, base::Unretained(&init_callback_)), base::Bind(&MockInitCallback::Call, base::Unretained(&init_callback_)),
@ -51,13 +66,21 @@ TEST_F(WebVttMediaParserTest, Init) {
TEST_F(WebVttMediaParserTest, ParseOneCue) { TEST_F(WebVttMediaParserTest, ParseOneCue) {
EXPECT_CALL(init_callback_, Call(_)); EXPECT_CALL(init_callback_, Call(_));
EXPECT_CALL(new_sample_callback_, Call(_, _)).WillOnce(Return(true));
VTTCueBox cue_box;
cue_box.cue_payload.cue_text = "subtitle";
std::vector<uint8_t> expected;
AppendBoxToVector(&cue_box, &expected);
EXPECT_CALL(new_sample_callback_,
Call(_, MatchesStartTimeEndTimeAndData(60000, 3600000, expected)))
.WillOnce(Return(true));
const char kWebVtt[] = const char kWebVtt[] =
"WEBVTT\n" "WEBVTT\n"
"\n" "\n"
"00:01:00.000 --> 01:00:00.000\n" "00:01:00.000 --> 01:00:00.000\n"
"subtitle"; "subtitle\n";
InitializeParser(); InitializeParser();
EXPECT_TRUE(parser_.Parse(reinterpret_cast<const uint8_t*>(kWebVtt), EXPECT_TRUE(parser_.Parse(reinterpret_cast<const uint8_t*>(kWebVtt),
arraysize(kWebVtt) - 1)); arraysize(kWebVtt) - 1));
@ -82,20 +105,63 @@ TEST_F(WebVttMediaParserTest, DifferentLineBreaks) {
EXPECT_TRUE(parser_.Flush()); EXPECT_TRUE(parser_.Flush());
} }
TEST_F(WebVttMediaParserTest, ParseMultpleCues) { // Verify that a typical case with mulitple cues works.
TEST_F(WebVttMediaParserTest, ParseMultipleCues) {
EXPECT_CALL(init_callback_, Call(_)); EXPECT_CALL(init_callback_, Call(_));
EXPECT_CALL(new_sample_callback_, Call(_, _))
.Times(2)
.WillRepeatedly(Return(true)); VTTCueBox first_cue_box;
first_cue_box.cue_payload.cue_text = "subtitle";
VTTCueBox second_cue_data;
second_cue_data.cue_payload.cue_text = "more subtitle";
VTTCueBox third_cue_data;
third_cue_data.cue_payload.cue_text = "more text";
std::vector<uint8_t> expected;
AppendBoxToVector(&first_cue_box, &expected);
EXPECT_CALL(new_sample_callback_,
Call(_, MatchesStartTimeEndTimeAndData(1000, 2321, expected)))
.WillOnce(Return(true));
expected.clear();
AppendBoxToVector(&first_cue_box, &expected);
AppendBoxToVector(&second_cue_data, &expected);
EXPECT_CALL(new_sample_callback_,
Call(_, MatchesStartTimeEndTimeAndData(2321, 5200, expected)))
.WillOnce(Return(true));
expected.clear();
AppendBoxToVector(&second_cue_data, &expected);
EXPECT_CALL(new_sample_callback_,
Call(_, MatchesStartTimeEndTimeAndData(5200, 5800, expected)))
.WillOnce(Return(true));
expected.clear();
AppendBoxToVector(&second_cue_data, &expected);
AppendBoxToVector(&third_cue_data, &expected);
EXPECT_CALL(new_sample_callback_,
Call(_, MatchesStartTimeEndTimeAndData(5800, 7000, expected)))
.WillOnce(Return(true));
expected.clear();
AppendBoxToVector(&third_cue_data, &expected);
EXPECT_CALL(new_sample_callback_,
Call(_, MatchesStartTimeEndTimeAndData(7000, 8000, expected)))
.WillOnce(Return(true));
const char kWebVtt[] = const char kWebVtt[] =
"WEBVTT\n" "WEBVTT\n"
"\n" "\n"
"00:01:00.000 --> 01:00:00.000\n" "00:00:01.000 --> 00:00:05.200\n"
"subtitle\n" "subtitle\n"
"\n" "\n"
"02:01:00.000 --> 02:02:00.000\n" "00:00:02.321 --> 00:00:07.000\n"
"more subtitle"; "more subtitle\n"
"\n"
"00:00:05.800 --> 00:00:08.000\n"
"more text\n" ;
InitializeParser(); InitializeParser();
EXPECT_TRUE(parser_.Parse(reinterpret_cast<const uint8_t*>(kWebVtt), EXPECT_TRUE(parser_.Parse(reinterpret_cast<const uint8_t*>(kWebVtt),
arraysize(kWebVtt) - 1)); arraysize(kWebVtt) - 1));
@ -112,9 +178,8 @@ MATCHER_P2(MatchesStartTimeAndDuration, start_time, duration, "") {
TEST_F(WebVttMediaParserTest, VerifyTimingParsing) { TEST_F(WebVttMediaParserTest, VerifyTimingParsing) {
EXPECT_CALL(init_callback_, Call(_)); EXPECT_CALL(init_callback_, Call(_));
EXPECT_CALL(new_sample_callback_, EXPECT_CALL(new_sample_callback_,
Call(_, MatchesStartTimeAndDuration(61004, 204088))) Call(_, MatchesStartTimeAndDuration(61004u, 204088u)))
.WillOnce(Return(true)); .WillOnce(Return(true));
const char kWebVtt[] = const char kWebVtt[] =
"WEBVTT\n" "WEBVTT\n"
"\n" "\n"
@ -159,48 +224,15 @@ TEST_F(WebVttMediaParserTest, SpacesInTimestamp) {
arraysize(kSpacesInTimestamp) - 1)); arraysize(kSpacesInTimestamp) - 1));
} }
MATCHER_P(MatchesPayload, data, "") { MATCHER_P(MatchesPayload, payload, "") {
std::vector<uint8_t> arg_data(arg->data(), arg->data() + arg->data_size()); return arg.payload.front() == std::string(payload);
return arg_data == data;
}
TEST_F(WebVttMediaParserTest, VerifyCuePayload) {
const char kExpectedPayload1[] = "subtitle";
const char kExpectedPayload2[] = "hello";
std::vector<uint8_t> expected_payload(
kExpectedPayload1, kExpectedPayload1 + arraysize(kExpectedPayload1) - 1);
InSequence s;
EXPECT_CALL(init_callback_, Call(_));
EXPECT_CALL(new_sample_callback_, Call(_, MatchesPayload(expected_payload)))
.WillOnce(Return(true));
expected_payload.assign(kExpectedPayload2,
kExpectedPayload2 + arraysize(kExpectedPayload2) - 1);
EXPECT_CALL(new_sample_callback_, Call(_, MatchesPayload(expected_payload)))
.WillOnce(Return(true));
const char kWebVtt[] =
"WEBVTT\n"
"\n"
"00:01:01.004 --> 00:01:22.088\n"
"subtitle\n"
"\n"
"02:06:00.000 --> 02:30:02.006\n"
"hello";
InitializeParser();
EXPECT_TRUE(parser_.Parse(reinterpret_cast<const uint8_t*>(kWebVtt),
arraysize(kWebVtt) - 1));
EXPECT_TRUE(parser_.Flush());
} }
// Verify that a sample can be created from multiple calls to Parse(), i.e. one // Verify that a sample can be created from multiple calls to Parse(), i.e. one
// Parse() is not a full sample. // Parse() is not a full sample.
TEST_F(WebVttMediaParserTest, PartialParse) { TEST_F(WebVttMediaParserTest, PartialParse) {
EXPECT_CALL(init_callback_, Call(_)); EXPECT_CALL(init_callback_, Call(_));
EXPECT_CALL(new_sample_callback_, Call(_, _)).WillOnce(Return(true)); EXPECT_CALL(new_sample_callback_, Call(_, _)).Times(0);
const char kWebVtt[] = const char kWebVtt[] =
"WEBVTT\n" "WEBVTT\n"
@ -210,7 +242,8 @@ TEST_F(WebVttMediaParserTest, PartialParse) {
InitializeParser(); InitializeParser();
// Pass in the first 8 bytes, i.e. right before the first cue. // Pass in the first 8 bytes, i.e. right before the first cue.
EXPECT_TRUE(parser_.Parse(reinterpret_cast<const uint8_t*>(kWebVtt), 8)); EXPECT_TRUE(parser_.Parse(reinterpret_cast<const uint8_t*>(kWebVtt), 8));
// Pass in the rest of the cue.
EXPECT_CALL(new_sample_callback_, Call(_, _)).WillOnce(Return(true));
EXPECT_TRUE(parser_.Parse(reinterpret_cast<const uint8_t*>(kWebVtt) + 8, EXPECT_TRUE(parser_.Parse(reinterpret_cast<const uint8_t*>(kWebVtt) + 8,
arraysize(kWebVtt) - 1 - 8)); arraysize(kWebVtt) - 1 - 8));
@ -221,6 +254,7 @@ TEST_F(WebVttMediaParserTest, PartialParse) {
TEST_F(WebVttMediaParserTest, BadMetadataHeader) { TEST_F(WebVttMediaParserTest, BadMetadataHeader) {
EXPECT_CALL(init_callback_, Call(_)).Times(0); EXPECT_CALL(init_callback_, Call(_)).Times(0);
EXPECT_CALL(new_sample_callback_, Call(_, _)).Times(0); EXPECT_CALL(new_sample_callback_, Call(_, _)).Times(0);
const char kBadWebVtt[] = const char kBadWebVtt[] =
"WEBVTT\n" "WEBVTT\n"
"00:01:01.004 --> 00:04:25.092\n"; "00:01:01.004 --> 00:04:25.092\n";
@ -230,12 +264,8 @@ TEST_F(WebVttMediaParserTest, BadMetadataHeader) {
EXPECT_TRUE(parser_.Flush()); EXPECT_TRUE(parser_.Flush());
} }
MATCHER_P(MatchesComment, comment, "") { // TODO(rkuroiwa): WebVttSampleConverter doesn't handle comments yet. Once its
std::vector<uint8_t> arg_comment(arg->side_data(), // implemented, this should verify that comment is in the sample.
arg->side_data() + arg->side_data_size());
return arg_comment == comment;
}
// Verify that comment is parsed. // Verify that comment is parsed.
TEST_F(WebVttMediaParserTest, Comment) { TEST_F(WebVttMediaParserTest, Comment) {
const char kExpectedComment[] = "NOTE This is a comment"; const char kExpectedComment[] = "NOTE This is a comment";
@ -243,8 +273,6 @@ TEST_F(WebVttMediaParserTest, Comment) {
kExpectedComment, kExpectedComment + arraysize(kExpectedComment) - 1); kExpectedComment, kExpectedComment + arraysize(kExpectedComment) - 1);
EXPECT_CALL(init_callback_, Call(_)); EXPECT_CALL(init_callback_, Call(_));
EXPECT_CALL(new_sample_callback_, Call(_, MatchesComment(expected_comment)))
.WillOnce(Return(true));
const char kWebVtt[] = const char kWebVtt[] =
"WEBVTT\n" "WEBVTT\n"
@ -260,7 +288,6 @@ TEST_F(WebVttMediaParserTest, Comment) {
// Verify that comment with --> is rejected. // Verify that comment with --> is rejected.
TEST_F(WebVttMediaParserTest, BadComment) { TEST_F(WebVttMediaParserTest, BadComment) {
EXPECT_CALL(init_callback_, Call(_)); EXPECT_CALL(init_callback_, Call(_));
EXPECT_CALL(new_sample_callback_, Call(_, _)).Times(0);
const char kWebVtt[] = const char kWebVtt[] =
"WEBVTT\n" "WEBVTT\n"

View File

@ -22,7 +22,7 @@ namespace media {
namespace { namespace {
std::shared_ptr<MediaSample> CreateEmptyCueSample(uint64_t start_time, std::shared_ptr<MediaSample> CreateEmptyCueSample(uint64_t start_time,
uint64_t end_time) { uint64_t end_time) {
DCHECK_GT(end_time, start_time); DCHECK_GT(end_time, start_time);
mp4::VTTEmptyCueBox empty_cue_box; mp4::VTTEmptyCueBox empty_cue_box;
@ -36,6 +36,15 @@ std::shared_ptr<MediaSample> CreateEmptyCueSample(uint64_t start_time,
return empty_cue_sample; return empty_cue_sample;
} }
void StripTrailingNewlines(const std::string& input, std::string* output) {
const size_t found = input.find_last_not_of('\n');
if (found != std::string::npos) {
*output = input.substr(0, found + 1);
} else {
*output = input;
}
}
mp4::VTTCueBox CueBoxFromCue(const Cue& cue) { mp4::VTTCueBox CueBoxFromCue(const Cue& cue) {
mp4::VTTCueBox cue_box; mp4::VTTCueBox cue_box;
if (!cue.identifier.empty()) { if (!cue.identifier.empty()) {
@ -46,7 +55,7 @@ mp4::VTTCueBox CueBoxFromCue(const Cue& cue) {
cue_box.cue_settings.settings = cue.settings; cue_box.cue_settings.settings = cue.settings;
} }
cue_box.cue_payload.cue_text = cue.payload.front(); StripTrailingNewlines(cue.payload, &cue_box.cue_payload.cue_text);
return cue_box; return cue_box;
} }
@ -127,19 +136,18 @@ WebVttSampleConverter::~WebVttSampleConverter() {}
// Note that this |sample| is either a cue or a comment. It does not have any // Note that this |sample| is either a cue or a comment. It does not have any
// info on whether the next cue is overlapping or not. // info on whether the next cue is overlapping or not.
void WebVttSampleConverter::PushSample(std::shared_ptr<MediaSample> sample) { void WebVttSampleConverter::PushCue(const Cue& cue) {
if (sample->data_size() == 0u) { if (!cue.comment.empty()) {
// A comment. Put it in the buffer and skip. // A comment. Put it in the buffer and skip.
mp4::VTTAdditionalTextBox comment; mp4::VTTAdditionalTextBox comment;
comment.cue_additional_text.assign( StripTrailingNewlines(cue.comment, &comment.cue_additional_text);
sample->side_data(), sample->side_data() + sample->side_data_size());
additional_texts_.push_back(comment); additional_texts_.push_back(comment);
// TODO(rkuriowa): Handle comments as samples. // TODO(rkuriowa): Handle comments as samples.
return; return;
} }
cues_.push_back(MediaSampleToCue(*sample)); cues_.push_back(cue);
if (cues_.size() == 1) { if (cues_.size() == 1) {
// Cannot make a decision with just one sample. Cache it and wait for // Cannot make a decision with just one sample. Cache it and wait for
// another one. // another one.

View File

@ -51,28 +51,29 @@ void AppendBoxToVector(mp4::Box* box, std::vector<uint8_t>* output_vector);
///\n ///\n
/// This class buffers the samples that are passed to AddSample() and creates /// This class buffers the samples that are passed to AddSample() and creates
/// more samples as necessary. /// more samples as necessary.
/// Methods are virtual only for mocking, not intended for inheritance.
class WebVttSampleConverter { class WebVttSampleConverter {
public: public:
WebVttSampleConverter(); WebVttSampleConverter();
~WebVttSampleConverter(); virtual ~WebVttSampleConverter();
/// Add a sample. /// Add a webvtt cue.
/// @param sample is the sample to be added. It should contain one VTT cue. /// @param cue is a webvtt cue.
void PushSample(std::shared_ptr<MediaSample> sample); virtual void PushCue(const Cue& cue);
/// Process all the buffered samples. /// Process all the buffered samples.
/// This finalizes the object and further calls to PushSample() may result in /// This finalizes the object and further calls to PushSample() may result in
/// an undefined behavior. /// an undefined behavior.
void Flush(); virtual void Flush();
/// @return The number of samples that are processed and ready to be popped. /// @return The number of samples that are processed and ready to be popped.
size_t ReadySamplesSize(); virtual size_t ReadySamplesSize();
/// Returns a MediaSample that is non-overlapping with the previous samples /// Returns a MediaSample that is non-overlapping with the previous samples
/// that it has output. The data in the sample is one or more ISO-BMFF boxes /// that it has output. The data in the sample is one or more ISO-BMFF boxes
/// for the duration of the sample. /// for the duration of the sample.
/// @return The first sample that is ready to be processed. /// @return The first sample that is ready to be processed.
std::shared_ptr<MediaSample> PopSample(); virtual std::shared_ptr<MediaSample> PopSample();
private: private:
// Handle |cues_| except the last item, and create samples from them. // Handle |cues_| except the last item, and create samples from them.

View File

@ -82,23 +82,18 @@ TEST_F(WebVttFragmenterTest, AppendBoxToVector) {
// |-- cue2 --| // |-- cue2 --|
TEST_F(WebVttFragmenterTest, NoOverlapContiguous) { TEST_F(WebVttFragmenterTest, NoOverlapContiguous) {
std::shared_ptr<MediaSample> sample1 = Cue cue1;
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage1), cue1.payload = kCueMessage1;
arraysize(kCueMessage1) - 1, true); cue1.start_time = 0;
sample1->set_pts(0); cue1.duration = 2000;
sample1->set_dts(0); webvtt_sample_converter_.PushCue(cue1);
sample1->set_duration(2000);
webvtt_sample_converter_.PushSample(sample1); Cue cue2;
cue2.payload = kCueMessage2;
cue2.start_time = 2000;
cue2.duration = 1000;
std::shared_ptr<MediaSample> sample2 = webvtt_sample_converter_.PushCue(cue2);
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage2),
arraysize(kCueMessage2) - 1, true);
sample2->set_pts(2000);
sample2->set_dts(2000);
sample2->set_duration(1000);
webvtt_sample_converter_.PushSample(sample2);
webvtt_sample_converter_.Flush(); webvtt_sample_converter_.Flush();
EXPECT_EQ(2u, webvtt_sample_converter_.ReadySamplesSize()); EXPECT_EQ(2u, webvtt_sample_converter_.ReadySamplesSize());
@ -119,23 +114,18 @@ TEST_F(WebVttFragmenterTest, NoOverlapContiguous) {
// Verify that if is a gap, then a sample is created for the gap. // Verify that if is a gap, then a sample is created for the gap.
TEST_F(WebVttFragmenterTest, Gap) { TEST_F(WebVttFragmenterTest, Gap) {
std::shared_ptr<MediaSample> sample1 = Cue cue1;
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage1), cue1.payload = kCueMessage1;
arraysize(kCueMessage1) - 1, true); cue1.start_time = 0;
sample1->set_pts(0); cue1.duration = 1000;
sample1->set_dts(0); webvtt_sample_converter_.PushCue(cue1);
sample1->set_duration(1000);
webvtt_sample_converter_.PushSample(sample1); Cue cue2;
cue2.payload = kCueMessage2;
cue2.start_time = 2000;
cue2.duration = 1000;
webvtt_sample_converter_.PushCue(cue2);
std::shared_ptr<MediaSample> sample2 =
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage2),
arraysize(kCueMessage2) - 1, true);
sample2->set_pts(2000);
sample2->set_dts(2000);
sample2->set_duration(1000);
webvtt_sample_converter_.PushSample(sample2);
EXPECT_EQ(2u, webvtt_sample_converter_.ReadySamplesSize()); EXPECT_EQ(2u, webvtt_sample_converter_.ReadySamplesSize());
webvtt_sample_converter_.Flush(); webvtt_sample_converter_.Flush();
@ -165,30 +155,23 @@ TEST_F(WebVttFragmenterTest, Gap) {
// The previous cue always ends before the current cue ends. // The previous cue always ends before the current cue ends.
// Cues are overlapping, no samples should be created in PushSample(). // Cues are overlapping, no samples should be created in PushSample().
TEST_F(WebVttFragmenterTest, OverlappingCuesSequential) { TEST_F(WebVttFragmenterTest, OverlappingCuesSequential) {
std::shared_ptr<MediaSample> sample1 = Cue cue1;
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage1), cue1.payload = kCueMessage1;
arraysize(kCueMessage1) - 1, true); cue1.start_time = 0;
sample1->set_pts(0); cue1.duration = 2000;
sample1->set_dts(0); webvtt_sample_converter_.PushCue(cue1);
sample1->set_duration(2000);
webvtt_sample_converter_.PushSample(sample1); Cue cue2;
cue2.payload = kCueMessage2;
cue2.start_time = 1000;
cue2.duration = 2000;
webvtt_sample_converter_.PushCue(cue2);
std::shared_ptr<MediaSample> sample2 = Cue cue3;
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage2), cue3.payload = kCueMessage3;
arraysize(kCueMessage2) - 1, true); cue3.start_time = 1500;
sample2->set_pts(1000); cue3.duration = 4000;
sample2->set_dts(1000); webvtt_sample_converter_.PushCue(cue3);
sample2->set_duration(2000);
webvtt_sample_converter_.PushSample(sample2);
std::shared_ptr<MediaSample> sample3 =
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage3),
arraysize(kCueMessage3) - 1, true);
sample3->set_pts(1500);
sample3->set_dts(1500);
sample3->set_duration(4000);
webvtt_sample_converter_.PushSample(sample3);
webvtt_sample_converter_.Flush(); webvtt_sample_converter_.Flush();
// There should be 5 samples for [0,1000], [1000,1500], [1500,2000], // There should be 5 samples for [0,1000], [1000,1500], [1500,2000],
@ -232,38 +215,29 @@ TEST_F(WebVttFragmenterTest, OverlappingCuesSequential) {
} }
TEST_F(WebVttFragmenterTest, OverlappingLongCue) { TEST_F(WebVttFragmenterTest, OverlappingLongCue) {
std::shared_ptr<MediaSample> sample1 = Cue cue1;
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage1), cue1.payload = kCueMessage1;
arraysize(kCueMessage1) - 1, true); cue1.start_time = 0;
sample1->set_pts(0); cue1.duration = 10000;
sample1->set_dts(0); webvtt_sample_converter_.PushCue(cue1);
sample1->set_duration(10000);
webvtt_sample_converter_.PushSample(sample1); Cue cue2;
cue2.payload = kCueMessage2;
cue2.start_time = 1000;
cue2.duration = 5000;
webvtt_sample_converter_.PushCue(cue2);
std::shared_ptr<MediaSample> sample2 = Cue cue3;
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage2), cue3.payload = kCueMessage3;
arraysize(kCueMessage2) - 1, true); cue3.start_time = 2000;
sample2->set_pts(1000); cue3.duration = 1000;
sample2->set_dts(1000); webvtt_sample_converter_.PushCue(cue3);
sample2->set_duration(5000);
webvtt_sample_converter_.PushSample(sample2);
std::shared_ptr<MediaSample> sample3 = Cue cue4;
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage3), cue4.payload = kCueMessage4;
arraysize(kCueMessage3) - 1, true); cue4.start_time = 8000;
sample3->set_pts(2000); cue4.duration = 1000;
sample3->set_dts(2000); webvtt_sample_converter_.PushCue(cue4);
sample3->set_duration(1000);
webvtt_sample_converter_.PushSample(sample3);
std::shared_ptr<MediaSample> sample4 =
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage4),
arraysize(kCueMessage4) - 1, true);
sample4->set_pts(8000);
sample4->set_dts(8000);
sample4->set_duration(1000);
webvtt_sample_converter_.PushSample(sample4);
webvtt_sample_converter_.Flush(); webvtt_sample_converter_.Flush();
// There should be 7 samples for [0,1000], [1000,2000], [2000,3000], // There should be 7 samples for [0,1000], [1000,2000], [2000,3000],
@ -320,13 +294,11 @@ TEST_F(WebVttFragmenterTest, OverlappingLongCue) {
} }
TEST_F(WebVttFragmenterTest, GapAtBeginning) { TEST_F(WebVttFragmenterTest, GapAtBeginning) {
std::shared_ptr<MediaSample> sample1 = Cue cue;
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage1), cue.payload = kCueMessage1;
arraysize(kCueMessage1) - 1, true); cue.start_time = 1200;
sample1->set_pts(1200); cue.duration = 2000;
sample1->set_dts(1200); webvtt_sample_converter_.PushCue(cue);
sample1->set_duration(2000);
webvtt_sample_converter_.PushSample(sample1);
webvtt_sample_converter_.Flush(); webvtt_sample_converter_.Flush();
EXPECT_EQ(1u, webvtt_sample_converter_.ReadySamplesSize()); EXPECT_EQ(1u, webvtt_sample_converter_.ReadySamplesSize());
@ -340,24 +312,18 @@ TEST_F(WebVttFragmenterTest, GapAtBeginning) {
} }
TEST_F(WebVttFragmenterTest, SameStartTime) { TEST_F(WebVttFragmenterTest, SameStartTime) {
// TODO(rkuroiwa): This should be std::shared_ptr if this is applied on HEAD. Cue cue1;
std::shared_ptr<MediaSample> sample1 = cue1.payload = kCueMessage1;
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage1), cue1.start_time = 0;
arraysize(kCueMessage1) - 1, true); cue1.duration = 2000;
sample1->set_pts(0); webvtt_sample_converter_.PushCue(cue1);
sample1->set_dts(0);
sample1->set_duration(2000);
webvtt_sample_converter_.PushSample(sample1); Cue cue2;
cue2.payload = kCueMessage2;
cue2.start_time = 0;
cue2.duration = 1500;
webvtt_sample_converter_.PushCue(cue2);
std::shared_ptr<MediaSample> sample2 =
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage2),
arraysize(kCueMessage2) - 1, true);
sample2->set_pts(0);
sample2->set_dts(0);
sample2->set_duration(1500);
webvtt_sample_converter_.PushSample(sample2);
webvtt_sample_converter_.Flush(); webvtt_sample_converter_.Flush();
EXPECT_EQ(2u, webvtt_sample_converter_.ReadySamplesSize()); EXPECT_EQ(2u, webvtt_sample_converter_.ReadySamplesSize());
@ -380,39 +346,29 @@ TEST_F(WebVttFragmenterTest, SameStartTime) {
// This test is a combination of the test cases above. // This test is a combination of the test cases above.
TEST_F(WebVttFragmenterTest, MoreCases) { TEST_F(WebVttFragmenterTest, MoreCases) {
std::shared_ptr<MediaSample> sample1 = Cue cue1;
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage1), cue1.payload = kCueMessage1;
arraysize(kCueMessage1) - 1, true); cue1.start_time = 0;
sample1->set_pts(0); cue1.duration = 2000;
sample1->set_dts(0); webvtt_sample_converter_.PushCue(cue1);
sample1->set_duration(2000);
webvtt_sample_converter_.PushSample(sample1); Cue cue2;
cue2.payload = kCueMessage2;
cue2.start_time = 100;
cue2.duration = 100;
webvtt_sample_converter_.PushCue(cue2);
std::shared_ptr<MediaSample> sample2 = Cue cue3;
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage2), cue3.payload = kCueMessage3;
arraysize(kCueMessage2) - 1, true); cue3.start_time = 1500;
sample2->set_pts(100); cue3.duration = 1000;
sample2->set_dts(100); webvtt_sample_converter_.PushCue(cue3);
sample2->set_duration(100);
webvtt_sample_converter_.PushSample(sample2); Cue cue4;
cue4.payload = kCueMessage4;
std::shared_ptr<MediaSample> sample3 = cue4.start_time = 1500;
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage3), cue4.duration = 800;
arraysize(kCueMessage3) - 1, true); webvtt_sample_converter_.PushCue(cue4);
sample3->set_pts(1500);
sample3->set_dts(1500);
sample3->set_duration(1000);
webvtt_sample_converter_.PushSample(sample3);
std::shared_ptr<MediaSample> sample4 =
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage4),
arraysize(kCueMessage4) - 1, true);
sample4->set_pts(1500);
sample4->set_dts(1500);
sample4->set_duration(800);
webvtt_sample_converter_.PushSample(sample4);
webvtt_sample_converter_.Flush(); webvtt_sample_converter_.Flush();
EXPECT_EQ(6u, webvtt_sample_converter_.ReadySamplesSize()); EXPECT_EQ(6u, webvtt_sample_converter_.ReadySamplesSize());