Make WebVttMediaParser use WebVttSampleConverter
- WebVttMediaParser uses WebVttSampleConverter to generate non overlapping media samples. - The media samples contains ISO BMFF boxes. - Add kCodecWebVtt to signal that the media is webvtt and the samples will be in ISO BMFF boxes. Change-Id: I639902cdba7b04af75428bc20622e26b8203cfb2
This commit is contained in:
parent
924d6d4693
commit
a3ce51785a
|
@ -51,6 +51,7 @@ enum Codec {
|
|||
kCodecAudioMaxPlusOne,
|
||||
|
||||
kCodecText = 300,
|
||||
kCodecWebVtt = kCodecText,
|
||||
};
|
||||
|
||||
/// Abstract class holds stream information.
|
||||
|
|
|
@ -11,10 +11,11 @@ namespace media {
|
|||
|
||||
TextStreamInfo::TextStreamInfo(int track_id, uint32_t time_scale,
|
||||
uint64_t duration,
|
||||
Codec codec,
|
||||
const std::string& codec_string,
|
||||
const std::string& codec_config, uint16_t width,
|
||||
uint16_t height, const std::string& language)
|
||||
: StreamInfo(kStreamText, track_id, time_scale, duration, kCodecText,
|
||||
: StreamInfo(kStreamText, track_id, time_scale, duration, codec,
|
||||
codec_string,
|
||||
reinterpret_cast<const uint8_t*>(codec_config.data()),
|
||||
codec_config.size(), language, false),
|
||||
|
|
|
@ -20,7 +20,8 @@ class TextStreamInfo : public StreamInfo {
|
|||
/// @param track_id is the track ID of this stream.
|
||||
/// @param time_scale is the time scale of this stream.
|
||||
/// @param duration is the duration of this stream.
|
||||
/// @param codec_string is the codec.
|
||||
/// @param codec is the media codec.
|
||||
/// @param codec_string is the codec in string format.
|
||||
/// @param codec_config is configuration for this text stream. This could be
|
||||
/// the metadata that applies to all the samples of this stream. This
|
||||
/// may be empty.
|
||||
|
@ -28,6 +29,7 @@ class TextStreamInfo : public StreamInfo {
|
|||
/// @param height of the text. This may be 0.
|
||||
/// @param language is the language of this stream. This may be empty.
|
||||
TextStreamInfo(int track_id, uint32_t time_scale, uint64_t duration,
|
||||
Codec codec,
|
||||
const std::string& codec_string,
|
||||
const std::string& codec_config, uint16_t width,
|
||||
uint16_t height, const std::string& language);
|
||||
|
|
|
@ -247,9 +247,9 @@ TEST_F(PesPacketGeneratorTest, InitializeAudioNonAac) {
|
|||
|
||||
// Text is not supported yet.
|
||||
TEST_F(PesPacketGeneratorTest, InitializeTextInfo) {
|
||||
std::shared_ptr<TextStreamInfo> stream_info(
|
||||
new TextStreamInfo(kTrackId, kTimeScale, kDuration, kCodecString,
|
||||
std::string(), kWidth, kHeight, kLanguage));
|
||||
std::shared_ptr<TextStreamInfo> stream_info(new TextStreamInfo(
|
||||
kTrackId, kTimeScale, kDuration, kCodecText, kCodecString, std::string(),
|
||||
kWidth, kHeight, kLanguage));
|
||||
EXPECT_FALSE(generator_.Initialize(*stream_info));
|
||||
}
|
||||
|
||||
|
|
|
@ -8,53 +8,5 @@ namespace media {
|
|||
Cue::Cue() : start_time(0), duration(0) {}
|
||||
Cue::~Cue() {}
|
||||
|
||||
// Mapping:
|
||||
// comment --> side data (and side data only sample)
|
||||
// settings --> side data
|
||||
// start_time --> pts
|
||||
std::shared_ptr<MediaSample> CueToMediaSample(const Cue& cue) {
|
||||
const bool kKeyFrame = true;
|
||||
if (!cue.comment.empty()) {
|
||||
const std::string comment = base::JoinString(cue.comment, "\n");
|
||||
return MediaSample::FromMetadata(
|
||||
reinterpret_cast<const uint8_t*>(comment.data()), comment.size());
|
||||
}
|
||||
|
||||
const std::string payload = base::JoinString(cue.payload, "\n");
|
||||
std::shared_ptr<MediaSample> media_sample = MediaSample::CopyFrom(
|
||||
reinterpret_cast<const uint8_t*>(payload.data()), payload.size(),
|
||||
reinterpret_cast<const uint8_t*>(cue.settings.data()),
|
||||
cue.settings.size(), !kKeyFrame);
|
||||
|
||||
media_sample->set_config_id(cue.identifier);
|
||||
media_sample->set_pts(cue.start_time);
|
||||
media_sample->set_duration(cue.duration);
|
||||
return media_sample;
|
||||
}
|
||||
|
||||
// TODO(rkuroiwa): Cue gets converted to MediaSample in WebVttMediaParser and
|
||||
// then back to Cue in the muxer. Consider making MediaSample a protobuf or make
|
||||
// Cue a protobuf and (ab)use MediaSample::data() to store serialized Cue.
|
||||
Cue MediaSampleToCue(const MediaSample& sample) {
|
||||
Cue cue;
|
||||
if (sample.data_size() == 0) {
|
||||
std::string comment(sample.side_data(),
|
||||
sample.side_data() + sample.side_data_size());
|
||||
cue.comment.push_back(comment);
|
||||
return cue;
|
||||
}
|
||||
|
||||
std::string payload(sample.data(), sample.data() + sample.data_size());
|
||||
cue.payload.push_back(payload);
|
||||
cue.identifier.assign(sample.config_id());
|
||||
cue.start_time = sample.pts();
|
||||
cue.duration = sample.duration();
|
||||
if (sample.side_data_size() != 0) {
|
||||
cue.settings.assign(sample.side_data(),
|
||||
sample.side_data() + sample.side_data_size());
|
||||
}
|
||||
return cue;
|
||||
}
|
||||
|
||||
} // namespace media
|
||||
} // namespace shaka
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
#ifndef PACKAGER_MEDIA_FORMATS_WEBVTT_CUE_H_
|
||||
#define PACKAGER_MEDIA_FORMATS_WEBVTT_CUE_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <memory>
|
||||
|
@ -20,19 +23,13 @@ struct Cue {
|
|||
uint64_t start_time;
|
||||
uint64_t duration;
|
||||
std::string settings;
|
||||
std::vector<std::string> payload;
|
||||
std::vector<std::string> comment;
|
||||
|
||||
// |payload| and |comment| may have trailing "\n" character.
|
||||
std::string payload;
|
||||
std::string comment;
|
||||
};
|
||||
|
||||
/// Convert Cue to MediaSample.
|
||||
/// @param cue data.
|
||||
/// @return @a cue converted to a MediaSample.
|
||||
std::shared_ptr<MediaSample> CueToMediaSample(const Cue& cue);
|
||||
|
||||
/// Convert MediaSample to Cue.
|
||||
/// @param sample to be converted.
|
||||
/// @return @a sample converted to Cue.
|
||||
Cue MediaSampleToCue(const MediaSample& sample);
|
||||
|
||||
} // namespace media
|
||||
} // namespace shaka
|
||||
|
||||
#endif // PACKAGER_MEDIA_FORMATS_WEBVTT_CUE_H_
|
||||
|
|
|
@ -22,6 +22,8 @@ namespace media {
|
|||
|
||||
namespace {
|
||||
|
||||
const bool kFlush = true;
|
||||
|
||||
// There's only one track in a WebVTT file.
|
||||
const int kTrackId = 0;
|
||||
|
||||
|
@ -186,7 +188,8 @@ bool ParseTimingAndSettingsLine(const std::string& line,
|
|||
|
||||
} // namespace
|
||||
|
||||
WebVttMediaParser::WebVttMediaParser() : state_(kHeader) {}
|
||||
WebVttMediaParser::WebVttMediaParser()
|
||||
: state_(kHeader), sample_converter_(new WebVttSampleConverter()) {}
|
||||
WebVttMediaParser::~WebVttMediaParser() {}
|
||||
|
||||
void WebVttMediaParser::Init(const InitCB& init_cb,
|
||||
|
@ -205,17 +208,20 @@ bool WebVttMediaParser::Flush() {
|
|||
// If it was in the middle of the payload and the stream finished, then this
|
||||
// is an end of the payload. The rest of the data is part of the payload.
|
||||
if (state_ == kCuePayload) {
|
||||
current_cue_.payload.push_back(data_);
|
||||
current_cue_.payload += data_ + "\n";
|
||||
} else {
|
||||
current_cue_.comment.push_back(data_);
|
||||
current_cue_.comment += data_ + "\n";
|
||||
}
|
||||
data_.clear();
|
||||
}
|
||||
|
||||
bool result = new_sample_cb_.Run(kTrackId, CueToMediaSample(current_cue_));
|
||||
current_cue_ = Cue();
|
||||
if (!ProcessCurrentCue(kFlush)) {
|
||||
state_ = kParseError;
|
||||
return false;
|
||||
}
|
||||
|
||||
state_ = kCueIdentifierOrTimingOrComment;
|
||||
return result;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool WebVttMediaParser::Parse(const uint8_t* buf, int size) {
|
||||
|
@ -265,8 +271,11 @@ bool WebVttMediaParser::Parse(const uint8_t* buf, int size) {
|
|||
// There is no one metadata to determine what the language is. Parts
|
||||
// of the text may be annotated as some specific language.
|
||||
const char kLanguage[] = "";
|
||||
|
||||
const char kWebVttCodecString[] = "wvtt";
|
||||
streams.emplace_back(
|
||||
new TextStreamInfo(kTrackId, kTimescale, kDuration, "wvtt",
|
||||
new TextStreamInfo(kTrackId, kTimescale, kDuration,
|
||||
kCodecWebVtt, kWebVttCodecString,
|
||||
base::JoinString(header_, "\n"),
|
||||
0, // Not necessary.
|
||||
0,
|
||||
|
@ -291,7 +300,7 @@ bool WebVttMediaParser::Parse(const uint8_t* buf, int size) {
|
|||
if (base::StartsWith(line, "NOTE",
|
||||
base::CompareCase::INSENSITIVE_ASCII)) {
|
||||
state_ = kComment;
|
||||
current_cue_.comment.push_back(line);
|
||||
current_cue_.comment += line + "\n";
|
||||
} else {
|
||||
// A cue can start from a cue identifier.
|
||||
// https://w3c.github.io/webvtt/#webvtt-cue-identifier
|
||||
|
@ -322,29 +331,27 @@ bool WebVttMediaParser::Parse(const uint8_t* buf, int size) {
|
|||
case kCuePayload: {
|
||||
if (line.empty()) {
|
||||
state_ = kCueIdentifierOrTimingOrComment;
|
||||
if (!new_sample_cb_.Run(kTrackId, CueToMediaSample(current_cue_))) {
|
||||
if (!ProcessCurrentCue(!kFlush)) {
|
||||
state_ = kParseError;
|
||||
return false;
|
||||
}
|
||||
current_cue_ = Cue();
|
||||
break;
|
||||
}
|
||||
|
||||
current_cue_.payload.push_back(line);
|
||||
current_cue_.payload += line + "\n";
|
||||
break;
|
||||
}
|
||||
case kComment: {
|
||||
if (line.empty()) {
|
||||
state_ = kCueIdentifierOrTimingOrComment;
|
||||
if (!new_sample_cb_.Run(kTrackId, CueToMediaSample(current_cue_))) {
|
||||
if (!ProcessCurrentCue(!kFlush)) {
|
||||
state_ = kParseError;
|
||||
return false;
|
||||
}
|
||||
current_cue_ = Cue();
|
||||
break;
|
||||
}
|
||||
|
||||
current_cue_.comment.push_back(line);
|
||||
current_cue_.comment += line + "\n";
|
||||
break;
|
||||
}
|
||||
case kParseError:
|
||||
|
@ -356,5 +363,25 @@ bool WebVttMediaParser::Parse(const uint8_t* buf, int size) {
|
|||
return true;
|
||||
}
|
||||
|
||||
void WebVttMediaParser::InjectWebVttSampleConvertForTesting(
|
||||
std::unique_ptr<WebVttSampleConverter> converter) {
|
||||
sample_converter_ = std::move(converter);
|
||||
}
|
||||
|
||||
bool WebVttMediaParser::ProcessCurrentCue(bool flush) {
|
||||
sample_converter_->PushCue(current_cue_);
|
||||
current_cue_ = Cue();
|
||||
if (flush)
|
||||
sample_converter_->Flush();
|
||||
|
||||
while (sample_converter_->ReadySamplesSize() > 0) {
|
||||
if (!new_sample_cb_.Run(kTrackId, sample_converter_->PopSample())) {
|
||||
LOG(ERROR) << "New sample callback failed.";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace media
|
||||
} // namespace shaka
|
||||
|
|
|
@ -8,12 +8,15 @@
|
|||
#define MEDIA_FORMATS_WEBVTT_WEBVTT_MEDIA_PARSER_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "packager/base/compiler_specific.h"
|
||||
#include "packager/media/base/media_parser.h"
|
||||
#include "packager/media/formats/webvtt/cue.h"
|
||||
#include "packager/media/formats/webvtt/webvtt_sample_converter.h"
|
||||
|
||||
namespace shaka {
|
||||
namespace media {
|
||||
|
@ -34,6 +37,9 @@ class WebVttMediaParser : public MediaParser {
|
|||
bool Parse(const uint8_t* buf, int size) override WARN_UNUSED_RESULT;
|
||||
/// @}
|
||||
|
||||
void InjectWebVttSampleConvertForTesting(
|
||||
std::unique_ptr<WebVttSampleConverter> converter);
|
||||
|
||||
private:
|
||||
enum WebVttReadingState {
|
||||
kHeader,
|
||||
|
@ -45,6 +51,11 @@ class WebVttMediaParser : public MediaParser {
|
|||
kParseError,
|
||||
};
|
||||
|
||||
// Sends current cue to sample converter, and dispatches any ready samples to
|
||||
// the callback.
|
||||
// current_cue_ is always cleared.
|
||||
bool ProcessCurrentCue(bool flush);
|
||||
|
||||
InitCB init_cb_;
|
||||
NewSampleCB new_sample_cb_;
|
||||
|
||||
|
@ -62,6 +73,8 @@ class WebVttMediaParser : public MediaParser {
|
|||
|
||||
Cue current_cue_;
|
||||
|
||||
std::unique_ptr<WebVttSampleConverter> sample_converter_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(WebVttMediaParser);
|
||||
};
|
||||
|
||||
|
|
|
@ -8,13 +8,30 @@
|
|||
#include <gtest/gtest.h>
|
||||
|
||||
#include "packager/base/bind.h"
|
||||
#include "packager/base/strings/string_number_conversions.h"
|
||||
#include "packager/media/base/media_sample.h"
|
||||
#include "packager/media/base/stream_info.h"
|
||||
#include "packager/media/formats/mp4/box_definitions.h"
|
||||
#include "packager/media/formats/webvtt/webvtt_media_parser.h"
|
||||
|
||||
namespace shaka {
|
||||
namespace media {
|
||||
|
||||
using mp4::VTTCueBox;
|
||||
|
||||
namespace {
|
||||
// Data is a vector and must not be empty.
|
||||
MATCHER_P3(MatchesStartTimeEndTimeAndData, start_time, end_time, data, "") {
|
||||
*result_listener << "which is (" << arg->pts() << ", "
|
||||
<< (arg->pts() + arg->duration()) << ", "
|
||||
<< base::HexEncode(arg->data(), arg->data_size()) << ")";
|
||||
return arg->pts() == start_time &&
|
||||
(arg->pts() + arg->duration() == end_time) &&
|
||||
arg->data_size() == data.size() &&
|
||||
(memcmp(&data[0], arg->data(), arg->data_size()) == 0);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
typedef testing::MockFunction<void(
|
||||
const std::vector<std::shared_ptr<StreamInfo>>& stream_info)>
|
||||
MockInitCallback;
|
||||
|
@ -22,15 +39,13 @@ typedef testing::MockFunction<
|
|||
bool(uint32_t track_id, const std::shared_ptr<MediaSample>& media_sample)>
|
||||
MockNewSampleCallback;
|
||||
|
||||
using testing::_;
|
||||
using testing::AtLeast;
|
||||
using testing::InSequence;
|
||||
using testing::Return;
|
||||
using testing::_;
|
||||
|
||||
class WebVttMediaParserTest : public ::testing::Test {
|
||||
public:
|
||||
WebVttMediaParserTest() {}
|
||||
~WebVttMediaParserTest() override {}
|
||||
|
||||
void InitializeParser() {
|
||||
parser_.Init(
|
||||
base::Bind(&MockInitCallback::Call, base::Unretained(&init_callback_)),
|
||||
|
@ -51,13 +66,21 @@ TEST_F(WebVttMediaParserTest, Init) {
|
|||
|
||||
TEST_F(WebVttMediaParserTest, ParseOneCue) {
|
||||
EXPECT_CALL(init_callback_, Call(_));
|
||||
EXPECT_CALL(new_sample_callback_, Call(_, _)).WillOnce(Return(true));
|
||||
|
||||
VTTCueBox cue_box;
|
||||
cue_box.cue_payload.cue_text = "subtitle";
|
||||
std::vector<uint8_t> expected;
|
||||
AppendBoxToVector(&cue_box, &expected);
|
||||
|
||||
EXPECT_CALL(new_sample_callback_,
|
||||
Call(_, MatchesStartTimeEndTimeAndData(60000, 3600000, expected)))
|
||||
.WillOnce(Return(true));
|
||||
|
||||
const char kWebVtt[] =
|
||||
"WEBVTT\n"
|
||||
"\n"
|
||||
"00:01:00.000 --> 01:00:00.000\n"
|
||||
"subtitle";
|
||||
"subtitle\n";
|
||||
InitializeParser();
|
||||
EXPECT_TRUE(parser_.Parse(reinterpret_cast<const uint8_t*>(kWebVtt),
|
||||
arraysize(kWebVtt) - 1));
|
||||
|
@ -82,20 +105,63 @@ TEST_F(WebVttMediaParserTest, DifferentLineBreaks) {
|
|||
EXPECT_TRUE(parser_.Flush());
|
||||
}
|
||||
|
||||
TEST_F(WebVttMediaParserTest, ParseMultpleCues) {
|
||||
// Verify that a typical case with mulitple cues works.
|
||||
TEST_F(WebVttMediaParserTest, ParseMultipleCues) {
|
||||
EXPECT_CALL(init_callback_, Call(_));
|
||||
EXPECT_CALL(new_sample_callback_, Call(_, _))
|
||||
.Times(2)
|
||||
.WillRepeatedly(Return(true));
|
||||
|
||||
|
||||
VTTCueBox first_cue_box;
|
||||
first_cue_box.cue_payload.cue_text = "subtitle";
|
||||
|
||||
VTTCueBox second_cue_data;
|
||||
second_cue_data.cue_payload.cue_text = "more subtitle";
|
||||
|
||||
VTTCueBox third_cue_data;
|
||||
third_cue_data.cue_payload.cue_text = "more text";
|
||||
|
||||
std::vector<uint8_t> expected;
|
||||
AppendBoxToVector(&first_cue_box, &expected);
|
||||
EXPECT_CALL(new_sample_callback_,
|
||||
Call(_, MatchesStartTimeEndTimeAndData(1000, 2321, expected)))
|
||||
.WillOnce(Return(true));
|
||||
|
||||
expected.clear();
|
||||
AppendBoxToVector(&first_cue_box, &expected);
|
||||
AppendBoxToVector(&second_cue_data, &expected);
|
||||
EXPECT_CALL(new_sample_callback_,
|
||||
Call(_, MatchesStartTimeEndTimeAndData(2321, 5200, expected)))
|
||||
.WillOnce(Return(true));
|
||||
|
||||
expected.clear();
|
||||
AppendBoxToVector(&second_cue_data, &expected);
|
||||
EXPECT_CALL(new_sample_callback_,
|
||||
Call(_, MatchesStartTimeEndTimeAndData(5200, 5800, expected)))
|
||||
.WillOnce(Return(true));
|
||||
|
||||
expected.clear();
|
||||
AppendBoxToVector(&second_cue_data, &expected);
|
||||
AppendBoxToVector(&third_cue_data, &expected);
|
||||
EXPECT_CALL(new_sample_callback_,
|
||||
Call(_, MatchesStartTimeEndTimeAndData(5800, 7000, expected)))
|
||||
.WillOnce(Return(true));
|
||||
|
||||
expected.clear();
|
||||
AppendBoxToVector(&third_cue_data, &expected);
|
||||
EXPECT_CALL(new_sample_callback_,
|
||||
Call(_, MatchesStartTimeEndTimeAndData(7000, 8000, expected)))
|
||||
.WillOnce(Return(true));
|
||||
|
||||
const char kWebVtt[] =
|
||||
"WEBVTT\n"
|
||||
"\n"
|
||||
"00:01:00.000 --> 01:00:00.000\n"
|
||||
"00:00:01.000 --> 00:00:05.200\n"
|
||||
"subtitle\n"
|
||||
"\n"
|
||||
"02:01:00.000 --> 02:02:00.000\n"
|
||||
"more subtitle";
|
||||
"00:00:02.321 --> 00:00:07.000\n"
|
||||
"more subtitle\n"
|
||||
"\n"
|
||||
"00:00:05.800 --> 00:00:08.000\n"
|
||||
"more text\n" ;
|
||||
InitializeParser();
|
||||
EXPECT_TRUE(parser_.Parse(reinterpret_cast<const uint8_t*>(kWebVtt),
|
||||
arraysize(kWebVtt) - 1));
|
||||
|
@ -112,9 +178,8 @@ MATCHER_P2(MatchesStartTimeAndDuration, start_time, duration, "") {
|
|||
TEST_F(WebVttMediaParserTest, VerifyTimingParsing) {
|
||||
EXPECT_CALL(init_callback_, Call(_));
|
||||
EXPECT_CALL(new_sample_callback_,
|
||||
Call(_, MatchesStartTimeAndDuration(61004, 204088)))
|
||||
Call(_, MatchesStartTimeAndDuration(61004u, 204088u)))
|
||||
.WillOnce(Return(true));
|
||||
|
||||
const char kWebVtt[] =
|
||||
"WEBVTT\n"
|
||||
"\n"
|
||||
|
@ -159,48 +224,15 @@ TEST_F(WebVttMediaParserTest, SpacesInTimestamp) {
|
|||
arraysize(kSpacesInTimestamp) - 1));
|
||||
}
|
||||
|
||||
MATCHER_P(MatchesPayload, data, "") {
|
||||
std::vector<uint8_t> arg_data(arg->data(), arg->data() + arg->data_size());
|
||||
return arg_data == data;
|
||||
}
|
||||
|
||||
TEST_F(WebVttMediaParserTest, VerifyCuePayload) {
|
||||
const char kExpectedPayload1[] = "subtitle";
|
||||
const char kExpectedPayload2[] = "hello";
|
||||
std::vector<uint8_t> expected_payload(
|
||||
kExpectedPayload1, kExpectedPayload1 + arraysize(kExpectedPayload1) - 1);
|
||||
|
||||
InSequence s;
|
||||
EXPECT_CALL(init_callback_, Call(_));
|
||||
EXPECT_CALL(new_sample_callback_, Call(_, MatchesPayload(expected_payload)))
|
||||
.WillOnce(Return(true));
|
||||
|
||||
expected_payload.assign(kExpectedPayload2,
|
||||
kExpectedPayload2 + arraysize(kExpectedPayload2) - 1);
|
||||
EXPECT_CALL(new_sample_callback_, Call(_, MatchesPayload(expected_payload)))
|
||||
.WillOnce(Return(true));
|
||||
|
||||
const char kWebVtt[] =
|
||||
"WEBVTT\n"
|
||||
"\n"
|
||||
"00:01:01.004 --> 00:01:22.088\n"
|
||||
"subtitle\n"
|
||||
"\n"
|
||||
"02:06:00.000 --> 02:30:02.006\n"
|
||||
"hello";
|
||||
|
||||
InitializeParser();
|
||||
EXPECT_TRUE(parser_.Parse(reinterpret_cast<const uint8_t*>(kWebVtt),
|
||||
arraysize(kWebVtt) - 1));
|
||||
|
||||
EXPECT_TRUE(parser_.Flush());
|
||||
MATCHER_P(MatchesPayload, payload, "") {
|
||||
return arg.payload.front() == std::string(payload);
|
||||
}
|
||||
|
||||
// Verify that a sample can be created from multiple calls to Parse(), i.e. one
|
||||
// Parse() is not a full sample.
|
||||
TEST_F(WebVttMediaParserTest, PartialParse) {
|
||||
EXPECT_CALL(init_callback_, Call(_));
|
||||
EXPECT_CALL(new_sample_callback_, Call(_, _)).WillOnce(Return(true));
|
||||
EXPECT_CALL(new_sample_callback_, Call(_, _)).Times(0);
|
||||
|
||||
const char kWebVtt[] =
|
||||
"WEBVTT\n"
|
||||
|
@ -210,7 +242,8 @@ TEST_F(WebVttMediaParserTest, PartialParse) {
|
|||
InitializeParser();
|
||||
// Pass in the first 8 bytes, i.e. right before the first cue.
|
||||
EXPECT_TRUE(parser_.Parse(reinterpret_cast<const uint8_t*>(kWebVtt), 8));
|
||||
// Pass in the rest of the cue.
|
||||
|
||||
EXPECT_CALL(new_sample_callback_, Call(_, _)).WillOnce(Return(true));
|
||||
EXPECT_TRUE(parser_.Parse(reinterpret_cast<const uint8_t*>(kWebVtt) + 8,
|
||||
arraysize(kWebVtt) - 1 - 8));
|
||||
|
||||
|
@ -221,6 +254,7 @@ TEST_F(WebVttMediaParserTest, PartialParse) {
|
|||
TEST_F(WebVttMediaParserTest, BadMetadataHeader) {
|
||||
EXPECT_CALL(init_callback_, Call(_)).Times(0);
|
||||
EXPECT_CALL(new_sample_callback_, Call(_, _)).Times(0);
|
||||
|
||||
const char kBadWebVtt[] =
|
||||
"WEBVTT\n"
|
||||
"00:01:01.004 --> 00:04:25.092\n";
|
||||
|
@ -230,12 +264,8 @@ TEST_F(WebVttMediaParserTest, BadMetadataHeader) {
|
|||
EXPECT_TRUE(parser_.Flush());
|
||||
}
|
||||
|
||||
MATCHER_P(MatchesComment, comment, "") {
|
||||
std::vector<uint8_t> arg_comment(arg->side_data(),
|
||||
arg->side_data() + arg->side_data_size());
|
||||
return arg_comment == comment;
|
||||
}
|
||||
|
||||
// TODO(rkuroiwa): WebVttSampleConverter doesn't handle comments yet. Once its
|
||||
// implemented, this should verify that comment is in the sample.
|
||||
// Verify that comment is parsed.
|
||||
TEST_F(WebVttMediaParserTest, Comment) {
|
||||
const char kExpectedComment[] = "NOTE This is a comment";
|
||||
|
@ -243,8 +273,6 @@ TEST_F(WebVttMediaParserTest, Comment) {
|
|||
kExpectedComment, kExpectedComment + arraysize(kExpectedComment) - 1);
|
||||
|
||||
EXPECT_CALL(init_callback_, Call(_));
|
||||
EXPECT_CALL(new_sample_callback_, Call(_, MatchesComment(expected_comment)))
|
||||
.WillOnce(Return(true));
|
||||
|
||||
const char kWebVtt[] =
|
||||
"WEBVTT\n"
|
||||
|
@ -260,7 +288,6 @@ TEST_F(WebVttMediaParserTest, Comment) {
|
|||
// Verify that comment with --> is rejected.
|
||||
TEST_F(WebVttMediaParserTest, BadComment) {
|
||||
EXPECT_CALL(init_callback_, Call(_));
|
||||
EXPECT_CALL(new_sample_callback_, Call(_, _)).Times(0);
|
||||
|
||||
const char kWebVtt[] =
|
||||
"WEBVTT\n"
|
||||
|
|
|
@ -22,7 +22,7 @@ namespace media {
|
|||
namespace {
|
||||
|
||||
std::shared_ptr<MediaSample> CreateEmptyCueSample(uint64_t start_time,
|
||||
uint64_t end_time) {
|
||||
uint64_t end_time) {
|
||||
DCHECK_GT(end_time, start_time);
|
||||
mp4::VTTEmptyCueBox empty_cue_box;
|
||||
|
||||
|
@ -36,6 +36,15 @@ std::shared_ptr<MediaSample> CreateEmptyCueSample(uint64_t start_time,
|
|||
return empty_cue_sample;
|
||||
}
|
||||
|
||||
void StripTrailingNewlines(const std::string& input, std::string* output) {
|
||||
const size_t found = input.find_last_not_of('\n');
|
||||
if (found != std::string::npos) {
|
||||
*output = input.substr(0, found + 1);
|
||||
} else {
|
||||
*output = input;
|
||||
}
|
||||
}
|
||||
|
||||
mp4::VTTCueBox CueBoxFromCue(const Cue& cue) {
|
||||
mp4::VTTCueBox cue_box;
|
||||
if (!cue.identifier.empty()) {
|
||||
|
@ -46,7 +55,7 @@ mp4::VTTCueBox CueBoxFromCue(const Cue& cue) {
|
|||
cue_box.cue_settings.settings = cue.settings;
|
||||
}
|
||||
|
||||
cue_box.cue_payload.cue_text = cue.payload.front();
|
||||
StripTrailingNewlines(cue.payload, &cue_box.cue_payload.cue_text);
|
||||
return cue_box;
|
||||
}
|
||||
|
||||
|
@ -127,19 +136,18 @@ WebVttSampleConverter::~WebVttSampleConverter() {}
|
|||
|
||||
// Note that this |sample| is either a cue or a comment. It does not have any
|
||||
// info on whether the next cue is overlapping or not.
|
||||
void WebVttSampleConverter::PushSample(std::shared_ptr<MediaSample> sample) {
|
||||
if (sample->data_size() == 0u) {
|
||||
void WebVttSampleConverter::PushCue(const Cue& cue) {
|
||||
if (!cue.comment.empty()) {
|
||||
// A comment. Put it in the buffer and skip.
|
||||
mp4::VTTAdditionalTextBox comment;
|
||||
comment.cue_additional_text.assign(
|
||||
sample->side_data(), sample->side_data() + sample->side_data_size());
|
||||
StripTrailingNewlines(cue.comment, &comment.cue_additional_text);
|
||||
additional_texts_.push_back(comment);
|
||||
// TODO(rkuriowa): Handle comments as samples.
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
cues_.push_back(MediaSampleToCue(*sample));
|
||||
cues_.push_back(cue);
|
||||
if (cues_.size() == 1) {
|
||||
// Cannot make a decision with just one sample. Cache it and wait for
|
||||
// another one.
|
||||
|
|
|
@ -51,28 +51,29 @@ void AppendBoxToVector(mp4::Box* box, std::vector<uint8_t>* output_vector);
|
|||
///\n
|
||||
/// This class buffers the samples that are passed to AddSample() and creates
|
||||
/// more samples as necessary.
|
||||
/// Methods are virtual only for mocking, not intended for inheritance.
|
||||
class WebVttSampleConverter {
|
||||
public:
|
||||
WebVttSampleConverter();
|
||||
~WebVttSampleConverter();
|
||||
virtual ~WebVttSampleConverter();
|
||||
|
||||
/// Add a sample.
|
||||
/// @param sample is the sample to be added. It should contain one VTT cue.
|
||||
void PushSample(std::shared_ptr<MediaSample> sample);
|
||||
/// Add a webvtt cue.
|
||||
/// @param cue is a webvtt cue.
|
||||
virtual void PushCue(const Cue& cue);
|
||||
|
||||
/// Process all the buffered samples.
|
||||
/// This finalizes the object and further calls to PushSample() may result in
|
||||
/// an undefined behavior.
|
||||
void Flush();
|
||||
virtual void Flush();
|
||||
|
||||
/// @return The number of samples that are processed and ready to be popped.
|
||||
size_t ReadySamplesSize();
|
||||
virtual size_t ReadySamplesSize();
|
||||
|
||||
/// Returns a MediaSample that is non-overlapping with the previous samples
|
||||
/// that it has output. The data in the sample is one or more ISO-BMFF boxes
|
||||
/// for the duration of the sample.
|
||||
/// @return The first sample that is ready to be processed.
|
||||
std::shared_ptr<MediaSample> PopSample();
|
||||
virtual std::shared_ptr<MediaSample> PopSample();
|
||||
|
||||
private:
|
||||
// Handle |cues_| except the last item, and create samples from them.
|
||||
|
|
|
@ -82,23 +82,18 @@ TEST_F(WebVttFragmenterTest, AppendBoxToVector) {
|
|||
// |-- cue2 --|
|
||||
|
||||
TEST_F(WebVttFragmenterTest, NoOverlapContiguous) {
|
||||
std::shared_ptr<MediaSample> sample1 =
|
||||
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage1),
|
||||
arraysize(kCueMessage1) - 1, true);
|
||||
sample1->set_pts(0);
|
||||
sample1->set_dts(0);
|
||||
sample1->set_duration(2000);
|
||||
Cue cue1;
|
||||
cue1.payload = kCueMessage1;
|
||||
cue1.start_time = 0;
|
||||
cue1.duration = 2000;
|
||||
webvtt_sample_converter_.PushCue(cue1);
|
||||
|
||||
webvtt_sample_converter_.PushSample(sample1);
|
||||
Cue cue2;
|
||||
cue2.payload = kCueMessage2;
|
||||
cue2.start_time = 2000;
|
||||
cue2.duration = 1000;
|
||||
|
||||
std::shared_ptr<MediaSample> sample2 =
|
||||
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage2),
|
||||
arraysize(kCueMessage2) - 1, true);
|
||||
sample2->set_pts(2000);
|
||||
sample2->set_dts(2000);
|
||||
sample2->set_duration(1000);
|
||||
|
||||
webvtt_sample_converter_.PushSample(sample2);
|
||||
webvtt_sample_converter_.PushCue(cue2);
|
||||
webvtt_sample_converter_.Flush();
|
||||
EXPECT_EQ(2u, webvtt_sample_converter_.ReadySamplesSize());
|
||||
|
||||
|
@ -119,23 +114,18 @@ TEST_F(WebVttFragmenterTest, NoOverlapContiguous) {
|
|||
|
||||
// Verify that if is a gap, then a sample is created for the gap.
|
||||
TEST_F(WebVttFragmenterTest, Gap) {
|
||||
std::shared_ptr<MediaSample> sample1 =
|
||||
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage1),
|
||||
arraysize(kCueMessage1) - 1, true);
|
||||
sample1->set_pts(0);
|
||||
sample1->set_dts(0);
|
||||
sample1->set_duration(1000);
|
||||
Cue cue1;
|
||||
cue1.payload = kCueMessage1;
|
||||
cue1.start_time = 0;
|
||||
cue1.duration = 1000;
|
||||
webvtt_sample_converter_.PushCue(cue1);
|
||||
|
||||
webvtt_sample_converter_.PushSample(sample1);
|
||||
Cue cue2;
|
||||
cue2.payload = kCueMessage2;
|
||||
cue2.start_time = 2000;
|
||||
cue2.duration = 1000;
|
||||
webvtt_sample_converter_.PushCue(cue2);
|
||||
|
||||
std::shared_ptr<MediaSample> sample2 =
|
||||
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage2),
|
||||
arraysize(kCueMessage2) - 1, true);
|
||||
sample2->set_pts(2000);
|
||||
sample2->set_dts(2000);
|
||||
sample2->set_duration(1000);
|
||||
|
||||
webvtt_sample_converter_.PushSample(sample2);
|
||||
EXPECT_EQ(2u, webvtt_sample_converter_.ReadySamplesSize());
|
||||
|
||||
webvtt_sample_converter_.Flush();
|
||||
|
@ -165,30 +155,23 @@ TEST_F(WebVttFragmenterTest, Gap) {
|
|||
// The previous cue always ends before the current cue ends.
|
||||
// Cues are overlapping, no samples should be created in PushSample().
|
||||
TEST_F(WebVttFragmenterTest, OverlappingCuesSequential) {
|
||||
std::shared_ptr<MediaSample> sample1 =
|
||||
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage1),
|
||||
arraysize(kCueMessage1) - 1, true);
|
||||
sample1->set_pts(0);
|
||||
sample1->set_dts(0);
|
||||
sample1->set_duration(2000);
|
||||
Cue cue1;
|
||||
cue1.payload = kCueMessage1;
|
||||
cue1.start_time = 0;
|
||||
cue1.duration = 2000;
|
||||
webvtt_sample_converter_.PushCue(cue1);
|
||||
|
||||
webvtt_sample_converter_.PushSample(sample1);
|
||||
Cue cue2;
|
||||
cue2.payload = kCueMessage2;
|
||||
cue2.start_time = 1000;
|
||||
cue2.duration = 2000;
|
||||
webvtt_sample_converter_.PushCue(cue2);
|
||||
|
||||
std::shared_ptr<MediaSample> sample2 =
|
||||
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage2),
|
||||
arraysize(kCueMessage2) - 1, true);
|
||||
sample2->set_pts(1000);
|
||||
sample2->set_dts(1000);
|
||||
sample2->set_duration(2000);
|
||||
webvtt_sample_converter_.PushSample(sample2);
|
||||
|
||||
std::shared_ptr<MediaSample> sample3 =
|
||||
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage3),
|
||||
arraysize(kCueMessage3) - 1, true);
|
||||
sample3->set_pts(1500);
|
||||
sample3->set_dts(1500);
|
||||
sample3->set_duration(4000);
|
||||
webvtt_sample_converter_.PushSample(sample3);
|
||||
Cue cue3;
|
||||
cue3.payload = kCueMessage3;
|
||||
cue3.start_time = 1500;
|
||||
cue3.duration = 4000;
|
||||
webvtt_sample_converter_.PushCue(cue3);
|
||||
|
||||
webvtt_sample_converter_.Flush();
|
||||
// There should be 5 samples for [0,1000], [1000,1500], [1500,2000],
|
||||
|
@ -232,38 +215,29 @@ TEST_F(WebVttFragmenterTest, OverlappingCuesSequential) {
|
|||
}
|
||||
|
||||
TEST_F(WebVttFragmenterTest, OverlappingLongCue) {
|
||||
std::shared_ptr<MediaSample> sample1 =
|
||||
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage1),
|
||||
arraysize(kCueMessage1) - 1, true);
|
||||
sample1->set_pts(0);
|
||||
sample1->set_dts(0);
|
||||
sample1->set_duration(10000);
|
||||
Cue cue1;
|
||||
cue1.payload = kCueMessage1;
|
||||
cue1.start_time = 0;
|
||||
cue1.duration = 10000;
|
||||
webvtt_sample_converter_.PushCue(cue1);
|
||||
|
||||
webvtt_sample_converter_.PushSample(sample1);
|
||||
Cue cue2;
|
||||
cue2.payload = kCueMessage2;
|
||||
cue2.start_time = 1000;
|
||||
cue2.duration = 5000;
|
||||
webvtt_sample_converter_.PushCue(cue2);
|
||||
|
||||
std::shared_ptr<MediaSample> sample2 =
|
||||
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage2),
|
||||
arraysize(kCueMessage2) - 1, true);
|
||||
sample2->set_pts(1000);
|
||||
sample2->set_dts(1000);
|
||||
sample2->set_duration(5000);
|
||||
webvtt_sample_converter_.PushSample(sample2);
|
||||
Cue cue3;
|
||||
cue3.payload = kCueMessage3;
|
||||
cue3.start_time = 2000;
|
||||
cue3.duration = 1000;
|
||||
webvtt_sample_converter_.PushCue(cue3);
|
||||
|
||||
std::shared_ptr<MediaSample> sample3 =
|
||||
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage3),
|
||||
arraysize(kCueMessage3) - 1, true);
|
||||
sample3->set_pts(2000);
|
||||
sample3->set_dts(2000);
|
||||
sample3->set_duration(1000);
|
||||
webvtt_sample_converter_.PushSample(sample3);
|
||||
|
||||
std::shared_ptr<MediaSample> sample4 =
|
||||
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage4),
|
||||
arraysize(kCueMessage4) - 1, true);
|
||||
sample4->set_pts(8000);
|
||||
sample4->set_dts(8000);
|
||||
sample4->set_duration(1000);
|
||||
webvtt_sample_converter_.PushSample(sample4);
|
||||
Cue cue4;
|
||||
cue4.payload = kCueMessage4;
|
||||
cue4.start_time = 8000;
|
||||
cue4.duration = 1000;
|
||||
webvtt_sample_converter_.PushCue(cue4);
|
||||
webvtt_sample_converter_.Flush();
|
||||
|
||||
// There should be 7 samples for [0,1000], [1000,2000], [2000,3000],
|
||||
|
@ -320,13 +294,11 @@ TEST_F(WebVttFragmenterTest, OverlappingLongCue) {
|
|||
}
|
||||
|
||||
TEST_F(WebVttFragmenterTest, GapAtBeginning) {
|
||||
std::shared_ptr<MediaSample> sample1 =
|
||||
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage1),
|
||||
arraysize(kCueMessage1) - 1, true);
|
||||
sample1->set_pts(1200);
|
||||
sample1->set_dts(1200);
|
||||
sample1->set_duration(2000);
|
||||
webvtt_sample_converter_.PushSample(sample1);
|
||||
Cue cue;
|
||||
cue.payload = kCueMessage1;
|
||||
cue.start_time = 1200;
|
||||
cue.duration = 2000;
|
||||
webvtt_sample_converter_.PushCue(cue);
|
||||
|
||||
webvtt_sample_converter_.Flush();
|
||||
EXPECT_EQ(1u, webvtt_sample_converter_.ReadySamplesSize());
|
||||
|
@ -340,24 +312,18 @@ TEST_F(WebVttFragmenterTest, GapAtBeginning) {
|
|||
}
|
||||
|
||||
TEST_F(WebVttFragmenterTest, SameStartTime) {
|
||||
// TODO(rkuroiwa): This should be std::shared_ptr if this is applied on HEAD.
|
||||
std::shared_ptr<MediaSample> sample1 =
|
||||
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage1),
|
||||
arraysize(kCueMessage1) - 1, true);
|
||||
sample1->set_pts(0);
|
||||
sample1->set_dts(0);
|
||||
sample1->set_duration(2000);
|
||||
Cue cue1;
|
||||
cue1.payload = kCueMessage1;
|
||||
cue1.start_time = 0;
|
||||
cue1.duration = 2000;
|
||||
webvtt_sample_converter_.PushCue(cue1);
|
||||
|
||||
webvtt_sample_converter_.PushSample(sample1);
|
||||
Cue cue2;
|
||||
cue2.payload = kCueMessage2;
|
||||
cue2.start_time = 0;
|
||||
cue2.duration = 1500;
|
||||
webvtt_sample_converter_.PushCue(cue2);
|
||||
|
||||
std::shared_ptr<MediaSample> sample2 =
|
||||
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage2),
|
||||
arraysize(kCueMessage2) - 1, true);
|
||||
sample2->set_pts(0);
|
||||
sample2->set_dts(0);
|
||||
sample2->set_duration(1500);
|
||||
|
||||
webvtt_sample_converter_.PushSample(sample2);
|
||||
webvtt_sample_converter_.Flush();
|
||||
EXPECT_EQ(2u, webvtt_sample_converter_.ReadySamplesSize());
|
||||
|
||||
|
@ -380,39 +346,29 @@ TEST_F(WebVttFragmenterTest, SameStartTime) {
|
|||
|
||||
// This test is a combination of the test cases above.
|
||||
TEST_F(WebVttFragmenterTest, MoreCases) {
|
||||
std::shared_ptr<MediaSample> sample1 =
|
||||
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage1),
|
||||
arraysize(kCueMessage1) - 1, true);
|
||||
sample1->set_pts(0);
|
||||
sample1->set_dts(0);
|
||||
sample1->set_duration(2000);
|
||||
Cue cue1;
|
||||
cue1.payload = kCueMessage1;
|
||||
cue1.start_time = 0;
|
||||
cue1.duration = 2000;
|
||||
webvtt_sample_converter_.PushCue(cue1);
|
||||
|
||||
webvtt_sample_converter_.PushSample(sample1);
|
||||
Cue cue2;
|
||||
cue2.payload = kCueMessage2;
|
||||
cue2.start_time = 100;
|
||||
cue2.duration = 100;
|
||||
webvtt_sample_converter_.PushCue(cue2);
|
||||
|
||||
std::shared_ptr<MediaSample> sample2 =
|
||||
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage2),
|
||||
arraysize(kCueMessage2) - 1, true);
|
||||
sample2->set_pts(100);
|
||||
sample2->set_dts(100);
|
||||
sample2->set_duration(100);
|
||||
Cue cue3;
|
||||
cue3.payload = kCueMessage3;
|
||||
cue3.start_time = 1500;
|
||||
cue3.duration = 1000;
|
||||
webvtt_sample_converter_.PushCue(cue3);
|
||||
|
||||
webvtt_sample_converter_.PushSample(sample2);
|
||||
|
||||
std::shared_ptr<MediaSample> sample3 =
|
||||
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage3),
|
||||
arraysize(kCueMessage3) - 1, true);
|
||||
sample3->set_pts(1500);
|
||||
sample3->set_dts(1500);
|
||||
sample3->set_duration(1000);
|
||||
webvtt_sample_converter_.PushSample(sample3);
|
||||
|
||||
std::shared_ptr<MediaSample> sample4 =
|
||||
MediaSample::CopyFrom(reinterpret_cast<const uint8_t*>(kCueMessage4),
|
||||
arraysize(kCueMessage4) - 1, true);
|
||||
sample4->set_pts(1500);
|
||||
sample4->set_dts(1500);
|
||||
sample4->set_duration(800);
|
||||
webvtt_sample_converter_.PushSample(sample4);
|
||||
Cue cue4;
|
||||
cue4.payload = kCueMessage4;
|
||||
cue4.start_time = 1500;
|
||||
cue4.duration = 800;
|
||||
webvtt_sample_converter_.PushCue(cue4);
|
||||
|
||||
webvtt_sample_converter_.Flush();
|
||||
EXPECT_EQ(6u, webvtt_sample_converter_.ReadySamplesSize());
|
||||
|
|
Loading…
Reference in New Issue