Make the text readers use streams.

Instead of having the text readers reading from the file directly, they
now accept the data as a stream.

Change-Id: Id1b32c867a8058a68ae7aab5c568f77672a4401d
This commit is contained in:
Jacob Trimble 2020-06-30 14:31:07 -07:00
parent 5b9fd409a5
commit 748e7e0056
4 changed files with 180 additions and 233 deletions

View File

@ -6,117 +6,104 @@
#include "packager/media/formats/webvtt/text_readers.h"
#include <cstring>
#include "packager/base/logging.h"
#include "packager/file/file.h"
namespace shaka {
namespace media {
Status FileReader::Open(const std::string& filename,
std::unique_ptr<FileReader>* out) {
const char* kReadOnly = "r";
LineReader::LineReader() : should_flush_(false) {}
std::unique_ptr<File, FileCloser> file(
File::Open(filename.c_str(), kReadOnly));
if (!file) {
return Status(error::INVALID_ARGUMENT,
"Could not open input file " + filename);
}
*out = std::unique_ptr<FileReader>(new FileReader(std::move(file)));
return Status::OK;
void LineReader::PushData(const uint8_t* data, size_t data_size) {
buffer_.Push(data, static_cast<int>(data_size));
should_flush_ = false;
}
bool FileReader::Next(char* out) {
// TODO(vaage): If file reading performance is poor, change this to buffer
// data and read from the buffer.
return file_->Read(out, 1) == 1;
}
FileReader::FileReader(std::unique_ptr<File, FileCloser> file)
: file_(std::move(file)) {
DCHECK(file_);
}
PeekingReader::PeekingReader(std::unique_ptr<FileReader> source)
: source_(std::move(source)) {}
bool PeekingReader::Next(char* out) {
DCHECK(out);
if (Peek(out)) {
has_cached_next_ = false;
return true;
}
return false;
}
bool PeekingReader::Peek(char* out) {
DCHECK(out);
if (!has_cached_next_ && source_->Next(&cached_next_)) {
has_cached_next_ = true;
}
if (has_cached_next_) {
*out = cached_next_;
return true;
}
return false;
}
LineReader::LineReader(std::unique_ptr<FileReader> source)
: source_(std::move(source)) {}
// Split lines based on https://w3c.github.io/webvtt/#webvtt-line-terminator
bool LineReader::Next(std::string* out) {
DCHECK(out);
out->clear();
bool read_something = false;
char now;
while (source_.Next(&now)) {
read_something = true;
// handle \n
if (now == '\n') {
int i;
int skip = 0;
const uint8_t* data;
int data_size;
buffer_.Peek(&data, &data_size);
for (i = 0; i < data_size; i++) {
// Handle \n
if (data[i] == '\n') {
skip = 1;
break;
}
// handle \r and \r\n
if (now == '\r') {
char next;
if (source_.Peek(&next) && next == '\n') {
source_.Next(&next); // Read in the '\n' that was just seen via |Peek|
// Handle \r and \r\n
if (data[i] == '\r') {
// Only read if we can see the next character; this ensures we don't get
// the '\n' in the next PushData.
if (i + 1 == data_size) {
if (!should_flush_)
return false;
skip = 1;
} else {
if (data[i + 1] == '\n')
skip = 2;
else
skip = 1;
}
break;
}
out->push_back(now);
}
return read_something;
if (i == data_size && (!should_flush_ || i == 0)) {
return false;
}
// TODO(modmaker): Handle character encodings?
out->assign(data, data + i);
buffer_.Pop(i + skip);
return true;
}
BlockReader::BlockReader(std::unique_ptr<FileReader> source)
: source_(std::move(source)) {}
void LineReader::Flush() {
should_flush_ = true;
}
BlockReader::BlockReader() : should_flush_(false) {}
void BlockReader::PushData(const uint8_t* data, size_t data_size) {
source_.PushData(data, data_size);
should_flush_ = false;
}
bool BlockReader::Next(std::vector<std::string>* out) {
DCHECK(out);
out->clear();
bool in_block = false;
bool end_block = false;
// Read through lines until a non-empty line is found. With a non-empty
// line is found, start adding the lines to the output and once an empty
// line if found again, stop adding lines and exit.
std::string line;
while (source_.Next(&line)) {
if (in_block && line.empty()) {
if (!temp_.empty() && line.empty()) {
end_block = true;
break;
}
if (in_block || !line.empty()) {
out->push_back(line);
in_block = true;
if (!line.empty()) {
temp_.emplace_back(std::move(line));
}
}
return in_block;
if (!end_block && (!should_flush_ || temp_.empty()))
return false;
*out = std::move(temp_);
return true;
}
void BlockReader::Flush() {
source_.Flush();
should_flush_ = true;
}
} // namespace media
} // namespace shaka

View File

@ -11,7 +11,7 @@
#include <string>
#include <vector>
#include "packager/file/file_closer.h"
#include "packager/media/base/byte_queue.h"
#include "packager/status.h"
namespace shaka {
@ -19,70 +19,47 @@ class File;
namespace media {
/// Class to read character-by-character from a file.
class FileReader {
public:
/// Create a new file reader by opening a file. If the file fails to open (in
/// readonly mode) a non-ok status will be returned. If the file successfully
/// opens, |out| will be set to a new FileReader and an ok status will be
/// returned.
static Status Open(const std::string& filename,
std::unique_ptr<FileReader>* out);
/// Read the next character from the file. If there is a next character,
/// |out| will be set and true will be returned. If there is no next
/// character false will be returned.
bool Next(char* out);
private:
explicit FileReader(std::unique_ptr<File, FileCloser> file);
FileReader(const FileReader& reader) = delete;
FileReader operator=(const FileReader& reader) = delete;
std::unique_ptr<File, FileCloser> file_;
};
class PeekingReader {
public:
explicit PeekingReader(std::unique_ptr<FileReader> source);
bool Peek(char* out);
bool Next(char* out);
private:
PeekingReader(const PeekingReader&) = delete;
PeekingReader operator=(const PeekingReader&) = delete;
std::unique_ptr<FileReader> source_;
char cached_next_ = 0;
bool has_cached_next_ = false;
};
class LineReader {
public:
explicit LineReader(std::unique_ptr<FileReader> source);
LineReader();
/// Pushes data onto the end of the buffer.
void PushData(const uint8_t* data, size_t data_size);
/// Reads the next line from the buffer.
/// @return True if a line is read, false if there's no line in the buffer.
bool Next(std::string* out);
/// Indicates that no more data is coming and that calls to Next should
/// return even possibly-incomplete data.
void Flush();
private:
LineReader(const LineReader&) = delete;
LineReader operator=(const LineReader&) = delete;
PeekingReader source_;
ByteQueue buffer_;
bool should_flush_;
};
class BlockReader {
public:
explicit BlockReader(std::unique_ptr<FileReader> source);
BlockReader();
/// Pushes data onto the end of the buffer.
void PushData(const uint8_t* data, size_t data_size);
/// Reads the next block from the buffer.
/// @return True if a block is read, false if there is no block in the buffer.
bool Next(std::vector<std::string>* out);
/// Indicates that no more data is coming and that calls to Next should
/// return even possibly-incomplete data.
void Flush();
private:
BlockReader(const BlockReader&) = delete;
BlockReader operator=(const BlockReader&) = delete;
LineReader source_;
std::vector<std::string> temp_;
bool should_flush_;
};
} // namespace media

View File

@ -4,6 +4,7 @@
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "packager/file/file.h"
@ -12,66 +13,15 @@
namespace shaka {
namespace media {
namespace {
const char* kFilename = "memory://test-file";
} // namespace
TEST(TextReadersTest, ReadWholeStream) {
const char* text = "abcd";
ASSERT_TRUE(File::WriteStringToFile(kFilename, text));
std::unique_ptr<FileReader> source;
ASSERT_OK(FileReader::Open(kFilename, &source));
char c;
ASSERT_TRUE(source->Next(&c));
ASSERT_EQ(c, 'a');
ASSERT_TRUE(source->Next(&c));
ASSERT_EQ(c, 'b');
ASSERT_TRUE(source->Next(&c));
ASSERT_EQ(c, 'c');
ASSERT_TRUE(source->Next(&c));
ASSERT_EQ(c, 'd');
ASSERT_FALSE(source->Next(&c));
}
TEST(TextReadersTest, Peeking) {
const char* text = "abc";
ASSERT_TRUE(File::WriteStringToFile(kFilename, text));
std::unique_ptr<FileReader> source;
ASSERT_OK(FileReader::Open(kFilename, &source));
PeekingReader reader(std::move(source));
char c;
ASSERT_TRUE(reader.Peek(&c));
ASSERT_EQ(c, 'a');
ASSERT_TRUE(reader.Next(&c));
ASSERT_EQ(c, 'a');
ASSERT_TRUE(reader.Peek(&c));
ASSERT_EQ(c, 'b');
ASSERT_TRUE(reader.Next(&c));
ASSERT_EQ(c, 'b');
ASSERT_TRUE(reader.Peek(&c));
ASSERT_EQ(c, 'c');
ASSERT_TRUE(reader.Next(&c));
ASSERT_EQ(c, 'c');
ASSERT_FALSE(reader.Peek(&c));
ASSERT_FALSE(reader.Next(&c));
}
using testing::ElementsAre;
TEST(TextReadersTest, ReadLinesWithNewLine) {
const char* text = "a\nb\nc";
const uint8_t text[] = "a\nb\nc";
ASSERT_TRUE(File::WriteStringToFile(kFilename, text));
std::unique_ptr<FileReader> source;
ASSERT_OK(FileReader::Open(kFilename, &source));
LineReader reader(std::move(source));
LineReader reader;
reader.PushData(text, sizeof(text) - 1);
reader.Flush();
std::string s;
ASSERT_TRUE(reader.Next(&s));
@ -84,14 +34,11 @@ TEST(TextReadersTest, ReadLinesWithNewLine) {
}
TEST(TextReadersTest, ReadLinesWithReturnsAndNewLine) {
const char* text = "a\r\nb\r\nc";
const uint8_t text[] = "a\r\nb\r\nc";
ASSERT_TRUE(File::WriteStringToFile(kFilename, text));
std::unique_ptr<FileReader> source;
ASSERT_OK(FileReader::Open(kFilename, &source));
LineReader reader(std::move(source));
LineReader reader;
reader.PushData(text, sizeof(text) - 1);
reader.Flush();
std::string s;
ASSERT_TRUE(reader.Next(&s));
@ -104,14 +51,11 @@ TEST(TextReadersTest, ReadLinesWithReturnsAndNewLine) {
}
TEST(TextReadersTest, ReadLinesWithNewLineAndReturns) {
const char* text = "a\n\rb\n\rc";
const uint8_t text[] = "a\n\rb\n\rc";
ASSERT_TRUE(File::WriteStringToFile(kFilename, text));
std::unique_ptr<FileReader> source;
ASSERT_OK(FileReader::Open(kFilename, &source));
LineReader reader(std::move(source));
LineReader reader;
reader.PushData(text, sizeof(text) - 1);
reader.Flush();
std::string s;
ASSERT_TRUE(reader.Next(&s));
@ -128,14 +72,11 @@ TEST(TextReadersTest, ReadLinesWithNewLineAndReturns) {
}
TEST(TextReadersTest, ReadLinesWithReturnAtEnd) {
const char* text = "a\r\nb\r\nc\r";
const uint8_t text[] = "a\r\nb\r\nc\r";
ASSERT_TRUE(File::WriteStringToFile(kFilename, text));
std::unique_ptr<FileReader> source;
ASSERT_OK(FileReader::Open(kFilename, &source));
LineReader reader(std::move(source));
LineReader reader;
reader.PushData(text, sizeof(text) - 1);
reader.Flush();
std::string s;
ASSERT_TRUE(reader.Next(&s));
@ -147,30 +88,44 @@ TEST(TextReadersTest, ReadLinesWithReturnAtEnd) {
ASSERT_FALSE(reader.Next(&s));
}
TEST(TextReadersTest, ReadLinesWithMultiplePushes) {
const uint8_t text1[] = "a\nb";
const uint8_t text2[] = "c\nd";
LineReader reader;
reader.PushData(text1, sizeof(text1) - 1);
std::string s;
ASSERT_TRUE(reader.Next(&s));
ASSERT_EQ(s, "a");
ASSERT_FALSE(reader.Next(&s));
reader.PushData(text2, sizeof(text2) - 1);
reader.Flush();
ASSERT_TRUE(reader.Next(&s));
ASSERT_EQ(s, "bc");
ASSERT_TRUE(reader.Next(&s));
ASSERT_EQ(s, "d");
ASSERT_FALSE(reader.Next(&s));
}
TEST(TextReadersTest, ReadBlocksReadMultilineBlock) {
const char* text =
const uint8_t text[] =
"block 1 - line 1\n"
"block 1 - line 2";
ASSERT_TRUE(File::WriteStringToFile(kFilename, text));
std::unique_ptr<FileReader> source;
ASSERT_OK(FileReader::Open(kFilename, &source));
BlockReader reader(std::move(source));
BlockReader reader;
reader.PushData(text, sizeof(text) - 1);
reader.Flush();
std::vector<std::string> block;
ASSERT_TRUE(reader.Next(&block));
ASSERT_EQ(2u, block.size());
ASSERT_EQ("block 1 - line 1", block[0]);
ASSERT_EQ("block 1 - line 2", block[1]);
EXPECT_THAT(block, ElementsAre("block 1 - line 1", "block 1 - line 2"));
ASSERT_FALSE(reader.Next(&block));
}
TEST(TextReadersTest, ReadBlocksSkipBlankLinesBeforeBlocks) {
const char* text =
const uint8_t text[] =
"\n"
"\n"
"block 1\n"
@ -178,38 +133,50 @@ TEST(TextReadersTest, ReadBlocksSkipBlankLinesBeforeBlocks) {
"\n"
"block 2\n";
ASSERT_TRUE(File::WriteStringToFile(kFilename, text));
std::unique_ptr<FileReader> source;
ASSERT_OK(FileReader::Open(kFilename, &source));
BlockReader reader(std::move(source));
BlockReader reader;
reader.PushData(text, sizeof(text) - 1);
reader.Flush();
std::vector<std::string> block;
ASSERT_TRUE(reader.Next(&block));
ASSERT_EQ(1u, block.size());
ASSERT_EQ("block 1", block[0]);
EXPECT_THAT(block, ElementsAre("block 1"));
ASSERT_TRUE(reader.Next(&block));
ASSERT_EQ(1u, block.size());
ASSERT_EQ("block 2", block[0]);
EXPECT_THAT(block, ElementsAre("block 2"));
ASSERT_FALSE(reader.Next(&block));
}
TEST(TextReadersTest, ReadBlocksWithOnlyBlankLines) {
const char* text = "\n\n\n\n";
const uint8_t text[] = "\n\n\n\n";
ASSERT_TRUE(File::WriteStringToFile(kFilename, text));
std::unique_ptr<FileReader> source;
ASSERT_OK(FileReader::Open(kFilename, &source));
BlockReader reader(std::move(source));
BlockReader reader;
reader.PushData(text, sizeof(text) - 1);
reader.Flush();
std::vector<std::string> block;
ASSERT_FALSE(reader.Next(&block));
}
TEST(TextReadersTest, ReadBlocksMultipleReads) {
const uint8_t text1[] = "block 1\n";
const uint8_t text2[] =
"block 2\n"
"\n"
"\n"
"end";
BlockReader reader;
reader.PushData(text1, sizeof(text1) - 1);
std::vector<std::string> block;
ASSERT_FALSE(reader.Next(&block));
reader.PushData(text2, sizeof(text2) - 1);
reader.Flush();
ASSERT_TRUE(reader.Next(&block));
EXPECT_THAT(block, ElementsAre("block 1", "block 2"));
}
} // namespace media
} // namespace shaka

View File

@ -12,6 +12,8 @@
#include "packager/base/logging.h"
#include "packager/base/strings/string_split.h"
#include "packager/base/strings/string_util.h"
#include "packager/file/file.h"
#include "packager/file/file_closer.h"
#include "packager/media/base/text_stream_info.h"
#include "packager/media/formats/webvtt/webvtt_timestamp.h"
#include "packager/status_macros.h"
@ -19,7 +21,9 @@
namespace shaka {
namespace media {
namespace {
const uint64_t kStreamIndex = 0;
const uint64_t kBufferSize = 64 * 1024 * 1024;
std::string BlockToString(const std::string* block, size_t size) {
std::string out = " --- BLOCK START ---\n";
@ -99,9 +103,21 @@ bool WebVttParser::ValidateOutputStreamIndex(size_t stream_index) const {
}
Status WebVttParser::Run() {
std::unique_ptr<FileReader> reader;
RETURN_IF_ERROR(FileReader::Open(input_path_, &reader));
BlockReader block_reader(std::move(reader));
BlockReader block_reader;
std::unique_ptr<File, FileCloser> file(File::Open(input_path_.c_str(), "r"));
if (!file)
return Status(error::FILE_FAILURE, "Error reading from file");
while (true) {
std::vector<uint8_t> buffer(kBufferSize);
const auto size = file->Read(buffer.data(), buffer.size());
if (size < 0)
return Status(error::FILE_FAILURE, "Error reading from file");
if (size == 0)
break;
block_reader.PushData(buffer.data(), size);
}
block_reader.Flush();
return Parse(&block_reader)
? FlushDownstream(kStreamIndex)