Make the text readers use streams.

Instead of having the text readers reading from the file directly, they
now accept the data as a stream.

Change-Id: Id1b32c867a8058a68ae7aab5c568f77672a4401d
This commit is contained in:
Jacob Trimble 2020-06-30 14:31:07 -07:00
parent 5b9fd409a5
commit 748e7e0056
4 changed files with 180 additions and 233 deletions

View File

@ -6,117 +6,104 @@
#include "packager/media/formats/webvtt/text_readers.h" #include "packager/media/formats/webvtt/text_readers.h"
#include <cstring>
#include "packager/base/logging.h" #include "packager/base/logging.h"
#include "packager/file/file.h"
namespace shaka { namespace shaka {
namespace media { namespace media {
Status FileReader::Open(const std::string& filename, LineReader::LineReader() : should_flush_(false) {}
std::unique_ptr<FileReader>* out) {
const char* kReadOnly = "r";
std::unique_ptr<File, FileCloser> file( void LineReader::PushData(const uint8_t* data, size_t data_size) {
File::Open(filename.c_str(), kReadOnly)); buffer_.Push(data, static_cast<int>(data_size));
should_flush_ = false;
if (!file) {
return Status(error::INVALID_ARGUMENT,
"Could not open input file " + filename);
}
*out = std::unique_ptr<FileReader>(new FileReader(std::move(file)));
return Status::OK;
} }
bool FileReader::Next(char* out) {
// TODO(vaage): If file reading performance is poor, change this to buffer
// data and read from the buffer.
return file_->Read(out, 1) == 1;
}
FileReader::FileReader(std::unique_ptr<File, FileCloser> file)
: file_(std::move(file)) {
DCHECK(file_);
}
PeekingReader::PeekingReader(std::unique_ptr<FileReader> source)
: source_(std::move(source)) {}
bool PeekingReader::Next(char* out) {
DCHECK(out);
if (Peek(out)) {
has_cached_next_ = false;
return true;
}
return false;
}
bool PeekingReader::Peek(char* out) {
DCHECK(out);
if (!has_cached_next_ && source_->Next(&cached_next_)) {
has_cached_next_ = true;
}
if (has_cached_next_) {
*out = cached_next_;
return true;
}
return false;
}
LineReader::LineReader(std::unique_ptr<FileReader> source)
: source_(std::move(source)) {}
// Split lines based on https://w3c.github.io/webvtt/#webvtt-line-terminator // Split lines based on https://w3c.github.io/webvtt/#webvtt-line-terminator
bool LineReader::Next(std::string* out) { bool LineReader::Next(std::string* out) {
DCHECK(out); DCHECK(out);
out->clear();
bool read_something = false; int i;
char now; int skip = 0;
while (source_.Next(&now)) { const uint8_t* data;
read_something = true; int data_size;
// handle \n buffer_.Peek(&data, &data_size);
if (now == '\n') { for (i = 0; i < data_size; i++) {
// Handle \n
if (data[i] == '\n') {
skip = 1;
break; break;
} }
// handle \r and \r\n
if (now == '\r') { // Handle \r and \r\n
char next; if (data[i] == '\r') {
if (source_.Peek(&next) && next == '\n') { // Only read if we can see the next character; this ensures we don't get
source_.Next(&next); // Read in the '\n' that was just seen via |Peek| // the '\n' in the next PushData.
if (i + 1 == data_size) {
if (!should_flush_)
return false;
skip = 1;
} else {
if (data[i + 1] == '\n')
skip = 2;
else
skip = 1;
} }
break; break;
} }
out->push_back(now);
} }
return read_something;
if (i == data_size && (!should_flush_ || i == 0)) {
return false;
}
// TODO(modmaker): Handle character encodings?
out->assign(data, data + i);
buffer_.Pop(i + skip);
return true;
} }
BlockReader::BlockReader(std::unique_ptr<FileReader> source) void LineReader::Flush() {
: source_(std::move(source)) {} should_flush_ = true;
}
BlockReader::BlockReader() : should_flush_(false) {}
void BlockReader::PushData(const uint8_t* data, size_t data_size) {
source_.PushData(data, data_size);
should_flush_ = false;
}
bool BlockReader::Next(std::vector<std::string>* out) { bool BlockReader::Next(std::vector<std::string>* out) {
DCHECK(out); DCHECK(out);
out->clear(); bool end_block = false;
bool in_block = false;
// Read through lines until a non-empty line is found. With a non-empty // Read through lines until a non-empty line is found. With a non-empty
// line is found, start adding the lines to the output and once an empty // line is found, start adding the lines to the output and once an empty
// line if found again, stop adding lines and exit. // line if found again, stop adding lines and exit.
std::string line; std::string line;
while (source_.Next(&line)) { while (source_.Next(&line)) {
if (in_block && line.empty()) { if (!temp_.empty() && line.empty()) {
end_block = true;
break; break;
} }
if (in_block || !line.empty()) { if (!line.empty()) {
out->push_back(line); temp_.emplace_back(std::move(line));
in_block = true;
} }
} }
return in_block; if (!end_block && (!should_flush_ || temp_.empty()))
return false;
*out = std::move(temp_);
return true;
} }
void BlockReader::Flush() {
source_.Flush();
should_flush_ = true;
}
} // namespace media } // namespace media
} // namespace shaka } // namespace shaka

View File

@ -11,7 +11,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "packager/file/file_closer.h" #include "packager/media/base/byte_queue.h"
#include "packager/status.h" #include "packager/status.h"
namespace shaka { namespace shaka {
@ -19,70 +19,47 @@ class File;
namespace media { namespace media {
/// Class to read character-by-character from a file.
class FileReader {
public:
/// Create a new file reader by opening a file. If the file fails to open (in
/// readonly mode) a non-ok status will be returned. If the file successfully
/// opens, |out| will be set to a new FileReader and an ok status will be
/// returned.
static Status Open(const std::string& filename,
std::unique_ptr<FileReader>* out);
/// Read the next character from the file. If there is a next character,
/// |out| will be set and true will be returned. If there is no next
/// character false will be returned.
bool Next(char* out);
private:
explicit FileReader(std::unique_ptr<File, FileCloser> file);
FileReader(const FileReader& reader) = delete;
FileReader operator=(const FileReader& reader) = delete;
std::unique_ptr<File, FileCloser> file_;
};
class PeekingReader {
public:
explicit PeekingReader(std::unique_ptr<FileReader> source);
bool Peek(char* out);
bool Next(char* out);
private:
PeekingReader(const PeekingReader&) = delete;
PeekingReader operator=(const PeekingReader&) = delete;
std::unique_ptr<FileReader> source_;
char cached_next_ = 0;
bool has_cached_next_ = false;
};
class LineReader { class LineReader {
public: public:
explicit LineReader(std::unique_ptr<FileReader> source); LineReader();
/// Pushes data onto the end of the buffer.
void PushData(const uint8_t* data, size_t data_size);
/// Reads the next line from the buffer.
/// @return True if a line is read, false if there's no line in the buffer.
bool Next(std::string* out); bool Next(std::string* out);
/// Indicates that no more data is coming and that calls to Next should
/// return even possibly-incomplete data.
void Flush();
private: private:
LineReader(const LineReader&) = delete; LineReader(const LineReader&) = delete;
LineReader operator=(const LineReader&) = delete; LineReader operator=(const LineReader&) = delete;
PeekingReader source_; ByteQueue buffer_;
bool should_flush_;
}; };
class BlockReader { class BlockReader {
public: public:
explicit BlockReader(std::unique_ptr<FileReader> source); BlockReader();
/// Pushes data onto the end of the buffer.
void PushData(const uint8_t* data, size_t data_size);
/// Reads the next block from the buffer.
/// @return True if a block is read, false if there is no block in the buffer.
bool Next(std::vector<std::string>* out); bool Next(std::vector<std::string>* out);
/// Indicates that no more data is coming and that calls to Next should
/// return even possibly-incomplete data.
void Flush();
private: private:
BlockReader(const BlockReader&) = delete; BlockReader(const BlockReader&) = delete;
BlockReader operator=(const BlockReader&) = delete; BlockReader operator=(const BlockReader&) = delete;
LineReader source_; LineReader source_;
std::vector<std::string> temp_;
bool should_flush_;
}; };
} // namespace media } // namespace media

View File

@ -4,6 +4,7 @@
// license that can be found in the LICENSE file or at // license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd // https://developers.google.com/open-source/licenses/bsd
#include <gmock/gmock.h>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "packager/file/file.h" #include "packager/file/file.h"
@ -12,66 +13,15 @@
namespace shaka { namespace shaka {
namespace media { namespace media {
namespace {
const char* kFilename = "memory://test-file";
} // namespace
TEST(TextReadersTest, ReadWholeStream) { using testing::ElementsAre;
const char* text = "abcd";
ASSERT_TRUE(File::WriteStringToFile(kFilename, text));
std::unique_ptr<FileReader> source;
ASSERT_OK(FileReader::Open(kFilename, &source));
char c;
ASSERT_TRUE(source->Next(&c));
ASSERT_EQ(c, 'a');
ASSERT_TRUE(source->Next(&c));
ASSERT_EQ(c, 'b');
ASSERT_TRUE(source->Next(&c));
ASSERT_EQ(c, 'c');
ASSERT_TRUE(source->Next(&c));
ASSERT_EQ(c, 'd');
ASSERT_FALSE(source->Next(&c));
}
TEST(TextReadersTest, Peeking) {
const char* text = "abc";
ASSERT_TRUE(File::WriteStringToFile(kFilename, text));
std::unique_ptr<FileReader> source;
ASSERT_OK(FileReader::Open(kFilename, &source));
PeekingReader reader(std::move(source));
char c;
ASSERT_TRUE(reader.Peek(&c));
ASSERT_EQ(c, 'a');
ASSERT_TRUE(reader.Next(&c));
ASSERT_EQ(c, 'a');
ASSERT_TRUE(reader.Peek(&c));
ASSERT_EQ(c, 'b');
ASSERT_TRUE(reader.Next(&c));
ASSERT_EQ(c, 'b');
ASSERT_TRUE(reader.Peek(&c));
ASSERT_EQ(c, 'c');
ASSERT_TRUE(reader.Next(&c));
ASSERT_EQ(c, 'c');
ASSERT_FALSE(reader.Peek(&c));
ASSERT_FALSE(reader.Next(&c));
}
TEST(TextReadersTest, ReadLinesWithNewLine) { TEST(TextReadersTest, ReadLinesWithNewLine) {
const char* text = "a\nb\nc"; const uint8_t text[] = "a\nb\nc";
ASSERT_TRUE(File::WriteStringToFile(kFilename, text)); LineReader reader;
reader.PushData(text, sizeof(text) - 1);
std::unique_ptr<FileReader> source; reader.Flush();
ASSERT_OK(FileReader::Open(kFilename, &source));
LineReader reader(std::move(source));
std::string s; std::string s;
ASSERT_TRUE(reader.Next(&s)); ASSERT_TRUE(reader.Next(&s));
@ -84,14 +34,11 @@ TEST(TextReadersTest, ReadLinesWithNewLine) {
} }
TEST(TextReadersTest, ReadLinesWithReturnsAndNewLine) { TEST(TextReadersTest, ReadLinesWithReturnsAndNewLine) {
const char* text = "a\r\nb\r\nc"; const uint8_t text[] = "a\r\nb\r\nc";
ASSERT_TRUE(File::WriteStringToFile(kFilename, text)); LineReader reader;
reader.PushData(text, sizeof(text) - 1);
std::unique_ptr<FileReader> source; reader.Flush();
ASSERT_OK(FileReader::Open(kFilename, &source));
LineReader reader(std::move(source));
std::string s; std::string s;
ASSERT_TRUE(reader.Next(&s)); ASSERT_TRUE(reader.Next(&s));
@ -104,14 +51,11 @@ TEST(TextReadersTest, ReadLinesWithReturnsAndNewLine) {
} }
TEST(TextReadersTest, ReadLinesWithNewLineAndReturns) { TEST(TextReadersTest, ReadLinesWithNewLineAndReturns) {
const char* text = "a\n\rb\n\rc"; const uint8_t text[] = "a\n\rb\n\rc";
ASSERT_TRUE(File::WriteStringToFile(kFilename, text)); LineReader reader;
reader.PushData(text, sizeof(text) - 1);
std::unique_ptr<FileReader> source; reader.Flush();
ASSERT_OK(FileReader::Open(kFilename, &source));
LineReader reader(std::move(source));
std::string s; std::string s;
ASSERT_TRUE(reader.Next(&s)); ASSERT_TRUE(reader.Next(&s));
@ -128,14 +72,11 @@ TEST(TextReadersTest, ReadLinesWithNewLineAndReturns) {
} }
TEST(TextReadersTest, ReadLinesWithReturnAtEnd) { TEST(TextReadersTest, ReadLinesWithReturnAtEnd) {
const char* text = "a\r\nb\r\nc\r"; const uint8_t text[] = "a\r\nb\r\nc\r";
ASSERT_TRUE(File::WriteStringToFile(kFilename, text)); LineReader reader;
reader.PushData(text, sizeof(text) - 1);
std::unique_ptr<FileReader> source; reader.Flush();
ASSERT_OK(FileReader::Open(kFilename, &source));
LineReader reader(std::move(source));
std::string s; std::string s;
ASSERT_TRUE(reader.Next(&s)); ASSERT_TRUE(reader.Next(&s));
@ -147,30 +88,44 @@ TEST(TextReadersTest, ReadLinesWithReturnAtEnd) {
ASSERT_FALSE(reader.Next(&s)); ASSERT_FALSE(reader.Next(&s));
} }
TEST(TextReadersTest, ReadLinesWithMultiplePushes) {
const uint8_t text1[] = "a\nb";
const uint8_t text2[] = "c\nd";
LineReader reader;
reader.PushData(text1, sizeof(text1) - 1);
std::string s;
ASSERT_TRUE(reader.Next(&s));
ASSERT_EQ(s, "a");
ASSERT_FALSE(reader.Next(&s));
reader.PushData(text2, sizeof(text2) - 1);
reader.Flush();
ASSERT_TRUE(reader.Next(&s));
ASSERT_EQ(s, "bc");
ASSERT_TRUE(reader.Next(&s));
ASSERT_EQ(s, "d");
ASSERT_FALSE(reader.Next(&s));
}
TEST(TextReadersTest, ReadBlocksReadMultilineBlock) { TEST(TextReadersTest, ReadBlocksReadMultilineBlock) {
const char* text = const uint8_t text[] =
"block 1 - line 1\n" "block 1 - line 1\n"
"block 1 - line 2"; "block 1 - line 2";
ASSERT_TRUE(File::WriteStringToFile(kFilename, text)); BlockReader reader;
reader.PushData(text, sizeof(text) - 1);
std::unique_ptr<FileReader> source; reader.Flush();
ASSERT_OK(FileReader::Open(kFilename, &source));
BlockReader reader(std::move(source));
std::vector<std::string> block; std::vector<std::string> block;
ASSERT_TRUE(reader.Next(&block)); ASSERT_TRUE(reader.Next(&block));
ASSERT_EQ(2u, block.size()); EXPECT_THAT(block, ElementsAre("block 1 - line 1", "block 1 - line 2"));
ASSERT_EQ("block 1 - line 1", block[0]);
ASSERT_EQ("block 1 - line 2", block[1]);
ASSERT_FALSE(reader.Next(&block)); ASSERT_FALSE(reader.Next(&block));
} }
TEST(TextReadersTest, ReadBlocksSkipBlankLinesBeforeBlocks) { TEST(TextReadersTest, ReadBlocksSkipBlankLinesBeforeBlocks) {
const char* text = const uint8_t text[] =
"\n" "\n"
"\n" "\n"
"block 1\n" "block 1\n"
@ -178,38 +133,50 @@ TEST(TextReadersTest, ReadBlocksSkipBlankLinesBeforeBlocks) {
"\n" "\n"
"block 2\n"; "block 2\n";
ASSERT_TRUE(File::WriteStringToFile(kFilename, text)); BlockReader reader;
reader.PushData(text, sizeof(text) - 1);
std::unique_ptr<FileReader> source; reader.Flush();
ASSERT_OK(FileReader::Open(kFilename, &source));
BlockReader reader(std::move(source));
std::vector<std::string> block; std::vector<std::string> block;
ASSERT_TRUE(reader.Next(&block)); ASSERT_TRUE(reader.Next(&block));
ASSERT_EQ(1u, block.size()); EXPECT_THAT(block, ElementsAre("block 1"));
ASSERT_EQ("block 1", block[0]);
ASSERT_TRUE(reader.Next(&block)); ASSERT_TRUE(reader.Next(&block));
ASSERT_EQ(1u, block.size()); EXPECT_THAT(block, ElementsAre("block 2"));
ASSERT_EQ("block 2", block[0]);
ASSERT_FALSE(reader.Next(&block)); ASSERT_FALSE(reader.Next(&block));
} }
TEST(TextReadersTest, ReadBlocksWithOnlyBlankLines) { TEST(TextReadersTest, ReadBlocksWithOnlyBlankLines) {
const char* text = "\n\n\n\n"; const uint8_t text[] = "\n\n\n\n";
ASSERT_TRUE(File::WriteStringToFile(kFilename, text)); BlockReader reader;
reader.PushData(text, sizeof(text) - 1);
std::unique_ptr<FileReader> source; reader.Flush();
ASSERT_OK(FileReader::Open(kFilename, &source));
BlockReader reader(std::move(source));
std::vector<std::string> block; std::vector<std::string> block;
ASSERT_FALSE(reader.Next(&block)); ASSERT_FALSE(reader.Next(&block));
} }
TEST(TextReadersTest, ReadBlocksMultipleReads) {
const uint8_t text1[] = "block 1\n";
const uint8_t text2[] =
"block 2\n"
"\n"
"\n"
"end";
BlockReader reader;
reader.PushData(text1, sizeof(text1) - 1);
std::vector<std::string> block;
ASSERT_FALSE(reader.Next(&block));
reader.PushData(text2, sizeof(text2) - 1);
reader.Flush();
ASSERT_TRUE(reader.Next(&block));
EXPECT_THAT(block, ElementsAre("block 1", "block 2"));
}
} // namespace media } // namespace media
} // namespace shaka } // namespace shaka

View File

@ -12,6 +12,8 @@
#include "packager/base/logging.h" #include "packager/base/logging.h"
#include "packager/base/strings/string_split.h" #include "packager/base/strings/string_split.h"
#include "packager/base/strings/string_util.h" #include "packager/base/strings/string_util.h"
#include "packager/file/file.h"
#include "packager/file/file_closer.h"
#include "packager/media/base/text_stream_info.h" #include "packager/media/base/text_stream_info.h"
#include "packager/media/formats/webvtt/webvtt_timestamp.h" #include "packager/media/formats/webvtt/webvtt_timestamp.h"
#include "packager/status_macros.h" #include "packager/status_macros.h"
@ -19,7 +21,9 @@
namespace shaka { namespace shaka {
namespace media { namespace media {
namespace { namespace {
const uint64_t kStreamIndex = 0; const uint64_t kStreamIndex = 0;
const uint64_t kBufferSize = 64 * 1024 * 1024;
std::string BlockToString(const std::string* block, size_t size) { std::string BlockToString(const std::string* block, size_t size) {
std::string out = " --- BLOCK START ---\n"; std::string out = " --- BLOCK START ---\n";
@ -99,9 +103,21 @@ bool WebVttParser::ValidateOutputStreamIndex(size_t stream_index) const {
} }
Status WebVttParser::Run() { Status WebVttParser::Run() {
std::unique_ptr<FileReader> reader; BlockReader block_reader;
RETURN_IF_ERROR(FileReader::Open(input_path_, &reader)); std::unique_ptr<File, FileCloser> file(File::Open(input_path_.c_str(), "r"));
BlockReader block_reader(std::move(reader)); if (!file)
return Status(error::FILE_FAILURE, "Error reading from file");
while (true) {
std::vector<uint8_t> buffer(kBufferSize);
const auto size = file->Read(buffer.data(), buffer.size());
if (size < 0)
return Status(error::FILE_FAILURE, "Error reading from file");
if (size == 0)
break;
block_reader.PushData(buffer.data(), size);
}
block_reader.Flush();
return Parse(&block_reader) return Parse(&block_reader)
? FlushDownstream(kStreamIndex) ? FlushDownstream(kStreamIndex)