From 748e7e0056c9a72e38226db4f607983795c9b829 Mon Sep 17 00:00:00 2001 From: Jacob Trimble Date: Tue, 30 Jun 2020 14:31:07 -0700 Subject: [PATCH] Make the text readers use streams. Instead of having the text readers reading from the file directly, they now accept the data as a stream. Change-Id: Id1b32c867a8058a68ae7aab5c568f77672a4401d --- packager/media/formats/webvtt/text_readers.cc | 143 +++++++------- packager/media/formats/webvtt/text_readers.h | 65 ++----- .../formats/webvtt/text_readers_unittest.cc | 183 +++++++----------- .../media/formats/webvtt/webvtt_parser.cc | 22 ++- 4 files changed, 180 insertions(+), 233 deletions(-) diff --git a/packager/media/formats/webvtt/text_readers.cc b/packager/media/formats/webvtt/text_readers.cc index 02fa37dccc..0d6f0fd22f 100644 --- a/packager/media/formats/webvtt/text_readers.cc +++ b/packager/media/formats/webvtt/text_readers.cc @@ -6,117 +6,104 @@ #include "packager/media/formats/webvtt/text_readers.h" +#include + #include "packager/base/logging.h" -#include "packager/file/file.h" namespace shaka { namespace media { -Status FileReader::Open(const std::string& filename, - std::unique_ptr* out) { - const char* kReadOnly = "r"; +LineReader::LineReader() : should_flush_(false) {} - std::unique_ptr file( - File::Open(filename.c_str(), kReadOnly)); - - if (!file) { - return Status(error::INVALID_ARGUMENT, - "Could not open input file " + filename); - } - - *out = std::unique_ptr(new FileReader(std::move(file))); - - return Status::OK; +void LineReader::PushData(const uint8_t* data, size_t data_size) { + buffer_.Push(data, static_cast(data_size)); + should_flush_ = false; } -bool FileReader::Next(char* out) { - // TODO(vaage): If file reading performance is poor, change this to buffer - // data and read from the buffer. - return file_->Read(out, 1) == 1; -} - -FileReader::FileReader(std::unique_ptr file) - : file_(std::move(file)) { - DCHECK(file_); -} - -PeekingReader::PeekingReader(std::unique_ptr source) - : source_(std::move(source)) {} - -bool PeekingReader::Next(char* out) { - DCHECK(out); - if (Peek(out)) { - has_cached_next_ = false; - return true; - } - return false; -} - -bool PeekingReader::Peek(char* out) { - DCHECK(out); - if (!has_cached_next_ && source_->Next(&cached_next_)) { - has_cached_next_ = true; - } - if (has_cached_next_) { - *out = cached_next_; - return true; - } - return false; -} - -LineReader::LineReader(std::unique_ptr source) - : source_(std::move(source)) {} - // Split lines based on https://w3c.github.io/webvtt/#webvtt-line-terminator bool LineReader::Next(std::string* out) { DCHECK(out); - out->clear(); - bool read_something = false; - char now; - while (source_.Next(&now)) { - read_something = true; - // handle \n - if (now == '\n') { + + int i; + int skip = 0; + const uint8_t* data; + int data_size; + buffer_.Peek(&data, &data_size); + for (i = 0; i < data_size; i++) { + // Handle \n + if (data[i] == '\n') { + skip = 1; break; } - // handle \r and \r\n - if (now == '\r') { - char next; - if (source_.Peek(&next) && next == '\n') { - source_.Next(&next); // Read in the '\n' that was just seen via |Peek| + + // Handle \r and \r\n + if (data[i] == '\r') { + // Only read if we can see the next character; this ensures we don't get + // the '\n' in the next PushData. + if (i + 1 == data_size) { + if (!should_flush_) + return false; + skip = 1; + } else { + if (data[i + 1] == '\n') + skip = 2; + else + skip = 1; } break; } - out->push_back(now); } - return read_something; + + if (i == data_size && (!should_flush_ || i == 0)) { + return false; + } + + // TODO(modmaker): Handle character encodings? + out->assign(data, data + i); + buffer_.Pop(i + skip); + return true; } -BlockReader::BlockReader(std::unique_ptr source) - : source_(std::move(source)) {} +void LineReader::Flush() { + should_flush_ = true; +} + +BlockReader::BlockReader() : should_flush_(false) {} + +void BlockReader::PushData(const uint8_t* data, size_t data_size) { + source_.PushData(data, data_size); + should_flush_ = false; +} bool BlockReader::Next(std::vector* out) { DCHECK(out); - out->clear(); - - bool in_block = false; - + bool end_block = false; // Read through lines until a non-empty line is found. With a non-empty // line is found, start adding the lines to the output and once an empty // line if found again, stop adding lines and exit. std::string line; while (source_.Next(&line)) { - if (in_block && line.empty()) { + if (!temp_.empty() && line.empty()) { + end_block = true; break; } - if (in_block || !line.empty()) { - out->push_back(line); - in_block = true; + if (!line.empty()) { + temp_.emplace_back(std::move(line)); } } - return in_block; + if (!end_block && (!should_flush_ || temp_.empty())) + return false; + + *out = std::move(temp_); + return true; } + +void BlockReader::Flush() { + source_.Flush(); + should_flush_ = true; +} + } // namespace media } // namespace shaka diff --git a/packager/media/formats/webvtt/text_readers.h b/packager/media/formats/webvtt/text_readers.h index 4fe0772148..1bafceabfa 100644 --- a/packager/media/formats/webvtt/text_readers.h +++ b/packager/media/formats/webvtt/text_readers.h @@ -11,7 +11,7 @@ #include #include -#include "packager/file/file_closer.h" +#include "packager/media/base/byte_queue.h" #include "packager/status.h" namespace shaka { @@ -19,70 +19,47 @@ class File; namespace media { -/// Class to read character-by-character from a file. -class FileReader { - public: - /// Create a new file reader by opening a file. If the file fails to open (in - /// readonly mode) a non-ok status will be returned. If the file successfully - /// opens, |out| will be set to a new FileReader and an ok status will be - /// returned. - static Status Open(const std::string& filename, - std::unique_ptr* out); - - /// Read the next character from the file. If there is a next character, - /// |out| will be set and true will be returned. If there is no next - /// character false will be returned. - bool Next(char* out); - - private: - explicit FileReader(std::unique_ptr file); - - FileReader(const FileReader& reader) = delete; - FileReader operator=(const FileReader& reader) = delete; - - std::unique_ptr file_; -}; - -class PeekingReader { - public: - explicit PeekingReader(std::unique_ptr source); - - bool Peek(char* out); - bool Next(char* out); - - private: - PeekingReader(const PeekingReader&) = delete; - PeekingReader operator=(const PeekingReader&) = delete; - - std::unique_ptr source_; - char cached_next_ = 0; - bool has_cached_next_ = false; -}; - class LineReader { public: - explicit LineReader(std::unique_ptr source); + LineReader(); + /// Pushes data onto the end of the buffer. + void PushData(const uint8_t* data, size_t data_size); + /// Reads the next line from the buffer. + /// @return True if a line is read, false if there's no line in the buffer. bool Next(std::string* out); + /// Indicates that no more data is coming and that calls to Next should + /// return even possibly-incomplete data. + void Flush(); private: LineReader(const LineReader&) = delete; LineReader operator=(const LineReader&) = delete; - PeekingReader source_; + ByteQueue buffer_; + bool should_flush_; }; class BlockReader { public: - explicit BlockReader(std::unique_ptr source); + BlockReader(); + /// Pushes data onto the end of the buffer. + void PushData(const uint8_t* data, size_t data_size); + /// Reads the next block from the buffer. + /// @return True if a block is read, false if there is no block in the buffer. bool Next(std::vector* out); + /// Indicates that no more data is coming and that calls to Next should + /// return even possibly-incomplete data. + void Flush(); private: BlockReader(const BlockReader&) = delete; BlockReader operator=(const BlockReader&) = delete; LineReader source_; + std::vector temp_; + bool should_flush_; }; } // namespace media diff --git a/packager/media/formats/webvtt/text_readers_unittest.cc b/packager/media/formats/webvtt/text_readers_unittest.cc index 85dc1c5839..f60f5c6d7a 100644 --- a/packager/media/formats/webvtt/text_readers_unittest.cc +++ b/packager/media/formats/webvtt/text_readers_unittest.cc @@ -4,6 +4,7 @@ // license that can be found in the LICENSE file or at // https://developers.google.com/open-source/licenses/bsd +#include #include #include "packager/file/file.h" @@ -12,66 +13,15 @@ namespace shaka { namespace media { -namespace { -const char* kFilename = "memory://test-file"; -} // namespace -TEST(TextReadersTest, ReadWholeStream) { - const char* text = "abcd"; - - ASSERT_TRUE(File::WriteStringToFile(kFilename, text)); - - std::unique_ptr source; - ASSERT_OK(FileReader::Open(kFilename, &source)); - - char c; - ASSERT_TRUE(source->Next(&c)); - ASSERT_EQ(c, 'a'); - ASSERT_TRUE(source->Next(&c)); - ASSERT_EQ(c, 'b'); - ASSERT_TRUE(source->Next(&c)); - ASSERT_EQ(c, 'c'); - ASSERT_TRUE(source->Next(&c)); - ASSERT_EQ(c, 'd'); - ASSERT_FALSE(source->Next(&c)); -} - -TEST(TextReadersTest, Peeking) { - const char* text = "abc"; - - ASSERT_TRUE(File::WriteStringToFile(kFilename, text)); - - std::unique_ptr source; - ASSERT_OK(FileReader::Open(kFilename, &source)); - - PeekingReader reader(std::move(source)); - - char c; - ASSERT_TRUE(reader.Peek(&c)); - ASSERT_EQ(c, 'a'); - ASSERT_TRUE(reader.Next(&c)); - ASSERT_EQ(c, 'a'); - ASSERT_TRUE(reader.Peek(&c)); - ASSERT_EQ(c, 'b'); - ASSERT_TRUE(reader.Next(&c)); - ASSERT_EQ(c, 'b'); - ASSERT_TRUE(reader.Peek(&c)); - ASSERT_EQ(c, 'c'); - ASSERT_TRUE(reader.Next(&c)); - ASSERT_EQ(c, 'c'); - ASSERT_FALSE(reader.Peek(&c)); - ASSERT_FALSE(reader.Next(&c)); -} +using testing::ElementsAre; TEST(TextReadersTest, ReadLinesWithNewLine) { - const char* text = "a\nb\nc"; + const uint8_t text[] = "a\nb\nc"; - ASSERT_TRUE(File::WriteStringToFile(kFilename, text)); - - std::unique_ptr source; - ASSERT_OK(FileReader::Open(kFilename, &source)); - - LineReader reader(std::move(source)); + LineReader reader; + reader.PushData(text, sizeof(text) - 1); + reader.Flush(); std::string s; ASSERT_TRUE(reader.Next(&s)); @@ -84,14 +34,11 @@ TEST(TextReadersTest, ReadLinesWithNewLine) { } TEST(TextReadersTest, ReadLinesWithReturnsAndNewLine) { - const char* text = "a\r\nb\r\nc"; + const uint8_t text[] = "a\r\nb\r\nc"; - ASSERT_TRUE(File::WriteStringToFile(kFilename, text)); - - std::unique_ptr source; - ASSERT_OK(FileReader::Open(kFilename, &source)); - - LineReader reader(std::move(source)); + LineReader reader; + reader.PushData(text, sizeof(text) - 1); + reader.Flush(); std::string s; ASSERT_TRUE(reader.Next(&s)); @@ -104,14 +51,11 @@ TEST(TextReadersTest, ReadLinesWithReturnsAndNewLine) { } TEST(TextReadersTest, ReadLinesWithNewLineAndReturns) { - const char* text = "a\n\rb\n\rc"; + const uint8_t text[] = "a\n\rb\n\rc"; - ASSERT_TRUE(File::WriteStringToFile(kFilename, text)); - - std::unique_ptr source; - ASSERT_OK(FileReader::Open(kFilename, &source)); - - LineReader reader(std::move(source)); + LineReader reader; + reader.PushData(text, sizeof(text) - 1); + reader.Flush(); std::string s; ASSERT_TRUE(reader.Next(&s)); @@ -128,14 +72,11 @@ TEST(TextReadersTest, ReadLinesWithNewLineAndReturns) { } TEST(TextReadersTest, ReadLinesWithReturnAtEnd) { - const char* text = "a\r\nb\r\nc\r"; + const uint8_t text[] = "a\r\nb\r\nc\r"; - ASSERT_TRUE(File::WriteStringToFile(kFilename, text)); - - std::unique_ptr source; - ASSERT_OK(FileReader::Open(kFilename, &source)); - - LineReader reader(std::move(source)); + LineReader reader; + reader.PushData(text, sizeof(text) - 1); + reader.Flush(); std::string s; ASSERT_TRUE(reader.Next(&s)); @@ -147,30 +88,44 @@ TEST(TextReadersTest, ReadLinesWithReturnAtEnd) { ASSERT_FALSE(reader.Next(&s)); } +TEST(TextReadersTest, ReadLinesWithMultiplePushes) { + const uint8_t text1[] = "a\nb"; + const uint8_t text2[] = "c\nd"; + + LineReader reader; + reader.PushData(text1, sizeof(text1) - 1); + + std::string s; + ASSERT_TRUE(reader.Next(&s)); + ASSERT_EQ(s, "a"); + ASSERT_FALSE(reader.Next(&s)); + + reader.PushData(text2, sizeof(text2) - 1); + reader.Flush(); + ASSERT_TRUE(reader.Next(&s)); + ASSERT_EQ(s, "bc"); + ASSERT_TRUE(reader.Next(&s)); + ASSERT_EQ(s, "d"); + ASSERT_FALSE(reader.Next(&s)); +} + TEST(TextReadersTest, ReadBlocksReadMultilineBlock) { - const char* text = + const uint8_t text[] = "block 1 - line 1\n" "block 1 - line 2"; - ASSERT_TRUE(File::WriteStringToFile(kFilename, text)); - - std::unique_ptr source; - ASSERT_OK(FileReader::Open(kFilename, &source)); - - BlockReader reader(std::move(source)); + BlockReader reader; + reader.PushData(text, sizeof(text) - 1); + reader.Flush(); std::vector block; - ASSERT_TRUE(reader.Next(&block)); - ASSERT_EQ(2u, block.size()); - ASSERT_EQ("block 1 - line 1", block[0]); - ASSERT_EQ("block 1 - line 2", block[1]); - + EXPECT_THAT(block, ElementsAre("block 1 - line 1", "block 1 - line 2")); ASSERT_FALSE(reader.Next(&block)); } TEST(TextReadersTest, ReadBlocksSkipBlankLinesBeforeBlocks) { - const char* text = + const uint8_t text[] = "\n" "\n" "block 1\n" @@ -178,38 +133,50 @@ TEST(TextReadersTest, ReadBlocksSkipBlankLinesBeforeBlocks) { "\n" "block 2\n"; - ASSERT_TRUE(File::WriteStringToFile(kFilename, text)); - - std::unique_ptr source; - ASSERT_OK(FileReader::Open(kFilename, &source)); - - BlockReader reader(std::move(source)); + BlockReader reader; + reader.PushData(text, sizeof(text) - 1); + reader.Flush(); std::vector block; ASSERT_TRUE(reader.Next(&block)); - ASSERT_EQ(1u, block.size()); - ASSERT_EQ("block 1", block[0]); + EXPECT_THAT(block, ElementsAre("block 1")); ASSERT_TRUE(reader.Next(&block)); - ASSERT_EQ(1u, block.size()); - ASSERT_EQ("block 2", block[0]); - + EXPECT_THAT(block, ElementsAre("block 2")); ASSERT_FALSE(reader.Next(&block)); } TEST(TextReadersTest, ReadBlocksWithOnlyBlankLines) { - const char* text = "\n\n\n\n"; + const uint8_t text[] = "\n\n\n\n"; - ASSERT_TRUE(File::WriteStringToFile(kFilename, text)); - - std::unique_ptr source; - ASSERT_OK(FileReader::Open(kFilename, &source)); - - BlockReader reader(std::move(source)); + BlockReader reader; + reader.PushData(text, sizeof(text) - 1); + reader.Flush(); std::vector block; ASSERT_FALSE(reader.Next(&block)); } + +TEST(TextReadersTest, ReadBlocksMultipleReads) { + const uint8_t text1[] = "block 1\n"; + const uint8_t text2[] = + "block 2\n" + "\n" + "\n" + "end"; + + BlockReader reader; + reader.PushData(text1, sizeof(text1) - 1); + + std::vector block; + ASSERT_FALSE(reader.Next(&block)); + reader.PushData(text2, sizeof(text2) - 1); + reader.Flush(); + + ASSERT_TRUE(reader.Next(&block)); + EXPECT_THAT(block, ElementsAre("block 1", "block 2")); +} + } // namespace media } // namespace shaka diff --git a/packager/media/formats/webvtt/webvtt_parser.cc b/packager/media/formats/webvtt/webvtt_parser.cc index c3999527f7..311c5920df 100644 --- a/packager/media/formats/webvtt/webvtt_parser.cc +++ b/packager/media/formats/webvtt/webvtt_parser.cc @@ -12,6 +12,8 @@ #include "packager/base/logging.h" #include "packager/base/strings/string_split.h" #include "packager/base/strings/string_util.h" +#include "packager/file/file.h" +#include "packager/file/file_closer.h" #include "packager/media/base/text_stream_info.h" #include "packager/media/formats/webvtt/webvtt_timestamp.h" #include "packager/status_macros.h" @@ -19,7 +21,9 @@ namespace shaka { namespace media { namespace { + const uint64_t kStreamIndex = 0; +const uint64_t kBufferSize = 64 * 1024 * 1024; std::string BlockToString(const std::string* block, size_t size) { std::string out = " --- BLOCK START ---\n"; @@ -99,9 +103,21 @@ bool WebVttParser::ValidateOutputStreamIndex(size_t stream_index) const { } Status WebVttParser::Run() { - std::unique_ptr reader; - RETURN_IF_ERROR(FileReader::Open(input_path_, &reader)); - BlockReader block_reader(std::move(reader)); + BlockReader block_reader; + std::unique_ptr file(File::Open(input_path_.c_str(), "r")); + if (!file) + return Status(error::FILE_FAILURE, "Error reading from file"); + while (true) { + std::vector buffer(kBufferSize); + const auto size = file->Read(buffer.data(), buffer.size()); + if (size < 0) + return Status(error::FILE_FAILURE, "Error reading from file"); + if (size == 0) + break; + + block_reader.PushData(buffer.data(), size); + } + block_reader.Flush(); return Parse(&block_reader) ? FlushDownstream(kStreamIndex)