From 2e521c8413c3273795552cf26cd968d6eccfad8f Mon Sep 17 00:00:00 2001 From: KongQun Yang Date: Wed, 5 May 2021 10:58:17 -0700 Subject: [PATCH] Remove another use of regex library It is not working correctly in gcc 4.8 or earlier, which is still popular (bundled by default in CentOS 7). Issue #865, #929. Change-Id: I136446a70831bd0237cd29646dd349fe7558176b --- packager/media/formats/webvtt/webvtt_utils.cc | 31 +++++++++++++++++-- .../formats/webvtt/webvtt_utils_unittest.cc | 15 +++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/packager/media/formats/webvtt/webvtt_utils.cc b/packager/media/formats/webvtt/webvtt_utils.cc index bb0bea3481..3a7af3f6a7 100644 --- a/packager/media/formats/webvtt/webvtt_utils.cc +++ b/packager/media/formats/webvtt/webvtt_utils.cc @@ -9,11 +9,12 @@ #include #include -#include +#include #include #include "packager/base/logging.h" #include "packager/base/strings/string_number_conversions.h" +#include "packager/base/strings/string_util.h" #include "packager/base/strings/stringprintf.h" namespace shaka { @@ -67,6 +68,31 @@ std::string GetCloseTag(StyleTagKind tag) { return ""; // Not reached, but Windows doesn't like NOTREACHED. } +bool IsWhitespace(char c) { + return c == '\t' || c == '\r' || c == '\n' || c == ' '; +} + +// Replace consecutive whitespaces with a single whitespace. +std::string CollapseWhitespace(const std::string& data) { + std::string output; + output.resize(data.size()); + size_t chars_written = 0; + bool in_whitespace = false; + for (char c : data) { + if (IsWhitespace(c)) { + if (!in_whitespace) { + in_whitespace = true; + output[chars_written++] = ' '; + } + } else { + in_whitespace = false; + output[chars_written++] = c; + } + } + output.resize(chars_written); + return output; +} + std::string WriteFragment(const TextFragment& fragment, std::list* tags) { std::string ret; @@ -115,8 +141,7 @@ std::string WriteFragment(const TextFragment& fragment, // Replace newlines and consecutive whitespace with a single space. If // the user wanted an explicit newline, they should use the "newline" // field. - std::regex whitespace("\\s+", std::regex_constants::ECMAScript); - ret += std::regex_replace(fragment.body, whitespace, std::string(" ")); + ret += CollapseWhitespace(fragment.body); } else { for (const auto& frag : fragment.sub_fragments) { ret += WriteFragment(frag, tags); diff --git a/packager/media/formats/webvtt/webvtt_utils_unittest.cc b/packager/media/formats/webvtt/webvtt_utils_unittest.cc index eb4281ef96..a56d5d08c6 100644 --- a/packager/media/formats/webvtt/webvtt_utils_unittest.cc +++ b/packager/media/formats/webvtt/webvtt_utils_unittest.cc @@ -179,6 +179,21 @@ TEST(WebVttUtilsTest, FragmentToString_PreservesTags) { EXPECT_EQ(WebVttFragmentToString(frag), "Foobar"); } +TEST(WebVttUtilsTest, FragmentToString_ConsecutiveLeadingWhitespaces) { + TextFragment frag(kNoStyle, "\r\n\t \r\nFoobar"); + EXPECT_EQ(WebVttFragmentToString(frag), " Foobar"); +} + +TEST(WebVttUtilsTest, FragmentToString_ConsecutiveTrailingWhitespaces) { + TextFragment frag(kNoStyle, "Foobar\r\n\t \r\n"); + EXPECT_EQ(WebVttFragmentToString(frag), "Foobar "); +} + +TEST(WebVttUtilsTest, FragmentToString_ConsecutiveInternalWhitespaces) { + TextFragment frag(kNoStyle, "Hello\r\n\t \r\nWorld"); + EXPECT_EQ(WebVttFragmentToString(frag), "Hello World"); +} + TEST(WebVttUtilsTest, FragmentToString_HandlesNestedFragments) { TextFragment frag; frag.sub_fragments.emplace_back(kNoStyle, "Hello ");