Remove another use of regex library

It is not working correctly in gcc 4.8 or earlier, which is still
popular (bundled by default in CentOS 7).

Issue #865, #929.

Change-Id: I136446a70831bd0237cd29646dd349fe7558176b
This commit is contained in:
KongQun Yang 2021-05-05 10:58:17 -07:00
parent d9124d6aaa
commit 2e521c8413
2 changed files with 43 additions and 3 deletions

View File

@ -9,11 +9,12 @@
#include <ctype.h>
#include <inttypes.h>
#include <regex>
#include <algorithm>
#include <unordered_set>
#include "packager/base/logging.h"
#include "packager/base/strings/string_number_conversions.h"
#include "packager/base/strings/string_util.h"
#include "packager/base/strings/stringprintf.h"
namespace shaka {
@ -67,6 +68,31 @@ std::string GetCloseTag(StyleTagKind tag) {
return ""; // Not reached, but Windows doesn't like NOTREACHED.
}
bool IsWhitespace(char c) {
return c == '\t' || c == '\r' || c == '\n' || c == ' ';
}
// Replace consecutive whitespaces with a single whitespace.
std::string CollapseWhitespace(const std::string& data) {
std::string output;
output.resize(data.size());
size_t chars_written = 0;
bool in_whitespace = false;
for (char c : data) {
if (IsWhitespace(c)) {
if (!in_whitespace) {
in_whitespace = true;
output[chars_written++] = ' ';
}
} else {
in_whitespace = false;
output[chars_written++] = c;
}
}
output.resize(chars_written);
return output;
}
std::string WriteFragment(const TextFragment& fragment,
std::list<StyleTagKind>* tags) {
std::string ret;
@ -115,8 +141,7 @@ std::string WriteFragment(const TextFragment& fragment,
// Replace newlines and consecutive whitespace with a single space. If
// the user wanted an explicit newline, they should use the "newline"
// field.
std::regex whitespace("\\s+", std::regex_constants::ECMAScript);
ret += std::regex_replace(fragment.body, whitespace, std::string(" "));
ret += CollapseWhitespace(fragment.body);
} else {
for (const auto& frag : fragment.sub_fragments) {
ret += WriteFragment(frag, tags);

View File

@ -179,6 +179,21 @@ TEST(WebVttUtilsTest, FragmentToString_PreservesTags) {
EXPECT_EQ(WebVttFragmentToString(frag), "<i>Foobar</i>");
}
TEST(WebVttUtilsTest, FragmentToString_ConsecutiveLeadingWhitespaces) {
TextFragment frag(kNoStyle, "\r\n\t \r\nFoobar");
EXPECT_EQ(WebVttFragmentToString(frag), " Foobar");
}
TEST(WebVttUtilsTest, FragmentToString_ConsecutiveTrailingWhitespaces) {
TextFragment frag(kNoStyle, "Foobar\r\n\t \r\n");
EXPECT_EQ(WebVttFragmentToString(frag), "Foobar ");
}
TEST(WebVttUtilsTest, FragmentToString_ConsecutiveInternalWhitespaces) {
TextFragment frag(kNoStyle, "Hello\r\n\t \r\nWorld");
EXPECT_EQ(WebVttFragmentToString(frag), "Hello World");
}
TEST(WebVttUtilsTest, FragmentToString_HandlesNestedFragments) {
TextFragment frag;
frag.sub_fragments.emplace_back(kNoStyle, "Hello ");