shaka-packager/base/i18n/string_search_unittest.cc

// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include <string>

#include "base/i18n/rtl.h"
#include "base/i18n/string_search.h"
#include "base/strings/string16.h"
#include "base/strings/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/icu/source/i18n/unicode/usearch.h"

namespace base {
namespace i18n {

// Note on setting default locale for testing: The current default locale on
// the Mac trybot is en_US_POSIX, with which primary-level collation strength
// string search is case-sensitive, when normally it should be
// case-insensitive. In other locales (including en_US which English speakers
// in the U.S. use), this search would be case-insensitive as expected.

TEST(StringSearchTest, ASCII) {
  std::string default_locale(uloc_getDefault());
  bool locale_is_posix = (default_locale == "en_US_POSIX");
  if (locale_is_posix)
    SetICUDefaultLocale("en_US");

  size_t index = 0;
  size_t length = 0;

  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
      ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length));
  EXPECT_EQ(0U, index);
  EXPECT_EQ(5U, length);

  EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
      ASCIIToUTF16("h    e l l o"), ASCIIToUTF16("h   e l l o"),
      &index, &length));

  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
      ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length));
  EXPECT_EQ(4U, index);
  EXPECT_EQ(6U, length);

  EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
      ASCIIToUTF16("searching within empty string"), string16(),
      &index, &length));

  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
      string16(), ASCIIToUTF16("searching for empty string"), &index, &length));
  EXPECT_EQ(0U, index);
  EXPECT_EQ(0U, length);

  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
      ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"),
      &index, &length));
  EXPECT_EQ(0U, index);
  EXPECT_EQ(18U, length);

  if (locale_is_posix)
    SetICUDefaultLocale(default_locale.data());
}

TEST(StringSearchTest, UnicodeLocaleIndependent) {
  // Base characters
  const string16 e_base = WideToUTF16(L"e");
  const string16 E_base = WideToUTF16(L"E");
  const string16 a_base = WideToUTF16(L"a");

  // Composed characters
  const string16 e_with_acute_accent = WideToUTF16(L"\u00e9");
  const string16 E_with_acute_accent = WideToUTF16(L"\u00c9");
  const string16 e_with_grave_accent = WideToUTF16(L"\u00e8");
  const string16 E_with_grave_accent = WideToUTF16(L"\u00c8");
  const string16 a_with_acute_accent = WideToUTF16(L"\u00e1");

  // Decomposed characters
  const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301");
  const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301");
  const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300");
  const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300");
  const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301");

  std::string default_locale(uloc_getDefault());
  bool locale_is_posix = (default_locale == "en_US_POSIX");
  if (locale_is_posix)
    SetICUDefaultLocale("en_US");

  size_t index = 0;
  size_t length = 0;

  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
      e_base, e_with_acute_accent, &index, &length));
  EXPECT_EQ(0U, index);
  EXPECT_EQ(e_with_acute_accent.size(), length);

  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
      e_with_acute_accent, e_base, &index, &length));
  EXPECT_EQ(0U, index);
  EXPECT_EQ(e_base.size(), length);

  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
      e_base, e_with_acute_combining_mark, &index, &length));
  EXPECT_EQ(0U, index);
  EXPECT_EQ(e_with_acute_combining_mark.size(), length);

  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
      e_with_acute_combining_mark, e_base, &index, &length));
  EXPECT_EQ(0U, index);
  EXPECT_EQ(e_base.size(), length);

  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
      e_with_acute_combining_mark, e_with_acute_accent,
      &index, &length));
  EXPECT_EQ(0U, index);
  EXPECT_EQ(e_with_acute_accent.size(), length);

  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
      e_with_acute_accent, e_with_acute_combining_mark,
      &index, &length));
  EXPECT_EQ(0U, index);
  EXPECT_EQ(e_with_acute_combining_mark.size(), length);

  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
      e_with_acute_combining_mark, e_with_grave_combining_mark,
      &index, &length));
  EXPECT_EQ(0U, index);
  EXPECT_EQ(e_with_grave_combining_mark.size(), length);

  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
      e_with_grave_combining_mark, e_with_acute_combining_mark,
      &index, &length));
  EXPECT_EQ(0U, index);
  EXPECT_EQ(e_with_acute_combining_mark.size(), length);

  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
      e_with_acute_combining_mark, e_with_grave_accent, &index, &length));
  EXPECT_EQ(0U, index);
  EXPECT_EQ(e_with_grave_accent.size(), length);

  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
      e_with_grave_accent, e_with_acute_combining_mark, &index, &length));
  EXPECT_EQ(0U, index);
  EXPECT_EQ(e_with_acute_combining_mark.size(), length);

  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
      E_with_acute_accent, e_with_acute_accent, &index, &length));
  EXPECT_EQ(0U, index);
  EXPECT_EQ(e_with_acute_accent.size(), length);

  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
      E_with_grave_accent, e_with_acute_accent, &index, &length));
  EXPECT_EQ(0U, index);
  EXPECT_EQ(e_with_acute_accent.size(), length);

  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
      E_with_acute_combining_mark, e_with_grave_accent, &index, &length));
  EXPECT_EQ(0U, index);
  EXPECT_EQ(e_with_grave_accent.size(), length);

  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
      E_with_grave_combining_mark, e_with_acute_accent, &index, &length));
  EXPECT_EQ(0U, index);
  EXPECT_EQ(e_with_acute_accent.size(), length);

  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
      E_base, e_with_grave_accent, &index, &length));
  EXPECT_EQ(0U, index);
  EXPECT_EQ(e_with_grave_accent.size(), length);

  EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
      a_with_acute_accent, e_with_acute_accent, &index, &length));

  EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
      a_with_acute_combining_mark, e_with_acute_combining_mark,
      &index, &length));

  if (locale_is_posix)
    SetICUDefaultLocale(default_locale.data());
}

TEST(StringSearchTest, UnicodeLocaleDependent) {
  // Base characters
  const string16 a_base = WideToUTF16(L"a");

  // Composed characters
  const string16 a_with_ring = WideToUTF16(L"\u00e5");

  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
      a_base, a_with_ring, NULL, NULL));

  const char* default_locale = uloc_getDefault();
  SetICUDefaultLocale("da");

  EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
      a_base, a_with_ring, NULL, NULL));

  SetICUDefaultLocale(default_locale);
}

TEST(StringSearchTest, FixedPatternMultipleSearch) {
  std::string default_locale(uloc_getDefault());
  bool locale_is_posix = (default_locale == "en_US_POSIX");
  if (locale_is_posix)
    SetICUDefaultLocale("en_US");

  size_t index = 0;
  size_t length = 0;

  // Search "hello" over multiple texts.
  FixedPatternStringSearchIgnoringCaseAndAccents query(ASCIIToUTF16("hello"));
  EXPECT_TRUE(query.Search(ASCIIToUTF16("12hello34"), &index, &length));
  EXPECT_EQ(2U, index);
  EXPECT_EQ(5U, length);
  EXPECT_FALSE(query.Search(ASCIIToUTF16("bye"), &index, &length));
  EXPECT_TRUE(query.Search(ASCIIToUTF16("hELLo"), &index, &length));
  EXPECT_EQ(0U, index);
  EXPECT_EQ(5U, length);

  if (locale_is_posix)
    SetICUDefaultLocale(default_locale.data());
}

}  // namespace i18n
}  // namespace base
Start with media/mp4, media/webm and base codes from Chromium. 2013-09-24 01:35:40 +00:00			`// Copyright (c) 2011 The Chromium Authors. All rights reserved.`
			`// Use of this source code is governed by a BSD-style license that can be`
			`// found in the LICENSE file.`

			`#include <string>`

			`#include "base/i18n/rtl.h"`
			`#include "base/i18n/string_search.h"`
			`#include "base/strings/string16.h"`
			`#include "base/strings/utf_string_conversions.h"`
			`#include "testing/gtest/include/gtest/gtest.h"`
			`#include "third_party/icu/source/i18n/unicode/usearch.h"`

			`namespace base {`
			`namespace i18n {`

			`// Note on setting default locale for testing: The current default locale on`
			`// the Mac trybot is en_US_POSIX, with which primary-level collation strength`
			`// string search is case-sensitive, when normally it should be`
			`// case-insensitive. In other locales (including en_US which English speakers`
			`// in the U.S. use), this search would be case-insensitive as expected.`

			`TEST(StringSearchTest, ASCII) {`
			`std::string default_locale(uloc_getDefault());`
			`bool locale_is_posix = (default_locale == "en_US_POSIX");`
			`if (locale_is_posix)`
			`SetICUDefaultLocale("en_US");`

			`size_t index = 0;`
			`size_t length = 0;`

			`EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(`
			`ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length));`
			`EXPECT_EQ(0U, index);`
			`EXPECT_EQ(5U, length);`

			`EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(`
			`ASCIIToUTF16("h e l l o"), ASCIIToUTF16("h e l l o"),`
			`&index, &length));`

			`EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(`
			`ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length));`
			`EXPECT_EQ(4U, index);`
			`EXPECT_EQ(6U, length);`

			`EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(`
			`ASCIIToUTF16("searching within empty string"), string16(),`
			`&index, &length));`

			`EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(`
			`string16(), ASCIIToUTF16("searching for empty string"), &index, &length));`
			`EXPECT_EQ(0U, index);`
			`EXPECT_EQ(0U, length);`

			`EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(`
			`ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"),`
			`&index, &length));`
			`EXPECT_EQ(0U, index);`
			`EXPECT_EQ(18U, length);`

			`if (locale_is_posix)`
			`SetICUDefaultLocale(default_locale.data());`
			`}`

			`TEST(StringSearchTest, UnicodeLocaleIndependent) {`
			`// Base characters`
			`const string16 e_base = WideToUTF16(L"e");`
			`const string16 E_base = WideToUTF16(L"E");`
			`const string16 a_base = WideToUTF16(L"a");`

			`// Composed characters`
			`const string16 e_with_acute_accent = WideToUTF16(L"\u00e9");`
			`const string16 E_with_acute_accent = WideToUTF16(L"\u00c9");`
			`const string16 e_with_grave_accent = WideToUTF16(L"\u00e8");`
			`const string16 E_with_grave_accent = WideToUTF16(L"\u00c8");`
			`const string16 a_with_acute_accent = WideToUTF16(L"\u00e1");`

			`// Decomposed characters`
			`const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301");`
			`const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301");`
			`const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300");`
			`const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300");`
			`const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301");`

			`std::string default_locale(uloc_getDefault());`
			`bool locale_is_posix = (default_locale == "en_US_POSIX");`
			`if (locale_is_posix)`
			`SetICUDefaultLocale("en_US");`

			`size_t index = 0;`
			`size_t length = 0;`

			`EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(`
			`e_base, e_with_acute_accent, &index, &length));`
			`EXPECT_EQ(0U, index);`
			`EXPECT_EQ(e_with_acute_accent.size(), length);`

			`EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(`
			`e_with_acute_accent, e_base, &index, &length));`
			`EXPECT_EQ(0U, index);`
			`EXPECT_EQ(e_base.size(), length);`

			`EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(`
			`e_base, e_with_acute_combining_mark, &index, &length));`
			`EXPECT_EQ(0U, index);`
			`EXPECT_EQ(e_with_acute_combining_mark.size(), length);`

			`EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(`
			`e_with_acute_combining_mark, e_base, &index, &length));`
			`EXPECT_EQ(0U, index);`
			`EXPECT_EQ(e_base.size(), length);`

			`EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(`
			`e_with_acute_combining_mark, e_with_acute_accent,`
			`&index, &length));`
			`EXPECT_EQ(0U, index);`
			`EXPECT_EQ(e_with_acute_accent.size(), length);`

			`EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(`
			`e_with_acute_accent, e_with_acute_combining_mark,`
			`&index, &length));`
			`EXPECT_EQ(0U, index);`
			`EXPECT_EQ(e_with_acute_combining_mark.size(), length);`

			`EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(`
			`e_with_acute_combining_mark, e_with_grave_combining_mark,`
			`&index, &length));`
			`EXPECT_EQ(0U, index);`
			`EXPECT_EQ(e_with_grave_combining_mark.size(), length);`

			`EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(`
			`e_with_grave_combining_mark, e_with_acute_combining_mark,`
			`&index, &length));`
			`EXPECT_EQ(0U, index);`
			`EXPECT_EQ(e_with_acute_combining_mark.size(), length);`

			`EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(`
			`e_with_acute_combining_mark, e_with_grave_accent, &index, &length));`
			`EXPECT_EQ(0U, index);`
			`EXPECT_EQ(e_with_grave_accent.size(), length);`

			`EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(`
			`e_with_grave_accent, e_with_acute_combining_mark, &index, &length));`
			`EXPECT_EQ(0U, index);`
			`EXPECT_EQ(e_with_acute_combining_mark.size(), length);`

			`EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(`
			`E_with_acute_accent, e_with_acute_accent, &index, &length));`
			`EXPECT_EQ(0U, index);`
			`EXPECT_EQ(e_with_acute_accent.size(), length);`

			`EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(`
			`E_with_grave_accent, e_with_acute_accent, &index, &length));`
			`EXPECT_EQ(0U, index);`
			`EXPECT_EQ(e_with_acute_accent.size(), length);`

			`EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(`
			`E_with_acute_combining_mark, e_with_grave_accent, &index, &length));`
			`EXPECT_EQ(0U, index);`
			`EXPECT_EQ(e_with_grave_accent.size(), length);`

			`EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(`
			`E_with_grave_combining_mark, e_with_acute_accent, &index, &length));`
			`EXPECT_EQ(0U, index);`
			`EXPECT_EQ(e_with_acute_accent.size(), length);`

			`EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(`
			`E_base, e_with_grave_accent, &index, &length));`
			`EXPECT_EQ(0U, index);`
			`EXPECT_EQ(e_with_grave_accent.size(), length);`

			`EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(`
			`a_with_acute_accent, e_with_acute_accent, &index, &length));`

			`EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(`
			`a_with_acute_combining_mark, e_with_acute_combining_mark,`
			`&index, &length));`

			`if (locale_is_posix)`
			`SetICUDefaultLocale(default_locale.data());`
			`}`

			`TEST(StringSearchTest, UnicodeLocaleDependent) {`
			`// Base characters`
			`const string16 a_base = WideToUTF16(L"a");`

			`// Composed characters`
			`const string16 a_with_ring = WideToUTF16(L"\u00e5");`

			`EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(`
			`a_base, a_with_ring, NULL, NULL));`

			`const char* default_locale = uloc_getDefault();`
			`SetICUDefaultLocale("da");`

			`EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(`
			`a_base, a_with_ring, NULL, NULL));`

			`SetICUDefaultLocale(default_locale);`
			`}`

			`TEST(StringSearchTest, FixedPatternMultipleSearch) {`
			`std::string default_locale(uloc_getDefault());`
			`bool locale_is_posix = (default_locale == "en_US_POSIX");`
			`if (locale_is_posix)`
			`SetICUDefaultLocale("en_US");`

			`size_t index = 0;`
			`size_t length = 0;`

			`// Search "hello" over multiple texts.`
			`FixedPatternStringSearchIgnoringCaseAndAccents query(ASCIIToUTF16("hello"));`
			`EXPECT_TRUE(query.Search(ASCIIToUTF16("12hello34"), &index, &length));`
			`EXPECT_EQ(2U, index);`
			`EXPECT_EQ(5U, length);`
			`EXPECT_FALSE(query.Search(ASCIIToUTF16("bye"), &index, &length));`
			`EXPECT_TRUE(query.Search(ASCIIToUTF16("hELLo"), &index, &length));`
			`EXPECT_EQ(0U, index);`
			`EXPECT_EQ(5U, length);`

			`if (locale_is_posix)`
			`SetICUDefaultLocale(default_locale.data());`
			`}`

			`} // namespace i18n`
			`} // namespace base`