fix: Fix local files with UTF8 names (#1246)

This fixes our use of std::filesystem to interpret all paths names as
UTF8. Before this, UTF8 paths did not work correctly in all cases.

This also adds a new unit test to cover this case.

On Windows, it is critical that a UTF8 locale be set at runtime.
Applications linking with Packager as a library should call setlocale(),
and the Packager frontends now do this automatically after converting
wide character arguments into narrow strings.

Closes #652
This commit is contained in:
Joey Parrish 2023-07-18 11:59:21 -07:00 committed by GitHub
parent c29c03c6e3
commit 5a2571b9bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 92 additions and 20 deletions

View File

@ -5,6 +5,7 @@
// https://developers.google.com/open-source/licenses/bsd
#include <iostream>
#include <locale>
#include "packager/app/mpd_generator_flags.h"
#include "packager/app/vlog_flags.h"
@ -20,7 +21,6 @@
#if defined(OS_WIN)
#include <codecvt>
#include <functional>
#include <locale>
#endif // defined(OS_WIN)
DEFINE_bool(licenses, false, "Dump licenses.");
@ -142,12 +142,20 @@ int wmain(int argc, wchar_t* argv[], wchar_t* envp[]) {
delete[] utf8_args;
});
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
for (int idx = 0; idx < argc; ++idx) {
std::string utf8_arg(converter.to_bytes(argv[idx]));
utf8_arg += '\0';
utf8_argv[idx] = new char[utf8_arg.size()];
memcpy(utf8_argv[idx], &utf8_arg[0], utf8_arg.size());
}
// Because we just converted wide character args into UTF8, and because
// std::filesystem::u8path is used to interpret all std::string paths as
// UTF8, we should set the locale to UTF8 as well, for the transition point
// to C library functions like fopen to work correctly with non-ASCII paths.
std::setlocale(LC_ALL, ".UTF8");
return shaka::MpdMain(argc, utf8_argv.get());
}
#else

View File

@ -6,6 +6,7 @@
#include <gflags/gflags.h>
#include <iostream>
#include <locale>
#include "packager/app/ad_cue_generator_flags.h"
#include "packager/app/crypto_flags.h"
@ -34,7 +35,6 @@
#if defined(OS_WIN)
#include <codecvt>
#include <functional>
#include <locale>
#endif // defined(OS_WIN)
DEFINE_bool(dump_stream_info, false, "Dump demuxed stream info.");
@ -575,12 +575,20 @@ int wmain(int argc, wchar_t* argv[], wchar_t* envp[]) {
delete[] utf8_args;
});
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
for (int idx = 0; idx < argc; ++idx) {
std::string utf8_arg(converter.to_bytes(argv[idx]));
utf8_arg += '\0';
utf8_argv[idx] = new char[utf8_arg.size()];
memcpy(utf8_argv[idx], &utf8_arg[0], utf8_arg.size());
}
// Because we just converted wide character args into UTF8, and because
// std::filesystem::u8path is used to interpret all std::string paths as
// UTF8, we should set the locale to UTF8 as well, for the transition point
// to C library functions like fopen to work correctly with non-ASCII paths.
std::setlocale(LC_ALL, ".UTF8");
return shaka::PackagerMain(argc, utf8_argv.get());
}
#else

View File

@ -73,8 +73,8 @@ bool DeleteLocalFile(const char* file_name) {
bool WriteLocalFileAtomically(const char* file_name,
const std::string& contents) {
const std::filesystem::path file_path(file_name);
const std::filesystem::path dir_path = file_path.parent_path();
const auto file_path = std::filesystem::u8path(file_name);
const auto dir_path = file_path.parent_path();
std::string temp_file_name;
if (!TempFilePath(dir_path.string(), &temp_file_name))
@ -83,7 +83,8 @@ bool WriteLocalFileAtomically(const char* file_name,
return false;
std::error_code ec;
std::filesystem::rename(temp_file_name, file_name, ec);
auto temp_file_path = std::filesystem::u8path(temp_file_name);
std::filesystem::rename(temp_file_path, file_name, ec);
if (ec) {
LOG(ERROR) << "Failed to replace file '" << file_name << "' with '"
<< temp_file_name << "', error: " << ec;
@ -405,7 +406,8 @@ bool File::IsLocalRegularFile(const char* file_name) {
return false;
std::error_code ec;
return std::filesystem::is_regular_file(real_file_name, ec);
auto real_file_path = std::filesystem::u8path(real_file_name);
return std::filesystem::is_regular_file(real_file_path, ec);
}
std::string File::MakeCallbackFileName(

View File

@ -16,8 +16,8 @@ std::string generate_unique_temp_path() {
// Generate a unique name for a temporary file, using standard library
// routines, to avoid a circular dependency on any of our own code for
// generating temporary files. The template must end in 6 X's.
std::filesystem::path temp_path_template =
(std::filesystem::temp_directory_path() / "packager-test.XXXXXX");
auto temp_path_template =
std::filesystem::temp_directory_path() / "packager-test.XXXXXX";
std::string temp_path_template_string = temp_path_template.string();
#if defined(OS_WIN)
// _mktemp will modify the string passed to it to reflect the generated name
@ -36,7 +36,7 @@ std::string generate_unique_temp_path() {
void delete_file(const std::string& path) {
std::error_code ec;
std::filesystem::remove(path, ec);
std::filesystem::remove(std::filesystem::u8path(path), ec);
// Ignore errors.
}
@ -44,7 +44,7 @@ TempFile::TempFile() : path_(generate_unique_temp_path()) {}
TempFile::~TempFile() {
std::error_code ec;
std::filesystem::remove(path_, ec);
std::filesystem::remove(std::filesystem::u8path(path_), ec);
// Ignore errors.
}

View File

@ -10,6 +10,7 @@
#include <sys/stat.h>
#include <filesystem>
#include <locale>
#include "absl/flags/declare.h"
#include "packager/file/file.h"
@ -31,13 +32,14 @@ void WriteFile(const std::string& path, const std::string& data) {
void DeleteFile(const std::string& path) {
std::error_code ec;
std::filesystem::remove(path, ec);
std::filesystem::remove(std::filesystem::u8path(path), ec);
// Ignore errors.
}
int64_t FileSize(const std::string& path) {
std::error_code ec;
int64_t file_size = std::filesystem::file_size(path, ec);
int64_t file_size =
std::filesystem::file_size(std::filesystem::u8path(path), ec);
if (ec) {
return -1;
}
@ -65,6 +67,17 @@ namespace shaka {
class LocalFileTest : public testing::Test {
protected:
static std::string original_locale_;
static void SetUpTestSuite() {
original_locale_ = setlocale(LC_ALL, NULL);
setlocale(LC_ALL, ".UTF8");
}
static void TearDownTestSuite() {
setlocale(LC_ALL, original_locale_.c_str());
}
void SetUp() override {
data_.resize(kDataSize);
for (int i = 0; i < kDataSize; ++i)
@ -97,6 +110,9 @@ class LocalFileTest : public testing::Test {
std::string local_file_name_;
};
// static
std::string LocalFileTest::original_locale_;
TEST_F(LocalFileTest, ReadNotExist) {
// Remove test file if it exists.
DeleteFile(local_file_name_no_prefix_);
@ -233,6 +249,42 @@ TEST_F(LocalFileTest, IsLocalRegular) {
ASSERT_TRUE(File::IsLocalRegularFile(local_file_name_.c_str()));
}
TEST_F(LocalFileTest, UnicodePath) {
// Delete the temp file already created.
DeleteFile(local_file_name_no_prefix_);
// Modify the local file name for this test to include non-ASCII characters.
// This is used in TearDown() to clean up the file we create in the test.
const std::string unicode_suffix = "από.txt";
local_file_name_ += unicode_suffix;
local_file_name_no_prefix_ += unicode_suffix;
// Write file using File API.
File* file = File::Open(local_file_name_.c_str(), "w");
ASSERT_TRUE(file != NULL);
EXPECT_EQ(kDataSize, file->Write(&data_[0], kDataSize));
// Check the size.
EXPECT_EQ(kDataSize, file->Size());
ASSERT_TRUE(file->Close());
// Open file using File API.
file = File::Open(local_file_name_.c_str(), "r");
ASSERT_TRUE(file != NULL);
// Read the entire file.
std::string read_data(kDataSize, 0);
EXPECT_EQ(kDataSize, file->Read(&read_data[0], kDataSize));
// Verify EOF.
uint8_t single_byte;
EXPECT_EQ(0, file->Read(&single_byte, sizeof(single_byte)));
ASSERT_TRUE(file->Close());
// Compare data written and read.
EXPECT_EQ(data_, read_data);
}
class ParamLocalFileTest : public LocalFileTest,
public ::testing::WithParamInterface<uint8_t> {};

View File

@ -43,7 +43,7 @@ std::string TempFileName() {
} // namespace
bool TempFilePath(const std::string& temp_dir, std::string* temp_file_path) {
std::filesystem::path temp_dir_path(temp_dir);
auto temp_dir_path = std::filesystem::u8path(temp_dir);
*temp_file_path = (temp_dir_path / TempFileName()).string();
return true;
}

View File

@ -75,7 +75,8 @@ int64_t LocalFile::Size() {
}
std::error_code ec;
int64_t file_size = std::filesystem::file_size(file_name(), ec);
auto file_path = std::filesystem::u8path(file_name());
int64_t file_size = std::filesystem::file_size(file_path, ec);
if (ec) {
LOG(ERROR) << "Cannot get file size, error: " << ec;
return -1;
@ -112,7 +113,7 @@ bool LocalFile::Tell(uint64_t* position) {
LocalFile::~LocalFile() {}
bool LocalFile::Open() {
std::filesystem::path file_path(file_name());
auto file_path = std::filesystem::u8path(file_name());
// Create upper level directories for write mode.
if (file_mode_.find("w") != std::string::npos) {
@ -133,9 +134,10 @@ bool LocalFile::Open() {
}
bool LocalFile::Delete(const char* file_name) {
auto file_path = std::filesystem::u8path(file_name);
std::error_code ec;
// On error (ec truthy), remove() will return false anyway.
return std::filesystem::remove(file_name, ec);
return std::filesystem::remove(file_path, ec);
}
} // namespace shaka

View File

@ -144,7 +144,7 @@ TEST(ContainerNamesTest, CheckFixedStrings) {
// Determine the container type of a specified file.
void TestFile(MediaContainerName expected, const std::string& name) {
std::filesystem::path path = GetTestDataFilePath(name);
auto path = GetTestDataFilePath(name);
std::vector<uint8_t> data = ReadTestDataFile(name);
ASSERT_FALSE(data.empty());

View File

@ -11,13 +11,13 @@ namespace media {
// Returns a file path for a file in the media/test/data directory.
std::filesystem::path GetTestDataFilePath(const std::string& name) {
std::filesystem::path data_dir(TEST_DATA_DIR);
auto data_dir = std::filesystem::u8path(TEST_DATA_DIR);
return data_dir / name;
}
// Returns a file path for a file in the media/app/test/testdata directory.
std::filesystem::path GetAppTestDataFilePath(const std::string& name) {
std::filesystem::path data_dir(TEST_DATA_DIR);
auto data_dir = std::filesystem::u8path(TEST_DATA_DIR);
auto app_data_dir =
data_dir.parent_path().parent_path() / "app" / "test" / "testdata";
return app_data_dir / name;
@ -25,7 +25,7 @@ std::filesystem::path GetAppTestDataFilePath(const std::string& name) {
// Reads a test file from media/test/data directory and returns its content.
std::vector<uint8_t> ReadTestDataFile(const std::string& name) {
std::filesystem::path path = GetTestDataFilePath(name);
auto path = GetTestDataFilePath(name);
FILE* f = fopen(path.string().c_str(), "rb");
if (!f) {