fix: Fix local files with UTF8 names (#1246)
This fixes our use of std::filesystem to interpret all paths names as UTF8. Before this, UTF8 paths did not work correctly in all cases. This also adds a new unit test to cover this case. On Windows, it is critical that a UTF8 locale be set at runtime. Applications linking with Packager as a library should call setlocale(), and the Packager frontends now do this automatically after converting wide character arguments into narrow strings. Closes #652
This commit is contained in:
parent
c29c03c6e3
commit
5a2571b9bc
|
@ -5,6 +5,7 @@
|
|||
// https://developers.google.com/open-source/licenses/bsd
|
||||
|
||||
#include <iostream>
|
||||
#include <locale>
|
||||
|
||||
#include "packager/app/mpd_generator_flags.h"
|
||||
#include "packager/app/vlog_flags.h"
|
||||
|
@ -20,7 +21,6 @@
|
|||
#if defined(OS_WIN)
|
||||
#include <codecvt>
|
||||
#include <functional>
|
||||
#include <locale>
|
||||
#endif // defined(OS_WIN)
|
||||
|
||||
DEFINE_bool(licenses, false, "Dump licenses.");
|
||||
|
@ -142,12 +142,20 @@ int wmain(int argc, wchar_t* argv[], wchar_t* envp[]) {
|
|||
delete[] utf8_args;
|
||||
});
|
||||
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
|
||||
|
||||
for (int idx = 0; idx < argc; ++idx) {
|
||||
std::string utf8_arg(converter.to_bytes(argv[idx]));
|
||||
utf8_arg += '\0';
|
||||
utf8_argv[idx] = new char[utf8_arg.size()];
|
||||
memcpy(utf8_argv[idx], &utf8_arg[0], utf8_arg.size());
|
||||
}
|
||||
|
||||
// Because we just converted wide character args into UTF8, and because
|
||||
// std::filesystem::u8path is used to interpret all std::string paths as
|
||||
// UTF8, we should set the locale to UTF8 as well, for the transition point
|
||||
// to C library functions like fopen to work correctly with non-ASCII paths.
|
||||
std::setlocale(LC_ALL, ".UTF8");
|
||||
|
||||
return shaka::MpdMain(argc, utf8_argv.get());
|
||||
}
|
||||
#else
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include <gflags/gflags.h>
|
||||
#include <iostream>
|
||||
#include <locale>
|
||||
|
||||
#include "packager/app/ad_cue_generator_flags.h"
|
||||
#include "packager/app/crypto_flags.h"
|
||||
|
@ -34,7 +35,6 @@
|
|||
#if defined(OS_WIN)
|
||||
#include <codecvt>
|
||||
#include <functional>
|
||||
#include <locale>
|
||||
#endif // defined(OS_WIN)
|
||||
|
||||
DEFINE_bool(dump_stream_info, false, "Dump demuxed stream info.");
|
||||
|
@ -575,12 +575,20 @@ int wmain(int argc, wchar_t* argv[], wchar_t* envp[]) {
|
|||
delete[] utf8_args;
|
||||
});
|
||||
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
|
||||
|
||||
for (int idx = 0; idx < argc; ++idx) {
|
||||
std::string utf8_arg(converter.to_bytes(argv[idx]));
|
||||
utf8_arg += '\0';
|
||||
utf8_argv[idx] = new char[utf8_arg.size()];
|
||||
memcpy(utf8_argv[idx], &utf8_arg[0], utf8_arg.size());
|
||||
}
|
||||
|
||||
// Because we just converted wide character args into UTF8, and because
|
||||
// std::filesystem::u8path is used to interpret all std::string paths as
|
||||
// UTF8, we should set the locale to UTF8 as well, for the transition point
|
||||
// to C library functions like fopen to work correctly with non-ASCII paths.
|
||||
std::setlocale(LC_ALL, ".UTF8");
|
||||
|
||||
return shaka::PackagerMain(argc, utf8_argv.get());
|
||||
}
|
||||
#else
|
||||
|
|
|
@ -73,8 +73,8 @@ bool DeleteLocalFile(const char* file_name) {
|
|||
|
||||
bool WriteLocalFileAtomically(const char* file_name,
|
||||
const std::string& contents) {
|
||||
const std::filesystem::path file_path(file_name);
|
||||
const std::filesystem::path dir_path = file_path.parent_path();
|
||||
const auto file_path = std::filesystem::u8path(file_name);
|
||||
const auto dir_path = file_path.parent_path();
|
||||
|
||||
std::string temp_file_name;
|
||||
if (!TempFilePath(dir_path.string(), &temp_file_name))
|
||||
|
@ -83,7 +83,8 @@ bool WriteLocalFileAtomically(const char* file_name,
|
|||
return false;
|
||||
|
||||
std::error_code ec;
|
||||
std::filesystem::rename(temp_file_name, file_name, ec);
|
||||
auto temp_file_path = std::filesystem::u8path(temp_file_name);
|
||||
std::filesystem::rename(temp_file_path, file_name, ec);
|
||||
if (ec) {
|
||||
LOG(ERROR) << "Failed to replace file '" << file_name << "' with '"
|
||||
<< temp_file_name << "', error: " << ec;
|
||||
|
@ -405,7 +406,8 @@ bool File::IsLocalRegularFile(const char* file_name) {
|
|||
return false;
|
||||
|
||||
std::error_code ec;
|
||||
return std::filesystem::is_regular_file(real_file_name, ec);
|
||||
auto real_file_path = std::filesystem::u8path(real_file_name);
|
||||
return std::filesystem::is_regular_file(real_file_path, ec);
|
||||
}
|
||||
|
||||
std::string File::MakeCallbackFileName(
|
||||
|
|
|
@ -16,8 +16,8 @@ std::string generate_unique_temp_path() {
|
|||
// Generate a unique name for a temporary file, using standard library
|
||||
// routines, to avoid a circular dependency on any of our own code for
|
||||
// generating temporary files. The template must end in 6 X's.
|
||||
std::filesystem::path temp_path_template =
|
||||
(std::filesystem::temp_directory_path() / "packager-test.XXXXXX");
|
||||
auto temp_path_template =
|
||||
std::filesystem::temp_directory_path() / "packager-test.XXXXXX";
|
||||
std::string temp_path_template_string = temp_path_template.string();
|
||||
#if defined(OS_WIN)
|
||||
// _mktemp will modify the string passed to it to reflect the generated name
|
||||
|
@ -36,7 +36,7 @@ std::string generate_unique_temp_path() {
|
|||
|
||||
void delete_file(const std::string& path) {
|
||||
std::error_code ec;
|
||||
std::filesystem::remove(path, ec);
|
||||
std::filesystem::remove(std::filesystem::u8path(path), ec);
|
||||
// Ignore errors.
|
||||
}
|
||||
|
||||
|
@ -44,7 +44,7 @@ TempFile::TempFile() : path_(generate_unique_temp_path()) {}
|
|||
|
||||
TempFile::~TempFile() {
|
||||
std::error_code ec;
|
||||
std::filesystem::remove(path_, ec);
|
||||
std::filesystem::remove(std::filesystem::u8path(path_), ec);
|
||||
// Ignore errors.
|
||||
}
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include <sys/stat.h>
|
||||
|
||||
#include <filesystem>
|
||||
#include <locale>
|
||||
|
||||
#include "absl/flags/declare.h"
|
||||
#include "packager/file/file.h"
|
||||
|
@ -31,13 +32,14 @@ void WriteFile(const std::string& path, const std::string& data) {
|
|||
|
||||
void DeleteFile(const std::string& path) {
|
||||
std::error_code ec;
|
||||
std::filesystem::remove(path, ec);
|
||||
std::filesystem::remove(std::filesystem::u8path(path), ec);
|
||||
// Ignore errors.
|
||||
}
|
||||
|
||||
int64_t FileSize(const std::string& path) {
|
||||
std::error_code ec;
|
||||
int64_t file_size = std::filesystem::file_size(path, ec);
|
||||
int64_t file_size =
|
||||
std::filesystem::file_size(std::filesystem::u8path(path), ec);
|
||||
if (ec) {
|
||||
return -1;
|
||||
}
|
||||
|
@ -65,6 +67,17 @@ namespace shaka {
|
|||
|
||||
class LocalFileTest : public testing::Test {
|
||||
protected:
|
||||
static std::string original_locale_;
|
||||
|
||||
static void SetUpTestSuite() {
|
||||
original_locale_ = setlocale(LC_ALL, NULL);
|
||||
setlocale(LC_ALL, ".UTF8");
|
||||
}
|
||||
|
||||
static void TearDownTestSuite() {
|
||||
setlocale(LC_ALL, original_locale_.c_str());
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
data_.resize(kDataSize);
|
||||
for (int i = 0; i < kDataSize; ++i)
|
||||
|
@ -97,6 +110,9 @@ class LocalFileTest : public testing::Test {
|
|||
std::string local_file_name_;
|
||||
};
|
||||
|
||||
// static
|
||||
std::string LocalFileTest::original_locale_;
|
||||
|
||||
TEST_F(LocalFileTest, ReadNotExist) {
|
||||
// Remove test file if it exists.
|
||||
DeleteFile(local_file_name_no_prefix_);
|
||||
|
@ -233,6 +249,42 @@ TEST_F(LocalFileTest, IsLocalRegular) {
|
|||
ASSERT_TRUE(File::IsLocalRegularFile(local_file_name_.c_str()));
|
||||
}
|
||||
|
||||
TEST_F(LocalFileTest, UnicodePath) {
|
||||
// Delete the temp file already created.
|
||||
DeleteFile(local_file_name_no_prefix_);
|
||||
|
||||
// Modify the local file name for this test to include non-ASCII characters.
|
||||
// This is used in TearDown() to clean up the file we create in the test.
|
||||
const std::string unicode_suffix = "από.txt";
|
||||
local_file_name_ += unicode_suffix;
|
||||
local_file_name_no_prefix_ += unicode_suffix;
|
||||
|
||||
// Write file using File API.
|
||||
File* file = File::Open(local_file_name_.c_str(), "w");
|
||||
ASSERT_TRUE(file != NULL);
|
||||
EXPECT_EQ(kDataSize, file->Write(&data_[0], kDataSize));
|
||||
|
||||
// Check the size.
|
||||
EXPECT_EQ(kDataSize, file->Size());
|
||||
ASSERT_TRUE(file->Close());
|
||||
|
||||
// Open file using File API.
|
||||
file = File::Open(local_file_name_.c_str(), "r");
|
||||
ASSERT_TRUE(file != NULL);
|
||||
|
||||
// Read the entire file.
|
||||
std::string read_data(kDataSize, 0);
|
||||
EXPECT_EQ(kDataSize, file->Read(&read_data[0], kDataSize));
|
||||
|
||||
// Verify EOF.
|
||||
uint8_t single_byte;
|
||||
EXPECT_EQ(0, file->Read(&single_byte, sizeof(single_byte)));
|
||||
ASSERT_TRUE(file->Close());
|
||||
|
||||
// Compare data written and read.
|
||||
EXPECT_EQ(data_, read_data);
|
||||
}
|
||||
|
||||
class ParamLocalFileTest : public LocalFileTest,
|
||||
public ::testing::WithParamInterface<uint8_t> {};
|
||||
|
||||
|
|
|
@ -43,7 +43,7 @@ std::string TempFileName() {
|
|||
} // namespace
|
||||
|
||||
bool TempFilePath(const std::string& temp_dir, std::string* temp_file_path) {
|
||||
std::filesystem::path temp_dir_path(temp_dir);
|
||||
auto temp_dir_path = std::filesystem::u8path(temp_dir);
|
||||
*temp_file_path = (temp_dir_path / TempFileName()).string();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -75,7 +75,8 @@ int64_t LocalFile::Size() {
|
|||
}
|
||||
|
||||
std::error_code ec;
|
||||
int64_t file_size = std::filesystem::file_size(file_name(), ec);
|
||||
auto file_path = std::filesystem::u8path(file_name());
|
||||
int64_t file_size = std::filesystem::file_size(file_path, ec);
|
||||
if (ec) {
|
||||
LOG(ERROR) << "Cannot get file size, error: " << ec;
|
||||
return -1;
|
||||
|
@ -112,7 +113,7 @@ bool LocalFile::Tell(uint64_t* position) {
|
|||
LocalFile::~LocalFile() {}
|
||||
|
||||
bool LocalFile::Open() {
|
||||
std::filesystem::path file_path(file_name());
|
||||
auto file_path = std::filesystem::u8path(file_name());
|
||||
|
||||
// Create upper level directories for write mode.
|
||||
if (file_mode_.find("w") != std::string::npos) {
|
||||
|
@ -133,9 +134,10 @@ bool LocalFile::Open() {
|
|||
}
|
||||
|
||||
bool LocalFile::Delete(const char* file_name) {
|
||||
auto file_path = std::filesystem::u8path(file_name);
|
||||
std::error_code ec;
|
||||
// On error (ec truthy), remove() will return false anyway.
|
||||
return std::filesystem::remove(file_name, ec);
|
||||
return std::filesystem::remove(file_path, ec);
|
||||
}
|
||||
|
||||
} // namespace shaka
|
||||
|
|
|
@ -144,7 +144,7 @@ TEST(ContainerNamesTest, CheckFixedStrings) {
|
|||
|
||||
// Determine the container type of a specified file.
|
||||
void TestFile(MediaContainerName expected, const std::string& name) {
|
||||
std::filesystem::path path = GetTestDataFilePath(name);
|
||||
auto path = GetTestDataFilePath(name);
|
||||
std::vector<uint8_t> data = ReadTestDataFile(name);
|
||||
ASSERT_FALSE(data.empty());
|
||||
|
||||
|
|
|
@ -11,13 +11,13 @@ namespace media {
|
|||
|
||||
// Returns a file path for a file in the media/test/data directory.
|
||||
std::filesystem::path GetTestDataFilePath(const std::string& name) {
|
||||
std::filesystem::path data_dir(TEST_DATA_DIR);
|
||||
auto data_dir = std::filesystem::u8path(TEST_DATA_DIR);
|
||||
return data_dir / name;
|
||||
}
|
||||
|
||||
// Returns a file path for a file in the media/app/test/testdata directory.
|
||||
std::filesystem::path GetAppTestDataFilePath(const std::string& name) {
|
||||
std::filesystem::path data_dir(TEST_DATA_DIR);
|
||||
auto data_dir = std::filesystem::u8path(TEST_DATA_DIR);
|
||||
auto app_data_dir =
|
||||
data_dir.parent_path().parent_path() / "app" / "test" / "testdata";
|
||||
return app_data_dir / name;
|
||||
|
@ -25,7 +25,7 @@ std::filesystem::path GetAppTestDataFilePath(const std::string& name) {
|
|||
|
||||
// Reads a test file from media/test/data directory and returns its content.
|
||||
std::vector<uint8_t> ReadTestDataFile(const std::string& name) {
|
||||
std::filesystem::path path = GetTestDataFilePath(name);
|
||||
auto path = GetTestDataFilePath(name);
|
||||
|
||||
FILE* f = fopen(path.string().c_str(), "rb");
|
||||
if (!f) {
|
||||
|
|
Loading…
Reference in New Issue