fix: Fix local files with UTF8 names (#1246)
This fixes our use of std::filesystem to interpret all paths names as UTF8. Before this, UTF8 paths did not work correctly in all cases. This also adds a new unit test to cover this case. On Windows, it is critical that a UTF8 locale be set at runtime. Applications linking with Packager as a library should call setlocale(), and the Packager frontends now do this automatically after converting wide character arguments into narrow strings. Closes #652
This commit is contained in:
parent
c29c03c6e3
commit
5a2571b9bc
|
@ -5,6 +5,7 @@
|
||||||
// https://developers.google.com/open-source/licenses/bsd
|
// https://developers.google.com/open-source/licenses/bsd
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <locale>
|
||||||
|
|
||||||
#include "packager/app/mpd_generator_flags.h"
|
#include "packager/app/mpd_generator_flags.h"
|
||||||
#include "packager/app/vlog_flags.h"
|
#include "packager/app/vlog_flags.h"
|
||||||
|
@ -20,7 +21,6 @@
|
||||||
#if defined(OS_WIN)
|
#if defined(OS_WIN)
|
||||||
#include <codecvt>
|
#include <codecvt>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <locale>
|
|
||||||
#endif // defined(OS_WIN)
|
#endif // defined(OS_WIN)
|
||||||
|
|
||||||
DEFINE_bool(licenses, false, "Dump licenses.");
|
DEFINE_bool(licenses, false, "Dump licenses.");
|
||||||
|
@ -142,12 +142,20 @@ int wmain(int argc, wchar_t* argv[], wchar_t* envp[]) {
|
||||||
delete[] utf8_args;
|
delete[] utf8_args;
|
||||||
});
|
});
|
||||||
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
|
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
|
||||||
|
|
||||||
for (int idx = 0; idx < argc; ++idx) {
|
for (int idx = 0; idx < argc; ++idx) {
|
||||||
std::string utf8_arg(converter.to_bytes(argv[idx]));
|
std::string utf8_arg(converter.to_bytes(argv[idx]));
|
||||||
utf8_arg += '\0';
|
utf8_arg += '\0';
|
||||||
utf8_argv[idx] = new char[utf8_arg.size()];
|
utf8_argv[idx] = new char[utf8_arg.size()];
|
||||||
memcpy(utf8_argv[idx], &utf8_arg[0], utf8_arg.size());
|
memcpy(utf8_argv[idx], &utf8_arg[0], utf8_arg.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Because we just converted wide character args into UTF8, and because
|
||||||
|
// std::filesystem::u8path is used to interpret all std::string paths as
|
||||||
|
// UTF8, we should set the locale to UTF8 as well, for the transition point
|
||||||
|
// to C library functions like fopen to work correctly with non-ASCII paths.
|
||||||
|
std::setlocale(LC_ALL, ".UTF8");
|
||||||
|
|
||||||
return shaka::MpdMain(argc, utf8_argv.get());
|
return shaka::MpdMain(argc, utf8_argv.get());
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
#include <gflags/gflags.h>
|
#include <gflags/gflags.h>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <locale>
|
||||||
|
|
||||||
#include "packager/app/ad_cue_generator_flags.h"
|
#include "packager/app/ad_cue_generator_flags.h"
|
||||||
#include "packager/app/crypto_flags.h"
|
#include "packager/app/crypto_flags.h"
|
||||||
|
@ -34,7 +35,6 @@
|
||||||
#if defined(OS_WIN)
|
#if defined(OS_WIN)
|
||||||
#include <codecvt>
|
#include <codecvt>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <locale>
|
|
||||||
#endif // defined(OS_WIN)
|
#endif // defined(OS_WIN)
|
||||||
|
|
||||||
DEFINE_bool(dump_stream_info, false, "Dump demuxed stream info.");
|
DEFINE_bool(dump_stream_info, false, "Dump demuxed stream info.");
|
||||||
|
@ -575,12 +575,20 @@ int wmain(int argc, wchar_t* argv[], wchar_t* envp[]) {
|
||||||
delete[] utf8_args;
|
delete[] utf8_args;
|
||||||
});
|
});
|
||||||
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
|
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
|
||||||
|
|
||||||
for (int idx = 0; idx < argc; ++idx) {
|
for (int idx = 0; idx < argc; ++idx) {
|
||||||
std::string utf8_arg(converter.to_bytes(argv[idx]));
|
std::string utf8_arg(converter.to_bytes(argv[idx]));
|
||||||
utf8_arg += '\0';
|
utf8_arg += '\0';
|
||||||
utf8_argv[idx] = new char[utf8_arg.size()];
|
utf8_argv[idx] = new char[utf8_arg.size()];
|
||||||
memcpy(utf8_argv[idx], &utf8_arg[0], utf8_arg.size());
|
memcpy(utf8_argv[idx], &utf8_arg[0], utf8_arg.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Because we just converted wide character args into UTF8, and because
|
||||||
|
// std::filesystem::u8path is used to interpret all std::string paths as
|
||||||
|
// UTF8, we should set the locale to UTF8 as well, for the transition point
|
||||||
|
// to C library functions like fopen to work correctly with non-ASCII paths.
|
||||||
|
std::setlocale(LC_ALL, ".UTF8");
|
||||||
|
|
||||||
return shaka::PackagerMain(argc, utf8_argv.get());
|
return shaka::PackagerMain(argc, utf8_argv.get());
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -73,8 +73,8 @@ bool DeleteLocalFile(const char* file_name) {
|
||||||
|
|
||||||
bool WriteLocalFileAtomically(const char* file_name,
|
bool WriteLocalFileAtomically(const char* file_name,
|
||||||
const std::string& contents) {
|
const std::string& contents) {
|
||||||
const std::filesystem::path file_path(file_name);
|
const auto file_path = std::filesystem::u8path(file_name);
|
||||||
const std::filesystem::path dir_path = file_path.parent_path();
|
const auto dir_path = file_path.parent_path();
|
||||||
|
|
||||||
std::string temp_file_name;
|
std::string temp_file_name;
|
||||||
if (!TempFilePath(dir_path.string(), &temp_file_name))
|
if (!TempFilePath(dir_path.string(), &temp_file_name))
|
||||||
|
@ -83,7 +83,8 @@ bool WriteLocalFileAtomically(const char* file_name,
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
std::error_code ec;
|
std::error_code ec;
|
||||||
std::filesystem::rename(temp_file_name, file_name, ec);
|
auto temp_file_path = std::filesystem::u8path(temp_file_name);
|
||||||
|
std::filesystem::rename(temp_file_path, file_name, ec);
|
||||||
if (ec) {
|
if (ec) {
|
||||||
LOG(ERROR) << "Failed to replace file '" << file_name << "' with '"
|
LOG(ERROR) << "Failed to replace file '" << file_name << "' with '"
|
||||||
<< temp_file_name << "', error: " << ec;
|
<< temp_file_name << "', error: " << ec;
|
||||||
|
@ -405,7 +406,8 @@ bool File::IsLocalRegularFile(const char* file_name) {
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
std::error_code ec;
|
std::error_code ec;
|
||||||
return std::filesystem::is_regular_file(real_file_name, ec);
|
auto real_file_path = std::filesystem::u8path(real_file_name);
|
||||||
|
return std::filesystem::is_regular_file(real_file_path, ec);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string File::MakeCallbackFileName(
|
std::string File::MakeCallbackFileName(
|
||||||
|
|
|
@ -16,8 +16,8 @@ std::string generate_unique_temp_path() {
|
||||||
// Generate a unique name for a temporary file, using standard library
|
// Generate a unique name for a temporary file, using standard library
|
||||||
// routines, to avoid a circular dependency on any of our own code for
|
// routines, to avoid a circular dependency on any of our own code for
|
||||||
// generating temporary files. The template must end in 6 X's.
|
// generating temporary files. The template must end in 6 X's.
|
||||||
std::filesystem::path temp_path_template =
|
auto temp_path_template =
|
||||||
(std::filesystem::temp_directory_path() / "packager-test.XXXXXX");
|
std::filesystem::temp_directory_path() / "packager-test.XXXXXX";
|
||||||
std::string temp_path_template_string = temp_path_template.string();
|
std::string temp_path_template_string = temp_path_template.string();
|
||||||
#if defined(OS_WIN)
|
#if defined(OS_WIN)
|
||||||
// _mktemp will modify the string passed to it to reflect the generated name
|
// _mktemp will modify the string passed to it to reflect the generated name
|
||||||
|
@ -36,7 +36,7 @@ std::string generate_unique_temp_path() {
|
||||||
|
|
||||||
void delete_file(const std::string& path) {
|
void delete_file(const std::string& path) {
|
||||||
std::error_code ec;
|
std::error_code ec;
|
||||||
std::filesystem::remove(path, ec);
|
std::filesystem::remove(std::filesystem::u8path(path), ec);
|
||||||
// Ignore errors.
|
// Ignore errors.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -44,7 +44,7 @@ TempFile::TempFile() : path_(generate_unique_temp_path()) {}
|
||||||
|
|
||||||
TempFile::~TempFile() {
|
TempFile::~TempFile() {
|
||||||
std::error_code ec;
|
std::error_code ec;
|
||||||
std::filesystem::remove(path_, ec);
|
std::filesystem::remove(std::filesystem::u8path(path_), ec);
|
||||||
// Ignore errors.
|
// Ignore errors.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
|
|
||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
|
#include <locale>
|
||||||
|
|
||||||
#include "absl/flags/declare.h"
|
#include "absl/flags/declare.h"
|
||||||
#include "packager/file/file.h"
|
#include "packager/file/file.h"
|
||||||
|
@ -31,13 +32,14 @@ void WriteFile(const std::string& path, const std::string& data) {
|
||||||
|
|
||||||
void DeleteFile(const std::string& path) {
|
void DeleteFile(const std::string& path) {
|
||||||
std::error_code ec;
|
std::error_code ec;
|
||||||
std::filesystem::remove(path, ec);
|
std::filesystem::remove(std::filesystem::u8path(path), ec);
|
||||||
// Ignore errors.
|
// Ignore errors.
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t FileSize(const std::string& path) {
|
int64_t FileSize(const std::string& path) {
|
||||||
std::error_code ec;
|
std::error_code ec;
|
||||||
int64_t file_size = std::filesystem::file_size(path, ec);
|
int64_t file_size =
|
||||||
|
std::filesystem::file_size(std::filesystem::u8path(path), ec);
|
||||||
if (ec) {
|
if (ec) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
@ -65,6 +67,17 @@ namespace shaka {
|
||||||
|
|
||||||
class LocalFileTest : public testing::Test {
|
class LocalFileTest : public testing::Test {
|
||||||
protected:
|
protected:
|
||||||
|
static std::string original_locale_;
|
||||||
|
|
||||||
|
static void SetUpTestSuite() {
|
||||||
|
original_locale_ = setlocale(LC_ALL, NULL);
|
||||||
|
setlocale(LC_ALL, ".UTF8");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void TearDownTestSuite() {
|
||||||
|
setlocale(LC_ALL, original_locale_.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
void SetUp() override {
|
void SetUp() override {
|
||||||
data_.resize(kDataSize);
|
data_.resize(kDataSize);
|
||||||
for (int i = 0; i < kDataSize; ++i)
|
for (int i = 0; i < kDataSize; ++i)
|
||||||
|
@ -97,6 +110,9 @@ class LocalFileTest : public testing::Test {
|
||||||
std::string local_file_name_;
|
std::string local_file_name_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// static
|
||||||
|
std::string LocalFileTest::original_locale_;
|
||||||
|
|
||||||
TEST_F(LocalFileTest, ReadNotExist) {
|
TEST_F(LocalFileTest, ReadNotExist) {
|
||||||
// Remove test file if it exists.
|
// Remove test file if it exists.
|
||||||
DeleteFile(local_file_name_no_prefix_);
|
DeleteFile(local_file_name_no_prefix_);
|
||||||
|
@ -233,6 +249,42 @@ TEST_F(LocalFileTest, IsLocalRegular) {
|
||||||
ASSERT_TRUE(File::IsLocalRegularFile(local_file_name_.c_str()));
|
ASSERT_TRUE(File::IsLocalRegularFile(local_file_name_.c_str()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(LocalFileTest, UnicodePath) {
|
||||||
|
// Delete the temp file already created.
|
||||||
|
DeleteFile(local_file_name_no_prefix_);
|
||||||
|
|
||||||
|
// Modify the local file name for this test to include non-ASCII characters.
|
||||||
|
// This is used in TearDown() to clean up the file we create in the test.
|
||||||
|
const std::string unicode_suffix = "από.txt";
|
||||||
|
local_file_name_ += unicode_suffix;
|
||||||
|
local_file_name_no_prefix_ += unicode_suffix;
|
||||||
|
|
||||||
|
// Write file using File API.
|
||||||
|
File* file = File::Open(local_file_name_.c_str(), "w");
|
||||||
|
ASSERT_TRUE(file != NULL);
|
||||||
|
EXPECT_EQ(kDataSize, file->Write(&data_[0], kDataSize));
|
||||||
|
|
||||||
|
// Check the size.
|
||||||
|
EXPECT_EQ(kDataSize, file->Size());
|
||||||
|
ASSERT_TRUE(file->Close());
|
||||||
|
|
||||||
|
// Open file using File API.
|
||||||
|
file = File::Open(local_file_name_.c_str(), "r");
|
||||||
|
ASSERT_TRUE(file != NULL);
|
||||||
|
|
||||||
|
// Read the entire file.
|
||||||
|
std::string read_data(kDataSize, 0);
|
||||||
|
EXPECT_EQ(kDataSize, file->Read(&read_data[0], kDataSize));
|
||||||
|
|
||||||
|
// Verify EOF.
|
||||||
|
uint8_t single_byte;
|
||||||
|
EXPECT_EQ(0, file->Read(&single_byte, sizeof(single_byte)));
|
||||||
|
ASSERT_TRUE(file->Close());
|
||||||
|
|
||||||
|
// Compare data written and read.
|
||||||
|
EXPECT_EQ(data_, read_data);
|
||||||
|
}
|
||||||
|
|
||||||
class ParamLocalFileTest : public LocalFileTest,
|
class ParamLocalFileTest : public LocalFileTest,
|
||||||
public ::testing::WithParamInterface<uint8_t> {};
|
public ::testing::WithParamInterface<uint8_t> {};
|
||||||
|
|
||||||
|
|
|
@ -43,7 +43,7 @@ std::string TempFileName() {
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
bool TempFilePath(const std::string& temp_dir, std::string* temp_file_path) {
|
bool TempFilePath(const std::string& temp_dir, std::string* temp_file_path) {
|
||||||
std::filesystem::path temp_dir_path(temp_dir);
|
auto temp_dir_path = std::filesystem::u8path(temp_dir);
|
||||||
*temp_file_path = (temp_dir_path / TempFileName()).string();
|
*temp_file_path = (temp_dir_path / TempFileName()).string();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -75,7 +75,8 @@ int64_t LocalFile::Size() {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::error_code ec;
|
std::error_code ec;
|
||||||
int64_t file_size = std::filesystem::file_size(file_name(), ec);
|
auto file_path = std::filesystem::u8path(file_name());
|
||||||
|
int64_t file_size = std::filesystem::file_size(file_path, ec);
|
||||||
if (ec) {
|
if (ec) {
|
||||||
LOG(ERROR) << "Cannot get file size, error: " << ec;
|
LOG(ERROR) << "Cannot get file size, error: " << ec;
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -112,7 +113,7 @@ bool LocalFile::Tell(uint64_t* position) {
|
||||||
LocalFile::~LocalFile() {}
|
LocalFile::~LocalFile() {}
|
||||||
|
|
||||||
bool LocalFile::Open() {
|
bool LocalFile::Open() {
|
||||||
std::filesystem::path file_path(file_name());
|
auto file_path = std::filesystem::u8path(file_name());
|
||||||
|
|
||||||
// Create upper level directories for write mode.
|
// Create upper level directories for write mode.
|
||||||
if (file_mode_.find("w") != std::string::npos) {
|
if (file_mode_.find("w") != std::string::npos) {
|
||||||
|
@ -133,9 +134,10 @@ bool LocalFile::Open() {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LocalFile::Delete(const char* file_name) {
|
bool LocalFile::Delete(const char* file_name) {
|
||||||
|
auto file_path = std::filesystem::u8path(file_name);
|
||||||
std::error_code ec;
|
std::error_code ec;
|
||||||
// On error (ec truthy), remove() will return false anyway.
|
// On error (ec truthy), remove() will return false anyway.
|
||||||
return std::filesystem::remove(file_name, ec);
|
return std::filesystem::remove(file_path, ec);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace shaka
|
} // namespace shaka
|
||||||
|
|
|
@ -144,7 +144,7 @@ TEST(ContainerNamesTest, CheckFixedStrings) {
|
||||||
|
|
||||||
// Determine the container type of a specified file.
|
// Determine the container type of a specified file.
|
||||||
void TestFile(MediaContainerName expected, const std::string& name) {
|
void TestFile(MediaContainerName expected, const std::string& name) {
|
||||||
std::filesystem::path path = GetTestDataFilePath(name);
|
auto path = GetTestDataFilePath(name);
|
||||||
std::vector<uint8_t> data = ReadTestDataFile(name);
|
std::vector<uint8_t> data = ReadTestDataFile(name);
|
||||||
ASSERT_FALSE(data.empty());
|
ASSERT_FALSE(data.empty());
|
||||||
|
|
||||||
|
|
|
@ -11,13 +11,13 @@ namespace media {
|
||||||
|
|
||||||
// Returns a file path for a file in the media/test/data directory.
|
// Returns a file path for a file in the media/test/data directory.
|
||||||
std::filesystem::path GetTestDataFilePath(const std::string& name) {
|
std::filesystem::path GetTestDataFilePath(const std::string& name) {
|
||||||
std::filesystem::path data_dir(TEST_DATA_DIR);
|
auto data_dir = std::filesystem::u8path(TEST_DATA_DIR);
|
||||||
return data_dir / name;
|
return data_dir / name;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns a file path for a file in the media/app/test/testdata directory.
|
// Returns a file path for a file in the media/app/test/testdata directory.
|
||||||
std::filesystem::path GetAppTestDataFilePath(const std::string& name) {
|
std::filesystem::path GetAppTestDataFilePath(const std::string& name) {
|
||||||
std::filesystem::path data_dir(TEST_DATA_DIR);
|
auto data_dir = std::filesystem::u8path(TEST_DATA_DIR);
|
||||||
auto app_data_dir =
|
auto app_data_dir =
|
||||||
data_dir.parent_path().parent_path() / "app" / "test" / "testdata";
|
data_dir.parent_path().parent_path() / "app" / "test" / "testdata";
|
||||||
return app_data_dir / name;
|
return app_data_dir / name;
|
||||||
|
@ -25,7 +25,7 @@ std::filesystem::path GetAppTestDataFilePath(const std::string& name) {
|
||||||
|
|
||||||
// Reads a test file from media/test/data directory and returns its content.
|
// Reads a test file from media/test/data directory and returns its content.
|
||||||
std::vector<uint8_t> ReadTestDataFile(const std::string& name) {
|
std::vector<uint8_t> ReadTestDataFile(const std::string& name) {
|
||||||
std::filesystem::path path = GetTestDataFilePath(name);
|
auto path = GetTestDataFilePath(name);
|
||||||
|
|
||||||
FILE* f = fopen(path.string().c_str(), "rb");
|
FILE* f = fopen(path.string().c_str(), "rb");
|
||||||
if (!f) {
|
if (!f) {
|
||||||
|
|
Loading…
Reference in New Issue