Accept language subtags

When normalizing language tags to their shortest form, we need to
split off subtags and treat them as separate components that do not
get changed.

Furthermore, MP4 can only store language tags without subtags.  So
only store the main language in MP4 output.

Fixes b/27533973

Change-Id: I96049e274aae9841e321c53039ef6464a2e61574
This commit is contained in:
Joey Parrish 2016-03-08 11:15:12 -08:00
parent 591650e61d
commit b6718a37c5
4 changed files with 133 additions and 30 deletions

View File

@ -191,12 +191,19 @@ void MP4Muxer::InitializeTrak(const StreamInfo* info, Track* trak) {
trak->media.header.timescale = info->time_scale();
trak->media.header.duration = 0;
if (!info->language().empty()) {
// ISO-639-2/T language code should be 3 characters..
if (info->language().size() != 3) {
LOG(WARNING) << "'" << info->language() << "' is not a valid ISO-639-2 "
// Strip off the subtag, if any.
std::string main_language = info->language();
size_t dash = main_language.find('-');
if (dash != std::string::npos) {
main_language.erase(dash);
}
// ISO-639-2/T main language code should be 3 characters.
if (main_language.size() != 3) {
LOG(WARNING) << "'" << main_language << "' is not a valid ISO-639-2 "
<< "language code, ignoring.";
} else {
trak->media.header.language.code = info->language();
trak->media.header.language.code = main_language;
}
}
}

View File

@ -48,6 +48,7 @@ const bool kSingleSegment = true;
const bool kMultipleSegments = false;
const bool kEnableEncryption = true;
const bool kDisableEncryption = false;
const char kNoLanguageOverride[] = "";
// Encryption constants.
const char kKeyIdHex[] = "e5007e6e9dcd5ac095202ed3758382cd";
@ -107,7 +108,8 @@ class PackagerTestBasic : public ::testing::TestWithParam<const char*> {
const std::string& video_output,
const std::string& audio_output,
bool single_segment,
bool enable_encryption);
bool enable_encryption,
const std::string& override_language);
void Decrypt(const std::string& input,
const std::string& video_output,
@ -149,7 +151,8 @@ void PackagerTestBasic::Remux(const std::string& input,
const std::string& video_output,
const std::string& audio_output,
bool single_segment,
bool enable_encryption) {
bool enable_encryption,
const std::string& language_override) {
CHECK(!video_output.empty() || !audio_output.empty());
Demuxer demuxer(GetFullPath(input));
@ -166,7 +169,12 @@ void PackagerTestBasic::Remux(const std::string& input,
new mp4::MP4Muxer(SetupOptions(video_output, single_segment)));
muxer_video->set_clock(&fake_clock_);
muxer_video->AddStream(FindFirstVideoStream(demuxer.streams()));
MediaStream* stream = FindFirstVideoStream(demuxer.streams());
if (!language_override.empty()) {
stream->info()->set_language(language_override);
ASSERT_EQ(language_override, stream->info()->language());
}
muxer_video->AddStream(stream);
if (enable_encryption) {
muxer_video->SetKeySource(encryption_key_source.get(),
@ -182,7 +190,12 @@ void PackagerTestBasic::Remux(const std::string& input,
new mp4::MP4Muxer(SetupOptions(audio_output, single_segment)));
muxer_audio->set_clock(&fake_clock_);
muxer_audio->AddStream(FindFirstAudioStream(demuxer.streams()));
MediaStream* stream = FindFirstAudioStream(demuxer.streams());
if (!language_override.empty()) {
stream->info()->set_language(language_override);
ASSERT_EQ(language_override, stream->info()->language());
}
muxer_audio->AddStream(stream);
if (enable_encryption) {
muxer_audio->SetKeySource(encryption_key_source.get(),
@ -234,7 +247,8 @@ TEST_P(PackagerTestBasic, MP4MuxerSingleSegmentUnencryptedVideo) {
kOutputVideo,
kOutputNone,
kSingleSegment,
kDisableEncryption));
kDisableEncryption,
kNoLanguageOverride));
}
TEST_P(PackagerTestBasic, MP4MuxerSingleSegmentUnencryptedAudio) {
@ -242,7 +256,8 @@ TEST_P(PackagerTestBasic, MP4MuxerSingleSegmentUnencryptedAudio) {
kOutputNone,
kOutputAudio,
kSingleSegment,
kDisableEncryption));
kDisableEncryption,
kNoLanguageOverride));
}
TEST_P(PackagerTestBasic, MP4MuxerSingleSegmentEncryptedVideo) {
@ -250,7 +265,8 @@ TEST_P(PackagerTestBasic, MP4MuxerSingleSegmentEncryptedVideo) {
kOutputVideo,
kOutputNone,
kSingleSegment,
kEnableEncryption));
kEnableEncryption,
kNoLanguageOverride));
ASSERT_NO_FATAL_FAILURE(Decrypt(kOutputVideo,
kOutputVideo2,
@ -262,13 +278,44 @@ TEST_P(PackagerTestBasic, MP4MuxerSingleSegmentEncryptedAudio) {
kOutputNone,
kOutputAudio,
kSingleSegment,
kEnableEncryption));
kEnableEncryption,
kNoLanguageOverride));
ASSERT_NO_FATAL_FAILURE(Decrypt(kOutputAudio,
kOutputNone,
kOutputAudio2));
}
TEST_P(PackagerTestBasic, MP4MuxerLanguageWithoutSubtag) {
ASSERT_NO_FATAL_FAILURE(Remux(GetParam(),
kOutputNone,
kOutputAudio,
kSingleSegment,
kDisableEncryption,
"por"));
Demuxer demuxer(GetFullPath(kOutputAudio));
ASSERT_OK(demuxer.Initialize());
MediaStream* stream = FindFirstAudioStream(demuxer.streams());
ASSERT_EQ("por", stream->info()->language());
}
TEST_P(PackagerTestBasic, MP4MuxerLanguageWithSubtag) {
ASSERT_NO_FATAL_FAILURE(Remux(GetParam(),
kOutputNone,
kOutputAudio,
kSingleSegment,
kDisableEncryption,
"por-BR"));
Demuxer demuxer(GetFullPath(kOutputAudio));
ASSERT_OK(demuxer.Initialize());
MediaStream* stream = FindFirstAudioStream(demuxer.streams());
ASSERT_EQ("por", stream->info()->language());
}
class PackagerTest : public PackagerTestBasic {
public:
void SetUp() override {
@ -278,13 +325,15 @@ class PackagerTest : public PackagerTestBasic {
kOutputVideo,
kOutputNone,
kSingleSegment,
kDisableEncryption));
kDisableEncryption,
kNoLanguageOverride));
ASSERT_NO_FATAL_FAILURE(Remux(GetParam(),
kOutputNone,
kOutputAudio,
kSingleSegment,
kDisableEncryption));
kDisableEncryption,
kNoLanguageOverride));
}
};
@ -295,7 +344,8 @@ TEST_P(PackagerTest, MP4MuxerSingleSegmentUnencryptedVideoAgain) {
kOutputVideo2,
kOutputNone,
kSingleSegment,
kDisableEncryption));
kDisableEncryption,
kNoLanguageOverride));
EXPECT_TRUE(ContentsEqual(kOutputVideo, kOutputVideo2));
}
@ -306,7 +356,8 @@ TEST_P(PackagerTest, MP4MuxerSingleSegmentUnencryptedAudioAgain) {
kOutputNone,
kOutputAudio2,
kSingleSegment,
kDisableEncryption));
kDisableEncryption,
kNoLanguageOverride));
EXPECT_TRUE(ContentsEqual(kOutputAudio, kOutputAudio2));
}
@ -315,7 +366,8 @@ TEST_P(PackagerTest, MP4MuxerSingleSegmentUnencryptedSeparateAudioVideo) {
kOutputVideo2,
kOutputAudio2,
kSingleSegment,
kDisableEncryption));
kDisableEncryption,
kNoLanguageOverride));
// Compare the output with single muxer output. They should match.
EXPECT_TRUE(ContentsEqual(kOutputVideo, kOutputVideo2));
@ -327,7 +379,8 @@ TEST_P(PackagerTest, MP4MuxerMultiSegmentsUnencryptedVideo) {
kOutputVideo2,
kOutputNone,
kMultipleSegments,
kDisableEncryption));
kDisableEncryption,
kNoLanguageOverride));
// Find and concatenates the segments.
const std::string kOutputVideoSegmentsCombined =
@ -361,7 +414,8 @@ TEST_P(PackagerTest, MP4MuxerMultiSegmentsUnencryptedVideo) {
kOutputVideo2,
kOutputNone,
kSingleSegment,
kDisableEncryption));
kDisableEncryption,
kNoLanguageOverride));
EXPECT_TRUE(ContentsEqual(kOutputVideo, kOutputVideo2));
}

View File

@ -70,42 +70,62 @@ const LanguageMapPairType kLanguageMap[] = {
{ "yor", "yo" }, { "zha", "za" }, { "zho", "zh" }, { "zul", "zu" },
};
void SplitLanguageTag(const std::string& tag,
std::string* main_language, std::string* subtag) {
// Split the main language from its subtag (if any).
*main_language = tag;
subtag->clear();
size_t dash = main_language->find('-');
if (dash != std::string::npos) {
*subtag = main_language->substr(dash);
main_language->erase(dash);
}
}
} // namespace
namespace edash_packager {
std::string LanguageToShortestForm(const std::string& language) {
if (language.size() == 2) {
std::string main_language;
std::string subtag;
SplitLanguageTag(language, &main_language, &subtag);
if (main_language.size() == 2) {
// Presumably already a valid ISO-639-1 code, and therefore conforms to
// BCP-47's requirement to use the shortest possible code.
return language;
return main_language + subtag;
}
for (size_t i = 0; i < arraysize(kLanguageMap); ++i) {
if (language == kLanguageMap[i].iso_639_2) {
return kLanguageMap[i].iso_639_1;
if (main_language == kLanguageMap[i].iso_639_2) {
return kLanguageMap[i].iso_639_1 + subtag;
}
}
// This could happen legitimately for languages which have no 2-letter code,
// but that would imply that the input language code is a 3-letter code.
DCHECK_EQ(3u, language.size());
return language;
DCHECK_EQ(3u, main_language.size());
return main_language + subtag;
}
std::string LanguageToISO_639_2(const std::string& language) {
if (language.size() == 3) {
std::string main_language;
std::string subtag;
SplitLanguageTag(language, &main_language, &subtag);
if (main_language.size() == 3) {
// Presumably already a valid ISO-639-2 code.
return language;
return main_language + subtag;
}
for (size_t i = 0; i < arraysize(kLanguageMap); ++i) {
if (language == kLanguageMap[i].iso_639_1) {
return kLanguageMap[i].iso_639_2;
if (main_language == kLanguageMap[i].iso_639_1) {
return kLanguageMap[i].iso_639_2 + subtag;
}
}
LOG(WARNING) << "No equivalent 3-letter language code for " << language;
LOG(WARNING) << "No equivalent 3-letter language code for " << main_language;
// This is probably a mistake on the part of the user and should be treated
// as invalid input.
return "und";

View File

@ -788,6 +788,28 @@ TEST_F(CommonMpdBuilderTest, CheckLanguageAttributeSet) {
ExpectAttributeEqString("lang", "en", node_xml.get()));
}
// Verify that language tags with subtags can still be converted.
TEST_F(CommonMpdBuilderTest, CheckConvertLanguageWithSubtag) {
base::AtomicSequenceNumber sequence_counter;
// The media info doesn't really matter as long as it is valid.
const char kTextMediaInfo[] =
"text_info {\n"
" format: 'ttml'\n"
"}\n"
"container_type: CONTAINER_TEXT\n";
// "por-BR" is the long tag for Brazillian Portuguese. The short tag
// is "pt-BR", which is what should appear in the manifest.
auto adaptation_set =
CreateAdaptationSet(kAnyAdaptationSetId, "por-BR", MpdOptions(),
MpdBuilder::kStatic, &sequence_counter);
adaptation_set->AddRepresentation(ConvertToMediaInfo(kTextMediaInfo));
xml::scoped_xml_ptr<xmlNode> node_xml(adaptation_set->GetXml());
EXPECT_NO_FATAL_FAILURE(
ExpectAttributeEqString("lang", "pt-BR", node_xml.get()));
}
TEST_F(CommonMpdBuilderTest, CheckAdaptationSetId) {
base::AtomicSequenceNumber sequence_counter;
const uint32_t kAdaptationSetId = 42;