feat: HLS / DASH support forced subtitle (#1020)

Closes #988

---------

Co-authored-by: Cosmin Stejerean <cstejerean@meta.com>
This commit is contained in:
Vishal Shah 2024-02-14 21:27:57 -07:00 committed by GitHub
parent e19d73321d
commit f73ad0d961
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
28 changed files with 236 additions and 12 deletions

View File

@ -12,5 +12,5 @@ DASH specific stream descriptor fields
Optional semicolon separated list of values for DASH Role element. The
value should be one of: **caption**, **subtitle**, **main**, **alternate**,
**supplementary**, **commentary**, **description** and **dub**. See
DASH (ISO/IEC 23009-1) specification for details.
**supplementary**, **commentary**, **description**, **dub** and **forced-subtitle** .
See DASH (ISO/IEC 23009-1) specification for details.

View File

@ -73,6 +73,15 @@ These are the available fields:
CEA allows specifying up to 4 streams within a single video stream. If not
specified, all subtitles will be merged together.
:forced_subtitle:
Optional boolean value (0|1). If set to 1 indicates that this stream is a
Forced Narrative subtitle that should be displayed when subtitles are otherwise
off, for example used to caption short portions of the audio that might be in
a foreign language. For DASH this will set role to **forced_subtitle**, for HLS
it will set FORCED=YES and AUTOSELECT=YES. Only valid for subtitles.
.. include:: /options/drm_stream_descriptors.rst
.. include:: /options/dash_stream_descriptors.rst
.. include:: /options/hls_stream_descriptors.rst

View File

@ -152,6 +152,9 @@ struct StreamDescriptor {
/// Set to true to indicate that the stream is for hls only.
bool hls_only = false;
/// Optional, indicates if this is a Forced Narrative subtitle stream.
bool forced_subtitle = false;
/// Optional for DASH output. It defines the Label element in Adaptation Set.
std::string dash_label;
};

View File

@ -120,8 +120,16 @@ const char kUsage[] =
" in the format: scheme_id_uri=value.\n"
" - dash_roles (roles): Optional semicolon separated list of values for\n"
" DASH Role elements. The value should be one of: caption, subtitle,\n"
" main, alternate, supplementary, commentary, description and dub. See\n"
" DASH (ISO/IEC 23009-1) specification for details.\n";
" forced-subtitle, main, alternate, supplementary, commentary, \n"
" description and dub. See DASH\n"
" (ISO/IEC 23009-1) specification for details.\n"
" - forced_subtitle: Optional boolean value (0|1). If set to 1 \n"
" indicates that this stream is a Forced Narrative subtitle that \n"
" should be displayed when subtitles are otherwise off, for example \n"
" used to caption short portions of the audio that might be in a \n"
" foreign language. For DASH this will set role to forced_subtitle, \n"
" for HLS it will set FORCED=YES and AUTOSELECT=YES. \n"
" Only valid for subtitles.";
// Labels for parameters in RawKey key info.
const char kDrmLabelLabel[] = "label";

View File

@ -40,6 +40,7 @@ enum FieldType {
kDashOnlyField,
kHlsOnlyField,
kDashLabelField,
kForcedSubtitleField,
};
struct FieldNameToTypeMapping {
@ -88,6 +89,7 @@ const FieldNameToTypeMapping kFieldNameTypeMappings[] = {
{"dash_only", kDashOnlyField},
{"hls_only", kHlsOnlyField},
{"dash_label", kDashLabelField},
{"forced_subtitle", kForcedSubtitleField},
};
FieldType GetFieldType(const std::string& field_name) {
@ -255,12 +257,35 @@ std::optional<StreamDescriptor> ParseStreamDescriptor(
case kDashLabelField:
descriptor.dash_label = pair.second;
break;
case kForcedSubtitleField:
unsigned forced_subtitle_value;
if (!absl::SimpleAtoi(pair.second, &forced_subtitle_value)) {
LOG(ERROR) << "Non-numeric option for forced field "
"specified ("
<< pair.second << ").";
return std::nullopt;
}
if (forced_subtitle_value > 1) {
LOG(ERROR) << "forced should be either 0 or 1.";
return std::nullopt;
}
descriptor.forced_subtitle = forced_subtitle_value > 0;
break;
default:
LOG(ERROR) << "Unknown field in stream descriptor (\"" << pair.first
<< "\").";
return std::nullopt;
}
}
if (descriptor.forced_subtitle) {
auto itr = std::find(descriptor.dash_roles.begin(),
descriptor.dash_roles.end(), "forced-subtitle");
if (itr == descriptor.dash_roles.end()) {
descriptor.dash_roles.push_back("forced-subtitle");
}
}
return descriptor;
}

View File

@ -310,7 +310,8 @@ class PackagerAppTest(unittest.TestCase):
skip_encryption=None,
bandwidth=None,
split_content_on_ad_cues=False,
test_file=None):
test_file=None,
forced_subtitle=None):
"""Get a stream descriptor as a string.
@ -347,8 +348,9 @@ class PackagerAppTest(unittest.TestCase):
into multiple files, with a total of NumAdCues + 1 files.
test_file: The input file to use. If the input file is not specified, a
default file will be used.
forced_subtitle: If set to true, it marks this as a Forced Narrative
subtitle, marked in DASH using forced-subtitle role and
in HLS using FORCED=YES.
Returns:
A string that makes up a single stream descriptor for input to the
packager.
@ -402,6 +404,9 @@ class PackagerAppTest(unittest.TestCase):
if dash_only:
stream.Append('dash_only', 1)
if forced_subtitle:
stream.Append('forced_subtitle', 1)
if dash_label:
stream.Append('dash_label', dash_label)
@ -799,6 +804,21 @@ class PackagerFunctionalTest(PackagerAppTest):
self.assertPackageSuccess(streams, self._GetFlags(output_dash=True))
self._CheckTestResults('dash-label')
def testForcedSubtitle(self):
streams = [
self._GetStream('audio', hls=True),
self._GetStream('video', hls=True),
]
streams += self._GetStreams(
['text'],
test_files=['bear-english.vtt'],
forced_subtitle=True)
self.assertPackageSuccess(streams, self._GetFlags(output_dash=True,
output_hls=True))
self._CheckTestResults('forced-subtitle')
def testAudioVideoWithLanguageOverride(self):
self.assertPackageSuccess(
self._GetStreams(['audio', 'video'], language='por', hls=True),

View File

@ -0,0 +1,16 @@
#EXTM3U
#EXT-X-VERSION:6
## Generated with https://github.com/shaka-project/shaka-packager version <tag>-<hash>-<test>
#EXT-X-TARGETDURATION:5
#EXT-X-PLAYLIST-TYPE:VOD
#EXT-X-MAP:URI="bear-640x360-audio.mp4",BYTERANGE="804@0"
#EXTINF:1.022,
#EXT-X-BYTERANGE:17028@872
bear-640x360-audio.mp4
#EXTINF:0.998,
#EXT-X-BYTERANGE:16285
bear-640x360-audio.mp4
#EXTINF:0.720,
#EXT-X-BYTERANGE:9558
bear-640x360-audio.mp4
#EXT-X-ENDLIST

Binary file not shown.

View File

@ -0,0 +1,17 @@
#EXTM3U
#EXT-X-VERSION:6
## Generated with https://github.com/shaka-project/shaka-packager version <tag>-<hash>-<test>
#EXT-X-TARGETDURATION:5
#EXT-X-PLAYLIST-TYPE:VOD
#EXT-X-I-FRAMES-ONLY
#EXT-X-MAP:URI="bear-640x360-video.mp4",BYTERANGE="870@0"
#EXTINF:1.001,
#EXT-X-BYTERANGE:15581@938
bear-640x360-video.mp4
#EXTINF:1.001,
#EXT-X-BYTERANGE:18221@100251
bear-640x360-video.mp4
#EXTINF:0.734,
#EXT-X-BYTERANGE:19663@222058
bear-640x360-video.mp4
#EXT-X-ENDLIST

View File

@ -0,0 +1,16 @@
#EXTM3U
#EXT-X-VERSION:6
## Generated with https://github.com/shaka-project/shaka-packager version <tag>-<hash>-<test>
#EXT-X-TARGETDURATION:5
#EXT-X-PLAYLIST-TYPE:VOD
#EXT-X-MAP:URI="bear-640x360-video.mp4",BYTERANGE="870@0"
#EXTINF:1.001,
#EXT-X-BYTERANGE:99313@938
bear-640x360-video.mp4
#EXTINF:1.001,
#EXT-X-BYTERANGE:121807
bear-640x360-video.mp4
#EXTINF:0.734,
#EXT-X-BYTERANGE:79662
bear-640x360-video.mp4
#EXT-X-ENDLIST

Binary file not shown.

View File

@ -0,0 +1,11 @@
WEBVTT
STYLE
::cue { color:lime }
00:00:00.000 --> 00:00:00.800 align:center
Yup, that's a bear, eh.
00:00:01.000 --> 00:00:04.700 align:center
He 's... um... doing bear-like stuff.

View File

@ -0,0 +1,13 @@
#EXTM3U
## Generated with https://github.com/shaka-project/shaka-packager version <tag>-<hash>-<test>
#EXT-X-INDEPENDENT-SEGMENTS
#EXT-X-MEDIA:TYPE=AUDIO,URI="bear-640x360-audio.m3u8",GROUP-ID="default-audio-group",NAME="stream_0",DEFAULT=NO,AUTOSELECT=YES,CHANNELS="2"
#EXT-X-MEDIA:TYPE=SUBTITLES,URI="stream_2.m3u8",GROUP-ID="default-text-group",NAME="stream_2",DEFAULT=NO,AUTOSELECT=YES,FORCED=YES
#EXT-X-STREAM-INF:BANDWIDTH=1106817,AVERAGE-BANDWIDTH=1004632,CODECS="avc1.64001e,mp4a.40.2",RESOLUTION=640x360,FRAME-RATE=29.970,AUDIO="default-audio-group",SUBTITLES="default-text-group",CLOSED-CAPTIONS=NONE
bear-640x360-video.m3u8
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=214292,AVERAGE-BANDWIDTH=156327,CODECS="avc1.64001e",RESOLUTION=640x360,CLOSED-CAPTIONS=NONE,URI="bear-640x360-video-iframe.m3u8"

View File

@ -0,0 +1,29 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Generated with https://github.com/shaka-project/shaka-packager version <tag>-<hash>-<test>-->
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 DASH-MPD.xsd" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" minBufferTime="PT2S" type="static" mediaPresentationDuration="PT2.736067S">
<Period id="0">
<AdaptationSet id="0" contentType="audio" subsegmentAlignment="true">
<Representation id="0" bandwidth="133334" codecs="mp4a.40.2" mimeType="audio/mp4" audioSamplingRate="44100">
<AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>
<BaseURL>bear-640x360-audio.mp4</BaseURL>
<SegmentBase indexRange="804-871" timescale="44100">
<Initialization range="0-803"/>
</SegmentBase>
</Representation>
</AdaptationSet>
<AdaptationSet id="1" contentType="video" width="640" height="360" frameRate="30000/1001" subsegmentAlignment="true" par="16:9">
<Representation id="1" bandwidth="973483" codecs="avc1.64001e" mimeType="video/mp4" sar="1:1">
<BaseURL>bear-640x360-video.mp4</BaseURL>
<SegmentBase indexRange="870-937" timescale="30000">
<Initialization range="0-869"/>
</SegmentBase>
</Representation>
</AdaptationSet>
<AdaptationSet id="2" contentType="text" subsegmentAlignment="true">
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="forced-subtitle"/>
<Representation id="2" bandwidth="317" mimeType="text/vtt">
<BaseURL>bear-english-text.vtt</BaseURL>
</Representation>
</AdaptationSet>
</Period>
</MPD>

View File

@ -0,0 +1,8 @@
#EXTM3U
#EXT-X-VERSION:6
## Generated with https://github.com/shaka-project/shaka-packager version <tag>-<hash>-<test>
#EXT-X-TARGETDURATION:5
#EXT-X-PLAYLIST-TYPE:VOD
#EXTINF:4.700,
bear-english-text.vtt
#EXT-X-ENDLIST

View File

@ -318,11 +318,16 @@ void BuildMediaTag(const MediaPlaylist& playlist,
} else {
tag.AddString("DEFAULT", "NO");
}
if (is_autoselect) {
tag.AddString("AUTOSELECT", "YES");
}
if (playlist.stream_type() ==
MediaPlaylist::MediaPlaylistStreamType::kSubtitle &&
playlist.forced_subtitle()) {
tag.AddString("FORCED", "YES");
}
const std::vector<std::string>& characteristics = playlist.characteristics();
if (!characteristics.empty()) {
tag.AddQuotedString("CHARACTERISTICS", absl::StrJoin(characteristics, ","));
@ -401,6 +406,12 @@ void BuildMediaTags(
}
}
if (playlist->stream_type() ==
MediaPlaylist::MediaPlaylistStreamType::kSubtitle &&
playlist->forced_subtitle()) {
is_autoselect = true;
}
BuildMediaTag(*playlist, group_id, is_default, is_autoselect, base_url,
out);
}

View File

@ -373,6 +373,10 @@ void MediaPlaylist::SetCharacteristicsForTesting(
characteristics_ = characteristics;
}
void MediaPlaylist::SetForcedSubtitleForTesting(const bool forced_subtitle) {
forced_subtitle_ = forced_subtitle;
}
bool MediaPlaylist::SetMediaInfo(const MediaInfo& media_info) {
const int32_t time_scale = GetTimeScale(media_info);
if (time_scale == 0) {
@ -400,6 +404,8 @@ bool MediaPlaylist::SetMediaInfo(const MediaInfo& media_info) {
std::vector<std::string>(media_info_.hls_characteristics().begin(),
media_info_.hls_characteristics().end());
forced_subtitle_ = media_info_.forced_subtitle();
return true;
}

View File

@ -90,6 +90,9 @@ class MediaPlaylist {
/// For testing only.
void SetLanguageForTesting(const std::string& language);
/// For testing only.
void SetForcedSubtitleForTesting(const bool forced_subtitle);
/// For testing only.
void SetCharacteristicsForTesting(
const std::vector<std::string>& characteristics);
@ -223,6 +226,8 @@ class MediaPlaylist {
return characteristics_;
}
bool forced_subtitle() const { return forced_subtitle_; }
bool is_dvs() const {
// HLS Authoring Specification for Apple Devices
// https://developer.apple.com/documentation/http_live_streaming/hls_authoring_specification_for_apple_devices#overview
@ -262,6 +267,7 @@ class MediaPlaylist {
std::string codec_;
std::string language_;
std::vector<std::string> characteristics_;
bool forced_subtitle_ = false;
uint32_t media_sequence_number_ = 0;
bool inserted_discontinuity_tag_ = false;
int discontinuity_sequence_number_ = 0;

View File

@ -26,6 +26,7 @@ HlsNotifyMuxerListener::HlsNotifyMuxerListener(
const std::string& ext_x_media_name,
const std::string& ext_x_media_group_id,
const std::vector<std::string>& characteristics,
bool forced_subtitle,
hls::HlsNotifier* hls_notifier,
std::optional<uint32_t> index)
: playlist_name_(playlist_name),
@ -33,6 +34,7 @@ HlsNotifyMuxerListener::HlsNotifyMuxerListener(
ext_x_media_name_(ext_x_media_name),
ext_x_media_group_id_(ext_x_media_group_id),
characteristics_(characteristics),
forced_subtitle_(forced_subtitle),
hls_notifier_(hls_notifier),
index_(index) {
DCHECK(hls_notifier);
@ -103,6 +105,9 @@ void HlsNotifyMuxerListener::OnMediaStart(const MuxerOptions& muxer_options,
for (const std::string& characteristic : characteristics_)
media_info->add_hls_characteristics(characteristic);
}
if (forced_subtitle_) {
media_info->set_forced_subtitle(forced_subtitle_);
}
if (index_.has_value())
media_info->set_index(index_.value());

View File

@ -39,12 +39,16 @@ class HlsNotifyMuxerListener : public MuxerListener {
/// @param characteristics is the characteristics for this playlist. This is
/// the value of CHARACTERISTICS attribute for EXT-X-MEDIA. This may be
/// empty.
/// @param forced is the HLS FORCED SUBTITLE setting for this playlist. This
/// is the value of FORCED attribute for EXT-X-MEDIA. This may be
/// empty.
/// @param hls_notifier used by this listener. Ownership does not transfer.
HlsNotifyMuxerListener(const std::string& playlist_name,
bool iframes_only,
const std::string& ext_x_media_name,
const std::string& ext_x_media_group_id,
const std::vector<std::string>& characteristics,
bool forced,
hls::HlsNotifier* hls_notifier,
std::optional<uint32_t> index);
~HlsNotifyMuxerListener() override;
@ -86,6 +90,7 @@ class HlsNotifyMuxerListener : public MuxerListener {
const std::string ext_x_media_name_;
const std::string ext_x_media_group_id_;
const std::vector<std::string> characteristics_;
const bool forced_subtitle_;
hls::HlsNotifier* const hls_notifier_;
std::optional<uint32_t> stream_id_;
std::optional<uint32_t> index_;

View File

@ -99,6 +99,7 @@ const char kDefaultName[] = "DEFAULTNAME";
const char kDefaultGroupId[] = "DEFAULTGROUPID";
const char kCharactersticA[] = "public.accessibility.transcribes-spoken-dialog";
const char kCharactersticB[] = "public.easy-to-read";
const bool kForced = false;
MATCHER_P(HasEncryptionScheme, expected_scheme, "") {
*result_listener << "it has_protected_content: "
@ -121,6 +122,7 @@ class HlsNotifyMuxerListenerTest : public ::testing::Test {
kDefaultName,
kDefaultGroupId,
std::vector<std::string>{kCharactersticA, kCharactersticB},
kForced,
&mock_notifier_,
0) {}
@ -459,6 +461,7 @@ class HlsNotifyMuxerListenerKeyFrameTest : public TestWithParam<bool> {
kDefaultName,
kDefaultGroupId,
std::vector<std::string>(), // no characteristics.
kForced,
&mock_notifier_,
0) {}

View File

@ -62,6 +62,7 @@ std::list<std::unique_ptr<MuxerListener>> CreateHlsListenersInternal(
const std::string& group_id = stream.hls_group_id;
const std::string& iframe_playlist_name = stream.hls_iframe_playlist_name;
const std::vector<std::string>& characteristics = stream.hls_characteristics;
const bool forced_subtitle = stream.forced_subtitle;
if (name.empty()) {
name = absl::StrFormat("stream_%d", stream_index);
@ -73,13 +74,13 @@ std::list<std::unique_ptr<MuxerListener>> CreateHlsListenersInternal(
const bool kIFramesOnly = true;
std::list<std::unique_ptr<MuxerListener>> listeners;
listeners.emplace_back(
new HlsNotifyMuxerListener(playlist_name, !kIFramesOnly, name, group_id,
characteristics, notifier, stream.index));
listeners.emplace_back(new HlsNotifyMuxerListener(
playlist_name, !kIFramesOnly, name, group_id, characteristics,
forced_subtitle, notifier, stream.index));
if (!iframe_playlist_name.empty()) {
listeners.emplace_back(new HlsNotifyMuxerListener(
iframe_playlist_name, kIFramesOnly, name, group_id,
std::vector<std::string>(), notifier, stream.index));
std::vector<std::string>(), forced_subtitle, notifier, stream.index));
}
return listeners;
}

View File

@ -47,6 +47,7 @@ class MuxerListenerFactory {
std::string hls_playlist_name;
std::string hls_iframe_playlist_name;
std::vector<std::string> hls_characteristics;
bool forced_subtitle = false;
bool hls_only = false;
// DASH specific values needed to write DASH mpd. Will only be used if an

View File

@ -59,6 +59,8 @@ std::string RoleToText(AdaptationSet::Role role) {
return "commentary";
case AdaptationSet::kRoleDub:
return "dub";
case AdaptationSet::kRoleForcedSubtitle:
return "forced-subtitle";
case AdaptationSet::kRoleDescription:
return "description";
default:

View File

@ -43,6 +43,7 @@ class AdaptationSet {
kRoleSupplementary,
kRoleCommentary,
kRoleDub,
kRoleForcedSubtitle,
kRoleDescription
};

View File

@ -213,6 +213,11 @@ message MediaInfo {
// Equal to the target segment duration times the reference time scale.
optional uint64 segment_duration = 25;
// Marks stream as a Forced Narrative subtitle stream, indicated using
// forced-subtitle role in DASH
// and FORCED=YES in HLS
optional bool forced_subtitle = 26 [default = false];
// stream index for consistent ordering of streams
optional uint32 index = 28;

View File

@ -59,6 +59,8 @@ AdaptationSet::Role RoleFromString(const std::string& role_str) {
return AdaptationSet::Role::kRoleCommentary;
if (role_str == "dub")
return AdaptationSet::Role::kRoleDub;
if (role_str == "forced-subtitle")
return AdaptationSet::Role::kRoleForcedSubtitle;
if (role_str == "description")
return AdaptationSet::Role::kRoleDescription;
return AdaptationSet::Role::kRoleUnknown;

View File

@ -69,6 +69,7 @@ MuxerListenerFactory::StreamData ToMuxerListenerData(
data.hls_playlist_name = stream.hls_playlist_name;
data.hls_iframe_playlist_name = stream.hls_iframe_playlist_name;
data.hls_characteristics = stream.hls_characteristics;
data.forced_subtitle = stream.forced_subtitle;
data.hls_only = stream.hls_only;
data.dash_accessiblities = stream.dash_accessiblities;