feat: Allow LIVE UDP WebVTT input (#1349)
An updated version of PR #1027 That previous PR was done using 2021 code, and there were many changes in the codebase from there, so a rebase was needed and also some minor tweak here and there. But it's the same code, just reimplemented on a newer codebase. If you want to take a look at this in action, after building shaka packager with this PR's code included, try this commands in 3 different simultaneous bash sessions: 1. Video UDP input: `ffmpeg -f lavfi -re -i "testsrc=s=320x240:r=30,format=yuv420p" -c:v h264 -sc_threshold 0 -g 30 -keyint_min 30 -r 30 -a53cc 1 -b:v 150k -preset ultrafast -r 30 -f mpegts "udp://127.0.0.1:10000?pkt_size=1316"` 2. WebVTT UDP input: `for sec in $(seq 0 9999) ; do printf "%02d:%02d.000 --> %02d:%02d.000\ntest second ${sec}\n\n" "$(( ${sec} / 60 ))" "$(( ${sec} % 60 ))" "$(( (${sec} + 1) / 60 ))" "$(( (${sec} + 1) % 60 ))" ; sleep 1 ; done > /dev/udp/127.0.0.1/12345` 3. shaka packager command line: `timeout 60 path/to/build/packager/packager 'in=udp://127.0.0.1:10000?timeout=8000000,stream_selector=0,init_segment=240_init.m4s,segment_template=240_$Number%09d$.m4s,bandwidth=150000' 'in=udp://127.0.0.1:12345?timeout=8000000,stream_selector=0,input_format=webvtt,format=webvtt+mp4,init_segment=text_init.m4s,segment_template=text_$Number%09d$.m4s,language=eng,dash_roles=subtitle' --mpd_output ./manifest.mpd --segment_duration 3.2 --suggested_presentation_delay 3.2 --min_buffer_time 3.2 --minimum_update_period 3.2 --time_shift_buffer_depth 60 --preserved_segments_outside_live_window 1 --default_language=eng --dump_stream_info 2>&1` Note the added `input_format=webvtt` to the shaka packager command's second selector. That's new from this PR. If you don't use that, shaka's format autodetection will not detect the webvtt format from the input, as explained in https://github.com/shaka-project/shaka-packager/issues/685#issuecomment-1029407191. Try the command without it if you want to. Fixes #685 Fixes #1017 --------- Co-authored-by: Daniel Cantarín <canta@canta.com.ar>
This commit is contained in:
parent
d23cce85b9
commit
89376d3c4d
|
@ -60,6 +60,20 @@ These are the available fields:
|
||||||
For subtitles in MP4, you can specify 'vtt+mp4' or 'ttml+mp4' to control
|
For subtitles in MP4, you can specify 'vtt+mp4' or 'ttml+mp4' to control
|
||||||
which text format is used.
|
which text format is used.
|
||||||
|
|
||||||
|
:input_format (format):
|
||||||
|
|
||||||
|
Optional value which specifies the format of the input files or
|
||||||
|
streams. If not specified, it will be autodetected, which in some
|
||||||
|
cases may fail.
|
||||||
|
|
||||||
|
For example, a live UDP WebVTT input stream may be up and streaming
|
||||||
|
long before a shaka packager instance consumes it, and therefore
|
||||||
|
shaka packager never gets the initial "WEBVTT" header string. In
|
||||||
|
such a case, shaka packager can't properly autodetect the stream
|
||||||
|
format as WebVTT, and thus doesn't process it. But stating
|
||||||
|
'input_format=webvtt' as selector parameter will tell shaka packager
|
||||||
|
to omit autodetection and consider WebVTT format for that stream.
|
||||||
|
|
||||||
:trick_play_factor (tpf):
|
:trick_play_factor (tpf):
|
||||||
|
|
||||||
Optional value which specifies the trick play, a.k.a. trick mode, stream
|
Optional value which specifies the trick play, a.k.a. trick mode, stream
|
||||||
|
|
|
@ -152,6 +152,11 @@ struct StreamDescriptor {
|
||||||
/// Set to true to indicate that the stream is for hls only.
|
/// Set to true to indicate that the stream is for hls only.
|
||||||
bool hls_only = false;
|
bool hls_only = false;
|
||||||
|
|
||||||
|
/// Optional value which specifies input container format.
|
||||||
|
/// Useful for live streaming situations, like auto-detecting webvtt without
|
||||||
|
/// its initial header.
|
||||||
|
std::string input_format;
|
||||||
|
|
||||||
/// Optional, indicates if this is a Forced Narrative subtitle stream.
|
/// Optional, indicates if this is a Forced Narrative subtitle stream.
|
||||||
bool forced_subtitle = false;
|
bool forced_subtitle = false;
|
||||||
|
|
||||||
|
|
|
@ -89,6 +89,10 @@ const char kUsage[] =
|
||||||
" - output_format (format): Optional value which specifies the format\n"
|
" - output_format (format): Optional value which specifies the format\n"
|
||||||
" of the output files (MP4 or WebM). If not specified, it will be\n"
|
" of the output files (MP4 or WebM). If not specified, it will be\n"
|
||||||
" derived from the file extension of the output file.\n"
|
" derived from the file extension of the output file.\n"
|
||||||
|
" - input_format (format): Optional value which specifies the format\n"
|
||||||
|
" of the input files or streams. If not specified, it will be\n"
|
||||||
|
" autodetected, which in some cases (such as live UDP webvtt) may\n"
|
||||||
|
" fail.\n"
|
||||||
" - skip_encryption=0|1: Optional. Defaults to 0 if not specified. If\n"
|
" - skip_encryption=0|1: Optional. Defaults to 0 if not specified. If\n"
|
||||||
" it is set to 1, no encryption of the stream will be made.\n"
|
" it is set to 1, no encryption of the stream will be made.\n"
|
||||||
" - drm_label: Optional value for custom DRM label, which defines the\n"
|
" - drm_label: Optional value for custom DRM label, which defines the\n"
|
||||||
|
|
|
@ -41,6 +41,7 @@ enum FieldType {
|
||||||
kHlsOnlyField,
|
kHlsOnlyField,
|
||||||
kDashLabelField,
|
kDashLabelField,
|
||||||
kForcedSubtitleField,
|
kForcedSubtitleField,
|
||||||
|
kInputFormatField,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct FieldNameToTypeMapping {
|
struct FieldNameToTypeMapping {
|
||||||
|
@ -90,6 +91,7 @@ const FieldNameToTypeMapping kFieldNameTypeMappings[] = {
|
||||||
{"hls_only", kHlsOnlyField},
|
{"hls_only", kHlsOnlyField},
|
||||||
{"dash_label", kDashLabelField},
|
{"dash_label", kDashLabelField},
|
||||||
{"forced_subtitle", kForcedSubtitleField},
|
{"forced_subtitle", kForcedSubtitleField},
|
||||||
|
{"input_format", kInputFormatField},
|
||||||
};
|
};
|
||||||
|
|
||||||
FieldType GetFieldType(const std::string& field_name) {
|
FieldType GetFieldType(const std::string& field_name) {
|
||||||
|
@ -271,6 +273,10 @@ std::optional<StreamDescriptor> ParseStreamDescriptor(
|
||||||
}
|
}
|
||||||
descriptor.forced_subtitle = forced_subtitle_value > 0;
|
descriptor.forced_subtitle = forced_subtitle_value > 0;
|
||||||
break;
|
break;
|
||||||
|
case kInputFormatField: {
|
||||||
|
descriptor.input_format = pair.second;
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
LOG(ERROR) << "Unknown field in stream descriptor (\"" << pair.first
|
LOG(ERROR) << "Unknown field in stream descriptor (\"" << pair.first
|
||||||
<< "\").";
|
<< "\").";
|
||||||
|
|
|
@ -165,21 +165,25 @@ Status Demuxer::InitializeParser() {
|
||||||
"Cannot open file for reading " + file_name_);
|
"Cannot open file for reading " + file_name_);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read enough bytes before detecting the container.
|
|
||||||
int64_t bytes_read = 0;
|
int64_t bytes_read = 0;
|
||||||
bool eof = false;
|
bool eof = false;
|
||||||
while (static_cast<size_t>(bytes_read) < kInitBufSize) {
|
if (input_format_.empty()) {
|
||||||
int64_t read_result =
|
// Read enough bytes before detecting the container.
|
||||||
media_file_->Read(buffer_.get() + bytes_read, kInitBufSize);
|
while (static_cast<size_t>(bytes_read) < kInitBufSize) {
|
||||||
if (read_result < 0)
|
int64_t read_result =
|
||||||
return Status(error::FILE_FAILURE, "Cannot read file " + file_name_);
|
media_file_->Read(buffer_.get() + bytes_read, kInitBufSize);
|
||||||
if (read_result == 0) {
|
if (read_result < 0)
|
||||||
eof = true;
|
return Status(error::FILE_FAILURE, "Cannot read file " + file_name_);
|
||||||
break;
|
if (read_result == 0) {
|
||||||
|
eof = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
bytes_read += read_result;
|
||||||
}
|
}
|
||||||
bytes_read += read_result;
|
container_name_ = DetermineContainer(buffer_.get(), bytes_read);
|
||||||
|
} else {
|
||||||
|
container_name_ = DetermineContainerFromFormatName(input_format_);
|
||||||
}
|
}
|
||||||
container_name_ = DetermineContainer(buffer_.get(), bytes_read);
|
|
||||||
|
|
||||||
// Initialize media parser.
|
// Initialize media parser.
|
||||||
switch (container_name_) {
|
switch (container_name_) {
|
||||||
|
|
|
@ -75,6 +75,10 @@ class Demuxer : public OriginHandler {
|
||||||
dump_stream_info_ = dump_stream_info;
|
dump_stream_info_ = dump_stream_info;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void set_input_format(std::string input_format) {
|
||||||
|
input_format_ = input_format;
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
/// @name MediaHandler implementation overrides.
|
/// @name MediaHandler implementation overrides.
|
||||||
/// @{
|
/// @{
|
||||||
|
@ -148,6 +152,8 @@ class Demuxer : public OriginHandler {
|
||||||
// Whether to dump stream info when it is received.
|
// Whether to dump stream info when it is received.
|
||||||
bool dump_stream_info_ = false;
|
bool dump_stream_info_ = false;
|
||||||
Status init_event_status_;
|
Status init_event_status_;
|
||||||
|
// Explicitly defined input format, for avoiding autodetection.
|
||||||
|
std::string input_format_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace media
|
} // namespace media
|
||||||
|
|
|
@ -40,6 +40,10 @@ class MuxerListenerFactory {
|
||||||
// told to output media info.
|
// told to output media info.
|
||||||
std::string media_info_output;
|
std::string media_info_output;
|
||||||
|
|
||||||
|
// Explicit input format, for avoiding autodetection when needed.
|
||||||
|
// This is useful for cases such as live WebVTT through UDP.
|
||||||
|
std::string input_format;
|
||||||
|
|
||||||
// HLS specific values needed to write to HLS manifests. Will only be used
|
// HLS specific values needed to write to HLS manifests. Will only be used
|
||||||
// if an HlsNotifier is given to the factory.
|
// if an HlsNotifier is given to the factory.
|
||||||
std::string hls_group_id;
|
std::string hls_group_id;
|
||||||
|
|
|
@ -225,14 +225,12 @@ bool WebVttParser::Parse() {
|
||||||
// Check the header. It is possible for a 0xFEFF BOM to come before the
|
// Check the header. It is possible for a 0xFEFF BOM to come before the
|
||||||
// header text.
|
// header text.
|
||||||
if (block.size() != 1) {
|
if (block.size() != 1) {
|
||||||
LOG(ERROR) << "Failed to read WEBVTT header - "
|
LOG(WARNING) << "Failed to read WEBVTT header - "
|
||||||
<< "block size should be 1 but was " << block.size() << ".";
|
<< "block size should be 1 but was " << block.size() << ".";
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
if (block[0] != "WEBVTT" && block[0] != "\xEF\xBB\xBFWEBVTT") {
|
if (block[0] != "WEBVTT" && block[0] != "\xEF\xBB\xBFWEBVTT") {
|
||||||
LOG(ERROR) << "Failed to read WEBVTT header - should be WEBVTT but was "
|
LOG(WARNING) << "Failed to read WEBVTT header - should be WEBVTT but was "
|
||||||
<< block[0];
|
<< block[0];
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
initialized_ = true;
|
initialized_ = true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -119,14 +119,17 @@ TEST_F(WebVttParserTest, ParseHeaderWithBOM) {
|
||||||
ASSERT_TRUE(samples_.empty());
|
ASSERT_TRUE(samples_.empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(WebVttParserTest, FailToParseHeaderWrongWord) {
|
TEST_F(WebVttParserTest, ParseNoHeaderWithoutExiting) {
|
||||||
|
// A proper WebVTT file should have the "WEBVTT" string header.
|
||||||
|
// But UDP input (not file) may be ingested when the header already
|
||||||
|
// passed, and it will not be repeated later.
|
||||||
const uint8_t text[] =
|
const uint8_t text[] =
|
||||||
"NOT WEBVTT\n"
|
"00:00:01.000 --> 00:00:02.000\n"
|
||||||
"\n";
|
"\n";
|
||||||
|
|
||||||
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
|
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
|
||||||
|
|
||||||
ASSERT_FALSE(parser_->Parse(text, sizeof(text) - 1));
|
ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
|
||||||
|
|
||||||
ASSERT_TRUE(streams_.empty());
|
ASSERT_TRUE(streams_.empty());
|
||||||
ASSERT_TRUE(samples_.empty());
|
ASSERT_TRUE(samples_.empty());
|
||||||
|
@ -140,7 +143,7 @@ TEST_F(WebVttParserTest, FailToParseHeaderNotOneLine) {
|
||||||
|
|
||||||
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
|
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
|
||||||
|
|
||||||
ASSERT_FALSE(parser_->Parse(text, sizeof(text) - 1));
|
ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
|
||||||
|
|
||||||
ASSERT_TRUE(streams_.empty());
|
ASSERT_TRUE(streams_.empty());
|
||||||
ASSERT_TRUE(samples_.empty());
|
ASSERT_TRUE(samples_.empty());
|
||||||
|
|
|
@ -77,6 +77,7 @@ MuxerListenerFactory::StreamData ToMuxerListenerData(
|
||||||
data.dash_only = stream.dash_only;
|
data.dash_only = stream.dash_only;
|
||||||
data.index = stream.index;
|
data.index = stream.index;
|
||||||
data.dash_label = stream.dash_label;
|
data.dash_label = stream.dash_label;
|
||||||
|
data.input_format = stream.input_format;
|
||||||
return data;
|
return data;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -447,6 +448,7 @@ Status CreateDemuxer(const StreamDescriptor& stream,
|
||||||
std::shared_ptr<Demuxer>* new_demuxer) {
|
std::shared_ptr<Demuxer>* new_demuxer) {
|
||||||
std::shared_ptr<Demuxer> demuxer = std::make_shared<Demuxer>(stream.input);
|
std::shared_ptr<Demuxer> demuxer = std::make_shared<Demuxer>(stream.input);
|
||||||
demuxer->set_dump_stream_info(packaging_params.test_params.dump_stream_info);
|
demuxer->set_dump_stream_info(packaging_params.test_params.dump_stream_info);
|
||||||
|
demuxer->set_input_format(stream.input_format);
|
||||||
|
|
||||||
if (packaging_params.decryption_params.key_provider != KeyProvider::kNone) {
|
if (packaging_params.decryption_params.key_provider != KeyProvider::kNone) {
|
||||||
std::unique_ptr<KeySource> decryption_key_source(
|
std::unique_ptr<KeySource> decryption_key_source(
|
||||||
|
|
Loading…
Reference in New Issue