Add TTML-in-MP4 output support.
This changes the default MP4 output to use TTML and adds a way to choose which one is used. This is done with 'format=ttml+mp4' or 'format=vtt+mp4'. This also fixes the boxes output in WebVTT in MP4. Change-Id: Ieaa7fc44fbf4dc020a5bb70cfa3578ec10e088ce
This commit is contained in:
parent
4766654b4d
commit
a93eeca5db
|
@ -243,7 +243,9 @@ def _UpdateMpdTimes(mpd_filepath):
|
||||||
|
|
||||||
|
|
||||||
def GetExtension(input_file_path, output_format):
|
def GetExtension(input_file_path, output_format):
|
||||||
if output_format:
|
if output_format in {'vtt+mp4', 'ttml+mp4'}:
|
||||||
|
return 'mp4'
|
||||||
|
elif output_format:
|
||||||
return output_format
|
return output_format
|
||||||
# Otherwise use the same extension as the input.
|
# Otherwise use the same extension as the input.
|
||||||
ext = os.path.splitext(input_file_path)[1]
|
ext = os.path.splitext(input_file_path)[1]
|
||||||
|
@ -857,6 +859,14 @@ class PackagerFunctionalTest(PackagerAppTest):
|
||||||
self.assertPackageSuccess(streams, flags)
|
self.assertPackageSuccess(streams, flags)
|
||||||
self._CheckTestResults('segmented-ttml-text')
|
self._CheckTestResults('segmented-ttml-text')
|
||||||
|
|
||||||
|
def testSegmentedTtmlMp4(self):
|
||||||
|
streams = self._GetStreams(['text'], test_files=['bear-english.vtt'],
|
||||||
|
output_format='ttml+mp4', segmented=True)
|
||||||
|
flags = self._GetFlags(output_hls=True, output_dash=True)
|
||||||
|
|
||||||
|
self.assertPackageSuccess(streams, flags)
|
||||||
|
self._CheckTestResults('segmented-ttml-mp4')
|
||||||
|
|
||||||
def testMp4TrailingMoov(self):
|
def testMp4TrailingMoov(self):
|
||||||
self.assertPackageSuccess(
|
self.assertPackageSuccess(
|
||||||
self._GetStreams(['audio', 'video'],
|
self._GetStreams(['audio', 'video'],
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,6 @@
|
||||||
|
#EXTM3U
|
||||||
|
## Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>
|
||||||
|
|
||||||
|
#EXT-X-INDEPENDENT-SEGMENTS
|
||||||
|
|
||||||
|
#EXT-X-MEDIA:TYPE=SUBTITLES,URI="stream_0.m3u8",GROUP-ID="default-text-group",NAME="stream_0",AUTOSELECT=YES
|
|
@ -0,0 +1,16 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>-->
|
||||||
|
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 DASH-MPD.xsd" profiles="urn:mpeg:dash:profile:isoff-live:2011" minBufferTime="PT2S" type="dynamic" publishTime="some_time" availabilityStartTime="some_time" minimumUpdatePeriod="PT5S" timeShiftBufferDepth="PT1800S">
|
||||||
|
<Period id="0" start="PT0S">
|
||||||
|
<AdaptationSet id="0" contentType="text" segmentAlignment="true">
|
||||||
|
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="subtitle"/>
|
||||||
|
<Representation id="0" bandwidth="4112" codecs="stpp" mimeType="application/mp4">
|
||||||
|
<SegmentTemplate timescale="1000" initialization="bear-english-text-init.mp4" media="bear-english-text-$Number$.m4s" startNumber="1">
|
||||||
|
<SegmentTimeline>
|
||||||
|
<S t="0" d="1000" r="4"/>
|
||||||
|
</SegmentTimeline>
|
||||||
|
</SegmentTemplate>
|
||||||
|
</Representation>
|
||||||
|
</AdaptationSet>
|
||||||
|
</Period>
|
||||||
|
</MPD>
|
|
@ -0,0 +1,17 @@
|
||||||
|
#EXTM3U
|
||||||
|
#EXT-X-VERSION:6
|
||||||
|
## Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>
|
||||||
|
#EXT-X-TARGETDURATION:1
|
||||||
|
#EXT-X-PLAYLIST-TYPE:VOD
|
||||||
|
#EXT-X-MAP:URI="bear-english-text-init.mp4"
|
||||||
|
#EXTINF:1.000,
|
||||||
|
bear-english-text-1.m4s
|
||||||
|
#EXTINF:1.000,
|
||||||
|
bear-english-text-2.m4s
|
||||||
|
#EXTINF:1.000,
|
||||||
|
bear-english-text-3.m4s
|
||||||
|
#EXTINF:1.000,
|
||||||
|
bear-english-text-4.m4s
|
||||||
|
#EXTINF:1.000,
|
||||||
|
bear-english-text-5.m4s
|
||||||
|
#EXT-X-ENDLIST
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -62,6 +62,18 @@ bool BufferReader::ReadToString(std::string* str, size_t size) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool BufferReader::ReadCString(std::string* str) {
|
||||||
|
DCHECK(str);
|
||||||
|
for (size_t count = 0; pos_ + count < size_; count++) {
|
||||||
|
if (buf_[pos_ + count] == 0) {
|
||||||
|
str->assign(buf_ + pos_, buf_ + pos_ + count);
|
||||||
|
pos_ += count + 1;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false; // EOF
|
||||||
|
}
|
||||||
|
|
||||||
bool BufferReader::SkipBytes(size_t num_bytes) {
|
bool BufferReader::SkipBytes(size_t num_bytes) {
|
||||||
if (!HasBytes(num_bytes))
|
if (!HasBytes(num_bytes))
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -56,6 +56,9 @@ class BufferReader {
|
||||||
bool ReadToVector(std::vector<uint8_t>* t, size_t count) WARN_UNUSED_RESULT;
|
bool ReadToVector(std::vector<uint8_t>* t, size_t count) WARN_UNUSED_RESULT;
|
||||||
bool ReadToString(std::string* str, size_t size) WARN_UNUSED_RESULT;
|
bool ReadToString(std::string* str, size_t size) WARN_UNUSED_RESULT;
|
||||||
|
|
||||||
|
/// Reads a null-terminated string.
|
||||||
|
bool ReadCString(std::string* str) WARN_UNUSED_RESULT;
|
||||||
|
|
||||||
/// Advance the stream by this many bytes.
|
/// Advance the stream by this many bytes.
|
||||||
/// @return false if there are not enough bytes in the buffer, true otherwise.
|
/// @return false if there are not enough bytes in the buffer, true otherwise.
|
||||||
bool SkipBytes(size_t num_bytes) WARN_UNUSED_RESULT;
|
bool SkipBytes(size_t num_bytes) WARN_UNUSED_RESULT;
|
||||||
|
|
|
@ -1745,7 +1745,10 @@ MediaContainerName DetermineContainerFromFormatName(
|
||||||
base::EqualsCaseInsensitiveASCII(format_name, "m4s") ||
|
base::EqualsCaseInsensitiveASCII(format_name, "m4s") ||
|
||||||
base::EqualsCaseInsensitiveASCII(format_name, "m4v") ||
|
base::EqualsCaseInsensitiveASCII(format_name, "m4v") ||
|
||||||
base::EqualsCaseInsensitiveASCII(format_name, "mov") ||
|
base::EqualsCaseInsensitiveASCII(format_name, "mov") ||
|
||||||
base::EqualsCaseInsensitiveASCII(format_name, "mp4")) {
|
base::EqualsCaseInsensitiveASCII(format_name, "mp4") ||
|
||||||
|
base::EqualsCaseInsensitiveASCII(format_name, "ttml+mp4") ||
|
||||||
|
base::EqualsCaseInsensitiveASCII(format_name, "webvtt+mp4") ||
|
||||||
|
base::EqualsCaseInsensitiveASCII(format_name, "vtt+mp4")) {
|
||||||
return CONTAINER_MOV;
|
return CONTAINER_MOV;
|
||||||
} else if (base::EqualsCaseInsensitiveASCII(format_name, "ts") ||
|
} else if (base::EqualsCaseInsensitiveASCII(format_name, "ts") ||
|
||||||
base::EqualsCaseInsensitiveASCII(format_name, "mpeg2ts")) {
|
base::EqualsCaseInsensitiveASCII(format_name, "mpeg2ts")) {
|
||||||
|
|
|
@ -98,6 +98,7 @@ enum FourCC : uint32_t {
|
||||||
FOURCC_mp4v = 0x6d703476,
|
FOURCC_mp4v = 0x6d703476,
|
||||||
FOURCC_mvex = 0x6d766578,
|
FOURCC_mvex = 0x6d766578,
|
||||||
FOURCC_mvhd = 0x6d766864,
|
FOURCC_mvhd = 0x6d766864,
|
||||||
|
FOURCC_nmhd = 0x6e6d6864,
|
||||||
FOURCC_pasp = 0x70617370,
|
FOURCC_pasp = 0x70617370,
|
||||||
FOURCC_payl = 0x7061796c,
|
FOURCC_payl = 0x7061796c,
|
||||||
FOURCC_pdin = 0x7064696e,
|
FOURCC_pdin = 0x7064696e,
|
||||||
|
@ -122,6 +123,7 @@ enum FourCC : uint32_t {
|
||||||
FOURCC_stbl = 0x7374626c,
|
FOURCC_stbl = 0x7374626c,
|
||||||
FOURCC_stco = 0x7374636f,
|
FOURCC_stco = 0x7374636f,
|
||||||
FOURCC_sthd = 0x73746864,
|
FOURCC_sthd = 0x73746864,
|
||||||
|
FOURCC_stpp = 0x73747070,
|
||||||
FOURCC_stsc = 0x73747363,
|
FOURCC_stsc = 0x73747363,
|
||||||
FOURCC_stsd = 0x73747364,
|
FOURCC_stsd = 0x73747364,
|
||||||
FOURCC_stss = 0x73747373,
|
FOURCC_stss = 0x73747373,
|
||||||
|
|
|
@ -145,6 +145,16 @@ class BoxBuffer {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ReadWriteCString(std::string* str) {
|
||||||
|
if (reader_)
|
||||||
|
return reader_->ReadCString(str);
|
||||||
|
// Cannot contain embedded nulls.
|
||||||
|
DCHECK_EQ(str->find('\0'), std::string::npos);
|
||||||
|
writer_->AppendString(*str);
|
||||||
|
writer_->AppendInt(static_cast<uint8_t>('\0'));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool ReadWriteFourCC(FourCC* fourcc) {
|
bool ReadWriteFourCC(FourCC* fourcc) {
|
||||||
if (reader_)
|
if (reader_)
|
||||||
return reader_->ReadFourCC(fourcc);
|
return reader_->ReadFourCC(fourcc);
|
||||||
|
|
|
@ -33,6 +33,7 @@ const uint8_t kUnityMatrix[] = {0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
const char kVideoHandlerName[] = "VideoHandler";
|
const char kVideoHandlerName[] = "VideoHandler";
|
||||||
const char kAudioHandlerName[] = "SoundHandler";
|
const char kAudioHandlerName[] = "SoundHandler";
|
||||||
const char kTextHandlerName[] = "TextHandler";
|
const char kTextHandlerName[] = "TextHandler";
|
||||||
|
const char kSubtitleHandlerName[] = "SubtitleHandler";
|
||||||
|
|
||||||
// Default values for VideoSampleEntry box.
|
// Default values for VideoSampleEntry box.
|
||||||
const uint32_t kVideoResolution = 0x00480000; // 72 dpi.
|
const uint32_t kVideoResolution = 0x00480000; // 72 dpi.
|
||||||
|
@ -106,6 +107,8 @@ TrackType FourCCToTrackType(FourCC fourcc) {
|
||||||
return kAudio;
|
return kAudio;
|
||||||
case FOURCC_text:
|
case FOURCC_text:
|
||||||
return kText;
|
return kText;
|
||||||
|
case FOURCC_subt:
|
||||||
|
return kSubtitle;
|
||||||
default:
|
default:
|
||||||
return kInvalid;
|
return kInvalid;
|
||||||
}
|
}
|
||||||
|
@ -119,6 +122,8 @@ FourCC TrackTypeToFourCC(TrackType track_type) {
|
||||||
return FOURCC_soun;
|
return FOURCC_soun;
|
||||||
case kText:
|
case kText:
|
||||||
return FOURCC_text;
|
return FOURCC_text;
|
||||||
|
case kSubtitle:
|
||||||
|
return FOURCC_subt;
|
||||||
default:
|
default:
|
||||||
return FOURCC_NULL;
|
return FOURCC_NULL;
|
||||||
}
|
}
|
||||||
|
@ -628,6 +633,7 @@ bool SampleDescription::ReadWriteInternal(BoxBuffer* buffer) {
|
||||||
count = static_cast<uint32_t>(audio_entries.size());
|
count = static_cast<uint32_t>(audio_entries.size());
|
||||||
break;
|
break;
|
||||||
case kText:
|
case kText:
|
||||||
|
case kSubtitle:
|
||||||
count = static_cast<uint32_t>(text_entries.size());
|
count = static_cast<uint32_t>(text_entries.size());
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -649,7 +655,7 @@ bool SampleDescription::ReadWriteInternal(BoxBuffer* buffer) {
|
||||||
} else if (type == kAudio) {
|
} else if (type == kAudio) {
|
||||||
RCHECK(reader->ReadAllChildren(&audio_entries));
|
RCHECK(reader->ReadAllChildren(&audio_entries));
|
||||||
RCHECK(audio_entries.size() == count);
|
RCHECK(audio_entries.size() == count);
|
||||||
} else if (type == kText) {
|
} else if (type == kText || type == kSubtitle) {
|
||||||
RCHECK(reader->ReadAllChildren(&text_entries));
|
RCHECK(reader->ReadAllChildren(&text_entries));
|
||||||
RCHECK(text_entries.size() == count);
|
RCHECK(text_entries.size() == count);
|
||||||
}
|
}
|
||||||
|
@ -661,7 +667,7 @@ bool SampleDescription::ReadWriteInternal(BoxBuffer* buffer) {
|
||||||
} else if (type == kAudio) {
|
} else if (type == kAudio) {
|
||||||
for (uint32_t i = 0; i < count; ++i)
|
for (uint32_t i = 0; i < count; ++i)
|
||||||
RCHECK(buffer->ReadWriteChild(&audio_entries[i]));
|
RCHECK(buffer->ReadWriteChild(&audio_entries[i]));
|
||||||
} else if (type == kText) {
|
} else if (type == kText || type == kSubtitle) {
|
||||||
for (uint32_t i = 0; i < count; ++i)
|
for (uint32_t i = 0; i < count; ++i)
|
||||||
RCHECK(buffer->ReadWriteChild(&text_entries[i]));
|
RCHECK(buffer->ReadWriteChild(&text_entries[i]));
|
||||||
} else {
|
} else {
|
||||||
|
@ -679,7 +685,7 @@ size_t SampleDescription::ComputeSizeInternal() {
|
||||||
} else if (type == kAudio) {
|
} else if (type == kAudio) {
|
||||||
for (uint32_t i = 0; i < audio_entries.size(); ++i)
|
for (uint32_t i = 0; i < audio_entries.size(); ++i)
|
||||||
box_size += audio_entries[i].ComputeSize();
|
box_size += audio_entries[i].ComputeSize();
|
||||||
} else if (type == kText) {
|
} else if (type == kText || type == kSubtitle) {
|
||||||
for (uint32_t i = 0; i < text_entries.size(); ++i)
|
for (uint32_t i = 0; i < text_entries.size(); ++i)
|
||||||
box_size += text_entries[i].ComputeSize();
|
box_size += text_entries[i].ComputeSize();
|
||||||
}
|
}
|
||||||
|
@ -1293,6 +1299,11 @@ bool HandlerReference::ReadWriteInternal(BoxBuffer* buffer) {
|
||||||
handler_name.assign(kTextHandlerName,
|
handler_name.assign(kTextHandlerName,
|
||||||
kTextHandlerName + arraysize(kTextHandlerName));
|
kTextHandlerName + arraysize(kTextHandlerName));
|
||||||
break;
|
break;
|
||||||
|
case FOURCC_subt:
|
||||||
|
handler_name.assign(
|
||||||
|
kSubtitleHandlerName,
|
||||||
|
kSubtitleHandlerName + arraysize(kSubtitleHandlerName));
|
||||||
|
break;
|
||||||
case FOURCC_ID32:
|
case FOURCC_ID32:
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -1322,6 +1333,9 @@ size_t HandlerReference::ComputeSizeInternal() {
|
||||||
case FOURCC_text:
|
case FOURCC_text:
|
||||||
box_size += sizeof(kTextHandlerName);
|
box_size += sizeof(kTextHandlerName);
|
||||||
break;
|
break;
|
||||||
|
case FOURCC_subt:
|
||||||
|
box_size += sizeof(kSubtitleHandlerName);
|
||||||
|
break;
|
||||||
case FOURCC_ID32:
|
case FOURCC_ID32:
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -2000,14 +2014,25 @@ bool TextSampleEntry::ReadWriteInternal(BoxBuffer* buffer) {
|
||||||
// TODO(rkuroiwa): Handle the optional MPEG4BitRateBox.
|
// TODO(rkuroiwa): Handle the optional MPEG4BitRateBox.
|
||||||
RCHECK(buffer->PrepareChildren() && buffer->ReadWriteChild(&config) &&
|
RCHECK(buffer->PrepareChildren() && buffer->ReadWriteChild(&config) &&
|
||||||
buffer->ReadWriteChild(&label));
|
buffer->ReadWriteChild(&label));
|
||||||
|
} else if (format == FOURCC_stpp) {
|
||||||
|
// These are marked as "optional"; but they should still have the
|
||||||
|
// null-terminator, so this should still work.
|
||||||
|
RCHECK(buffer->ReadWriteCString(&namespace_) &&
|
||||||
|
buffer->ReadWriteCString(&schema_location));
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t TextSampleEntry::ComputeSizeInternal() {
|
size_t TextSampleEntry::ComputeSizeInternal() {
|
||||||
// 6 for the (anonymous) reserved bytes for SampleEntry class.
|
// 6 for the (anonymous) reserved bytes for SampleEntry class.
|
||||||
return HeaderSize() + 6 + sizeof(data_reference_index) +
|
size_t ret = HeaderSize() + 6 + sizeof(data_reference_index);
|
||||||
config.ComputeSize() + label.ComputeSize();
|
if (format == FOURCC_wvtt) {
|
||||||
|
ret += config.ComputeSize() + label.ComputeSize();
|
||||||
|
} else if (format == FOURCC_stpp) {
|
||||||
|
// +2 for the two null terminators for these strings.
|
||||||
|
ret += namespace_.size() + schema_location.size() + 2;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
MediaHeader::MediaHeader() = default;
|
MediaHeader::MediaHeader() = default;
|
||||||
|
@ -2079,6 +2104,21 @@ size_t SoundMediaHeader::ComputeSizeInternal() {
|
||||||
return HeaderSize() + sizeof(balance) + sizeof(uint16_t);
|
return HeaderSize() + sizeof(balance) + sizeof(uint16_t);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
NullMediaHeader::NullMediaHeader() = default;
|
||||||
|
NullMediaHeader::~NullMediaHeader() = default;
|
||||||
|
|
||||||
|
FourCC NullMediaHeader::BoxType() const {
|
||||||
|
return FOURCC_nmhd;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool NullMediaHeader::ReadWriteInternal(BoxBuffer* buffer) {
|
||||||
|
return ReadWriteHeaderInternal(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t NullMediaHeader::ComputeSizeInternal() {
|
||||||
|
return HeaderSize();
|
||||||
|
}
|
||||||
|
|
||||||
SubtitleMediaHeader::SubtitleMediaHeader() = default;
|
SubtitleMediaHeader::SubtitleMediaHeader() = default;
|
||||||
SubtitleMediaHeader::~SubtitleMediaHeader() = default;
|
SubtitleMediaHeader::~SubtitleMediaHeader() = default;
|
||||||
|
|
||||||
|
@ -2178,6 +2218,9 @@ bool MediaInformation::ReadWriteInternal(BoxBuffer* buffer) {
|
||||||
RCHECK(buffer->ReadWriteChild(&smhd));
|
RCHECK(buffer->ReadWriteChild(&smhd));
|
||||||
break;
|
break;
|
||||||
case kText:
|
case kText:
|
||||||
|
RCHECK(buffer->TryReadWriteChild(&nmhd));
|
||||||
|
break;
|
||||||
|
case kSubtitle:
|
||||||
RCHECK(buffer->TryReadWriteChild(&sthd));
|
RCHECK(buffer->TryReadWriteChild(&sthd));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -2198,6 +2241,9 @@ size_t MediaInformation::ComputeSizeInternal() {
|
||||||
box_size += smhd.ComputeSize();
|
box_size += smhd.ComputeSize();
|
||||||
break;
|
break;
|
||||||
case kText:
|
case kText:
|
||||||
|
box_size += nmhd.ComputeSize();
|
||||||
|
break;
|
||||||
|
case kSubtitle:
|
||||||
box_size += sthd.ComputeSize();
|
box_size += sthd.ComputeSize();
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -26,6 +26,7 @@ enum TrackType {
|
||||||
kAudio,
|
kAudio,
|
||||||
kHint,
|
kHint,
|
||||||
kText,
|
kText,
|
||||||
|
kSubtitle,
|
||||||
};
|
};
|
||||||
|
|
||||||
class BoxBuffer;
|
class BoxBuffer;
|
||||||
|
@ -407,6 +408,11 @@ struct TextSampleEntry : Box {
|
||||||
// always present.
|
// always present.
|
||||||
uint16_t data_reference_index = 1u;
|
uint16_t data_reference_index = 1u;
|
||||||
|
|
||||||
|
// Sub fields for ttml text sample entry.
|
||||||
|
std::string namespace_;
|
||||||
|
std::string schema_location;
|
||||||
|
// Optional MPEG4BitRateBox.
|
||||||
|
|
||||||
// Sub boxes for wvtt text sample entry.
|
// Sub boxes for wvtt text sample entry.
|
||||||
WebVTTConfigurationBox config;
|
WebVTTConfigurationBox config;
|
||||||
WebVTTSourceLabelBox label;
|
WebVTTSourceLabelBox label;
|
||||||
|
@ -597,6 +603,10 @@ struct SoundMediaHeader : FullBox {
|
||||||
uint16_t balance = 0u;
|
uint16_t balance = 0u;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct NullMediaHeader : FullBox {
|
||||||
|
DECLARE_BOX_METHODS(NullMediaHeader);
|
||||||
|
};
|
||||||
|
|
||||||
struct SubtitleMediaHeader : FullBox {
|
struct SubtitleMediaHeader : FullBox {
|
||||||
DECLARE_BOX_METHODS(SubtitleMediaHeader);
|
DECLARE_BOX_METHODS(SubtitleMediaHeader);
|
||||||
};
|
};
|
||||||
|
@ -628,6 +638,7 @@ struct MediaInformation : Box {
|
||||||
// Exactly one specific meida header shall be present, vmhd, smhd, hmhd, nmhd.
|
// Exactly one specific meida header shall be present, vmhd, smhd, hmhd, nmhd.
|
||||||
VideoMediaHeader vmhd;
|
VideoMediaHeader vmhd;
|
||||||
SoundMediaHeader smhd;
|
SoundMediaHeader smhd;
|
||||||
|
NullMediaHeader nmhd;
|
||||||
SubtitleMediaHeader sthd;
|
SubtitleMediaHeader sthd;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -50,6 +50,7 @@
|
||||||
'../../base/media_base.gyp:media_base',
|
'../../base/media_base.gyp:media_base',
|
||||||
'../../codecs/codecs.gyp:codecs',
|
'../../codecs/codecs.gyp:codecs',
|
||||||
'../../event/media_event.gyp:media_event',
|
'../../event/media_event.gyp:media_event',
|
||||||
|
'../../formats/ttml/ttml.gyp:ttml',
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
#include "packager/media/formats/mp4/box_definitions.h"
|
#include "packager/media/formats/mp4/box_definitions.h"
|
||||||
#include "packager/media/formats/mp4/multi_segment_segmenter.h"
|
#include "packager/media/formats/mp4/multi_segment_segmenter.h"
|
||||||
#include "packager/media/formats/mp4/single_segment_segmenter.h"
|
#include "packager/media/formats/mp4/single_segment_segmenter.h"
|
||||||
|
#include "packager/media/formats/ttml/ttml_generator.h"
|
||||||
#include "packager/status_macros.h"
|
#include "packager/status_macros.h"
|
||||||
|
|
||||||
namespace shaka {
|
namespace shaka {
|
||||||
|
@ -593,6 +594,17 @@ bool MP4Muxer::GenerateTextTrak(const TextStreamInfo* text_info,
|
||||||
sample_description.type = kText;
|
sample_description.type = kText;
|
||||||
sample_description.text_entries.push_back(webvtt);
|
sample_description.text_entries.push_back(webvtt);
|
||||||
return true;
|
return true;
|
||||||
|
} else if (text_info->codec_string() == "ttml") {
|
||||||
|
// Handle TTML.
|
||||||
|
TextSampleEntry ttml;
|
||||||
|
ttml.format = FOURCC_stpp;
|
||||||
|
ttml.namespace_ = ttml::TtmlGenerator::kTtNamespace;
|
||||||
|
|
||||||
|
SampleDescription& sample_description =
|
||||||
|
trak->media.information.sample_table.description;
|
||||||
|
sample_description.type = kSubtitle;
|
||||||
|
sample_description.text_entries.push_back(ttml);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
NOTIMPLEMENTED() << text_info->codec_string()
|
NOTIMPLEMENTED() << text_info->codec_string()
|
||||||
<< " handling not implemented yet.";
|
<< " handling not implemented yet.";
|
||||||
|
|
|
@ -17,6 +17,8 @@
|
||||||
'ttml_generator.h',
|
'ttml_generator.h',
|
||||||
'ttml_muxer.cc',
|
'ttml_muxer.cc',
|
||||||
'ttml_muxer.h',
|
'ttml_muxer.h',
|
||||||
|
'ttml_to_mp4_handler.cc',
|
||||||
|
'ttml_to_mp4_handler.h',
|
||||||
],
|
],
|
||||||
'dependencies': [
|
'dependencies': [
|
||||||
'../../base/media_base.gyp:media_base',
|
'../../base/media_base.gyp:media_base',
|
||||||
|
|
|
@ -38,6 +38,8 @@ std::string ToTtmlSize(const TextNumber& x, const TextNumber& y) {
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
const char* TtmlGenerator::kTtNamespace = "http://www.w3.org/ns/ttml";
|
||||||
|
|
||||||
TtmlGenerator::TtmlGenerator() {}
|
TtmlGenerator::TtmlGenerator() {}
|
||||||
|
|
||||||
TtmlGenerator::~TtmlGenerator() {}
|
TtmlGenerator::~TtmlGenerator() {}
|
||||||
|
@ -60,7 +62,7 @@ void TtmlGenerator::Reset() {
|
||||||
|
|
||||||
bool TtmlGenerator::Dump(std::string* result) const {
|
bool TtmlGenerator::Dump(std::string* result) const {
|
||||||
xml::XmlNode root("tt");
|
xml::XmlNode root("tt");
|
||||||
RCHECK(root.SetStringAttribute("xmlns", "http://www.w3.org/ns/ttml"));
|
RCHECK(root.SetStringAttribute("xmlns", kTtNamespace));
|
||||||
RCHECK(root.SetStringAttribute("xmlns:tts",
|
RCHECK(root.SetStringAttribute("xmlns:tts",
|
||||||
"http://www.w3.org/ns/ttml#styling"));
|
"http://www.w3.org/ns/ttml#styling"));
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,8 @@ class TtmlGenerator {
|
||||||
explicit TtmlGenerator();
|
explicit TtmlGenerator();
|
||||||
~TtmlGenerator();
|
~TtmlGenerator();
|
||||||
|
|
||||||
|
static const char* kTtNamespace;
|
||||||
|
|
||||||
void Initialize(const std::map<std::string, TextRegion>& regions,
|
void Initialize(const std::map<std::string, TextRegion>& regions,
|
||||||
const std::string& language,
|
const std::string& language,
|
||||||
uint32_t time_scale);
|
uint32_t time_scale);
|
||||||
|
|
|
@ -0,0 +1,123 @@
|
||||||
|
// Copyright 2020 Google LLC. All rights reserved.
|
||||||
|
//
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file or at
|
||||||
|
// https://developers.google.com/open-source/licenses/bsd
|
||||||
|
|
||||||
|
#include "packager/media/formats/ttml/ttml_to_mp4_handler.h"
|
||||||
|
|
||||||
|
#include "packager/status_macros.h"
|
||||||
|
|
||||||
|
namespace shaka {
|
||||||
|
namespace media {
|
||||||
|
namespace ttml {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
size_t kTrackId = 0;
|
||||||
|
|
||||||
|
std::shared_ptr<MediaSample> CreateMediaSample(const std::string& data,
|
||||||
|
int64_t start_time,
|
||||||
|
int64_t duration) {
|
||||||
|
DCHECK_GE(start_time, 0);
|
||||||
|
DCHECK_GT(duration, 0);
|
||||||
|
|
||||||
|
const bool kIsKeyFrame = true;
|
||||||
|
|
||||||
|
std::shared_ptr<MediaSample> sample = MediaSample::CopyFrom(
|
||||||
|
reinterpret_cast<const uint8_t*>(data.data()), data.size(), kIsKeyFrame);
|
||||||
|
sample->set_pts(start_time);
|
||||||
|
sample->set_dts(start_time);
|
||||||
|
sample->set_duration(duration);
|
||||||
|
|
||||||
|
return sample;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
Status TtmlToMp4Handler::InitializeInternal() {
|
||||||
|
return Status::OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
Status TtmlToMp4Handler::Process(std::unique_ptr<StreamData> stream_data) {
|
||||||
|
switch (stream_data->stream_data_type) {
|
||||||
|
case StreamDataType::kStreamInfo:
|
||||||
|
return OnStreamInfo(std::move(stream_data));
|
||||||
|
case StreamDataType::kCueEvent:
|
||||||
|
return OnCueEvent(std::move(stream_data));
|
||||||
|
case StreamDataType::kSegmentInfo:
|
||||||
|
return OnSegmentInfo(std::move(stream_data));
|
||||||
|
case StreamDataType::kTextSample:
|
||||||
|
return OnTextSample(std::move(stream_data));
|
||||||
|
default:
|
||||||
|
return Status(error::INTERNAL_ERROR,
|
||||||
|
"Invalid stream data type (" +
|
||||||
|
StreamDataTypeToString(stream_data->stream_data_type) +
|
||||||
|
") for this TtmlToMp4 handler");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Status TtmlToMp4Handler::OnStreamInfo(std::unique_ptr<StreamData> stream_data) {
|
||||||
|
DCHECK(stream_data);
|
||||||
|
DCHECK(stream_data->stream_info);
|
||||||
|
|
||||||
|
auto clone = stream_data->stream_info->Clone();
|
||||||
|
clone->set_codec(kCodecTtml);
|
||||||
|
clone->set_codec_string("ttml");
|
||||||
|
|
||||||
|
if (clone->stream_type() != kStreamText)
|
||||||
|
return Status(error::MUXER_FAILURE, "Incorrect stream type");
|
||||||
|
auto* text_stream = static_cast<const TextStreamInfo*>(clone.get());
|
||||||
|
generator_.Initialize(text_stream->regions(), text_stream->language(),
|
||||||
|
text_stream->time_scale());
|
||||||
|
|
||||||
|
return Dispatch(
|
||||||
|
StreamData::FromStreamInfo(stream_data->stream_index, std::move(clone)));
|
||||||
|
}
|
||||||
|
|
||||||
|
Status TtmlToMp4Handler::OnCueEvent(std::unique_ptr<StreamData> stream_data) {
|
||||||
|
DCHECK(stream_data);
|
||||||
|
DCHECK(stream_data->cue_event);
|
||||||
|
return Dispatch(std::move(stream_data));
|
||||||
|
}
|
||||||
|
|
||||||
|
Status TtmlToMp4Handler::OnSegmentInfo(
|
||||||
|
std::unique_ptr<StreamData> stream_data) {
|
||||||
|
DCHECK(stream_data);
|
||||||
|
DCHECK(stream_data->segment_info);
|
||||||
|
|
||||||
|
const auto& segment = stream_data->segment_info;
|
||||||
|
|
||||||
|
std::string data;
|
||||||
|
if (!generator_.Dump(&data))
|
||||||
|
return Status(error::INTERNAL_ERROR, "Error generating XML");
|
||||||
|
generator_.Reset();
|
||||||
|
|
||||||
|
RETURN_IF_ERROR(DispatchMediaSample(
|
||||||
|
kTrackId,
|
||||||
|
CreateMediaSample(data, segment->start_timestamp, segment->duration)));
|
||||||
|
|
||||||
|
return Dispatch(std::move(stream_data));
|
||||||
|
}
|
||||||
|
|
||||||
|
Status TtmlToMp4Handler::OnTextSample(std::unique_ptr<StreamData> stream_data) {
|
||||||
|
DCHECK(stream_data);
|
||||||
|
DCHECK(stream_data->text_sample);
|
||||||
|
|
||||||
|
auto& sample = stream_data->text_sample;
|
||||||
|
|
||||||
|
// Ignore empty samples. This will create gaps, but we will handle that
|
||||||
|
// later.
|
||||||
|
if (sample->body().is_empty()) {
|
||||||
|
return Status::OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add the new text sample to the cache of samples that belong in the
|
||||||
|
// current segment.
|
||||||
|
generator_.AddSample(*sample);
|
||||||
|
return Status::OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace ttml
|
||||||
|
} // namespace media
|
||||||
|
} // namespace shaka
|
|
@ -0,0 +1,43 @@
|
||||||
|
// Copyright 2020 Google LLC. All rights reserved.
|
||||||
|
//
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file or at
|
||||||
|
// https://developers.google.com/open-source/licenses/bsd
|
||||||
|
|
||||||
|
#ifndef PACKAGER_MEDIA_FORMATS_TTML_TTML_TO_MP4_HANDLER_H_
|
||||||
|
#define PACKAGER_MEDIA_FORMATS_TTML_TTML_TO_MP4_HANDLER_H_
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "packager/media/base/media_handler.h"
|
||||||
|
#include "packager/media/formats/ttml/ttml_generator.h"
|
||||||
|
|
||||||
|
namespace shaka {
|
||||||
|
namespace media {
|
||||||
|
namespace ttml {
|
||||||
|
|
||||||
|
// A media handler that should come after the cue aligner and segmenter and
|
||||||
|
// should come before the muxer. This handler is to convert text samples
|
||||||
|
// to media samples so that they can be sent to a mp4 muxer.
|
||||||
|
class TtmlToMp4Handler : public MediaHandler {
|
||||||
|
public:
|
||||||
|
TtmlToMp4Handler() = default;
|
||||||
|
~TtmlToMp4Handler() override = default;
|
||||||
|
|
||||||
|
private:
|
||||||
|
Status InitializeInternal() override;
|
||||||
|
Status Process(std::unique_ptr<StreamData> stream_data) override;
|
||||||
|
|
||||||
|
Status OnStreamInfo(std::unique_ptr<StreamData> stream_data);
|
||||||
|
Status OnCueEvent(std::unique_ptr<StreamData> stream_data);
|
||||||
|
Status OnSegmentInfo(std::unique_ptr<StreamData> stream_data);
|
||||||
|
Status OnTextSample(std::unique_ptr<StreamData> stream_data);
|
||||||
|
|
||||||
|
TtmlGenerator generator_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace ttml
|
||||||
|
} // namespace media
|
||||||
|
} // namespace shaka
|
||||||
|
|
||||||
|
#endif // PACKAGER_MEDIA_FORMATS_TTML_TTML_TO_MP4_HANDLER_H_
|
|
@ -40,6 +40,7 @@
|
||||||
#include "packager/media/demuxer/demuxer.h"
|
#include "packager/media/demuxer/demuxer.h"
|
||||||
#include "packager/media/event/muxer_listener_factory.h"
|
#include "packager/media/event/muxer_listener_factory.h"
|
||||||
#include "packager/media/event/vod_media_info_dump_muxer_listener.h"
|
#include "packager/media/event/vod_media_info_dump_muxer_listener.h"
|
||||||
|
#include "packager/media/formats/ttml/ttml_to_mp4_handler.h"
|
||||||
#include "packager/media/formats/webvtt/text_padder.h"
|
#include "packager/media/formats/webvtt/text_padder.h"
|
||||||
#include "packager/media/formats/webvtt/webvtt_to_mp4_handler.h"
|
#include "packager/media/formats/webvtt/webvtt_to_mp4_handler.h"
|
||||||
#include "packager/media/replicator/replicator.h"
|
#include "packager/media/replicator/replicator.h"
|
||||||
|
@ -161,6 +162,27 @@ MediaContainerName GetOutputFormat(const StreamDescriptor& descriptor) {
|
||||||
return CONTAINER_UNKNOWN;
|
return CONTAINER_UNKNOWN;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MediaContainerName GetTextOutputCodec(const StreamDescriptor& descriptor) {
|
||||||
|
const auto output_container = GetOutputFormat(descriptor);
|
||||||
|
if (output_container != CONTAINER_MOV)
|
||||||
|
return output_container;
|
||||||
|
|
||||||
|
const auto input_container = DetermineContainerFromFileName(descriptor.input);
|
||||||
|
if (base::EqualsCaseInsensitiveASCII(descriptor.output_format, "vtt+mp4") ||
|
||||||
|
base::EqualsCaseInsensitiveASCII(descriptor.output_format,
|
||||||
|
"webvtt+mp4")) {
|
||||||
|
return CONTAINER_WEBVTT;
|
||||||
|
} else if (!base::EqualsCaseInsensitiveASCII(descriptor.output_format,
|
||||||
|
"ttml+mp4") &&
|
||||||
|
input_container == CONTAINER_WEBVTT) {
|
||||||
|
// With WebVTT input, default to WebVTT output.
|
||||||
|
return CONTAINER_WEBVTT;
|
||||||
|
} else {
|
||||||
|
// Otherwise default to TTML since it has more features.
|
||||||
|
return CONTAINER_TTML;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Status ValidateStreamDescriptor(bool dump_stream_info,
|
Status ValidateStreamDescriptor(bool dump_stream_info,
|
||||||
const StreamDescriptor& stream) {
|
const StreamDescriptor& stream) {
|
||||||
if (stream.input.empty()) {
|
if (stream.input.empty()) {
|
||||||
|
@ -640,27 +662,32 @@ Status CreateAudioVideoJobs(
|
||||||
muxer_listener_factory->CreateListener(ToMuxerListenerData(stream));
|
muxer_listener_factory->CreateListener(ToMuxerListenerData(stream));
|
||||||
muxer->SetMuxerListener(std::move(muxer_listener));
|
muxer->SetMuxerListener(std::move(muxer_listener));
|
||||||
|
|
||||||
|
std::vector<std::shared_ptr<MediaHandler>> handlers;
|
||||||
|
handlers.emplace_back(replicator);
|
||||||
|
|
||||||
// Trick play is optional.
|
// Trick play is optional.
|
||||||
std::shared_ptr<MediaHandler> trick_play =
|
if (stream.trick_play_factor) {
|
||||||
stream.trick_play_factor
|
handlers.emplace_back(
|
||||||
? std::make_shared<TrickPlayHandler>(stream.trick_play_factor)
|
std::make_shared<TrickPlayHandler>(stream.trick_play_factor));
|
||||||
: nullptr;
|
}
|
||||||
|
|
||||||
std::shared_ptr<MediaHandler> chunker =
|
if (is_text &&
|
||||||
is_text && (!stream.segment_template.empty() ||
|
(!stream.segment_template.empty() || output_format == CONTAINER_MOV)) {
|
||||||
output_format == CONTAINER_MOV)
|
handlers.emplace_back(
|
||||||
? CreateTextChunker(packaging_params.chunking_params)
|
CreateTextChunker(packaging_params.chunking_params));
|
||||||
: nullptr;
|
}
|
||||||
|
|
||||||
// TODO(modmaker): Move to MOV muxer?
|
if (is_text && output_format == CONTAINER_MOV) {
|
||||||
const auto input_container = DetermineContainerFromFileName(stream.input);
|
const auto output_codec = GetTextOutputCodec(stream);
|
||||||
auto text_to_mp4 =
|
if (output_codec == CONTAINER_WEBVTT) {
|
||||||
input_container == CONTAINER_WEBVTT && output_format == CONTAINER_MOV
|
handlers.emplace_back(std::make_shared<WebVttToMp4Handler>());
|
||||||
? std::make_shared<WebVttToMp4Handler>()
|
} else if (output_codec == CONTAINER_TTML) {
|
||||||
: nullptr;
|
handlers.emplace_back(std::make_shared<ttml::TtmlToMp4Handler>());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
RETURN_IF_ERROR(MediaHandler::Chain(
|
handlers.emplace_back(muxer);
|
||||||
{replicator, trick_play, chunker, text_to_mp4, muxer}));
|
RETURN_IF_ERROR(MediaHandler::Chain(handlers));
|
||||||
}
|
}
|
||||||
|
|
||||||
return Status::OK;
|
return Status::OK;
|
||||||
|
|
Loading…
Reference in New Issue