feat: Parse MPEG-TS PMT ES language and maximum bitrate descriptors (#369) (#1311)

Part of https://github.com/shaka-project/shaka-packager/issues/369

This adds read support for some MPEG-TS PMT elementary stream
descriptors:
- ISO639 Language Descriptor providing language code and audio type
- Maximum Bitrate Descriptor providing peak stream bandwidth

Those metadata are propagated to StreamInfo structures:
- StreamInfo.language field
- AudioStreamMetadata.max_bitrate field for audio streams
- audio type is currently not propagated - corresponding field has to be
added to AudioStreamMetadata

Test vector file containing those descriptors is provided.
This commit is contained in:
modernletter 2024-02-08 20:58:26 +01:00 committed by GitHub
parent 2ba67bc24c
commit c09eb831b8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 179 additions and 12 deletions

View File

@ -60,6 +60,10 @@ class AudioStreamInfo : public StreamInfo {
sampling_frequency_ = sampling_frequency;
}
void set_max_bitrate(const uint32_t max_bitrate) {
max_bitrate_ = max_bitrate;
}
/// @param audio_object_type is only used by AAC Codec, ignored otherwise.
/// @return The codec string.
static std::string GetCodecString(Codec codec, uint8_t audio_object_type);

View File

@ -33,6 +33,7 @@ add_library(mp2t STATIC
pes_packet_generator.h
program_map_table_writer.cc
program_map_table_writer.h
ts_audio_type.h
ts_muxer.cc
ts_muxer.h
ts_packet.cc

View File

@ -19,6 +19,7 @@
#include <packager/media/formats/mp2t/es_parser_h264.h>
#include <packager/media/formats/mp2t/es_parser_h265.h>
#include <packager/media/formats/mp2t/mp2t_common.h>
#include <packager/media/formats/mp2t/ts_audio_type.h>
#include <packager/media/formats/mp2t/ts_packet.h>
#include <packager/media/formats/mp2t/ts_section.h>
#include <packager/media/formats/mp2t/ts_section_pat.h>
@ -274,7 +275,8 @@ void Mp2tMediaParser::RegisterPmt(int program_number, int pmt_pid) {
DVLOG(1) << "Create a new PMT parser";
std::unique_ptr<TsSection> pmt_section_parser(new TsSectionPmt(std::bind(
&Mp2tMediaParser::RegisterPes, this, pmt_pid, std::placeholders::_1,
std::placeholders::_2, std::placeholders::_3, std::placeholders::_4)));
std::placeholders::_2, std::placeholders::_3, std::placeholders::_4,
std::placeholders::_5, std::placeholders::_6, std::placeholders::_7)));
std::unique_ptr<PidState> pmt_pid_state(
new PidState(pmt_pid, PidState::kPidPmt, std::move(pmt_section_parser)));
pmt_pid_state->Enable();
@ -284,13 +286,19 @@ void Mp2tMediaParser::RegisterPmt(int program_number, int pmt_pid) {
void Mp2tMediaParser::RegisterPes(int pmt_pid,
int pes_pid,
TsStreamType stream_type,
uint32_t max_bitrate,
const std::string& lang,
TsAudioType audio_type,
const uint8_t* descriptor,
size_t descriptor_length) {
if (pids_.count(pes_pid) != 0)
return;
DVLOG(1) << "RegisterPes:"
<< " pes_pid=" << pes_pid << " stream_type=" << std::hex
<< static_cast<int>(stream_type) << std::dec;
<< static_cast<int>(stream_type) << std::dec
<< "max_bitrate=" << max_bitrate << " lang=" << lang
<< "audio_type=" << std::hex << static_cast<int>(audio_type)
<< std::dec;
// Create a stream parser corresponding to the stream type.
PidState::PidType pid_type = PidState::kPidVideoPes;
@ -340,6 +348,10 @@ void Mp2tMediaParser::RegisterPes(int pmt_pid,
new PidState(pes_pid, pid_type, std::move(pes_section_parser)));
pes_pid_state->Enable();
pids_.emplace(pes_pid, std::move(pes_pid_state));
// Store PES metadata.
pes_metadata_.insert(
std::make_pair(pes_pid, PesMetadata{max_bitrate, lang, audio_type}));
}
void Mp2tMediaParser::OnNewStreamInfo(
@ -358,6 +370,17 @@ void Mp2tMediaParser::OnNewStreamInfo(
if (new_stream_info) {
// Set the stream configuration information for the PID.
auto pes_metadata = pes_metadata_.find(pes_pid);
DCHECK(pes_metadata != pes_metadata_.end());
if (!pes_metadata->second.language.empty())
new_stream_info->set_language(pes_metadata->second.language);
if (new_stream_info->stream_type() == kStreamAudio) {
auto* audio_info = static_cast<AudioStreamInfo*>(new_stream_info.get());
audio_info->set_max_bitrate(pes_metadata->second.max_bitrate);
// TODO(modernletter) Add some field for audio type to AudioStreamInfo
// and set here from audio_type
}
pid_state->second->set_config(new_stream_info);
} else {
LOG(WARNING) << "Ignoring unsupported stream with pid=" << pes_pid;

View File

@ -9,11 +9,13 @@
#include <deque>
#include <map>
#include <memory>
#include <string>
#include <packager/macros/classes.h>
#include <packager/media/base/byte_queue.h>
#include <packager/media/base/media_parser.h>
#include <packager/media/base/stream_info.h>
#include <packager/media/formats/mp2t/ts_audio_type.h>
#include <packager/media/formats/mp2t/ts_stream_type.h>
namespace shaka {
@ -27,6 +29,12 @@ class PidState;
class TsPacket;
class TsSection;
struct PesMetadata {
uint32_t max_bitrate;
std::string language;
TsAudioType audio_type;
};
class Mp2tMediaParser : public MediaParser {
public:
Mp2tMediaParser();
@ -50,10 +58,15 @@ class Mp2tMediaParser : public MediaParser {
// Callback invoked to register a PES pid.
// Possible values for |media_type| are defined in:
// ISO-13818.1 / ITU H.222 Table 2.34 "Media type assignments".
// Possible values for |audio_type| are defined in:
// ISO-13818.1 / ITU H.222 Table 2-60 "Audio type values".
// |pes_pid| is part of the Program Map Table refered by |pmt_pid|.
void RegisterPes(int pmt_pid,
int pes_pid,
TsStreamType media_type,
uint32_t max_bitrate,
const std::string& lang,
TsAudioType audio_type,
const uint8_t* descriptor,
size_t descriptor_length);
@ -94,6 +107,9 @@ class Mp2tMediaParser : public MediaParser {
// has a deterministic order.
std::map<int, std::unique_ptr<PidState>> pids_;
// Map of PIDs and their metadata.
std::map<int, PesMetadata> pes_metadata_;
// Whether |init_cb_| has been invoked.
bool is_initialized_;

View File

@ -12,6 +12,7 @@
#include <gtest/gtest.h>
#include <packager/macros/logging.h>
#include <packager/media/base/audio_stream_info.h>
#include <packager/media/base/media_sample.h>
#include <packager/media/base/stream_info.h>
#include <packager/media/base/timestamp.h>
@ -190,6 +191,19 @@ TEST_F(Mp2tMediaParserTest, PtsZeroDtsWrapAround) {
EXPECT_GT(video_max_pts_, static_cast<int64_t>(1) << 33);
}
TEST_F(Mp2tMediaParserTest, PmtEsDescriptors) {
//"bear-eng-visualy-impaired-audio.ts" consist of audio stream marked as
// english audio with commentary for visualy impaired viewer and max
// bitrate set to ~128kbps
ParseMpeg2TsFile("bear-visualy-impaired-eng-audio.ts", 188);
EXPECT_TRUE(parser_->Flush());
EXPECT_STREQ("eng", stream_map_[257]->language().c_str());
auto* audio_info = static_cast<AudioStreamInfo*>(stream_map_[257].get());
EXPECT_EQ(131600, audio_info->max_bitrate());
}
} // namespace mp2t
} // namespace media
} // namespace shaka

View File

@ -0,0 +1,30 @@
// Copyright 2023 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#ifndef PACKAGER_MEDIA_FORMATS_MP2T_TS_AUDIO_TYPE_H
#define PACKAGER_MEDIA_FORMATS_MP2T_TS_AUDIO_TYPE_H
#include <stdint.h>
namespace shaka {
namespace media {
namespace mp2t {
enum class TsAudioType : uint8_t {
// ISO-13818.1 / ITU H.222 Table 2-60 "Audio type values"
kUndefined = 0x00,
kCleanEffects = 0x01,
kHearingImpaired = 0x02,
kVisualyImpairedCommentary = 0x03,
// 0x04-0x7F - user private
// 0x80-0xFF - reserved
};
} // namespace mp2t
} // namespace media
} // namespace shaka
#endif // PACKAGER_MEDIA_FORMATS_MP2T_TS_AUDIO_TYPE_H

View File

@ -10,12 +10,21 @@
#include <packager/media/base/bit_reader.h>
#include <packager/media/formats/mp2t/mp2t_common.h>
#include <packager/media/formats/mp2t/ts_audio_type.h>
#include <packager/media/formats/mp2t/ts_stream_type.h>
namespace shaka {
namespace media {
namespace mp2t {
namespace {
const int kISO639LanguageDescriptor = 0x0A;
const int kMaximumBitrateDescriptor = 0x0E;
const int kSubtitlingDescriptor = 0x59;
} // namespace
TsSectionPmt::TsSectionPmt(const RegisterPesCb& register_pes_cb)
: register_pes_cb_(register_pes_cb) {
}
@ -82,6 +91,9 @@ bool TsSectionPmt::ParsePsiSection(BitReader* bit_reader) {
TsStreamType stream_type;
const uint8_t* descriptor;
size_t descriptor_length;
std::string lang;
uint32_t max_bitrate;
TsAudioType audio_type;
};
std::vector<Info> pid_info;
while (static_cast<int>(bit_reader->bits_available()) >
@ -99,22 +111,59 @@ bool TsSectionPmt::ParsePsiSection(BitReader* bit_reader) {
// Do not register the PID right away.
// Wait for the end of the section to be fully parsed
// to make sure there is no error.
pid_info.push_back({pid_es, stream_type, descriptor, es_info_length});
pid_info.push_back({pid_es, stream_type, descriptor, es_info_length, "", 0,
TsAudioType::kUndefined});
// Read the ES info descriptors.
// Defined in section 2.6 of ISO-13818.
if (es_info_length > 0) {
uint8_t descriptor_tag;
uint8_t descriptor_tag;
uint8_t descriptor_length;
while (es_info_length) {
RCHECK(bit_reader->ReadBits(8, &descriptor_tag));
es_info_length--;
RCHECK(bit_reader->ReadBits(8, &descriptor_length));
es_info_length -= 2;
// See ETSI EN 300 468 Section 6.1
if (stream_type == TsStreamType::kPesPrivateData &&
descriptor_tag == 0x59) { // subtitling_descriptor
descriptor_tag == kSubtitlingDescriptor) {
pid_info.back().stream_type = TsStreamType::kDvbSubtitles;
} else if (descriptor_tag == kISO639LanguageDescriptor &&
descriptor_length >= 4) {
// See section 2.6.19 of ISO-13818
// Descriptor can contain 0..N language defintions,
// we process only the first one
RCHECK(es_info_length >= 4);
char lang[3];
RCHECK(bit_reader->ReadBits(8, &lang[0])); // ISO_639_language_code
RCHECK(bit_reader->ReadBits(8, &lang[1]));
RCHECK(bit_reader->ReadBits(8, &lang[2]));
RCHECK(bit_reader->ReadBits(8, &pid_info.back().audio_type));
pid_info.back().lang = std::string(lang, 3);
es_info_length -= 4;
descriptor_length -= 4;
} else if (descriptor_tag == kMaximumBitrateDescriptor &&
descriptor_length >= 3) {
// See section 2.6.25 of ISO-13818
RCHECK(es_info_length >= 3);
uint32_t max_bitrate;
RCHECK(bit_reader->SkipBits(2)); // reserved
RCHECK(bit_reader->ReadBits(22, &max_bitrate));
// maximum bitrate is stored in units of 50 bytes per second
pid_info.back().max_bitrate = 50 * 8 * max_bitrate;
es_info_length -= 3;
descriptor_length -= 3;
}
RCHECK(bit_reader->SkipBits(8 * descriptor_length));
es_info_length -= descriptor_length;
}
RCHECK(bit_reader->SkipBits(8 * es_info_length));
RCHECK(bit_reader->SkipBytes(es_info_length));
}
// Read the CRC.
@ -123,8 +172,8 @@ bool TsSectionPmt::ParsePsiSection(BitReader* bit_reader) {
// Once the PMT has been proved to be correct, register the PIDs.
for (auto& info : pid_info) {
register_pes_cb_(info.pid_es, info.stream_type, info.descriptor,
info.descriptor_length);
register_pes_cb_(info.pid_es, info.stream_type, info.max_bitrate, info.lang,
info.audio_type, info.descriptor, info.descriptor_length);
}
return true;

View File

@ -6,8 +6,10 @@
#define PACKAGER_MEDIA_FORMATS_MP2T_TS_SECTION_PMT_H_
#include <functional>
#include <string>
#include <packager/macros/classes.h>
#include <packager/media/formats/mp2t/ts_audio_type.h>
#include <packager/media/formats/mp2t/ts_section_psi.h>
#include <packager/media/formats/mp2t/ts_stream_type.h>
@ -17,10 +19,20 @@ namespace mp2t {
class TsSectionPmt : public TsSectionPsi {
public:
// RegisterPesCb::Run(int pes_pid, int stream_type);
// RegisterPesCb::Run(int pes_pid, int stream_type, uint32_t max_bitrate,
// const string& lang, TsAudioType audio_type, uint8_t* descriptor,
// size_t desriptor_size);
// Stream type is defined in
// "Table 2-34 Stream type assignments" in H.222
typedef std::function<void(int, TsStreamType, const uint8_t*, size_t)>
// Audio type is defined in
// "Table 2-60 - Audio type values" in H.222
typedef std::function<void(int,
TsStreamType,
uint32_t,
const std::string&,
TsAudioType,
const uint8_t*,
size_t)>
RegisterPesCb;
explicit TsSectionPmt(const RegisterPesCb& register_pes_cb);

View File

@ -29,6 +29,12 @@ bear-640x360.ts - AVC + AAC encode, multiplexed into an MPEG2-TS container.
bear-640x360_ptswraparound.ts - Same as bear-640x360.ts, with a timestamp wrap-around in the middle, created with the below command:
ffmpeg -itsoffset 95442 -i bear-640x360.ts -c:v copy -c:a copy -muxdelay 0 bear-640x360_ptswraparound.ts
bear-640x360-hevc.ts - HEVC + AAC encode, multiplexed into an MPEG2-TS container.
bear-eng-visualy-impaired-audio.ts - Audio stream from bear-640x360.ts marked as english with commentary for visually impaired viewer using the below commands:
tsp -I file bear-640x360.ts \
-P filter --video --negate
-P inject --replace --pid 4096 --xml bear-visualy-impaired-eng-audio-pmt.xml \
-O file bear-visualy-impaired-eng-audio.ts
(xml template can be obtained by command "tsp -I file bear-640x360.ts -P tables --pid 4096 --tid 2 --max 1 --xml pmt.xml -O drop")
// ISO-BMFF streams.
bear-1280x720.mp4 - AVC + AAC encode, mulitplexed into an ISOBMFF container.

View File

@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<tsduck>
<PMT version="0" current="true" service_id="0x0001" PCR_PID="0x0100">
<metadata PID="4,096"/>
<component elementary_PID="0x0101" stream_type="0x0F">
<ISO_639_language_descriptor>
<language code="eng" audio_type="0x03"/>
</ISO_639_language_descriptor>
<maximum_bitrate_descriptor maximum_bitrate="131,600"/>
</component>
</PMT>
</tsduck>