From 78be14c092de53c671d0827191afb380a5a8e327 Mon Sep 17 00:00:00 2001 From: Jacob Trimble Date: Mon, 5 Oct 2020 15:39:59 -0700 Subject: [PATCH] Add DVB-sub parser Note that this only supports a single page within the DVB-sub stream. Multiple pages will be merged together. A follow-up will allow selecting a specific page. This only supports outputting using TTML or MP4+TTML; you cannot have DVB-sub output nor can you output it in WebVTT. Since DVB-sub uses images, it is hard to impossible to do this with WebVTT. This also only supports interlaced images, not progressive images nor text. Closes #832 Change-Id: Id6dbb6393c7b9a05722e61c6bd255bef5e69a7d8 --- packager/media/formats/dvb/dvb.gyp | 3 + packager/media/formats/dvb/dvb_sub_parser.cc | 475 ++++++++++++++++++ packager/media/formats/dvb/dvb_sub_parser.h | 86 ++++ .../formats/dvb/dvb_sub_parser_unittest.cc | 305 +++++++++++ packager/media/formats/mp2t/es_parser_dvb.cc | 94 ++++ packager/media/formats/mp2t/es_parser_dvb.h | 54 ++ packager/media/formats/mp2t/mp2t.gyp | 3 + .../media/formats/mp2t/mp2t_media_parser.cc | 18 +- packager/media/formats/mp2t/ts_section_pmt.cc | 11 + packager/media/formats/mp2t/ts_stream_type.h | 4 + 10 files changed, 1048 insertions(+), 5 deletions(-) create mode 100644 packager/media/formats/dvb/dvb_sub_parser.cc create mode 100644 packager/media/formats/dvb/dvb_sub_parser.h create mode 100644 packager/media/formats/dvb/dvb_sub_parser_unittest.cc create mode 100644 packager/media/formats/mp2t/es_parser_dvb.cc create mode 100644 packager/media/formats/mp2t/es_parser_dvb.h diff --git a/packager/media/formats/dvb/dvb.gyp b/packager/media/formats/dvb/dvb.gyp index 55519ef89b..b6814d554f 100644 --- a/packager/media/formats/dvb/dvb.gyp +++ b/packager/media/formats/dvb/dvb.gyp @@ -15,6 +15,8 @@ 'sources': [ 'dvb_image.cc', 'dvb_image.h', + 'dvb_sub_parser.cc', + 'dvb_sub_parser.h', 'subtitle_composer.cc', 'subtitle_composer.h', ], @@ -28,6 +30,7 @@ 'type': '<(gtest_target_type)', 'sources': [ 'dvb_image_unittest.cc', + 'dvb_sub_parser_unittest.cc', 'subtitle_composer_unittest.cc', ], 'dependencies': [ diff --git a/packager/media/formats/dvb/dvb_sub_parser.cc b/packager/media/formats/dvb/dvb_sub_parser.cc new file mode 100644 index 0000000000..04c217851f --- /dev/null +++ b/packager/media/formats/dvb/dvb_sub_parser.cc @@ -0,0 +1,475 @@ +// Copyright 2020 Google LLC. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "packager/media/formats/dvb/dvb_sub_parser.h" + +#include "packager/base/logging.h" +#include "packager/media/formats/mp2t/mp2t_common.h" + +namespace shaka { +namespace media { + +namespace { + +RgbaColor ConvertYuv(uint8_t Y, uint8_t Cr, uint8_t Cb, uint8_t T) { + // See https://en.wikipedia.org/wiki/YCbCr + RgbaColor color; + const double y_transform = 255.0 / 219 * (Y - 16); + const double cb_transform = 255.0 / 244 * 1.772 * (Cb - 128); + const double cr_transform = 255.0 / 244 * 1.402 * (Cr - 128); + const double f1 = 0.114 / 0.587; + const double f2 = 0.299 / 0.587; + color.r = static_cast(y_transform + cr_transform); + color.g = + static_cast(y_transform - cb_transform * f1 - cr_transform * f2); + color.b = static_cast(y_transform + cb_transform); + color.a = 255 - T; + return color; +} + +} // namespace + +DvbSubParser::DvbSubParser() : last_pts_(0), timeout_(0) {} + +DvbSubParser::~DvbSubParser() {} + +bool DvbSubParser::Parse(DvbSubSegmentType segment_type, + int64_t pts, + const uint8_t* payload, + size_t size, + std::vector>* samples) { + switch (segment_type) { + case DvbSubSegmentType::kPageComposition: + return ParsePageComposition(pts, payload, size, samples); + case DvbSubSegmentType::kRegionComposition: + return ParseRegionComposition(payload, size); + case DvbSubSegmentType::kClutDefinition: + return ParseClutDefinition(payload, size); + case DvbSubSegmentType::kObjectData: + return ParseObjectData(pts, payload, size); + case DvbSubSegmentType::kDisplayDefinition: + return ParseDisplayDefinition(payload, size); + case DvbSubSegmentType::kEndOfDisplay: + // This signals all the current objects are available. But we need to + // know the end time, so we do nothing for now. + return true; + default: + LOG(WARNING) << "Unknown DVB-sub segment_type=0x" << std::hex + << static_cast(segment_type); + return true; + } +} + +bool DvbSubParser::Flush(std::vector>* samples) { + RCHECK(composer_.GetSamples(last_pts_, last_pts_ + timeout_ * kMpeg2Timescale, + samples)); + composer_.ClearObjects(); + return true; +} + +const DvbImageColorSpace* DvbSubParser::GetColorSpace(uint8_t clut_id) { + return composer_.GetColorSpace(clut_id); +} + +const DvbImageBuilder* DvbSubParser::GetImageForObject(uint16_t object_id) { + return composer_.GetObjectImage(object_id); +} + +bool DvbSubParser::ParsePageComposition( + int64_t pts, + const uint8_t* data, + size_t size, + std::vector>* samples) { + // See ETSI EN 300 743 Section 7.2.2. + BitReader reader(data, size); + + uint8_t page_state; + RCHECK(reader.ReadBits(8, &timeout_)); + RCHECK(reader.SkipBits(4)); // page_version_number + RCHECK(reader.ReadBits(2, &page_state)); + RCHECK(reader.SkipBits(2)); // reserved + if (page_state == 0x1 || page_state == 0x2) { + // If this is a "acquisition point" or a "mode change", then this is a new + // page and we should clear the old data. + RCHECK(composer_.GetSamples(last_pts_, pts, samples)); + composer_.ClearObjects(); + last_pts_ = pts; + } + + while (reader.bits_available() > 0u) { + uint8_t region_id; + uint16_t x, y; + RCHECK(reader.ReadBits(8, ®ion_id)); + RCHECK(reader.SkipBits(8)); // reserved + RCHECK(reader.ReadBits(16, &x)); + RCHECK(reader.ReadBits(16, &y)); + + RCHECK(composer_.SetRegionPosition(region_id, x, y)); + } + + return true; +} + +bool DvbSubParser::ParseRegionComposition(const uint8_t* data, size_t size) { + // See ETSI EN 300 743 Section 7.2.3. + BitReader reader(data, size); + + uint8_t region_id, clut_id; + uint16_t region_width, region_height; + bool region_fill_flag; + int background_pixel_code; + RCHECK(reader.ReadBits(8, ®ion_id)); + RCHECK(reader.SkipBits(4)); // region_version_number + RCHECK(reader.ReadBits(1, ®ion_fill_flag)); + RCHECK(reader.SkipBits(3)); // reserved + RCHECK(reader.ReadBits(16, ®ion_width)); + RCHECK(reader.ReadBits(16, ®ion_height)); + RCHECK(reader.SkipBits(3)); // region_level_of_compatibility + RCHECK(reader.SkipBits(3)); // region_depth + RCHECK(reader.SkipBits(2)); // reserved + RCHECK(reader.ReadBits(8, &clut_id)); + RCHECK(reader.ReadBits(8, &background_pixel_code)); + RCHECK(reader.SkipBits(4)); // region_4-bit_pixel_code + RCHECK(reader.SkipBits(2)); // region_2-bit_pixel_code + RCHECK(reader.SkipBits(2)); // reserved + RCHECK( + composer_.SetRegionInfo(region_id, clut_id, region_width, region_height)); + if (!region_fill_flag) + background_pixel_code = -1; + + while (reader.bits_available() > 0) { + uint16_t object_id, x, y; + uint8_t object_type; + RCHECK(reader.ReadBits(16, &object_id)); + RCHECK(reader.ReadBits(2, &object_type)); + RCHECK(reader.SkipBits(2)); // object_provider_flag + RCHECK(reader.ReadBits(12, &x)); + RCHECK(reader.SkipBits(4)); // reserved + RCHECK(reader.ReadBits(12, &y)); + + if (object_type == 0x01 || object_type == 0x02) { + RCHECK(reader.SkipBits(8)); // foreground_pixel_code + RCHECK(reader.SkipBits(8)); // background_pixel_code + } + RCHECK(composer_.SetObjectInfo(object_id, region_id, x, y, + background_pixel_code)); + } + + return true; +} + +bool DvbSubParser::ParseClutDefinition(const uint8_t* data, size_t size) { + // See ETSI EN 300 743 Section 7.2.4. + BitReader reader(data, size); + + uint8_t clut_id; + RCHECK(reader.ReadBits(8, &clut_id)); + auto* color_space = composer_.GetColorSpace(clut_id); + RCHECK(reader.SkipBits(4)); // CLUT_version_number + RCHECK(reader.SkipBits(4)); // reserved + while (reader.bits_available() > 0) { + uint8_t clut_entry_id; + uint8_t has_2_bit; + uint8_t has_4_bit; + uint8_t has_8_bit; + uint8_t full_range_flag; + RCHECK(reader.ReadBits(8, &clut_entry_id)); + RCHECK(reader.ReadBits(1, &has_2_bit)); + RCHECK(reader.ReadBits(1, &has_4_bit)); + RCHECK(reader.ReadBits(1, &has_8_bit)); + RCHECK(reader.SkipBits(4)); // reserved + RCHECK(reader.ReadBits(1, &full_range_flag)); + + if (has_2_bit + has_4_bit + has_8_bit != 1) { + LOG(ERROR) << "Must specify exactly one bit depth in CLUT definition"; + return false; + } + const BitDepth bit_depth = + has_2_bit ? BitDepth::k2Bit + : (has_4_bit ? BitDepth::k4Bit : BitDepth::k8Bit); + + uint8_t Y, Cr, Cb, T; + if (full_range_flag) { + RCHECK(reader.ReadBits(8, &Y)); + RCHECK(reader.ReadBits(8, &Cr)); + RCHECK(reader.ReadBits(8, &Cb)); + RCHECK(reader.ReadBits(8, &T)); + } else { + // These store the most-significant bits, so shift them up. + RCHECK(reader.ReadBits(6, &Y)); + Y <<= 2; + RCHECK(reader.ReadBits(4, &Cr)); + Cr <<= 4; + RCHECK(reader.ReadBits(4, &Cb)); + Cb <<= 4; + RCHECK(reader.ReadBits(2, &T)); + T <<= 6; + } + color_space->SetColor(bit_depth, clut_entry_id, ConvertYuv(Y, Cr, Cb, T)); + } + + return true; +} + +bool DvbSubParser::ParseObjectData(int64_t pts, + const uint8_t* data, + size_t size) { + // See ETSI EN 300 743 Section 7.2.5 Table 17. + BitReader reader(data, size); + + uint16_t object_id; + uint8_t object_coding_method; + RCHECK(reader.ReadBits(16, &object_id)); + RCHECK(reader.SkipBits(4)); // object_version_number + RCHECK(reader.ReadBits(2, &object_coding_method)); + RCHECK(reader.SkipBits(1)); // non_modifying_colour_flag + RCHECK(reader.SkipBits(1)); // reserved + + auto* image = composer_.GetObjectImage(object_id); + auto* color_space = composer_.GetColorSpaceForObject(object_id); + if (!image || !color_space) + return false; + + if (object_coding_method == 0) { + uint16_t top_field_length; + uint16_t bottom_field_length; + RCHECK(reader.ReadBits(16, &top_field_length)); + RCHECK(reader.ReadBits(16, &bottom_field_length)); + + RCHECK(ParsePixelDataSubObject(top_field_length, true, &reader, color_space, + image)); + RCHECK(ParsePixelDataSubObject(bottom_field_length, false, &reader, + color_space, image)); + // Ignore 8_stuff_bits since we don't need to read to the end. + + if (bottom_field_length == 0) { + // If there are no bottom rows, then the top rows are used instead. See + // beginning of section 7.2.5.1. + image->MirrorToBottomRows(); + } + } else { + LOG(ERROR) << "Unsupported DVB-sub object coding method: " + << static_cast(object_coding_method); + return false; + } + return true; +} + +bool DvbSubParser::ParseDisplayDefinition(const uint8_t* data, size_t size) { + // See ETSI EN 300 743 Section 7.2.1. + BitReader reader(data, size); + + uint16_t width, height; + RCHECK(reader.SkipBits(4)); // dds_version_number + RCHECK(reader.SkipBits(1)); // display_window_flag + RCHECK(reader.SkipBits(3)); // reserved + RCHECK(reader.ReadBits(16, &width)); + RCHECK(reader.ReadBits(16, &height)); + // Size is stored as -1. + composer_.SetDisplaySize(width + 1, height + 1); + + return true; +} + +bool DvbSubParser::ParsePixelDataSubObject(size_t sub_object_length, + bool is_top_fields, + BitReader* reader, + DvbImageColorSpace* color_space, + DvbImageBuilder* image) { + const size_t start = reader->bit_position() / 8; + while (reader->bit_position() / 8 < start + sub_object_length) { + // See ETSI EN 300 743 Section 7.2.5.1 Table 20 + uint8_t data_type; + RCHECK(reader->ReadBits(8, &data_type)); + uint8_t temp[16]; + switch (data_type) { + case 0x10: + RCHECK(Parse2BitPixelData(is_top_fields, reader, image)); + reader->SkipToNextByte(); + break; + case 0x11: + RCHECK(Parse4BitPixelData(is_top_fields, reader, image)); + reader->SkipToNextByte(); + break; + case 0x12: + RCHECK(Parse8BitPixelData(is_top_fields, reader, image)); + break; + case 0x20: + for (int i = 0; i < 4; i++) { + RCHECK(reader->ReadBits(4, &temp[i])); + } + color_space->Set2To4BitDepthMap(temp); + break; + case 0x21: + for (int i = 0; i < 4; i++) { + RCHECK(reader->ReadBits(8, &temp[i])); + } + color_space->Set2To8BitDepthMap(temp); + break; + case 0x22: + for (int i = 0; i < 16; i++) { + RCHECK(reader->ReadBits(8, &temp[i])); + } + color_space->Set4To8BitDepthMap(temp); + break; + case 0xf0: + image->NewRow(is_top_fields); + break; + default: + LOG(ERROR) << "Unsupported DVB-sub pixel data format: 0x" << std::hex + << static_cast(data_type); + return false; + } + } + return true; +} + +bool DvbSubParser::Parse2BitPixelData(bool is_top_fields, + BitReader* reader, + DvbImageBuilder* image) { + // 2-bit/pixel code string, Section 7.2.5.2.1, Table 22. + while (true) { + uint8_t peek; + RCHECK(reader->ReadBits(2, &peek)); + if (peek != 0) { + RCHECK(image->AddPixel(BitDepth::k2Bit, peek, is_top_fields)); + } else { + uint8_t switch_1; + RCHECK(reader->ReadBits(1, &switch_1)); + if (switch_1 == 1) { + uint8_t count_minus_3; + RCHECK(reader->ReadBits(3, &count_minus_3)); + RCHECK(reader->ReadBits(2, &peek)); + for (uint8_t i = 0; i < count_minus_3 + 3; i++) + RCHECK(image->AddPixel(BitDepth::k2Bit, peek, is_top_fields)); + } else { + uint8_t switch_2; + RCHECK(reader->ReadBits(1, &switch_2)); + if (switch_2 == 1) { + RCHECK(image->AddPixel(BitDepth::k2Bit, 0, is_top_fields)); + } else { + uint8_t switch_3; + RCHECK(reader->ReadBits(2, &switch_3)); + if (switch_3 == 0) { + break; + } else if (switch_3 == 1) { + RCHECK(image->AddPixel(BitDepth::k2Bit, 0, is_top_fields)); + RCHECK(image->AddPixel(BitDepth::k2Bit, 0, is_top_fields)); + } else if (switch_3 == 2) { + uint8_t count_minus_12; + RCHECK(reader->ReadBits(4, &count_minus_12)); + RCHECK(reader->ReadBits(2, &peek)); + for (uint8_t i = 0; i < count_minus_12 + 12; i++) + RCHECK(image->AddPixel(BitDepth::k2Bit, peek, is_top_fields)); + } else if (switch_3 == 3) { + uint8_t count_minus_29; + RCHECK(reader->ReadBits(8, &count_minus_29)); + RCHECK(reader->ReadBits(2, &peek)); + for (uint8_t i = 0; i < count_minus_29 + 29; i++) + RCHECK(image->AddPixel(BitDepth::k2Bit, peek, is_top_fields)); + } + } + } + } + } + + return true; +} + +bool DvbSubParser::Parse4BitPixelData(bool is_top_fields, + BitReader* reader, + DvbImageBuilder* image) { + // 4-bit/pixel code string, Section 7.2.5.2.2, Table 24. + DCHECK(reader->bits_available() % 8 == 0); + while (true) { + uint8_t peek; + RCHECK(reader->ReadBits(4, &peek)); + if (peek != 0) { + RCHECK(image->AddPixel(BitDepth::k4Bit, peek, is_top_fields)); + } else { + uint8_t switch_1; + RCHECK(reader->ReadBits(1, &switch_1)); + if (switch_1 == 0) { + RCHECK(reader->ReadBits(3, &peek)); + if (peek != 0) { + for (int i = 0; i < peek + 2; i++) + RCHECK(image->AddPixel(BitDepth::k4Bit, 0, is_top_fields)); + } else { + break; + } + } else { + uint8_t switch_2; + RCHECK(reader->ReadBits(1, &switch_2)); + if (switch_2 == 0) { + RCHECK(reader->ReadBits(2, &peek)); // run_length_4-7 + uint8_t code; + RCHECK(reader->ReadBits(4, &code)); + for (int i = 0; i < peek + 4; i++) + RCHECK(image->AddPixel(BitDepth::k4Bit, code, is_top_fields)); + } else { + uint8_t switch_3; + RCHECK(reader->ReadBits(2, &switch_3)); + if (switch_3 == 0) { + RCHECK(image->AddPixel(BitDepth::k4Bit, 0, is_top_fields)); + } else if (switch_3 == 1) { + RCHECK(image->AddPixel(BitDepth::k4Bit, 0, is_top_fields)); + RCHECK(image->AddPixel(BitDepth::k4Bit, 0, is_top_fields)); + } else if (switch_3 == 2) { + RCHECK(reader->ReadBits(4, &peek)); // run_length_9-24 + uint8_t code; + RCHECK(reader->ReadBits(4, &code)); + for (int i = 0; i < peek + 9; i++) + RCHECK(image->AddPixel(BitDepth::k4Bit, code, is_top_fields)); + } else { + // switch_3 == 3 + RCHECK(reader->ReadBits(8, &peek)); // run_length_25-280 + uint8_t code; + RCHECK(reader->ReadBits(4, &code)); + for (int i = 0; i < peek + 25; i++) + RCHECK(image->AddPixel(BitDepth::k4Bit, code, is_top_fields)); + } + } + } + } + } + return true; +} + +bool DvbSubParser::Parse8BitPixelData(bool is_top_fields, + BitReader* reader, + DvbImageBuilder* image) { + // 8-bit/pixel code string, Section 7.2.5.2.3, Table 26. + while (true) { + uint8_t peek; + RCHECK(reader->ReadBits(8, &peek)); + if (peek != 0) { + RCHECK(image->AddPixel(BitDepth::k8Bit, peek, is_top_fields)); + } else { + uint8_t switch_1; + RCHECK(reader->ReadBits(1, &switch_1)); + if (switch_1 == 0) { + RCHECK(reader->ReadBits(7, &peek)); + if (peek != 0) { + for (uint8_t i = 0; i < peek; i++) + RCHECK(image->AddPixel(BitDepth::k8Bit, 0, is_top_fields)); + } else { + break; + } + } else { + uint8_t count; + RCHECK(reader->ReadBits(7, &count)); + RCHECK(reader->ReadBits(8, &peek)); + for (uint8_t i = 0; i < count; i++) + RCHECK(image->AddPixel(BitDepth::k8Bit, peek, is_top_fields)); + } + } + } + + return true; +} + +} // namespace media +} // namespace shaka diff --git a/packager/media/formats/dvb/dvb_sub_parser.h b/packager/media/formats/dvb/dvb_sub_parser.h new file mode 100644 index 0000000000..a4de793f4c --- /dev/null +++ b/packager/media/formats/dvb/dvb_sub_parser.h @@ -0,0 +1,86 @@ +// Copyright 2020 Google LLC. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#ifndef PACKAGER_MEDIA_DVB_DVB_SUB_PARSER_H_ +#define PACKAGER_MEDIA_DVB_DVB_SUB_PARSER_H_ + +#include +#include + +#include "packager/media/base/bit_reader.h" +#include "packager/media/base/text_sample.h" +#include "packager/media/formats/dvb/dvb_image.h" +#include "packager/media/formats/dvb/subtitle_composer.h" + +namespace shaka { +namespace media { + +// See ETSI EN 300 743 Section 7.2.0.1 and Table 7. +enum class DvbSubSegmentType : uint16_t { + kPageComposition = 0x10, + kRegionComposition = 0x11, + kClutDefinition = 0x12, + kObjectData = 0x13, + kDisplayDefinition = 0x14, + kDisparitySignalling = 0x15, + kAlternativeClut = 0x16, + kEndOfDisplay = 0x80, +}; + +class DvbSubParser { + public: + DvbSubParser(); + ~DvbSubParser(); + + DvbSubParser(const DvbSubParser&) = delete; + DvbSubParser& operator=(const DvbSubParser&) = delete; + + bool Parse(DvbSubSegmentType segment_type, + int64_t pts, + const uint8_t* payload, + size_t size, + std::vector>* samples); + bool Flush(std::vector>* samples); + + private: + friend class DvbSubParserTest; + + const DvbImageColorSpace* GetColorSpace(uint8_t clut_id); + const DvbImageBuilder* GetImageForObject(uint16_t object_id); + + bool ParsePageComposition(int64_t pts, + const uint8_t* data, + size_t size, + std::vector>* samples); + bool ParseRegionComposition(const uint8_t* data, size_t size); + bool ParseClutDefinition(const uint8_t* data, size_t size); + bool ParseObjectData(int64_t pts, const uint8_t* data, size_t size); + bool ParseDisplayDefinition(const uint8_t* data, size_t size); + + bool ParsePixelDataSubObject(size_t sub_object_length, + bool is_top_fields, + BitReader* reader, + DvbImageColorSpace* color_space, + DvbImageBuilder* image); + bool Parse2BitPixelData(bool is_top_fields, + BitReader* reader, + DvbImageBuilder* image); + bool Parse4BitPixelData(bool is_top_fields, + BitReader* reader, + DvbImageBuilder* image); + bool Parse8BitPixelData(bool is_top_fields, + BitReader* reader, + DvbImageBuilder* image); + + SubtitleComposer composer_; + int64_t last_pts_; + uint8_t timeout_; +}; + +} // namespace media +} // namespace shaka + +#endif // PACKAGER_MEDIA_DVB_DVB_SUB_PARSER_H_ diff --git a/packager/media/formats/dvb/dvb_sub_parser_unittest.cc b/packager/media/formats/dvb/dvb_sub_parser_unittest.cc new file mode 100644 index 0000000000..bf5b382859 --- /dev/null +++ b/packager/media/formats/dvb/dvb_sub_parser_unittest.cc @@ -0,0 +1,305 @@ +// Copyright 2020 Google LLC. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "packager/media/formats/dvb/dvb_sub_parser.h" + +#include +#include + +#include +#include +#include + +namespace shaka { +namespace media { + +namespace { + +constexpr const uint8_t kRegionId = 7; +constexpr const uint8_t kClutId = 12; +constexpr const uint8_t kObjectId1 = 1; +constexpr const uint8_t kObjectId2 = 2; + +constexpr const int64_t kNoPts = 0; + +/// @param object_id The Object ID. +/// @param pairs A vector of data_type plus data body. Each pair should be a +/// single row. It should be image order (i.e. not interlaced). The +/// body is an array of strings containing binary codes (e.g. "01"). +std::vector GenerateObjectData( + uint8_t object_id, + const std::vector>>& pairs) { + std::vector ret; + ret.push_back(0); + ret.push_back(object_id); + ret.push_back(0); + ret.insert(ret.end(), 4, 0); // insert dummy bytes for size + + auto push_data = [&](size_t start_index) { + const auto start_size = ret.size(); + for (size_t i = start_index; i < pairs.size(); i += 2) { + ret.push_back(pairs[i].first); + + uint8_t temp = 0; + uint8_t count = 0; + for (const auto& str : pairs[i].second) { + for (const auto& ch : str) { + if (ch == ' ') + continue; + + CHECK(ch == '0' || ch == '1'); + temp = (temp << 1) | (ch - '0'); + if (++count == 8) { + ret.push_back(temp); + temp = count = 0; + } + } + } + if (count != 0) + ret.push_back(temp << (8 - count)); + ret.push_back(0xf0); // end-of-line + } + return ret.size() - start_size; + }; + + const size_t top_size = push_data(0); + const size_t bottom_size = push_data(1); + CHECK(top_size <= 0xffff); + CHECK(bottom_size <= 0xffff); + ret[3] = (top_size >> 8) & 0xff; + ret[4] = top_size & 0xff; + ret[5] = (bottom_size >> 8) & 0xff; + ret[6] = bottom_size & 0xff; + + return ret; +} + +} // namespace + +class DvbSubParserTest : public testing::Test { + protected: + const DvbImageColorSpace* GetColorSpace(DvbSubParser* parser, + uint8_t clut_id) { + return parser->GetColorSpace(clut_id); + } + const DvbImageBuilder* GetImage(DvbSubParser* parser, uint8_t object_id) { + return parser->GetImageForObject(object_id); + } +}; + +TEST_F(DvbSubParserTest, TestHelper) { + const uint8_t kResult[] = { + // clang-format off + 0x00, 0x12, 0x00, + + 0x00, 0x09, // top-rows size + 0x00, 0x04, // bottom-rows size + + // Top-rows + 0x30, 0x1b, 0xe9, 0x6b, + 0xf0, + 0x88, 0xc8, 0xe0, + 0xf0, + + // Bottom-rows + 0x11, 0xe6, 0xcd, + 0xf0, + // clang-format on + }; + std::vector expected(kResult, kResult + sizeof(kResult)); + + const auto actual = + GenerateObjectData(0x12, {{0x30, {"00011011", "11101001", "01101011"}}, + {0x11, {"11100110", "11001101"}}, + {0x88, {"11", "00", "10", "00", "11", "10"}}}); + EXPECT_EQ(actual, expected); +} + +TEST_F(DvbSubParserTest, BasicFlow) { + // Note up to segment_length is handled by caller. + constexpr const uint8_t kDisplayDefinitionSegment[] = { + 0x00, // dds_version_number(4) | display_window_flag(1) | reserved(3) + 0x00, 99, // display_width + 0x00, 99, // display_height + }; + constexpr const uint8_t kPageCompositionSegment[] = { + 0x02, // page_time_out + 0x04, // page_version_number(4) | page_state(2) | reserved(2) + // First region + kRegionId, // region_id + 0x00, // reserved + 0x00, 0x11, // region_horizontal_address + 0x00, 0x12, // region_vertical_address + }; + constexpr const uint8_t kRegionCompositionSegment[] = { + kRegionId, // region_id + 0x08, // region_version_number(4) | region_fill_flag(1) | + // reserved(3) + 0x00, 50, // region_width + 0x00, 50, // region_height + 0x6c, // region_level_of_compatibility(3) | region_depth(3) | + // reserved(2) + kClutId, // CLUT_id + 0x02, // region_8-bit_pixel_code, + 0x28, // region_4-bit_pixel_code(4) | region_2-bit_pixel_code(2) | + // reserved(2) + + // First object + 0x00, kObjectId1, // object_id + 0x00, 0x07, // object_type(2) | object_provider_flag(2) | + // object_horizontal_position(12) + 0x00, 0x08, // reserved(4) | object_vertical_position(12) + + // Second object + 0x00, kObjectId2, // object_id + 0x00, 0x09, // object_type(2) | object_provider_flag(2) | + // object_horizontal_position(12) + 0x00, 0x0c, // reserved(4) | object_vertical_position(12) + }; + constexpr const uint8_t kClutDefinitionSegment[] = { + // clang-format off + kClutId, // CLUT_id + 0x00, // CLUT_version_number(4) | reserved(4) + + // First color + 0x00, // CLUT_entry_id + 0x81, // flags (2-bit,full-range) + 70, 141, 117, 0, + 0x00, // CLUT_entry_id + 0x41, // flags (4-bit,full-range) + 70, 141, 117, 0, + 0x00, // CLUT_entry_id + 0x21, // flags (8-bit,full-range) + 70, 141, 117, 0, + + // Second color + 0x01, // CLUT_entry_id + 0x81, // flags (2-bit,full-range) + 33, 134, 122, 0, + 0x01, // CLUT_entry_id + 0x41, // flags (4-bit,full-range) + 33, 134, 122, 0, + 0x01, // CLUT_entry_id + 0x21, // flags (8-bit,full-range) + 33, 134, 122, 0, + + // Third color + 0x02, // CLUT_entry_id + 0x81, // flags (2-bit,full-range) + 100, 128, 127, 0, + 0x02, // CLUT_entry_id + 0x41, // flags (4-bit,full-range) + 100, 128, 127, 0, + 0x02, // CLUT_entry_id + 0x21, // flags (8-bit,full-range) + 100, 128, 127, 0, + // clang-format on + }; + // 0 0 0 0 1 1 + // 0 1 1 1 0 0 + // 1 0 1 1 1 1 + // 0 0 0 1 + const auto kObjectData1 = GenerateObjectData( + kObjectId1, + { + {0x10, {"00100100", "01", "01", "000000"}}, + {0x10, {"0001", "00100001", "000001", "000000"}}, + {0x11, {"0001", "0000 1100", "0000 1000 0001", "0000 0000"}}, + {0x11, {"0000 0001", "0001", "0000 0000"}}, + }); + // 1 1 0 0 + // 0 0 1 0 + // 1 0 0 0 + const uint8_t kObjectData2[] = { + 0x00, kObjectId2, 0x00, + + 0x00, 0x0f, // top-rows length + 0x00, 0x09, // bottom-rows length + + 0x12, 0x01, 0x01, 0x00, 0x02, 0x00, 0x00, 0xf0, // row 0 + 0x12, 0x01, 0x00, 0x03, 0x00, 0x00, 0xf0, // row 2 + + 0x12, 0x00, 0x02, 0x01, 0x00, 0x01, 0x00, 0x00, 0xf0, // row 1 + }; + constexpr const uint8_t kEndOfDisplaySegment[] = {0x00}; + auto check_image_data = [&](DvbSubParser* parser, uint8_t object_id, + const std::vector& data) { + const RgbaColor* pixels; + uint16_t width, height; + auto* color_space = GetColorSpace(parser, kClutId); + auto* image = GetImage(parser, object_id); + ASSERT_TRUE(image); + ASSERT_TRUE(color_space); + ASSERT_TRUE(image->GetPixels(&pixels, &width, &height)); + ASSERT_EQ(static_cast(width * height), data.size()); + for (size_t y = 0; y < height; y++) { + for (size_t x = 0; x < width; x++) { + auto color = + color_space->GetColor(BitDepth::k8Bit, data[x + y * width]); + EXPECT_EQ(pixels[x + y * image->max_width()], color) + << "Object=" << static_cast(object_id) << ", X=" << x + << ", Y=" << y; + } + } + }; + + DvbSubParser parser; + std::vector> samples; + ASSERT_TRUE(parser.Parse(DvbSubSegmentType::kDisplayDefinition, kNoPts, + kDisplayDefinitionSegment, + sizeof(kDisplayDefinitionSegment), &samples)); + ASSERT_TRUE(parser.Parse(DvbSubSegmentType::kPageComposition, kNoPts, + kPageCompositionSegment, + sizeof(kPageCompositionSegment), &samples)); + ASSERT_TRUE(parser.Parse(DvbSubSegmentType::kRegionComposition, kNoPts, + kRegionCompositionSegment, + sizeof(kRegionCompositionSegment), &samples)); + ASSERT_TRUE(parser.Parse(DvbSubSegmentType::kClutDefinition, kNoPts, + kClutDefinitionSegment, + sizeof(kClutDefinitionSegment), &samples)); + ASSERT_TRUE(parser.Parse(DvbSubSegmentType::kObjectData, kNoPts, + kObjectData1.data(), kObjectData1.size(), &samples)); + ASSERT_TRUE(parser.Parse(DvbSubSegmentType::kObjectData, kNoPts, kObjectData2, + sizeof(kObjectData2), &samples)); + ASSERT_TRUE(parser.Parse(DvbSubSegmentType::kEndOfDisplay, kNoPts, + kEndOfDisplaySegment, sizeof(kEndOfDisplaySegment), + &samples)); + + check_image_data(&parser, kObjectId1, {0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, + 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 2, 2}); + check_image_data(&parser, kObjectId2, {1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}); + + ASSERT_TRUE(parser.Flush(&samples)); + ASSERT_EQ(samples.size(), 2u); + for (auto& sample : samples) { + ASSERT_TRUE(sample->settings().line); + ASSERT_EQ(sample->settings().line->type, TextUnitType::kPercent); + ASSERT_TRUE(sample->settings().position); + ASSERT_EQ(sample->settings().position->type, TextUnitType::kPercent); + ASSERT_TRUE(sample->settings().width); + ASSERT_EQ(sample->settings().width->type, TextUnitType::kPercent); + ASSERT_TRUE(sample->settings().height); + ASSERT_EQ(sample->settings().height->type, TextUnitType::kPercent); + + ASSERT_FALSE(sample->body().image.empty()); + } + // Allow in either order. + if (samples[0]->settings().position->value == 0x1a) + std::swap(samples[0], samples[1]); + + EXPECT_EQ(samples[0]->settings().position->value, 0x18); + EXPECT_EQ(samples[0]->settings().line->value, 0x1a); + EXPECT_EQ(samples[0]->settings().width->value, 6); + EXPECT_EQ(samples[0]->settings().height->value, 4); + + EXPECT_EQ(samples[1]->settings().position->value, 0x1a); + EXPECT_EQ(samples[1]->settings().line->value, 0x1e); + EXPECT_EQ(samples[1]->settings().width->value, 4); + EXPECT_EQ(samples[1]->settings().height->value, 3); +} + +} // namespace media +} // namespace shaka diff --git a/packager/media/formats/mp2t/es_parser_dvb.cc b/packager/media/formats/mp2t/es_parser_dvb.cc new file mode 100644 index 0000000000..a392b033c7 --- /dev/null +++ b/packager/media/formats/mp2t/es_parser_dvb.cc @@ -0,0 +1,94 @@ +// Copyright 2020 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "packager/media/formats/mp2t/es_parser_dvb.h" + +#include "packager/media/base/bit_reader.h" +#include "packager/media/base/text_stream_info.h" +#include "packager/media/base/timestamp.h" +#include "packager/media/formats/mp2t/mp2t_common.h" + +namespace shaka { +namespace media { +namespace mp2t { + +EsParserDvb::EsParserDvb(uint32_t pid, + const NewStreamInfoCB& new_stream_info_cb, + const EmitTextSampleCB& emit_sample_cb) + : EsParser(pid), + new_stream_info_cb_(new_stream_info_cb), + emit_sample_cb_(emit_sample_cb) {} + +EsParserDvb::~EsParserDvb() {} + +bool EsParserDvb::Parse(const uint8_t* buf, + int size, + int64_t pts, + int64_t dts) { + if (!sent_info_) { + sent_info_ = true; + std::shared_ptr info = std::make_shared( + pid(), kMpeg2Timescale, kInfiniteDuration, kCodecText, + /* codec_string= */ "", /* codec_config= */ "", /* width= */ 0, + /* height= */ 0, /* language= */ ""); + new_stream_info_cb_.Run(info); + } + + // TODO: Handle buffering and multiple reads? All content so far has been + // a whole segment, so it may not be needed. + return ParseInternal(buf, size, pts); +} + +bool EsParserDvb::Flush() { + for (auto& pair : parsers_) { + std::vector> samples; + RCHECK(pair.second.Flush(&samples)); + + for (auto sample : samples) + emit_sample_cb_.Run(sample); + } + return true; +} + +void EsParserDvb::Reset() { + parsers_.clear(); +} + +bool EsParserDvb::ParseInternal(const uint8_t* data, size_t size, int64_t pts) { + // See EN 300 743 Table 3. + BitReader reader(data, size); + int data_identifier; + int subtitle_stream_id; + RCHECK(reader.ReadBits(8, &data_identifier)); + RCHECK(reader.ReadBits(8, &subtitle_stream_id)); + RCHECK(data_identifier == 0x20); + RCHECK(subtitle_stream_id == 0); + + int temp; + while (reader.ReadBits(8, &temp) && temp == 0xf) { + DvbSubSegmentType segment_type; + uint16_t page_id; + size_t segment_length; + RCHECK(reader.ReadBits(8, &segment_type)); + RCHECK(reader.ReadBits(16, &page_id)); + RCHECK(reader.ReadBits(16, &segment_length)); + RCHECK(reader.bits_available() > segment_length * 8); + + const uint8_t* payload = data + (size - reader.bits_available() / 8); + std::vector> samples; + RCHECK(parsers_[page_id].Parse(segment_type, pts, payload, segment_length, + &samples)); + for (auto sample : samples) + emit_sample_cb_.Run(sample); + + RCHECK(reader.SkipBytes(segment_length)); + } + return temp == 0xff; +} + +} // namespace mp2t +} // namespace media +} // namespace shaka diff --git a/packager/media/formats/mp2t/es_parser_dvb.h b/packager/media/formats/mp2t/es_parser_dvb.h new file mode 100644 index 0000000000..ce56c0c17e --- /dev/null +++ b/packager/media/formats/mp2t/es_parser_dvb.h @@ -0,0 +1,54 @@ +// Copyright 2020 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#ifndef PACKAGER_MEDIA_FORMATS_MP2T_ES_PARSER_DVB_H_ +#define PACKAGER_MEDIA_FORMATS_MP2T_ES_PARSER_DVB_H_ + +#include + +#include "packager/base/callback.h" +#include "packager/media/base/byte_queue.h" +#include "packager/media/formats/dvb/dvb_sub_parser.h" +#include "packager/media/formats/mp2t/es_parser.h" + +namespace shaka { +namespace media { +namespace mp2t { + +class EsParserDvb : public EsParser { + public: + EsParserDvb(uint32_t pid, + const NewStreamInfoCB& new_stream_info_cb, + const EmitTextSampleCB& emit_sample_cb); + ~EsParserDvb() override; + + // EsParser implementation. + bool Parse(const uint8_t* buf, int size, int64_t pts, int64_t dts) override; + bool Flush() override; + void Reset() override; + + private: + EsParserDvb(const EsParserDvb&) = delete; + EsParserDvb& operator=(const EsParserDvb&) = delete; + + bool ParseInternal(const uint8_t* data, size_t size, int64_t pts); + + // Callbacks: + // - to signal a new audio configuration, + // - to send ES buffers. + NewStreamInfoCB new_stream_info_cb_; + EmitTextSampleCB emit_sample_cb_; + + // A map of page_id to parser. + std::unordered_map parsers_; + bool sent_info_ = false; +}; + +} // namespace mp2t +} // namespace media +} // namespace shaka + +#endif // PACKAGER_MEDIA_FORMATS_MP2T_ES_PARSER_DVB_H_ diff --git a/packager/media/formats/mp2t/mp2t.gyp b/packager/media/formats/mp2t/mp2t.gyp index 267b5d8de6..f31ab5ad68 100644 --- a/packager/media/formats/mp2t/mp2t.gyp +++ b/packager/media/formats/mp2t/mp2t.gyp @@ -22,6 +22,8 @@ 'continuity_counter.h', 'es_parser_audio.cc', 'es_parser_audio.h', + 'es_parser_dvb.cc', + 'es_parser_dvb.h', 'es_parser_h264.cc', 'es_parser_h264.h', 'es_parser_h265.cc', @@ -63,6 +65,7 @@ '../../base/media_base.gyp:media_base', '../../crypto/crypto.gyp:crypto', '../../codecs/codecs.gyp:codecs', + '../dvb/dvb.gyp:dvb', ], }, { diff --git a/packager/media/formats/mp2t/mp2t_media_parser.cc b/packager/media/formats/mp2t/mp2t_media_parser.cc index 15ac98bbbf..7ce6505b2a 100644 --- a/packager/media/formats/mp2t/mp2t_media_parser.cc +++ b/packager/media/formats/mp2t/mp2t_media_parser.cc @@ -12,6 +12,7 @@ #include "packager/media/base/text_sample.h" #include "packager/media/formats/mp2t/es_parser.h" #include "packager/media/formats/mp2t/es_parser_audio.h" +#include "packager/media/formats/mp2t/es_parser_dvb.h" #include "packager/media/formats/mp2t/es_parser_h264.h" #include "packager/media/formats/mp2t/es_parser_h265.h" #include "packager/media/formats/mp2t/mp2t_common.h" @@ -33,6 +34,7 @@ class PidState { kPidPmt, kPidAudioPes, kPidVideoPes, + kPidTextPes, }; PidState(int pid, @@ -281,12 +283,14 @@ void Mp2tMediaParser::RegisterPes(int pmt_pid, << static_cast(stream_type) << std::dec; // Create a stream parser corresponding to the stream type. - bool is_audio = false; + PidState::PidType pid_type = PidState::kPidVideoPes; std::unique_ptr es_parser; auto on_new_stream = base::Bind(&Mp2tMediaParser::OnNewStreamInfo, base::Unretained(this), pes_pid); auto on_emit_media = base::Bind(&Mp2tMediaParser::OnEmitMediaSample, base::Unretained(this), pes_pid); + auto on_emit_text = base::Bind(&Mp2tMediaParser::OnEmitTextSample, + base::Unretained(this), pes_pid); switch (stream_type) { case TsStreamType::kAvc: es_parser.reset(new EsParserH264(pes_pid, on_new_stream, on_emit_media)); @@ -300,10 +304,15 @@ void Mp2tMediaParser::RegisterPes(int pmt_pid, es_parser.reset( new EsParserAudio(pes_pid, static_cast(stream_type), on_new_stream, on_emit_media, sbr_in_mimetype_)); - is_audio = true; + pid_type = PidState::kPidAudioPes; + break; + case TsStreamType::kDvbSubtitles: + es_parser.reset(new EsParserDvb(pes_pid, on_new_stream, on_emit_text)); + pid_type = PidState::kPidTextPes; break; default: { auto type = static_cast(stream_type); + DCHECK(type <= 0xff); LOG_IF(ERROR, !stream_type_logged_once_[type]) << "Ignore unsupported MPEG2TS stream type 0x" << std::hex << type << std::dec; @@ -316,8 +325,6 @@ void Mp2tMediaParser::RegisterPes(int pmt_pid, DVLOG(1) << "Create a new PES state"; std::unique_ptr pes_section_parser( new TsSectionPes(std::move(es_parser))); - PidState::PidType pid_type = - is_audio ? PidState::kPidAudioPes : PidState::kPidVideoPes; std::unique_ptr pes_pid_state( new PidState(pes_pid, pid_type, std::move(pes_section_parser))); pes_pid_state->Enable(); @@ -363,7 +370,8 @@ bool Mp2tMediaParser::FinishInitializationIfNeeded() { uint32_t num_es(0); for (const auto& pair : pids_) { if ((pair.second->pid_type() == PidState::kPidAudioPes || - pair.second->pid_type() == PidState::kPidVideoPes) && + pair.second->pid_type() == PidState::kPidVideoPes || + pair.second->pid_type() == PidState::kPidTextPes) && pair.second->IsEnabled()) { ++num_es; if (pair.second->config()) diff --git a/packager/media/formats/mp2t/ts_section_pmt.cc b/packager/media/formats/mp2t/ts_section_pmt.cc index a0c23c7d4a..5008156693 100644 --- a/packager/media/formats/mp2t/ts_section_pmt.cc +++ b/packager/media/formats/mp2t/ts_section_pmt.cc @@ -95,6 +95,17 @@ bool TsSectionPmt::ParsePsiSection(BitReader* bit_reader) { // Read the ES info descriptors. // Defined in section 2.6 of ISO-13818. + if (es_info_length > 0) { + uint8_t descriptor_tag; + RCHECK(bit_reader->ReadBits(8, &descriptor_tag)); + es_info_length--; + + // See ETSI EN 300 468 Section 6.1 + if (stream_type == TsStreamType::kPesPrivateData && + descriptor_tag == 0x59) { // subtitling_descriptor + pid_map[pid_es] = TsStreamType::kDvbSubtitles; + } + } RCHECK(bit_reader->SkipBits(8 * es_info_length)); } diff --git a/packager/media/formats/mp2t/ts_stream_type.h b/packager/media/formats/mp2t/ts_stream_type.h index 9258706c0e..01bc131c54 100644 --- a/packager/media/formats/mp2t/ts_stream_type.h +++ b/packager/media/formats/mp2t/ts_stream_type.h @@ -41,6 +41,10 @@ enum class TsStreamType { kEncryptedEac3 = 0xC2, kEncryptedAdtsAac = 0xCF, kEncryptedAvc = 0xDB, + + // Below are internal values used to select other stream types based on other + // info in headers. + kDvbSubtitles = 0x100, }; } // namespace mp2t