diff --git a/docs/source/options/stream_descriptors.rst b/docs/source/options/stream_descriptors.rst index 71677ec650..992bffda77 100644 --- a/docs/source/options/stream_descriptors.rst +++ b/docs/source/options/stream_descriptors.rst @@ -64,4 +64,5 @@ These are the available fields: stream. .. include:: /options/drm_stream_descriptors.rst +.. include:: /options/dash_stream_descriptors.rst .. include:: /options/hls_stream_descriptors.rst diff --git a/packager/media/base/language_utils.cc b/packager/media/base/language_utils.cc index 5860d3b5df..7f8e443d6f 100644 --- a/packager/media/base/language_utils.cc +++ b/packager/media/base/language_utils.cc @@ -110,7 +110,7 @@ std::string LanguageToShortestForm(const std::string& language) { // This could happen legitimately for languages which have no 2-letter code, // but that would imply that the input language code is a 3-letter code. - DCHECK_EQ(3u, main_language.size()); + DCHECK_EQ(3u, main_language.size()) << main_language; return main_language + subtag; } diff --git a/packager/media/formats/mp2t/es_parser_h26x.cc b/packager/media/formats/mp2t/es_parser_h26x.cc index 9588fa7d67..14cbf69fd0 100644 --- a/packager/media/formats/mp2t/es_parser_h26x.cc +++ b/packager/media/formats/mp2t/es_parser_h26x.cc @@ -62,6 +62,14 @@ bool EsParserH26x::Parse(const uint8_t* buf, // Link the end of the byte queue with the incoming timing descriptor. timing_desc_list_.push_back( std::pair(es_queue_->tail(), timing_desc)); + + // Warns if there are a large number of cached timestamps, which should be 1 + // or 2 if everythings works as expected. + const size_t kWarningSize = + 24; // An arbitrary number (it is 1 second for a fps of 24). + LOG_IF(WARNING, timing_desc_list_.size() >= kWarningSize) + << "Unusually large number of cached timestamps (" + << timing_desc_list_.size() << ")."; } // Add the incoming bytes to the ES queue. @@ -209,8 +217,10 @@ bool EsParserH26x::ParseInternal() { // AUD shall be the first NAL unit if present. There shall be at most one // AUD in any access unit. We can emit the current access unit which shall // not contain the AUD. - if (nalu.is_aud()) - return EmitCurrentAccessUnit(); + if (nalu.is_aud()) { + RCHECK(EmitCurrentAccessUnit()); + continue; + } // We can only determine if the current access unit ends after seeing // another VCL NAL unit. @@ -281,7 +291,9 @@ bool EsParserH26x::EmitFrame(int64_t access_unit_pos, // Emit a frame. DVLOG(LOG_LEVEL_ES) << "Emit frame: stream_pos=" << access_unit_pos - << " size=" << access_unit_size; + << " size=" << access_unit_size << " pts " + << current_timing_desc.pts << " timing_desc_list size " + << timing_desc_list_.size(); int es_size; const uint8_t* es; es_queue_->PeekAt(access_unit_pos, &es, &es_size); diff --git a/packager/media/formats/mp2t/es_parser_h26x_unittest.cc b/packager/media/formats/mp2t/es_parser_h26x_unittest.cc index df07c1891e..cd2aebe9ef 100644 --- a/packager/media/formats/mp2t/es_parser_h26x_unittest.cc +++ b/packager/media/formats/mp2t/es_parser_h26x_unittest.cc @@ -163,6 +163,11 @@ class EsParserH26xTest : public testing::Test { const H26xNaluType* types, size_t types_count); + // Returns the vector of samples data j + std::vector> BuildSamplesData(Nalu::CodecType codec_type, + const H26xNaluType* types, + size_t types_count); + void EmitSample(uint32_t pid, const std::shared_ptr& sample) { size_t sample_id = sample_count_; sample_count_++; @@ -186,27 +191,29 @@ class EsParserH26xTest : public testing::Test { bool has_stream_info_; }; -void EsParserH26xTest::RunTest(Nalu::CodecType codec_type, - const H26xNaluType* types, - size_t types_count) { - // Duration of one 25fps video frame in 90KHz clock units. - const uint32_t kMpegTicksPerFrame = 3600; +// Return AnnexB samples data and stores NAL Unit samples data in |samples_|, +// which is what will be returned from |EsParser|. +std::vector> EsParserH26xTest::BuildSamplesData( + Nalu::CodecType codec_type, + const H26xNaluType* types, + size_t types_count) { + std::vector> samples_data; + const uint8_t kStartCode[] = {0x00, 0x00, 0x01}; - TestableEsParser es_parser( - codec_type, - base::Bind(&EsParserH26xTest::NewVideoConfig, base::Unretained(this)), - base::Bind(&EsParserH26xTest::EmitSample, base::Unretained(this))); - bool seen_key_frame = false; - std::vector cur_sample_data; - ASSERT_EQ(kSeparator, types[0]); + std::vector nal_unit_sample_data; + std::vector annex_b_sample_data; + CHECK_EQ(kSeparator, types[0]); for (size_t k = 1; k < types_count; k++) { if (types[k] == kSeparator) { // We should not be emitting samples until we see a key frame. if (seen_key_frame) - samples_.push_back(cur_sample_data); - cur_sample_data.clear(); + samples_.push_back(nal_unit_sample_data); + if (!annex_b_sample_data.empty()) + samples_data.push_back(annex_b_sample_data); + nal_unit_sample_data.clear(); + annex_b_sample_data.clear(); } else { if (codec_type == Nalu::kH264) { if (types[k] == kH264VclKeyFrame) @@ -218,34 +225,56 @@ void EsParserH26xTest::RunTest(Nalu::CodecType codec_type, std::vector es_data = CreateNalu(codec_type, types[k], static_cast(k)); - cur_sample_data.push_back(0); - cur_sample_data.push_back(0); - cur_sample_data.push_back(0); - cur_sample_data.push_back(static_cast(es_data.size())); - cur_sample_data.insert(cur_sample_data.end(), es_data.begin(), - es_data.end()); + + nal_unit_sample_data.push_back(0); + nal_unit_sample_data.push_back(0); + nal_unit_sample_data.push_back(0); + nal_unit_sample_data.push_back(static_cast(es_data.size())); + nal_unit_sample_data.insert(nal_unit_sample_data.end(), es_data.begin(), + es_data.end()); + es_data.insert(es_data.begin(), kStartCode, kStartCode + arraysize(kStartCode)); - - const int64_t pts = k * kMpegTicksPerFrame; - const int64_t dts = k * kMpegTicksPerFrame; - // This may process the previous sample; but since we don't know whether - // we are at the end yet, this will not process the current sample until - // later. - size_t offset = 0; - size_t size = 1; - while (offset < es_data.size()) { - // Insert the data in parts to test partial data searches. - size = std::min(size + 1, es_data.size() - offset); - ASSERT_TRUE(es_parser.Parse(&es_data[offset], static_cast(size), - pts, dts)); - offset += size; - } + annex_b_sample_data.insert(annex_b_sample_data.end(), es_data.begin(), + es_data.end()); } } if (seen_key_frame) - samples_.push_back(cur_sample_data); + samples_.push_back(nal_unit_sample_data); + if (!annex_b_sample_data.empty()) + samples_data.push_back(annex_b_sample_data); + return samples_data; +} + +void EsParserH26xTest::RunTest(Nalu::CodecType codec_type, + const H26xNaluType* types, + size_t types_count) { + // Duration of one 25fps video frame in 90KHz clock units. + const uint32_t kMpegTicksPerFrame = 3600; + + TestableEsParser es_parser( + codec_type, + base::Bind(&EsParserH26xTest::NewVideoConfig, base::Unretained(this)), + base::Bind(&EsParserH26xTest::EmitSample, base::Unretained(this))); + + int64_t timestamp = 0; + for (const auto& sample_data : + BuildSamplesData(codec_type, types, types_count)) { + // This may process the previous sample; but since we don't know whether + // we are at the end yet, this will not process the current sample until + // later. + size_t offset = 0; + size_t size = 1; + while (offset < sample_data.size()) { + // Insert the data in parts to test partial data searches. + size = std::min(size + 1, sample_data.size() - offset); + ASSERT_TRUE(es_parser.Parse(&sample_data[offset], static_cast(size), + timestamp, timestamp)); + offset += size; + } + timestamp += kMpegTicksPerFrame; + } es_parser.Flush(); } @@ -350,6 +379,55 @@ TEST_F(EsParserH26xTest, H264BasicSupport) { EXPECT_TRUE(has_stream_info_); } +// This is not compliant to H264 spec, but VLC generates streams like this. See +// https://github.com/google/shaka-packager/issues/526 for details. +TEST_F(EsParserH26xTest, H264AudInAccessUnit) { + // clang-format off + const H26xNaluType kData[] = { + kSeparator, kH264Aud, kH264Sps, kH264Aud, kH264VclKeyFrame, + kSeparator, kH264Aud, kH264Vcl, + kSeparator, kH264Aud, kH264Vcl, + kSeparator, kH264Aud, kH264Sps, kH264Aud, kH264VclKeyFrame, + kSeparator, kH264Aud, kH264Vcl, + kSeparator, kH264Aud, kH264Vcl, + kSeparator, kH264Aud, kH264Sps, kH264Aud, kH264VclKeyFrame, + kSeparator, kH264Aud, kH264Vcl, + kSeparator, kH264Aud, kH264Vcl, + kSeparator, kH264Aud, kH264Sps, kH264Aud, kH264VclKeyFrame, + kSeparator, kH264Aud, kH264Vcl, + kSeparator, kH264Aud, kH264Vcl, + }; + // clang-format on + + TestableEsParser es_parser( + Nalu::kH264, + base::Bind(&EsParserH26xTest::NewVideoConfig, base::Unretained(this)), + base::Bind(&EsParserH26xTest::EmitSample, base::Unretained(this))); + + size_t sample_index = 0; + for (const auto& sample_data : + BuildSamplesData(Nalu::kH264, kData, arraysize(kData))) { + // Duration of one 25fps video frame in 90KHz clock units. + const uint32_t kMpegTicksPerFrame = 3600; + const int64_t timestamp = kMpegTicksPerFrame * sample_index; + ASSERT_TRUE(es_parser.Parse(sample_data.data(), + static_cast(sample_data.size()), timestamp, + timestamp)); + sample_index++; + + // The number of emitted samples are less than the number of samples that + // are pushed to the EsParser since samples could be cached internally + // before being emitted. + // The delay is at most 2 in our current implementation. + const size_t kExpectedMaxDelay = 2; + EXPECT_NEAR(sample_index, sample_count_, kExpectedMaxDelay); + } + + es_parser.Flush(); + EXPECT_EQ(sample_index, sample_count_); + EXPECT_TRUE(has_stream_info_); +} + TEST_F(EsParserH26xTest, H264DeterminesAccessUnitsWithoutAUD) { const H26xNaluType kData[] = { kSeparator, kH264Sps, kH264VclKeyFrame,