diff --git a/packager/app/test/testdata/bear-320x240-vp9-cenc-golden.mp4 b/packager/app/test/testdata/bear-320x240-vp9-cenc-golden.mp4 index d7b8add6ea..3e15d7b82c 100644 Binary files a/packager/app/test/testdata/bear-320x240-vp9-cenc-golden.mp4 and b/packager/app/test/testdata/bear-320x240-vp9-cenc-golden.mp4 differ diff --git a/packager/app/test/testdata/bear-320x240-vp9-cenc-golden.mpd b/packager/app/test/testdata/bear-320x240-vp9-cenc-golden.mpd index 6b89c5ccc3..865faf2362 100644 --- a/packager/app/test/testdata/bear-320x240-vp9-cenc-golden.mpd +++ b/packager/app/test/testdata/bear-320x240-vp9-cenc-golden.mpd @@ -3,14 +3,14 @@ - + AAAAMHBzc2gAAAAA7e+LqXnWSs6jyCfc1R0h7QAAABAxMjM0NTY3ODkwMTIzNDU2 output_video.mp4 - - + + diff --git a/packager/app/test/testdata/bear-640x360-vp8-cenc-golden.mp4 b/packager/app/test/testdata/bear-640x360-vp8-cenc-golden.mp4 index 3423a75b26..976ff92f37 100644 Binary files a/packager/app/test/testdata/bear-640x360-vp8-cenc-golden.mp4 and b/packager/app/test/testdata/bear-640x360-vp8-cenc-golden.mp4 differ diff --git a/packager/app/test/testdata/bear-640x360-vp8-cenc-golden.mpd b/packager/app/test/testdata/bear-640x360-vp8-cenc-golden.mpd index 18372ecbfe..46bb26a841 100644 --- a/packager/app/test/testdata/bear-640x360-vp8-cenc-golden.mpd +++ b/packager/app/test/testdata/bear-640x360-vp8-cenc-golden.mpd @@ -3,14 +3,14 @@ - + AAAAMHBzc2gAAAAA7e+LqXnWSs6jyCfc1R0h7QAAABAxMjM0NTY3ODkwMTIzNDU2 output_video.mp4 - - + + diff --git a/packager/media/formats/mp4/box_definitions.cc b/packager/media/formats/mp4/box_definitions.cc index 7227aac03c..4de1f834c7 100644 --- a/packager/media/formats/mp4/box_definitions.cc +++ b/packager/media/formats/mp4/box_definitions.cc @@ -1186,16 +1186,27 @@ uint32_t Metadata::ComputeSizeInternal() { : HeaderSize() + handler.ComputeSize() + id3v2_size; } -CodecConfigurationRecord::CodecConfigurationRecord() : box_type(FOURCC_NULL) {} -CodecConfigurationRecord::~CodecConfigurationRecord() {} -FourCC CodecConfigurationRecord::BoxType() const { - // CodecConfigurationRecord should be parsed according to format recovered in +CodecConfiguration::CodecConfiguration() : box_type(FOURCC_NULL) {} +CodecConfiguration::~CodecConfiguration() {} + +FourCC CodecConfiguration::BoxType() const { + // CodecConfiguration box should be parsed according to format recovered in // VideoSampleEntry. |box_type| is determined dynamically there. return box_type; } -bool CodecConfigurationRecord::ReadWriteInternal(BoxBuffer* buffer) { +bool CodecConfiguration::ReadWriteInternal(BoxBuffer* buffer) { + DCHECK_NE(box_type, FOURCC_NULL); RCHECK(ReadWriteHeaderInternal(buffer)); + + // VPCodecConfiguration box inherits from FullBox instead of Box. The extra 4 + // bytes are handled here. + if (box_type == FOURCC_VPCC) { + uint32_t version_flags = 0; + RCHECK(buffer->ReadWriteUInt32(&version_flags)); + RCHECK(version_flags == 0); + } + if (buffer->Reading()) { RCHECK(buffer->ReadWriteVector(&data, buffer->BytesLeft())); } else { @@ -1204,10 +1215,11 @@ bool CodecConfigurationRecord::ReadWriteInternal(BoxBuffer* buffer) { return true; } -uint32_t CodecConfigurationRecord::ComputeSizeInternal() { +uint32_t CodecConfiguration::ComputeSizeInternal() { if (data.empty()) return 0; - return HeaderSize() + data.size(); + DCHECK_NE(box_type, FOURCC_NULL); + return HeaderSize() + (box_type == FOURCC_VPCC ? 4 : 0) + data.size(); } PixelAspectRatio::PixelAspectRatio() : h_spacing(0), v_spacing(0) {} @@ -1310,37 +1322,50 @@ bool VideoSampleEntry::ReadWriteInternal(BoxBuffer* buffer) { } const FourCC actual_format = GetActualFormat(); - switch (actual_format) { - case FOURCC_AVC1: - codec_config_record.box_type = FOURCC_AVCC; - break; - case FOURCC_HEV1: - case FOURCC_HVC1: - codec_config_record.box_type = FOURCC_HVCC; - break; - case FOURCC_VP08: - case FOURCC_VP09: - case FOURCC_VP10: - codec_config_record.box_type = FOURCC_VPCC; - break; - default: - LOG(ERROR) << FourCCToString(actual_format) << " is not supported."; - return false; + if (buffer->Reading()) { + codec_configuration.box_type = GetCodecConfigurationBoxType(actual_format); + } else { + DCHECK_EQ(codec_configuration.box_type, + GetCodecConfigurationBoxType(actual_format)); } - RCHECK(buffer->ReadWriteChild(&codec_config_record)); + DCHECK_NE(codec_configuration.box_type, FOURCC_NULL); + + RCHECK(buffer->ReadWriteChild(&codec_configuration)); RCHECK(buffer->TryReadWriteChild(&pixel_aspect)); return true; } uint32_t VideoSampleEntry::ComputeSizeInternal() { + const FourCC actual_format = GetActualFormat(); + if (actual_format == FOURCC_NULL) + return 0; + codec_configuration.box_type = GetCodecConfigurationBoxType(actual_format); + DCHECK_NE(codec_configuration.box_type, FOURCC_NULL); return HeaderSize() + sizeof(data_reference_index) + sizeof(width) + sizeof(height) + sizeof(kVideoResolution) * 2 + sizeof(kVideoFrameCount) + sizeof(kVideoDepth) + pixel_aspect.ComputeSize() + sinf.ComputeSize() + - codec_config_record.ComputeSize() + kCompressorNameSize + 6 + 4 + 16 + + codec_configuration.ComputeSize() + kCompressorNameSize + 6 + 4 + 16 + 2; // 6 + 4 bytes reserved, 16 + 2 bytes predefined. } +FourCC VideoSampleEntry::GetCodecConfigurationBoxType(FourCC format) const { + switch (format) { + case FOURCC_AVC1: + return FOURCC_AVCC; + case FOURCC_HEV1: + case FOURCC_HVC1: + return FOURCC_HVCC; + case FOURCC_VP08: + case FOURCC_VP09: + case FOURCC_VP10: + return FOURCC_VPCC; + default: + LOG(ERROR) << FourCCToString(format) << " is not supported."; + return FOURCC_NULL; + } +} + ElementaryStreamDescriptor::ElementaryStreamDescriptor() {} ElementaryStreamDescriptor::~ElementaryStreamDescriptor() {} FourCC ElementaryStreamDescriptor::BoxType() const { return FOURCC_ESDS; } @@ -1502,6 +1527,8 @@ bool AudioSampleEntry::ReadWriteInternal(BoxBuffer* buffer) { } uint32_t AudioSampleEntry::ComputeSizeInternal() { + if (GetActualFormat() == FOURCC_NULL) + return 0; return HeaderSize() + sizeof(data_reference_index) + sizeof(channelcount) + sizeof(samplesize) + sizeof(samplerate) + sinf.ComputeSize() + esds.ComputeSize() + ddts.ComputeSize() + dac3.ComputeSize() + diff --git a/packager/media/formats/mp4/box_definitions.h b/packager/media/formats/mp4/box_definitions.h index a1ce10dcba..f61491a2d4 100644 --- a/packager/media/formats/mp4/box_definitions.h +++ b/packager/media/formats/mp4/box_definitions.h @@ -257,8 +257,14 @@ struct Metadata : FullBox { ID3v2 id3v2; }; -struct CodecConfigurationRecord : Box { - DECLARE_BOX_METHODS(CodecConfigurationRecord); +// This defines a common structure for various CodecConfiguration boxes: +// AVCConfiguration, HEVCConfiguration and VPCodecConfiguration. +// Note that unlike the other two CodecConfiguration boxes, VPCodecConfiguration +// box inherits from FullBox instead of Box, according to VP Codec ISO Media +// File Format Binding specification. It will be handled properly in the +// implementation. +struct CodecConfiguration: Box { + DECLARE_BOX_METHODS(CodecConfiguration); FourCC box_type; // Contains full codec configuration record, including possible extension @@ -279,6 +285,8 @@ struct VideoSampleEntry : Box { FourCC GetActualFormat() const { return format == FOURCC_ENCV ? sinf.format.format : format; } + // Returns the box type of codec configuration box from video format. + FourCC GetCodecConfigurationBoxType(FourCC format) const; FourCC format; uint16_t data_reference_index; @@ -287,7 +295,7 @@ struct VideoSampleEntry : Box { PixelAspectRatio pixel_aspect; ProtectionSchemeInfo sinf; - CodecConfigurationRecord codec_config_record; + CodecConfiguration codec_configuration; }; struct ElementaryStreamDescriptor : FullBox { diff --git a/packager/media/formats/mp4/box_definitions_comparison.h b/packager/media/formats/mp4/box_definitions_comparison.h index 7afb92315f..25cae4dba7 100644 --- a/packager/media/formats/mp4/box_definitions_comparison.h +++ b/packager/media/formats/mp4/box_definitions_comparison.h @@ -200,9 +200,9 @@ inline bool operator==(const Metadata& lhs, const Metadata& rhs) { return lhs.handler == rhs.handler && lhs.id3v2 == rhs.id3v2; } -inline bool operator==(const CodecConfigurationRecord& lhs, - const CodecConfigurationRecord& rhs) { - return lhs.data == rhs.data; +inline bool operator==(const CodecConfiguration& lhs, + const CodecConfiguration& rhs) { + return lhs.box_type == rhs.box_type && lhs.data == rhs.data; } inline bool operator==(const PixelAspectRatio& lhs, @@ -216,7 +216,7 @@ inline bool operator==(const VideoSampleEntry& lhs, lhs.data_reference_index == rhs.data_reference_index && lhs.width == rhs.width && lhs.height == rhs.height && lhs.pixel_aspect == rhs.pixel_aspect && lhs.sinf == rhs.sinf && - lhs.codec_config_record == rhs.codec_config_record; + lhs.codec_configuration == rhs.codec_configuration; } inline bool operator==(const ESDescriptor& lhs, const ESDescriptor& rhs) { diff --git a/packager/media/formats/mp4/box_definitions_unittest.cc b/packager/media/formats/mp4/box_definitions_unittest.cc index 3610a1fd03..25071b647d 100644 --- a/packager/media/formats/mp4/box_definitions_unittest.cc +++ b/packager/media/formats/mp4/box_definitions_unittest.cc @@ -328,24 +328,26 @@ class BoxDefinitionsTestGeneral : public testing::Test { void Modify(PixelAspectRatio* pasp) { pasp->v_spacing *= 8; } - void Fill(CodecConfigurationRecord* codec_config_record) { - const uint8_t kAvccData[] = { + void Fill(CodecConfiguration* codec_configuration) { + const uint8_t kCodecConfigurationData[] = { 0x01, 0x64, 0x00, 0x1f, 0xff, 0xe1, 0x00, 0x18, 0x67, 0x64, 0x00, 0x1f, 0xac, 0xd9, 0x40, 0x50, 0x05, 0xbb, 0x01, 0x10, 0x00, 0x00, 0x3e, 0x90, 0x00, 0x0e, 0xa6, 0x00, 0xf1, 0x83, 0x19, 0x60, 0x01, 0x00, 0x06, 0x68, 0xeb, 0xe3, 0xcb, 0x22, 0xc0}; - codec_config_record->data.assign(kAvccData, - kAvccData + arraysize(kAvccData)); + codec_configuration->data.assign( + kCodecConfigurationData, + kCodecConfigurationData + arraysize(kCodecConfigurationData)); } - void Modify(CodecConfigurationRecord* codec_config_record) { - const uint8_t kAvccData[] = { + void Modify(CodecConfiguration* codec_configuration) { + const uint8_t kCodecConfigurationData[] = { 0x01, 0x64, 0x00, 0x1e, 0xff, 0xe1, 0x00, 0x19, 0x67, 0x64, 0x00, 0x1e, 0xac, 0xd9, 0x40, 0xa0, 0x2f, 0xf9, 0x70, 0x11, 0x00, 0x00, 0x03, 0x03, 0xe9, 0x00, 0x00, 0xea, 0x60, 0x0f, 0x16, 0x2d, 0x96, 0x01, 0x00, 0x05, 0x68, 0xeb, 0xec, 0xb2, 0x2c}; - codec_config_record->data.assign(kAvccData, - kAvccData + arraysize(kAvccData)); + codec_configuration->data.assign( + kCodecConfigurationData, + kCodecConfigurationData + arraysize(kCodecConfigurationData)); } void Fill(VideoSampleEntry* encv) { @@ -355,12 +357,12 @@ class BoxDefinitionsTestGeneral : public testing::Test { encv->height = 600; Fill(&encv->pixel_aspect); Fill(&encv->sinf); - Fill(&encv->codec_config_record); + Fill(&encv->codec_configuration); } void Modify(VideoSampleEntry* encv) { encv->height += 600; - Modify(&encv->codec_config_record); + Modify(&encv->codec_configuration); } void Fill(ElementaryStreamDescriptor* esds) { @@ -913,11 +915,13 @@ class BoxDefinitionsTestGeneral : public testing::Test { bool IsOptional(const ProtectionSchemeInfo* box) { return true; } bool IsOptional(const EditList* box) { return true; } bool IsOptional(const Edit* box) { return true; } - bool IsOptional(const CodecConfigurationRecord* box) { return true; } + bool IsOptional(const CodecConfiguration* box) { return true; } bool IsOptional(const PixelAspectRatio* box) { return true; } + bool IsOptional(const VideoSampleEntry* box) { return true; } bool IsOptional(const ElementaryStreamDescriptor* box) { return true; } bool IsOptional(const AC3Specific* box) { return true; } bool IsOptional(const EC3Specific* box) { return true; } + bool IsOptional(const AudioSampleEntry* box) { return true; } // Recommended, but optional. bool IsOptional(const ProtectionSystemSpecificHeader* box) { return true; } bool IsOptional(const WebVTTSourceLabelBox* box) { return true; } @@ -956,7 +960,6 @@ typedef testing::Typesbuffer_.get()); + // Should inherit from Box. + EXPECT_EQ( + 8u, codec_configuration.ComputeSize() - codec_configuration.data.size()); + + CodecConfiguration codec_configuration_readback; + // BoxType should be provided before parsing the box. + codec_configuration_readback.box_type = FOURCC_AVCC; + ASSERT_TRUE(ReadBack(&codec_configuration_readback)); + EXPECT_EQ(codec_configuration, codec_configuration_readback); +} + +TEST_F(BoxDefinitionsTest, VPCodecConfiguration) { + CodecConfiguration codec_configuration; + Fill(&codec_configuration); + codec_configuration.box_type = FOURCC_VPCC; + codec_configuration.Write(this->buffer_.get()); + // Should inherit from FullBox. + EXPECT_EQ( + 12u, codec_configuration.ComputeSize() - codec_configuration.data.size()); + + CodecConfiguration codec_configuration_readback; + // BoxType should be provided before parsing the box. + codec_configuration_readback.box_type = FOURCC_VPCC; + ASSERT_TRUE(ReadBack(&codec_configuration_readback)); + EXPECT_EQ(codec_configuration, codec_configuration_readback); +} + TEST_F(BoxDefinitionsTest, DTSSampleEntry) { AudioSampleEntry entry; entry.format = FOURCC_DTSE; diff --git a/packager/media/formats/mp4/mp4_media_parser.cc b/packager/media/formats/mp4/mp4_media_parser.cc index a84b3e57b6..0a8a25e3eb 100644 --- a/packager/media/formats/mp4/mp4_media_parser.cc +++ b/packager/media/formats/mp4/mp4_media_parser.cc @@ -471,7 +471,7 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { switch (actual_format) { case FOURCC_AVC1: { AVCDecoderConfiguration avc_config; - if (!avc_config.Parse(entry.codec_config_record.data)) { + if (!avc_config.Parse(entry.codec_configuration.data)) { LOG(ERROR) << "Failed to parse avcc."; return false; } @@ -508,7 +508,7 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { case FOURCC_HEV1: case FOURCC_HVC1: { HEVCDecoderConfiguration hevc_config; - if (!hevc_config.Parse(entry.codec_config_record.data)) { + if (!hevc_config.Parse(entry.codec_configuration.data)) { LOG(ERROR) << "Failed to parse hevc."; return false; } @@ -520,7 +520,7 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { case FOURCC_VP09: case FOURCC_VP10: { VPCodecConfiguration vp_config; - if (!vp_config.Parse(entry.codec_config_record.data)) { + if (!vp_config.Parse(entry.codec_configuration.data)) { LOG(ERROR) << "Failed to parse vpcc."; return false; } @@ -540,8 +540,8 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { codec_string, track->media.header.language.code, coded_width, coded_height, pixel_width, pixel_height, 0, // trick_play_rate - nalu_length_size, entry.codec_config_record.data.data(), - entry.codec_config_record.data.size(), is_encrypted)); + nalu_length_size, entry.codec_configuration.data.data(), + entry.codec_configuration.data.size(), is_encrypted)); } } diff --git a/packager/media/formats/mp4/mp4_muxer.cc b/packager/media/formats/mp4/mp4_muxer.cc index 10c90efc03..4505a53353 100644 --- a/packager/media/formats/mp4/mp4_muxer.cc +++ b/packager/media/formats/mp4/mp4_muxer.cc @@ -232,7 +232,7 @@ void MP4Muxer::GenerateVideoTrak(const VideoStreamInfo* video_info, video.format = VideoCodecToFourCC(video_info->codec()); video.width = video_info->width(); video.height = video_info->height(); - video.codec_config_record.data = video_info->extra_data(); + video.codec_configuration.data = video_info->extra_data(); if (pixel_width != 1 || pixel_height != 1) { video.pixel_aspect.h_spacing = pixel_width; video.pixel_aspect.v_spacing = pixel_height;