diff --git a/packager/media/base/fourccs.h b/packager/media/base/fourccs.h index 291ad1c3e8..15febbc25f 100644 --- a/packager/media/base/fourccs.h +++ b/packager/media/base/fourccs.h @@ -82,6 +82,7 @@ enum FourCC : uint32_t { FOURCC_pdin = 0x7064696e, FOURCC_prft = 0x70726674, FOURCC_pssh = 0x70737368, + FOURCC_roll = 0x726f6c6c, FOURCC_saio = 0x7361696f, FOURCC_saiz = 0x7361697a, FOURCC_sbgp = 0x73626770, diff --git a/packager/media/formats/mp4/box_definitions.cc b/packager/media/formats/mp4/box_definitions.cc index 5d974fb93f..99255d1672 100644 --- a/packager/media/formats/mp4/box_definitions.cc +++ b/packager/media/formats/mp4/box_definitions.cc @@ -921,6 +921,187 @@ uint32_t SyncSample::ComputeSizeInternal() { sizeof(uint32_t) * sample_number.size(); } +CencSampleEncryptionInfoEntry::CencSampleEncryptionInfoEntry() + : is_protected(0), + per_sample_iv_size(0), + crypt_byte_block(0), + skip_byte_block(0) {} +CencSampleEncryptionInfoEntry::~CencSampleEncryptionInfoEntry() {}; + +bool CencSampleEncryptionInfoEntry::ReadWrite(BoxBuffer* buffer) { + if (!buffer->Reading()) { + if (key_id.size() != kCencKeyIdSize) { + LOG(WARNING) << "CENC defines key id length of " << kCencKeyIdSize + << " bytes; got " << key_id.size() + << ". Resized accordingly."; + key_id.resize(kCencKeyIdSize); + } + RCHECK(crypt_byte_block < 16 && skip_byte_block < 16); + } + + RCHECK(buffer->IgnoreBytes(1)); // reserved. + + uint8_t pattern = crypt_byte_block << 4 | skip_byte_block; + RCHECK(buffer->ReadWriteUInt8(&pattern)); + crypt_byte_block = pattern >> 4; + skip_byte_block = pattern & 0x0F; + + RCHECK(buffer->ReadWriteUInt8(&is_protected) && + buffer->ReadWriteUInt8(&per_sample_iv_size) && + buffer->ReadWriteVector(&key_id, kCencKeyIdSize)); + + if (is_protected == 1) { + if (per_sample_iv_size == 0) { // For constant iv. + uint8_t constant_iv_size = constant_iv.size(); + RCHECK(buffer->ReadWriteUInt8(&constant_iv_size)); + RCHECK(constant_iv_size == 8 || constant_iv_size == 16); + RCHECK(buffer->ReadWriteVector(&constant_iv, constant_iv_size)); + } else { + RCHECK(per_sample_iv_size == 8 || per_sample_iv_size == 16); + DCHECK(constant_iv.empty()); + } + } else { + // Expect |is_protected| to be 0, i.e. not protected. Other values of + // |is_protected| is not supported. + RCHECK(is_protected == 0); + RCHECK(per_sample_iv_size == 0); + } + return true; +} + +uint32_t CencSampleEncryptionInfoEntry::ComputeSize() const { + return sizeof(uint32_t) + kCencKeyIdSize + + (constant_iv.empty() ? 0 : (sizeof(uint8_t) + constant_iv.size())); +} + +AudioRollRecoveryEntry::AudioRollRecoveryEntry(): roll_distance(0) {} +AudioRollRecoveryEntry::~AudioRollRecoveryEntry() {} + +bool AudioRollRecoveryEntry::ReadWrite(BoxBuffer* buffer) { + RCHECK(buffer->ReadWriteInt16(&roll_distance)); + return true; +} + +uint32_t AudioRollRecoveryEntry::ComputeSize() const { + return sizeof(roll_distance); +} + +SampleGroupDescription::SampleGroupDescription() : grouping_type(0) {} +SampleGroupDescription::~SampleGroupDescription() {} +FourCC SampleGroupDescription::BoxType() const { return FOURCC_sgpd; } + +bool SampleGroupDescription::ReadWriteInternal(BoxBuffer* buffer) { + RCHECK(ReadWriteHeaderInternal(buffer) && + buffer->ReadWriteUInt32(&grouping_type)); + + switch (grouping_type) { + case FOURCC_seig: + return ReadWriteEntries(buffer, &cenc_sample_encryption_info_entries); + case FOURCC_roll: + return ReadWriteEntries(buffer, &audio_roll_recovery_entries); + default: + DCHECK(buffer->Reading()); + DLOG(WARNING) << "Sample group '" << grouping_type + << "' is not supported."; + return true; + } +} + +template +bool SampleGroupDescription::ReadWriteEntries(BoxBuffer* buffer, + std::vector* entries) { + uint32_t default_length = 0; + if (!buffer->Reading()) { + DCHECK(!entries->empty()); + default_length = (*entries)[0].ComputeSize(); + DCHECK_NE(default_length, 0u); + } + if (version == 1) + RCHECK(buffer->ReadWriteUInt32(&default_length)); + if (version >= 2) { + NOTIMPLEMENTED() << "Unsupported SampleGroupDescriptionBox 'sgpd' version " + << static_cast(version); + return false; + } + + uint32_t count = entries->size(); + RCHECK(buffer->ReadWriteUInt32(&count)); + RCHECK(count != 0); + entries->resize(count); + + for (T& entry : *entries) { + if (version == 1) { + uint32_t description_length = default_length; + if (buffer->Reading() && default_length == 0) + RCHECK(buffer->ReadWriteUInt32(&description_length)); + RCHECK(entry.ReadWrite(buffer)); + RCHECK(entry.ComputeSize() == description_length); + } else { + RCHECK(entry.ReadWrite(buffer)); + } + } + return true; +} + +uint32_t SampleGroupDescription::ComputeSizeInternal() { + // Version 0 is obsoleted, so always generate version 1 box. + version = 1; + size_t entries_size = 0; + switch (grouping_type) { + case FOURCC_seig: + for (const auto& entry : cenc_sample_encryption_info_entries) + entries_size += entry.ComputeSize(); + break; + case FOURCC_roll: + for (const auto& entry : audio_roll_recovery_entries) + entries_size += entry.ComputeSize(); + break; + } + // This box is optional. Skip it if it is not used. + if (entries_size == 0) + return 0; + return HeaderSize() + sizeof(grouping_type) + + (version == 1 ? sizeof(uint32_t) : 0) + sizeof(uint32_t) + + entries_size; +} + +SampleToGroup::SampleToGroup() : grouping_type(0), grouping_type_parameter(0) {} +SampleToGroup::~SampleToGroup() {} +FourCC SampleToGroup::BoxType() const { return FOURCC_sbgp; } + +bool SampleToGroup::ReadWriteInternal(BoxBuffer* buffer) { + RCHECK(ReadWriteHeaderInternal(buffer) && + buffer->ReadWriteUInt32(&grouping_type)); + if (version == 1) + RCHECK(buffer->ReadWriteUInt32(&grouping_type_parameter)); + + if (grouping_type != FOURCC_seig && grouping_type != FOURCC_roll) { + DCHECK(buffer->Reading()); + DLOG(WARNING) << "Sample group " + << FourCCToString(static_cast(grouping_type)) + << " is not supported."; + return true; + } + + uint32_t count = entries.size(); + RCHECK(buffer->ReadWriteUInt32(&count)); + entries.resize(count); + for (uint32_t i = 0; i < count; ++i) { + RCHECK(buffer->ReadWriteUInt32(&entries[i].sample_count) && + buffer->ReadWriteUInt32(&entries[i].group_description_index)); + } + return true; +} + +uint32_t SampleToGroup::ComputeSizeInternal() { + // This box is optional. Skip it if it is not used. + if (entries.empty()) + return 0; + return HeaderSize() + sizeof(grouping_type) + + (version == 1 ? sizeof(grouping_type_parameter) : 0) + + sizeof(uint32_t) + entries.size() * sizeof(entries[0]); +} + SampleTable::SampleTable() {} SampleTable::~SampleTable() {} FourCC SampleTable::BoxType() const { return FOURCC_stbl; } @@ -961,15 +1142,30 @@ bool SampleTable::ReadWriteInternal(BoxBuffer* buffer) { buffer->ReadWriteChild(&chunk_large_offset)); } RCHECK(buffer->TryReadWriteChild(&sync_sample)); + if (buffer->Reading()) { + RCHECK(buffer->reader()->TryReadChildren(&sample_group_descriptions) && + buffer->reader()->TryReadChildren(&sample_to_groups)); + } else { + for (auto& sample_group_description : sample_group_descriptions) + RCHECK(buffer->ReadWriteChild(&sample_group_description)); + for (auto& sample_to_group : sample_to_groups) + RCHECK(buffer->ReadWriteChild(&sample_to_group)); + } return true; } uint32_t SampleTable::ComputeSizeInternal() { - return HeaderSize() + description.ComputeSize() + - decoding_time_to_sample.ComputeSize() + - composition_time_to_sample.ComputeSize() + - sample_to_chunk.ComputeSize() + sample_size.ComputeSize() + - chunk_large_offset.ComputeSize() + sync_sample.ComputeSize(); + uint32_t box_size = + HeaderSize() + description.ComputeSize() + + decoding_time_to_sample.ComputeSize() + + composition_time_to_sample.ComputeSize() + sample_to_chunk.ComputeSize() + + sample_size.ComputeSize() + chunk_large_offset.ComputeSize() + + sync_sample.ComputeSize(); + for (auto& sample_group_description : sample_group_descriptions) + box_size += sample_group_description.ComputeSize(); + for (auto& sample_to_group : sample_to_groups) + box_size += sample_to_group.ComputeSize(); + return box_size; } EditList::EditList() {} @@ -2199,152 +2395,6 @@ uint32_t TrackFragmentRun::ComputeSizeInternal() { return box_size; } -SampleToGroup::SampleToGroup() : grouping_type(0), grouping_type_parameter(0) {} -SampleToGroup::~SampleToGroup() {} -FourCC SampleToGroup::BoxType() const { return FOURCC_sbgp; } - -bool SampleToGroup::ReadWriteInternal(BoxBuffer* buffer) { - RCHECK(ReadWriteHeaderInternal(buffer) && - buffer->ReadWriteUInt32(&grouping_type)); - if (version == 1) - RCHECK(buffer->ReadWriteUInt32(&grouping_type_parameter)); - - if (grouping_type != FOURCC_seig) { - DCHECK(buffer->Reading()); - DLOG(WARNING) << "Sample group " - << FourCCToString(static_cast(grouping_type)) - << " is not supported."; - return true; - } - - uint32_t count = entries.size(); - RCHECK(buffer->ReadWriteUInt32(&count)); - entries.resize(count); - for (uint32_t i = 0; i < count; ++i) { - RCHECK(buffer->ReadWriteUInt32(&entries[i].sample_count) && - buffer->ReadWriteUInt32(&entries[i].group_description_index)); - } - return true; -} - -uint32_t SampleToGroup::ComputeSizeInternal() { - // This box is optional. Skip it if it is not used. - if (entries.empty()) - return 0; - return HeaderSize() + sizeof(grouping_type) + - (version == 1 ? sizeof(grouping_type_parameter) : 0) + - sizeof(uint32_t) + entries.size() * sizeof(entries[0]); -} - -CencSampleEncryptionInfoEntry::CencSampleEncryptionInfoEntry() - : is_protected(0), - per_sample_iv_size(0), - crypt_byte_block(0), - skip_byte_block(0) {} -CencSampleEncryptionInfoEntry::~CencSampleEncryptionInfoEntry() {}; - -SampleGroupDescription::SampleGroupDescription() : grouping_type(0) {} -SampleGroupDescription::~SampleGroupDescription() {} -FourCC SampleGroupDescription::BoxType() const { return FOURCC_sgpd; } - -bool SampleGroupDescription::ReadWriteInternal(BoxBuffer* buffer) { - RCHECK(ReadWriteHeaderInternal(buffer) && - buffer->ReadWriteUInt32(&grouping_type)); - - if (grouping_type != FOURCC_seig) { - DCHECK(buffer->Reading()); - DLOG(WARNING) << "Sample group '" << grouping_type << "' is not supported."; - return true; - } - - const size_t kEntrySize = sizeof(uint32_t) + kCencKeyIdSize; - uint32_t default_length = 0; - if (version == 1) { - if (buffer->Reading()) { - RCHECK(buffer->ReadWriteUInt32(&default_length)); - RCHECK(default_length == 0 || default_length >= kEntrySize); - } else { - default_length = kEntrySize; - RCHECK(buffer->ReadWriteUInt32(&default_length)); - } - } - - uint32_t count = entries.size(); - RCHECK(buffer->ReadWriteUInt32(&count)); - entries.resize(count); - for (uint32_t i = 0; i < count; ++i) { - if (version == 1) { - if (buffer->Reading() && default_length == 0) { - uint32_t description_length = 0; - RCHECK(buffer->ReadWriteUInt32(&description_length)); - RCHECK(description_length >= kEntrySize); - } - } - - if (!buffer->Reading()) { - if (entries[i].key_id.size() != kCencKeyIdSize) { - LOG(WARNING) << "CENC defines key id length of " << kCencKeyIdSize - << " bytes; got " << entries[i].key_id.size() - << ". Resized accordingly."; - entries[i].key_id.resize(kCencKeyIdSize); - } - RCHECK(entries[i].crypt_byte_block < 16 && - entries[i].skip_byte_block < 16); - } - - RCHECK(buffer->IgnoreBytes(1)); // reserved. - - uint8_t pattern = - entries[i].crypt_byte_block << 4 | entries[i].skip_byte_block; - RCHECK(buffer->ReadWriteUInt8(&pattern)); - entries[i].crypt_byte_block = pattern >> 4; - entries[i].skip_byte_block = pattern & 0x0F; - - RCHECK(buffer->ReadWriteUInt8(&entries[i].is_protected) && - buffer->ReadWriteUInt8(&entries[i].per_sample_iv_size) && - buffer->ReadWriteVector(&entries[i].key_id, kCencKeyIdSize)); - - if (entries[i].is_protected == 1) { - if (entries[i].per_sample_iv_size == 0) { // For constant iv. - uint8_t constant_iv_size = entries[i].constant_iv.size(); - RCHECK(buffer->ReadWriteUInt8(&constant_iv_size)); - RCHECK(constant_iv_size == 8 || constant_iv_size == 16); - RCHECK( - buffer->ReadWriteVector(&entries[i].constant_iv, constant_iv_size)); - } else { - RCHECK(entries[i].per_sample_iv_size == 8 || - entries[i].per_sample_iv_size == 16); - RCHECK(entries[i].constant_iv.empty()); - } - } else { - // Expect |is_protected| to be 0, i.e. not protected. Other values of - // |is_protected| is not supported. - RCHECK(entries[i].is_protected == 0); - RCHECK(entries[i].per_sample_iv_size == 0); - } - - } - return true; -} - -uint32_t SampleGroupDescription::ComputeSizeInternal() { - // Version 0 is obsoleted, so always generate version 1 box. - version = 1; - // This box is optional. Skip it if it is not used. - if (entries.empty()) - return 0; - size_t entries_size = 0; - for (const auto& entry : entries) { - entries_size += sizeof(uint32_t) + kCencKeyIdSize + - (entry.constant_iv.empty() - ? 0 - : (sizeof(uint8_t) + entry.constant_iv.size())); - } - return HeaderSize() + sizeof(grouping_type) + - (version == 1 ? sizeof(uint32_t) : 0) + sizeof(uint32_t) + - entries_size; -} - TrackFragment::TrackFragment() : decode_time_absent(false) {} TrackFragment::~TrackFragment() {} FourCC TrackFragment::BoxType() const { return FOURCC_traf; } @@ -2358,27 +2408,18 @@ bool TrackFragment::ReadWriteInternal(BoxBuffer* buffer) { decode_time_absent = !buffer->reader()->ChildExist(&decode_time); if (!decode_time_absent) RCHECK(buffer->ReadWriteChild(&decode_time)); - RCHECK(buffer->reader()->TryReadChildren(&runs)); - - // There could be multiple SampleGroupDescription and SampleToGroup boxes - // with different grouping types. For common encryption, the relevant - // grouping type is 'seig'. Continue reading until 'seig' is found, or - // until running out of child boxes. - while (sample_to_group.grouping_type != FOURCC_seig && - buffer->reader()->ChildExist(&sample_to_group)) { - RCHECK(buffer->reader()->ReadChild(&sample_to_group)); - } - while (sample_group_description.grouping_type != FOURCC_seig && - buffer->reader()->ChildExist(&sample_group_description)) { - RCHECK(buffer->reader()->ReadChild(&sample_group_description)); - } + RCHECK(buffer->reader()->TryReadChildren(&runs) && + buffer->reader()->TryReadChildren(&sample_group_descriptions) && + buffer->reader()->TryReadChildren(&sample_to_groups)); } else { if (!decode_time_absent) RCHECK(buffer->ReadWriteChild(&decode_time)); for (uint32_t i = 0; i < runs.size(); ++i) RCHECK(buffer->ReadWriteChild(&runs[i])); - RCHECK(buffer->TryReadWriteChild(&sample_to_group) && - buffer->TryReadWriteChild(&sample_group_description)); + for (uint32_t i = 0; i < sample_to_groups.size(); ++i) + RCHECK(buffer->ReadWriteChild(&sample_to_groups[i])); + for (uint32_t i = 0; i < sample_group_descriptions.size(); ++i) + RCHECK(buffer->ReadWriteChild(&sample_group_descriptions[i])); } return buffer->TryReadWriteChild(&auxiliary_size) && buffer->TryReadWriteChild(&auxiliary_offset) && @@ -2388,11 +2429,14 @@ bool TrackFragment::ReadWriteInternal(BoxBuffer* buffer) { uint32_t TrackFragment::ComputeSizeInternal() { uint32_t box_size = HeaderSize() + header.ComputeSize() + decode_time.ComputeSize() + - sample_to_group.ComputeSize() + sample_group_description.ComputeSize() + auxiliary_size.ComputeSize() + auxiliary_offset.ComputeSize() + sample_encryption.ComputeSize(); for (uint32_t i = 0; i < runs.size(); ++i) box_size += runs[i].ComputeSize(); + for (uint32_t i = 0; i < sample_group_descriptions.size(); ++i) + box_size += sample_group_descriptions[i].ComputeSize(); + for (uint32_t i = 0; i < sample_to_groups.size(); ++i) + box_size += sample_to_groups[i].ComputeSize(); return box_size; } diff --git a/packager/media/formats/mp4/box_definitions.h b/packager/media/formats/mp4/box_definitions.h index d9c835cec9..c1e3aef04d 100644 --- a/packager/media/formats/mp4/box_definitions.h +++ b/packager/media/formats/mp4/box_definitions.h @@ -468,6 +468,67 @@ struct SyncSample : FullBox { std::vector sample_number; }; +struct CencSampleEncryptionInfoEntry { + CencSampleEncryptionInfoEntry(); + ~CencSampleEncryptionInfoEntry(); + + bool ReadWrite(BoxBuffer* buffer); + uint32_t ComputeSize() const; + + uint8_t is_protected; + uint8_t per_sample_iv_size; + std::vector key_id; + + // For pattern-based encryption. + uint8_t crypt_byte_block; + uint8_t skip_byte_block; + + // Present only if |is_protected == 1 && per_sample_iv_size == 0|. + std::vector constant_iv; +}; + +struct AudioRollRecoveryEntry { + AudioRollRecoveryEntry(); + ~AudioRollRecoveryEntry(); + + bool ReadWrite(BoxBuffer* buffer); + uint32_t ComputeSize() const; + + int16_t roll_distance; +}; + +struct SampleGroupDescription : FullBox { + DECLARE_BOX_METHODS(SampleGroupDescription); + + template + bool ReadWriteEntries(BoxBuffer* buffer, std::vector* entries); + + uint32_t grouping_type; + // Only present if grouping_type == 'seig'. + std::vector + cenc_sample_encryption_info_entries; + // Only present if grouping_type == 'roll'. + std::vector audio_roll_recovery_entries; +}; + +struct SampleToGroupEntry { + enum GroupDescriptionIndexBase { + kTrackGroupDescriptionIndexBase = 0, + kTrackFragmentGroupDescriptionIndexBase = 0x10000, + }; + + uint32_t sample_count; + uint32_t group_description_index; +}; + +struct SampleToGroup : FullBox { + DECLARE_BOX_METHODS(SampleToGroup); + + uint32_t grouping_type; + uint32_t grouping_type_parameter; // Version 1 only. + std::vector entries; +}; + struct SampleTable : Box { DECLARE_BOX_METHODS(SampleTable); @@ -481,6 +542,8 @@ struct SampleTable : Box { // ChunkLargeOffset. ChunkLargeOffset chunk_large_offset; SyncSample sync_sample; + std::vector sample_group_descriptions; + std::vector sample_to_groups; }; struct MediaHeader : FullBox { @@ -654,47 +717,6 @@ struct TrackFragmentRun : FullBox { std::vector sample_composition_time_offsets; }; -struct SampleToGroupEntry { - enum GroupDescriptionIndexBase { - kTrackGroupDescriptionIndexBase = 0, - kTrackFragmentGroupDescriptionIndexBase = 0x10000, - }; - - uint32_t sample_count; - uint32_t group_description_index; -}; - -struct SampleToGroup : FullBox { - DECLARE_BOX_METHODS(SampleToGroup); - - uint32_t grouping_type; - uint32_t grouping_type_parameter; // Version 1 only. - std::vector entries; -}; - -struct CencSampleEncryptionInfoEntry { - CencSampleEncryptionInfoEntry(); - ~CencSampleEncryptionInfoEntry(); - - uint8_t is_protected; - uint8_t per_sample_iv_size; - std::vector key_id; - - // For pattern-based encryption. - uint8_t crypt_byte_block; - uint8_t skip_byte_block; - - // Present only if |is_protected == 1 && per_sample_iv_size == 0|. - std::vector constant_iv; -}; - -struct SampleGroupDescription : FullBox { - DECLARE_BOX_METHODS(SampleGroupDescription); - - uint32_t grouping_type; - std::vector entries; -}; - struct TrackFragment : Box { DECLARE_BOX_METHODS(TrackFragment); @@ -702,8 +724,8 @@ struct TrackFragment : Box { std::vector runs; bool decode_time_absent; TrackFragmentDecodeTime decode_time; - SampleToGroup sample_to_group; - SampleGroupDescription sample_group_description; + std::vector sample_group_descriptions; + std::vector sample_to_groups; SampleAuxiliaryInformationSize auxiliary_size; SampleAuxiliaryInformationOffset auxiliary_offset; SampleEncryption sample_encryption; diff --git a/packager/media/formats/mp4/box_definitions_comparison.h b/packager/media/formats/mp4/box_definitions_comparison.h index ba5c006186..dba15341e1 100644 --- a/packager/media/formats/mp4/box_definitions_comparison.h +++ b/packager/media/formats/mp4/box_definitions_comparison.h @@ -156,6 +156,42 @@ inline bool operator==(const SyncSample& lhs, const SyncSample& rhs) { return lhs.sample_number == rhs.sample_number; } +inline bool operator==(const CencSampleEncryptionInfoEntry& lhs, + const CencSampleEncryptionInfoEntry& rhs) { + return lhs.is_protected == rhs.is_protected && + lhs.per_sample_iv_size == rhs.per_sample_iv_size && + lhs.key_id == rhs.key_id && + lhs.crypt_byte_block == rhs.crypt_byte_block && + lhs.skip_byte_block == rhs.skip_byte_block && + lhs.constant_iv == rhs.constant_iv; +} + +inline bool operator==(const AudioRollRecoveryEntry& lhs, + const AudioRollRecoveryEntry& rhs) { + return lhs.roll_distance == rhs.roll_distance; +} + +inline bool operator==(const SampleGroupDescription& lhs, + const SampleGroupDescription& rhs) { + return lhs.grouping_type == rhs.grouping_type && + lhs.cenc_sample_encryption_info_entries == + rhs.cenc_sample_encryption_info_entries && + lhs.audio_roll_recovery_entries == rhs.audio_roll_recovery_entries; +} + +inline bool operator==(const SampleToGroupEntry& lhs, + const SampleToGroupEntry& rhs) { + return lhs.sample_count == rhs.sample_count && + lhs.group_description_index == rhs.group_description_index; +} + +inline bool operator==(const SampleToGroup& lhs, + const SampleToGroup& rhs) { + return lhs.grouping_type == rhs.grouping_type && + lhs.grouping_type_parameter == rhs.grouping_type_parameter && + lhs.entries == rhs.entries; +} + inline bool operator==(const SampleTable& lhs, const SampleTable& rhs) { return lhs.description == rhs.description && lhs.decoding_time_to_sample == rhs.decoding_time_to_sample && @@ -163,7 +199,9 @@ inline bool operator==(const SampleTable& lhs, const SampleTable& rhs) { lhs.sample_to_chunk == rhs.sample_to_chunk && lhs.sample_size == rhs.sample_size && lhs.chunk_large_offset == rhs.chunk_large_offset && - lhs.sync_sample == rhs.sync_sample; + lhs.sync_sample == rhs.sync_sample && + lhs.sample_group_descriptions == rhs.sample_group_descriptions && + lhs.sample_to_groups == rhs.sample_to_groups; } inline bool operator==(const EditListEntry& lhs, const EditListEntry& rhs) { @@ -385,35 +423,6 @@ inline bool operator==(const TrackFragmentRun& lhs, rhs.sample_composition_time_offsets; } -inline bool operator==(const SampleToGroupEntry& lhs, - const SampleToGroupEntry& rhs) { - return lhs.sample_count == rhs.sample_count && - lhs.group_description_index == rhs.group_description_index; -} - -inline bool operator==(const SampleToGroup& lhs, - const SampleToGroup& rhs) { - return lhs.grouping_type == rhs.grouping_type && - lhs.grouping_type_parameter == rhs.grouping_type_parameter && - lhs.entries == rhs.entries; -} - -inline bool operator==(const CencSampleEncryptionInfoEntry& lhs, - const CencSampleEncryptionInfoEntry& rhs) { - return lhs.is_protected == rhs.is_protected && - lhs.per_sample_iv_size == rhs.per_sample_iv_size && - lhs.key_id == rhs.key_id && - lhs.crypt_byte_block == rhs.crypt_byte_block && - lhs.skip_byte_block == rhs.skip_byte_block && - lhs.constant_iv == rhs.constant_iv; -} - -inline bool operator==(const SampleGroupDescription& lhs, - const SampleGroupDescription& rhs) { - return lhs.grouping_type == rhs.grouping_type && - lhs.entries == rhs.entries; -} - inline bool operator==(const TrackFragment& lhs, const TrackFragment& rhs) { return lhs.header == rhs.header && lhs.runs == rhs.runs && lhs.decode_time == rhs.decode_time && diff --git a/packager/media/formats/mp4/box_definitions_unittest.cc b/packager/media/formats/mp4/box_definitions_unittest.cc index e5169aad46..610369a66d 100644 --- a/packager/media/formats/mp4/box_definitions_unittest.cc +++ b/packager/media/formats/mp4/box_definitions_unittest.cc @@ -567,6 +567,45 @@ class BoxDefinitionsTestGeneral : public testing::Test { void Modify(SyncSample* stss) { stss->sample_number.pop_back(); } + void Fill(SampleGroupDescription* sgpd) { + sgpd->grouping_type = FOURCC_seig; + sgpd->cenc_sample_encryption_info_entries.resize(2); + sgpd->cenc_sample_encryption_info_entries[0].is_protected = 1; + sgpd->cenc_sample_encryption_info_entries[0].per_sample_iv_size = 8; + sgpd->cenc_sample_encryption_info_entries[0].key_id.assign( + kData16Bytes, kData16Bytes + arraysize(kData16Bytes)); + sgpd->cenc_sample_encryption_info_entries[0].crypt_byte_block = 3; + sgpd->cenc_sample_encryption_info_entries[0].skip_byte_block = 7; + sgpd->cenc_sample_encryption_info_entries[1].is_protected = 0; + sgpd->cenc_sample_encryption_info_entries[1].per_sample_iv_size = 0; + sgpd->cenc_sample_encryption_info_entries[1].key_id.resize(16); + sgpd->version = 1; + } + + void Modify(SampleGroupDescription* sgpd) { + sgpd->cenc_sample_encryption_info_entries.resize(1); + sgpd->cenc_sample_encryption_info_entries[0].is_protected = 1; + sgpd->cenc_sample_encryption_info_entries[0].per_sample_iv_size = 0; + sgpd->cenc_sample_encryption_info_entries[0].constant_iv.assign( + kData16Bytes, kData16Bytes + arraysize(kData16Bytes)); + sgpd->cenc_sample_encryption_info_entries[0].key_id.resize(16); + } + + void Fill(SampleToGroup* sbgp) { + sbgp->grouping_type = FOURCC_seig; + sbgp->entries.resize(2); + sbgp->entries[0].sample_count = 3; + sbgp->entries[0].group_description_index = 0x10002; + sbgp->entries[1].sample_count = 1212; + sbgp->entries[1].group_description_index = 0x10001; + } + + void Modify(SampleToGroup* sbgp) { + sbgp->entries.resize(1); + sbgp->entries[0].sample_count = 5; + sbgp->entries[0].group_description_index = 0x10001; + } + void Fill(SampleTable* stbl) { Fill(&stbl->description); Fill(&stbl->decoding_time_to_sample); @@ -575,11 +614,17 @@ class BoxDefinitionsTestGeneral : public testing::Test { Fill(&stbl->sample_size); Fill(&stbl->chunk_large_offset); Fill(&stbl->sync_sample); + stbl->sample_group_descriptions.resize(1); + Fill(&stbl->sample_group_descriptions[0]); + stbl->sample_to_groups.resize(1); + Fill(&stbl->sample_to_groups[0]); } void Modify(SampleTable* stbl) { Modify(&stbl->chunk_large_offset); Modify(&stbl->sync_sample); + stbl->sample_group_descriptions.clear(); + stbl->sample_to_groups.clear(); } void Fill(MediaHeader* mdhd) { @@ -768,47 +813,6 @@ class BoxDefinitionsTestGeneral : public testing::Test { trun->version = 0; } - void Fill(SampleToGroup* sbgp) { - sbgp->grouping_type = FOURCC_seig; - sbgp->entries.resize(2); - sbgp->entries[0].sample_count = 3; - sbgp->entries[0].group_description_index = 0x10002; - sbgp->entries[1].sample_count = 1212; - sbgp->entries[1].group_description_index = 0x10001; - } - - void Modify(SampleToGroup* sbgp) { - sbgp->entries.resize(1); - sbgp->entries[0].sample_count = 5; - sbgp->entries[0].group_description_index = 0x10001; - } - - void Fill(SampleGroupDescription* sgpd) { - sgpd->grouping_type = FOURCC_seig; - sgpd->entries.resize(3); - sgpd->entries[0].is_protected = 1; - sgpd->entries[0].per_sample_iv_size = 8; - sgpd->entries[0].key_id.assign(kData16Bytes, - kData16Bytes + arraysize(kData16Bytes)); - sgpd->entries[0].crypt_byte_block = 3; - sgpd->entries[0].skip_byte_block = 7; - sgpd->entries[1].is_protected = 0; - sgpd->entries[1].per_sample_iv_size = 0; - sgpd->entries[1].key_id.resize(16); - sgpd->entries[2].is_protected = 1; - sgpd->entries[2].per_sample_iv_size = 0; - sgpd->entries[2].constant_iv.assign(kData16Bytes, - kData16Bytes + arraysize(kData16Bytes)); - sgpd->entries[2].key_id.resize(16); - sgpd->version = 1; - } - - void Modify(SampleGroupDescription* sgpd) { - sgpd->entries.resize(1); - sgpd->entries[0].key_id[4] = 88; - sgpd->version = 1; - } - void Fill(TrackFragment* traf) { Fill(&traf->header); traf->runs.resize(1); @@ -821,8 +825,19 @@ class BoxDefinitionsTestGeneral : public testing::Test { void Modify(TrackFragment* traf) { Modify(&traf->header); Modify(&traf->decode_time); - Fill(&traf->sample_to_group); - Fill(&traf->sample_group_description); + + traf->sample_group_descriptions.resize(2); + Fill(&traf->sample_group_descriptions[0]); + traf->sample_group_descriptions[1].grouping_type = FOURCC_roll; + traf->sample_group_descriptions[1].audio_roll_recovery_entries.resize(1); + traf->sample_group_descriptions[1] + .audio_roll_recovery_entries[0] + .roll_distance = -10; + + traf->sample_to_groups.resize(2); + Fill(&traf->sample_to_groups[0]); + Modify(&traf->sample_to_groups[1]); + traf->sample_to_groups[1].grouping_type = FOURCC_roll; } void Fill(MovieFragment* moof) { @@ -951,10 +966,10 @@ class BoxDefinitionsTestGeneral : public testing::Test { bool IsOptional(const WebVTTSourceLabelBox* box) { return true; } bool IsOptional(const CompositionTimeToSample* box) { return true; } bool IsOptional(const SyncSample* box) { return true; } + bool IsOptional(const SampleGroupDescription* box) { return true; } + bool IsOptional(const SampleToGroup* box) { return true; } bool IsOptional(const MovieExtendsHeader* box) { return true; } bool IsOptional(const MovieExtends* box) { return true; } - bool IsOptional(const SampleToGroup* box) { return true; } - bool IsOptional(const SampleGroupDescription* box) { return true; } bool IsOptional(const CueSourceIDBox* box) { return true; } bool IsOptional(const CueIDBox* box) { return true; } bool IsOptional(const CueTimeBox* box) { return true; } @@ -1006,6 +1021,8 @@ typedef testing::Types Boxes; typedef testing::Types #include "packager/media/base/buffer_writer.h" +#include "packager/media/base/audio_stream_info.h" #include "packager/media/base/media_sample.h" #include "packager/media/formats/mp4/box_definitions.h" @@ -18,10 +19,18 @@ namespace mp4 { namespace { const int64_t kInvalidTime = std::numeric_limits::max(); + +uint64_t GetSeekPreroll(const StreamInfo& stream_info) { + if (stream_info.stream_type() != kStreamAudio) return 0; + const AudioStreamInfo& audio_stream_info = + static_cast(stream_info); + return audio_stream_info.seek_preroll_ns(); +} } // namespace -Fragmenter::Fragmenter(TrackFragment* traf) +Fragmenter::Fragmenter(scoped_refptr info, TrackFragment* traf) : traf_(traf), + seek_preroll_(GetSeekPreroll(*info)), fragment_initialized_(false), fragment_finalized_(false), fragment_duration_(0), @@ -80,6 +89,8 @@ Status Fragmenter::InitializeFragment(int64_t first_sample_dts) { traf_->runs.clear(); traf_->runs.resize(1); traf_->runs[0].flags = TrackFragmentRun::kDataOffsetPresentMask; + traf_->sample_group_descriptions.clear(); + traf_->sample_to_groups.clear(); traf_->header.sample_description_index = 1; // 1-based. traf_->header.flags = TrackFragmentHeader::kDefaultBaseIsMoofMask | TrackFragmentHeader::kSampleDescriptionIndexPresentMask; @@ -113,6 +124,35 @@ void Fragmenter::FinalizeFragment() { traf_->runs[0].flags |= TrackFragmentRun::kSampleFlagsPresentMask; } + // Add SampleToGroup boxes. A SampleToGroup box with grouping type of 'roll' + // needs to be added if there is seek preroll, referencing sample group + // description in track level; Also need to add SampleToGroup boxes + // correponding to every SampleGroupDescription boxes, referencing sample + // group description in fragment level. + DCHECK_EQ(traf_->sample_to_groups.size(), 0u); + if (seek_preroll_ > 0) { + traf_->sample_to_groups.resize(traf_->sample_to_groups.size() + 1); + SampleToGroup& sample_to_group = traf_->sample_to_groups.back(); + sample_to_group.grouping_type = FOURCC_roll; + + sample_to_group.entries.resize(1); + SampleToGroupEntry& sample_to_group_entry = sample_to_group.entries.back(); + sample_to_group_entry.sample_count = traf_->runs[0].sample_count; + sample_to_group_entry.group_description_index = + SampleToGroupEntry::kTrackGroupDescriptionIndexBase + 1; + } + for (const auto& sample_group_description : traf_->sample_group_descriptions) { + traf_->sample_to_groups.resize(traf_->sample_to_groups.size() + 1); + SampleToGroup& sample_to_group = traf_->sample_to_groups.back(); + sample_to_group.grouping_type = sample_group_description.grouping_type; + + sample_to_group.entries.resize(1); + SampleToGroupEntry& sample_to_group_entry = sample_to_group.entries.back(); + sample_to_group_entry.sample_count = traf_->runs[0].sample_count; + sample_to_group_entry.group_description_index = + SampleToGroupEntry::kTrackFragmentGroupDescriptionIndexBase + 1; + } + fragment_finalized_ = true; fragment_initialized_ = false; } diff --git a/packager/media/formats/mp4/fragmenter.h b/packager/media/formats/mp4/fragmenter.h index 8da737fcd3..10e0fdec46 100644 --- a/packager/media/formats/mp4/fragmenter.h +++ b/packager/media/formats/mp4/fragmenter.h @@ -19,6 +19,7 @@ namespace media { class BufferWriter; class MediaSample; +class StreamInfo; namespace mp4 { @@ -29,8 +30,9 @@ struct TrackFragment; /// box and corresponding 'mdat' box. class Fragmenter { public: + /// @param info contains stream information. /// @param traf points to a TrackFragment box. - Fragmenter(TrackFragment* traf); + Fragmenter(scoped_refptr info, TrackFragment* traf); virtual ~Fragmenter(); @@ -74,6 +76,7 @@ class Fragmenter { bool StartsWithSAP(); TrackFragment* traf_; + uint64_t seek_preroll_; bool fragment_initialized_; bool fragment_finalized_; uint64_t fragment_duration_; diff --git a/packager/media/formats/mp4/key_rotation_fragmenter.cc b/packager/media/formats/mp4/key_rotation_fragmenter.cc index a080304507..6dbb677b24 100644 --- a/packager/media/formats/mp4/key_rotation_fragmenter.cc +++ b/packager/media/formats/mp4/key_rotation_fragmenter.cc @@ -107,9 +107,15 @@ Status KeyRotationFragmenter::PrepareFragmentForEncryption( // i.e. there is at most one key for the fragment. So there should be only // one entry in SampleGroupDescription box and one entry in SampleToGroup box. // Fill in SampleGroupDescription box information. - traf()->sample_group_description.grouping_type = FOURCC_seig; - traf()->sample_group_description.entries.resize(1); - auto& sample_group_entry = traf()->sample_group_description.entries[0]; + traf()->sample_group_descriptions.resize( + traf()->sample_group_descriptions.size() + 1); + SampleGroupDescription& sample_group_description = + traf()->sample_group_descriptions.back(); + sample_group_description.grouping_type = FOURCC_seig; + + sample_group_description.cenc_sample_encryption_info_entries.resize(1); + CencSampleEncryptionInfoEntry& sample_group_entry = + sample_group_description.cenc_sample_encryption_info_entries.back(); sample_group_entry.is_protected = 1; if (protection_scheme() == FOURCC_cbcs) { // For 'cbcs' scheme, Constant IVs SHALL be used. @@ -122,23 +128,9 @@ Status KeyRotationFragmenter::PrepareFragmentForEncryption( sample_group_entry.skip_byte_block = skip_byte_block(); sample_group_entry.key_id = encryption_key()->key_id; - // Fill in SampleToGroup box information. - traf()->sample_to_group.grouping_type = FOURCC_seig; - traf()->sample_to_group.entries.resize(1); - // sample_count is adjusted in |FinalizeFragment| later. - traf()->sample_to_group.entries[0].group_description_index = - SampleToGroupEntry::kTrackFragmentGroupDescriptionIndexBase + 1; - return Status::OK; } -void KeyRotationFragmenter::FinalizeFragmentForEncryption() { - EncryptingFragmenter::FinalizeFragmentForEncryption(); - DCHECK_EQ(1u, traf()->sample_to_group.entries.size()); - traf()->sample_to_group.entries[0].sample_count = - traf()->auxiliary_size.sample_count; -} - } // namespace mp4 } // namespace media } // namespace edash_packager diff --git a/packager/media/formats/mp4/key_rotation_fragmenter.h b/packager/media/formats/mp4/key_rotation_fragmenter.h index ee138bb7f5..88f5162417 100644 --- a/packager/media/formats/mp4/key_rotation_fragmenter.h +++ b/packager/media/formats/mp4/key_rotation_fragmenter.h @@ -23,6 +23,7 @@ struct MovieFragment; class KeyRotationFragmenter : public EncryptingFragmenter { public: /// @param moof points to a MovieFragment box. + /// @param info contains stream information. /// @param traf points to a TrackFragment box. /// @param encryption_key_source points to the source which generates /// encryption keys. @@ -57,7 +58,6 @@ class KeyRotationFragmenter : public EncryptingFragmenter { /// @name Fragmenter implementation overrides. /// @{ Status PrepareFragmentForEncryption(bool enable_encryption) override; - void FinalizeFragmentForEncryption() override; /// @} private: diff --git a/packager/media/formats/mp4/mp4_media_parser.cc b/packager/media/formats/mp4/mp4_media_parser.cc index c30c317cec..a3a9f9ef88 100644 --- a/packager/media/formats/mp4/mp4_media_parser.cc +++ b/packager/media/formats/mp4/mp4_media_parser.cc @@ -440,6 +440,34 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { return false; } + // Extract possible seek preroll. + uint64_t seek_preroll_ns = 0; + for (const auto& sample_group_description : + track->media.information.sample_table.sample_group_descriptions) { + if (sample_group_description.grouping_type != FOURCC_roll) + continue; + const auto& audio_roll_recovery_entries = + sample_group_description.audio_roll_recovery_entries; + if (audio_roll_recovery_entries.size() != 1) { + LOG(WARNING) << "Unexpected number of entries in " + "SampleGroupDescription table with grouping type " + "'roll'."; + break; + } + const int16_t roll_distance_in_samples = + audio_roll_recovery_entries[0].roll_distance; + if (roll_distance_in_samples < 0) { + RCHECK(sampling_frequency != 0); + seek_preroll_ns = kNanosecondsPerSecond * + (-roll_distance_in_samples) / sampling_frequency; + } else { + LOG(WARNING) + << "Roll distance is supposed to be negative, but seeing " + << roll_distance_in_samples; + } + break; + } + const bool is_encrypted = entry.sinf.info.track_encryption.default_is_protected == 1; DVLOG(1) << "is_audio_track_encrypted_: " << is_encrypted; @@ -453,7 +481,7 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { entry.samplesize, num_channels, sampling_frequency, - 0 /* seek preroll */, + seek_preroll_ns, codec_delay_ns, max_bitrate, avg_bitrate, diff --git a/packager/media/formats/mp4/mp4_muxer.cc b/packager/media/formats/mp4/mp4_muxer.cc index fed1e2d568..172ac663b4 100644 --- a/packager/media/formats/mp4/mp4_muxer.cc +++ b/packager/media/formats/mp4/mp4_muxer.cc @@ -287,10 +287,41 @@ void MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info, audio.channelcount = audio_info->num_channels(); audio.samplesize = audio_info->sample_bits(); audio.samplerate = audio_info->sampling_frequency(); - SampleDescription& sample_description = - trak->media.information.sample_table.description; + SampleTable& sample_table = trak->media.information.sample_table; + SampleDescription& sample_description = sample_table.description; sample_description.type = kAudio; sample_description.audio_entries.push_back(audio); + + // Opus requires at least one sample group description box and at least one + // sample to group box with grouping type 'roll' within sample table box. + if (audio_info->codec() == kCodecOpus) { + sample_table.sample_group_descriptions.resize(1); + SampleGroupDescription& sample_group_description = + sample_table.sample_group_descriptions.back(); + sample_group_description.grouping_type = FOURCC_roll; + sample_group_description.audio_roll_recovery_entries.resize(1); + // The roll distance is expressed in sample units and always takes negative + // values. + const uint64_t kNanosecondsPerSecond = 1000000000ull; + sample_group_description.audio_roll_recovery_entries[0].roll_distance = + -(audio_info->seek_preroll_ns() * audio.samplerate + + kNanosecondsPerSecond / 2) / + kNanosecondsPerSecond; + + sample_table.sample_to_groups.resize(1); + SampleToGroup& sample_to_group = sample_table.sample_to_groups.back(); + sample_to_group.grouping_type = FOURCC_roll; + + sample_to_group.entries.resize(1); + SampleToGroupEntry& sample_to_group_entry = sample_to_group.entries.back(); + // All samples are in track fragments. + sample_to_group_entry.sample_count = 0; + sample_to_group_entry.group_description_index = + SampleToGroupEntry::kTrackGroupDescriptionIndexBase + 1; + } else if (audio_info->seek_preroll_ns() != 0) { + LOG(WARNING) << "Unexpected seek preroll for codec " << audio_info->codec(); + return; + } } bool MP4Muxer::GetInitRangeStartAndEnd(uint32_t* start, uint32_t* end) { diff --git a/packager/media/formats/mp4/segmenter.cc b/packager/media/formats/mp4/segmenter.cc index 25a6222805..fcd90ab9c6 100644 --- a/packager/media/formats/mp4/segmenter.cc +++ b/packager/media/formats/mp4/segmenter.cc @@ -170,7 +170,7 @@ Status Segmenter::Initialize(const std::vector& streams, sidx_->reference_id = i + 1; } if (!encryption_key_source) { - fragmenters_[i] = new Fragmenter(&moof_->tracks[i]); + fragmenters_[i] = new Fragmenter(streams[i]->info(), &moof_->tracks[i]); continue; }