Added support for multiple audio & video configurations in WVM parser.

BUG=24614172 Change-Id: I197a0ebdfa6df90abc27358762cae041e4f8fd45
2015-10-09 15:09:09 -07:00 · 2015-10-09 15:09:09 -07:00 · cb7173d280
parent 93e6c4fc92
commit cb7173d280
4 changed files with 163 additions and 66 deletions
--- a/packager/media/formats/wvm/wvm.gyp
+++ b/packager/media/formats/wvm/wvm.gyp
@ -20,6 +20,7 @@
        '../../base/media_base.gyp:base',
        '../../filters/filters.gyp:filters',
        '../../formats/mp2t/mp2t.gyp:mp2t',
+        '../../formats/mp4/mp4.gyp:mp4',
        '../mpeg/mpeg.gyp:mpeg',
      ],
    },
--- a/packager/media/formats/wvm/wvm_media_parser.cc
+++ b/packager/media/formats/wvm/wvm_media_parser.cc
@ -8,6 +8,7 @@
 #include <sstream>
 #include <vector>

+#include "packager/base/stl_util.h"
 #include "packager/base/strings/string_number_conversions.h"
 #include "packager/media/base/aes_encryptor.h"
 #include "packager/media/base/audio_stream_info.h"
@ -17,6 +18,8 @@
 #include "packager/media/base/video_stream_info.h"
 #include "packager/media/filters/h264_parser.h"
 #include "packager/media/formats/mp2t/adts_header.h"
+#include "packager/media/formats/mp4/aac_audio_specific_config.h"
+#include "packager/media/formats/mp4/es_descriptor.h"

 #define HAS_HEADER_EXTENSION(x) ((x != 0xBC) && (x != 0xBE) && (x != 0xBF) \
         && (x != 0xF0) && (x != 0xF2) && (x != 0xF8) \
@ -543,22 +546,22 @@ bool WvmMediaParser::ParseIndexEntry() {
    return false;
  }

-  const uint8_t* read_ptr_index = &index_data_[0];
-  if (ntohlFromBuffer(read_ptr_index) != kIndexMagic) {
+  const uint8_t* read_ptr = vector_as_array(&index_data_);
+  if (ntohlFromBuffer(read_ptr) != kIndexMagic) {
    index_data_.clear();
    return false;
  }
-  read_ptr_index += 4;
+  read_ptr += 4;

-  uint32_t version = ntohlFromBuffer(read_ptr_index);
-  read_ptr_index += 4;
+  uint32_t version = ntohlFromBuffer(read_ptr);
+  read_ptr += 4;
  if (version == kVersion4) {
-    index_size = kIndexVersion4HeaderSize + ntohlFromBuffer(read_ptr_index);
+    index_size = kIndexVersion4HeaderSize + ntohlFromBuffer(read_ptr);
    if (index_data_.size() < index_size) {
      // We do not yet have the full index. Keep accumulating index data.
      return true;
    }
-    read_ptr_index += sizeof(uint32_t);
+    read_ptr += sizeof(uint32_t);

    // Index metadata
    uint32_t index_metadata_max_size = index_size - kIndexVersion4HeaderSize;
@ -581,90 +584,89 @@ bool WvmMediaParser::ParseIndexEntry() {
    int video_pes_stream_id = 0;
    bool has_video = false;
    bool has_audio = false;
-    std::vector<uint8_t> decoder_config_record;
-    std::string video_codec_string;
-    std::string audio_codec_string;
-    uint8_t num_index_entries = *read_ptr_index;
-    ++read_ptr_index;
+    std::vector<uint8_t> audio_codec_config;
+    std::vector<uint8_t> video_codec_config;
+    uint8_t num_index_entries = *read_ptr;
+    ++read_ptr;
    --index_metadata_max_size;

    for (uint8_t idx = 0; idx < num_index_entries; ++idx) {
      if (index_metadata_max_size < (2 * sizeof(uint8_t)) + sizeof(uint32_t)) {
        return false;
      }
-      uint8_t tag = *read_ptr_index;
-      ++read_ptr_index;
-      uint8_t type = *read_ptr_index;
-      ++read_ptr_index;
-      uint32_t length = ntohlFromBuffer(read_ptr_index);
-      read_ptr_index += sizeof(uint32_t);
+      uint8_t tag = *read_ptr;
+      ++read_ptr;
+      uint8_t type = *read_ptr;
+      ++read_ptr;
+      uint32_t length = ntohlFromBuffer(read_ptr);
+      read_ptr += sizeof(uint32_t);
      index_metadata_max_size -= (2 * sizeof(uint8_t)) + sizeof(uint32_t);
      if (index_metadata_max_size < length) {
        return false;
      }
      int64_t value = 0;
      Tag tagtype = Unset;
-      std::vector<uint8_t> binary_data(length);
+      std::vector<uint8_t> binary_data;
      switch (Type(type)) {
        case Type_uint8:
          if (length == sizeof(uint8_t)) {
-            tagtype = GetTag(tag, length, read_ptr_index, &value);
+            tagtype = GetTag(tag, length, read_ptr, &value);
          } else {
            return false;
          }
          break;
        case Type_int8:
          if (length == sizeof(int8_t)) {
-            tagtype = GetTag(tag, length, read_ptr_index, &value);
+            tagtype = GetTag(tag, length, read_ptr, &value);
          } else {
            return false;
          }
          break;
        case Type_uint16:
          if (length == sizeof(uint16_t)) {
-            tagtype = GetTag(tag, length, read_ptr_index, &value);
+            tagtype = GetTag(tag, length, read_ptr, &value);
          } else {
            return false;
          }
          break;
        case Type_int16:
          if (length == sizeof(int16_t)) {
-            tagtype = GetTag(tag, length, read_ptr_index, &value);
+            tagtype = GetTag(tag, length, read_ptr, &value);
          } else {
            return false;
          }
          break;
        case Type_uint32:
          if (length == sizeof(uint32_t)) {
-            tagtype = GetTag(tag, length, read_ptr_index, &value);
+            tagtype = GetTag(tag, length, read_ptr, &value);
          } else {
            return false;
          }
          break;
        case Type_int32:
          if (length == sizeof(int32_t)) {
-            tagtype = GetTag(tag, length, read_ptr_index, &value);
+            tagtype = GetTag(tag, length, read_ptr, &value);
          } else {
            return false;
          }
          break;
        case Type_uint64:
          if (length == sizeof(uint64_t)) {
-            tagtype = GetTag(tag, length, read_ptr_index, &value);
+            tagtype = GetTag(tag, length, read_ptr, &value);
          } else {
            return false;
          }
          break;
        case Type_int64:
          if (length == sizeof(int64_t)) {
-            tagtype = GetTag(tag, length, read_ptr_index, &value);
+            tagtype = GetTag(tag, length, read_ptr, &value);
          } else {
            return false;
          }
          break;
        case Type_string:
        case Type_BinaryData:
-          memcpy(&binary_data[0], read_ptr_index, length);
+          binary_data.assign(read_ptr, read_ptr + length);
          tagtype = Tag(tag);
          break;
        default:
@ -705,24 +707,41 @@ bool WvmMediaParser::ParseIndexEntry() {
        case VideoPixelHeight:
          pixel_height = static_cast<uint32_t>(value);
          break;
+        case Audio_EsDescriptor: {
+          mp4::ESDescriptor descriptor;
+          if (!descriptor.Parse(binary_data)) {
+            LOG(ERROR) <<
+                "Could not extract AudioSpecificConfig from ES_Descriptor";
+            return false;
+          }
+          audio_codec_config = descriptor.decoder_specific_info();
+          break;
+        }
+        case Audio_EC3SpecificData:
+        case Audio_DtsSpecificData:
+        case Audio_AC3SpecificData:
+          LOG(ERROR) << "Audio type not supported.";
+          return false;
+        case AVCDecoderConfigurationRecord:
+          video_codec_config = binary_data;
+          break;
        default:
          break;
      }

-      read_ptr_index += length;
+      read_ptr += length;
      index_metadata_max_size -= length;
    }
    // End Index metadata
-    index_size = read_ptr_index - &index_data_[0];
+    index_size = read_ptr - vector_as_array(&index_data_);

-    // Extra data for both audio and video streams not set here, but in
-    // Output().
    if (has_video) {
      VideoCodec video_codec = kCodecH264;
      stream_infos_.push_back(new VideoStreamInfo(
          stream_id_count_, time_scale, track_duration, video_codec,
-          video_codec_string, std::string(), video_width, video_height,
-          pixel_width, pixel_height, trick_play_rate, nalu_length_size, NULL, 0,
+          std::string(), std::string(), video_width, video_height,
+          pixel_width, pixel_height, trick_play_rate, nalu_length_size,
+          vector_as_array(&video_codec_config), video_codec_config.size(),
          true));
      program_demux_stream_map_[base::UintToString(index_program_id_) + ":" +
                                base::UintToString(video_pes_stream_id ?
@ -734,8 +753,9 @@ bool WvmMediaParser::ParseIndexEntry() {
      AudioCodec audio_codec = kCodecAAC;
      stream_infos_.push_back(new AudioStreamInfo(
          stream_id_count_, time_scale, track_duration, audio_codec,
-          audio_codec_string, std::string(), kAacSampleSizeBits, num_channels,
-          sampling_frequency, NULL, 0, true));
+          std::string(), std::string(), kAacSampleSizeBits, num_channels,
+          sampling_frequency, vector_as_array(&audio_codec_config),
+          audio_codec_config.size(), true));
      program_demux_stream_map_[base::UintToString(index_program_id_) + ":" +
                                base::UintToString(audio_pes_stream_id ?
                                                   audio_pes_stream_id :
@ -788,14 +808,16 @@ void WvmMediaParser::StartMediaSampleDemux() {

 bool WvmMediaParser::Output(bool output_encrypted_sample) {
  if (output_encrypted_sample) {
-    media_sample_->set_data(&sample_data_[0], sample_data_.size());
+    media_sample_->set_data(vector_as_array(&sample_data_),
+                            sample_data_.size());
    media_sample_->set_is_encrypted(true);
  } else {
    if ((prev_pes_stream_id_ & kPesStreamIdVideoMask) == kPesStreamIdVideo) {
-      // Set data on the video stream from the NalUnitStream.
+      // Convert video stream to unit stream and get config.
      std::vector<uint8_t> nal_unit_stream;
      if (!byte_to_unit_stream_converter_.ConvertByteStreamToNalUnitStream(
-              &sample_data_[0], sample_data_.size(), &nal_unit_stream)) {
+              vector_as_array(&sample_data_), sample_data_.size(),
+              &nal_unit_stream)) {
        LOG(ERROR) << "Could not convert h.264 byte stream sample";
        return false;
      }
@ -808,11 +830,21 @@ bool WvmMediaParser::Output(bool output_encrypted_sample) {
            &decoder_config_record);
        for (uint32_t i = 0; i < stream_infos_.size(); i++) {
          if (stream_infos_[i]->stream_type() == media::kStreamVideo &&
-              stream_infos_[i]->extra_data().empty()) {
+              stream_infos_[i]->codec_string().empty()) {
+            const std::vector<uint8_t>* stream_config;
+            if (stream_infos_[i]->extra_data().empty()) {
+              // Decoder config record not available for stream. Use the one
+              // computed from the first video stream.
              stream_infos_[i]->set_extra_data(decoder_config_record);
+              stream_config = &decoder_config_record;
+            } else {
+              // Use stream-specific config record.
+              stream_config = &stream_infos_[i]->extra_data();
+            }
+            DCHECK(stream_config);
            stream_infos_[i]->set_codec_string(VideoStreamInfo::GetCodecString(
-                kCodecH264, decoder_config_record[1], decoder_config_record[2],
-                decoder_config_record[3]));
+                kCodecH264, (*stream_config)[1], (*stream_config)[2],
+                (*stream_config)[3]));

            VideoStreamInfo* video_stream_info =
                reinterpret_cast<VideoStreamInfo*>(stream_infos_[i].get());
@ -821,7 +853,7 @@ bool WvmMediaParser::Output(bool output_encrypted_sample) {
            uint32_t pixel_width = 0;
            uint32_t pixel_height = 0;
            if (!ExtractResolutionFromDecoderConfig(
-                    &decoder_config_record[0], decoder_config_record.size(),
+                    vector_as_array(stream_config), stream_config->size(),
                    &coded_width, &coded_height, &pixel_width, &pixel_height)) {
              LOG(ERROR) << "Failed to parse AVCDecoderConfigurationRecord.";
              return false;
@ -857,14 +889,12 @@ bool WvmMediaParser::Output(bool output_encrypted_sample) {
      }
    } else if ((prev_pes_stream_id_ & kPesStreamIdAudioMask) ==
        kPesStreamIdAudio) {
-      // Set data on the audio stream from AdtsHeader.
+      // Set data on the audio stream.
      int frame_size = media::mp2t::AdtsHeader::GetAdtsFrameSize(
-          &sample_data_[0], kAdtsHeaderMinSize);
+          vector_as_array(&sample_data_), kAdtsHeaderMinSize);
      media::mp2t::AdtsHeader adts_header;
-      const uint8_t* frame_ptr = &sample_data_[0];
-      std::vector<uint8_t> extra_data;
-      if (!adts_header.Parse(frame_ptr, frame_size) ||
-          !adts_header.GetAudioSpecificConfig(&extra_data)) {
+      const uint8_t* frame_ptr = vector_as_array(&sample_data_);
+      if (!adts_header.Parse(frame_ptr, frame_size)) {
        LOG(ERROR) << "Could not parse ADTS header";
        return false;
      }
@ -875,18 +905,36 @@ bool WvmMediaParser::Output(bool output_encrypted_sample) {
      if (!is_initialized_) {
        for (uint32_t i = 0; i < stream_infos_.size(); i++) {
          if (stream_infos_[i]->stream_type() == media::kStreamAudio &&
-              stream_infos_[i]->extra_data().empty()) {
+              stream_infos_[i]->codec_string().empty()) {
+            AudioStreamInfo* audio_stream_info =
+                reinterpret_cast<AudioStreamInfo*>(stream_infos_[i].get());
+            if (audio_stream_info->extra_data().empty()) {
              // Set AudioStreamInfo fields using information from the ADTS
              // header.
-            AudioStreamInfo* audio_stream_info =
-                reinterpret_cast<AudioStreamInfo*>(
-                    stream_infos_[i].get());
              audio_stream_info->set_sampling_frequency(
                  adts_header.GetSamplingFrequency());
-            audio_stream_info->set_extra_data(extra_data);
+              std::vector<uint8_t> audio_specific_config;
+              if (!adts_header.GetAudioSpecificConfig(&audio_specific_config)) {
+                LOG(ERROR) << "Could not compute AACaudiospecificconfig";
+                return false;
+              }
+              audio_stream_info->set_extra_data(audio_specific_config);
              audio_stream_info->set_codec_string(
                  AudioStreamInfo::GetCodecString(
                      kCodecAAC, adts_header.GetObjectType()));
+            } else {
+              // Set AudioStreamInfo fields using information from the
+              // AACAudioSpecificConfig record.
+              mp4::AACAudioSpecificConfig aac_config;
+              if (!aac_config.Parse(stream_infos_[i]->extra_data())) {
+                LOG(ERROR) << "Could not parse AACAudioSpecificconfig";
+                return false;
+              }
+              audio_stream_info->set_sampling_frequency(aac_config.frequency());
+              audio_stream_info->set_codec_string(
+                  AudioStreamInfo::GetCodecString(
+                      kCodecAAC, aac_config.audio_object_type()));
+            }
          }
        }
      }
@ -894,15 +942,15 @@ bool WvmMediaParser::Output(bool output_encrypted_sample) {
  }

  if (!is_initialized_) {
-    bool is_extra_data_in_stream_infos = true;
+    bool all_streams_have_config = true;
    // Check if all collected stream infos have extra_data set.
    for (uint32_t i = 0; i < stream_infos_.size(); i++) {
-      if (stream_infos_[i]->extra_data().empty()) {
-        is_extra_data_in_stream_infos = false;
+      if (stream_infos_[i]->codec_string().empty()) {
+        all_streams_have_config = false;
        break;
      }
    }
-    if (is_extra_data_in_stream_infos) {
+    if (all_streams_have_config) {
      init_cb_.Run(stream_infos_);
      is_initialized_ = true;
    }
@ -1074,7 +1122,7 @@ bool WvmMediaParser::ProcessEcm() {
      kEcmPaddingSizeBytes;  // flags + contentKey + padding.
  std::vector<uint8_t> content_key_buffer(content_key_buffer_size);
  asset_decryptor.Decrypt(
-      ecm_data, content_key_buffer_size, &content_key_buffer[0]);
+      ecm_data, content_key_buffer_size, vector_as_array(&content_key_buffer));

  std::vector<uint8_t> decrypted_content_key_vec(
      content_key_buffer.begin() + 4,
--- a/packager/media/formats/wvm/wvm_media_parser_unittest.cc
+++ b/packager/media/formats/wvm/wvm_media_parser_unittest.cc
@ -12,6 +12,7 @@
 #include "packager/base/bind_helpers.h"
 #include "packager/base/logging.h"
 #include "packager/base/memory/ref_counted.h"
+#include "packager/media/base/audio_stream_info.h"
 #include "packager/media/base/media_sample.h"
 #include "packager/media/base/request_signer.h"
 #include "packager/media/base/stream_info.h"
@ -37,6 +38,7 @@ const uint8_t k64ByteAssetKey[] =
    "\x92\x48\xd2\x45\x39\x0e\x0a\x49\xd4\x83\xba\x9b\x43\xfc\x69\xc3"
    "\x92\x48\xd2\x45\x39\x0e\x0a\x49\xd4\x83\xba\x9b\x43\xfc\x69\xc3";
 const size_t kInitDataSize = 0x4000;
+const char kMultiConfigWvmFile[] = "bear-multi-configs.wvm";
 }  // namespace

 using ::testing::_;
@ -209,6 +211,52 @@ TEST_F(WvmMediaParserTest, ParseWvmWith64ByteAssetKey) {
  EXPECT_EQ(kExpectedAudioFrameCount, audio_frame_count_);
 }

+TEST_F(WvmMediaParserTest, ParseMultiConfigWvm) {
+  EXPECT_CALL(*key_source_, FetchKeys(_)).WillOnce(Return(Status::OK));
+  EXPECT_CALL(*key_source_, GetKey(_, _))
+      .WillOnce(DoAll(SetArgPointee<1>(encryption_key_), Return(Status::OK)));
+  Parse(kMultiConfigWvmFile);
+  EXPECT_EQ(6u, stream_map_.size());
+
+  ASSERT_EQ(kStreamVideo, stream_map_[0]->stream_type());
+  VideoStreamInfo* video_info = reinterpret_cast<VideoStreamInfo*>(
+      stream_map_[0].get());
+  EXPECT_EQ("avc1.64000d", video_info->codec_string());
+  EXPECT_EQ(320u, video_info->width());
+  EXPECT_EQ(180u, video_info->height());
+
+  ASSERT_EQ(kStreamAudio, stream_map_[1]->stream_type());
+  AudioStreamInfo* audio_info = reinterpret_cast<AudioStreamInfo*>(
+      stream_map_[1].get());
+  EXPECT_EQ("mp4a.40.2", audio_info->codec_string());
+  EXPECT_EQ(2u, audio_info->num_channels());
+  EXPECT_EQ(44100u, audio_info->sampling_frequency());
+
+  ASSERT_EQ(kStreamVideo, stream_map_[2]->stream_type());
+  video_info = reinterpret_cast<VideoStreamInfo*>(stream_map_[2].get());
+  EXPECT_EQ("avc1.64001e", video_info->codec_string());
+  EXPECT_EQ(640u, video_info->width());
+  EXPECT_EQ(360u, video_info->height());
+
+  ASSERT_EQ(kStreamAudio, stream_map_[3]->stream_type());
+  audio_info = reinterpret_cast<AudioStreamInfo*>(stream_map_[3].get());
+  EXPECT_EQ("mp4a.40.2", audio_info->codec_string());
+  EXPECT_EQ(2u, audio_info->num_channels());
+  EXPECT_EQ(44100u, audio_info->sampling_frequency());
+
+  ASSERT_EQ(kStreamVideo, stream_map_[4]->stream_type());
+  video_info = reinterpret_cast<VideoStreamInfo*>(stream_map_[4].get());
+  EXPECT_EQ("avc1.64001f", video_info->codec_string());
+  EXPECT_EQ(1280u, video_info->width());
+  EXPECT_EQ(720u, video_info->height());
+
+  ASSERT_EQ(kStreamAudio, stream_map_[5]->stream_type());
+  audio_info = reinterpret_cast<AudioStreamInfo*>(stream_map_[5].get());
+  EXPECT_EQ("mp4a.40.2", audio_info->codec_string());
+  EXPECT_EQ(2u, audio_info->num_channels());
+  EXPECT_EQ(48000u, audio_info->sampling_frequency());
+}
+
 }  // namespace wvm
 }  // namespace media
 }  // namespace edash_packager
--- a/packager/media/test/data/bear-multi-configs.wvm
+++ b/packager/media/test/data/bear-multi-configs.wvm