From a99af5a0156450fc7d36d5e744f513d7e927c824 Mon Sep 17 00:00:00 2001
From: Ramji Chandramouli <ramjic@google.com>
Date: Mon, 12 Jan 2015 13:17:10 -0800
Subject: [PATCH] Modify WVM media parser to support encrypted media sample.

Change-Id: I8e696527a09fcec22b6c9713e0d1d3096720ce9c
---
 packager/media/base/media_sample.cc           |  12 +-
 packager/media/base/media_sample.h            |  10 ++
 packager/media/base/muxer.cc                  |   3 +
 .../media/formats/wvm/wvm_media_parser.cc     | 140 +++++++++---------
 packager/media/formats/wvm/wvm_media_parser.h |   8 +-
 .../formats/wvm/wvm_media_parser_unittest.cc  |  13 +-
 6 files changed, 111 insertions(+), 75 deletions(-)
diff --git a/packager/media/base/media_sample.cc b/packager/media/base/media_sample.cc
index dcd1873c74..c16cef8f4c 100644
--- a/packager/media/base/media_sample.cc
+++ b/packager/media/base/media_sample.cc
@@ -19,7 +19,11 @@ MediaSample::MediaSample(const uint8_t* data,
                          const uint8_t* side_data,
                          size_t side_data_size,
                          bool is_key_frame)
-    : dts_(0), pts_(0), duration_(0), is_key_frame_(is_key_frame) {
+    : dts_(0),
+      pts_(0),
+      duration_(0),
+      is_key_frame_(is_key_frame),
+      is_encrypted_(false) {
   if (!data) {
     CHECK_EQ(size, 0u);
     CHECK(!side_data);
@@ -31,9 +35,11 @@ MediaSample::MediaSample(const uint8_t* data,
     side_data_.assign(side_data, side_data + side_data_size);
 }
 
-MediaSample::MediaSample() : dts_(0), pts_(0),
+MediaSample::MediaSample() : dts_(0),
+                             pts_(0),
                              duration_(0),
-                             is_key_frame_(false) {}
+                             is_key_frame_(false),
+                             is_encrypted_(false) {}
 
 MediaSample::~MediaSample() {}
 
diff --git a/packager/media/base/media_sample.h b/packager/media/base/media_sample.h
index f90fa6e7d7..f3983e9e4d 100644
--- a/packager/media/base/media_sample.h
+++ b/packager/media/base/media_sample.h
@@ -83,6 +83,10 @@ class MediaSample : public base::RefCountedThreadSafe<MediaSample> {
     return is_key_frame_;
   }
 
+  bool is_encrypted() const {
+    DCHECK(!end_of_stream());
+    return is_encrypted_;
+  }
   const uint8_t* data() const {
     DCHECK(!end_of_stream());
     return &data_[0];
@@ -116,6 +120,10 @@ class MediaSample : public base::RefCountedThreadSafe<MediaSample> {
     is_key_frame_ = value;
   }
 
+  void set_is_encrypted(bool value) {
+    is_encrypted_ = value;
+  }
+
   // If there's no data in this buffer, it represents end of stream.
   bool end_of_stream() const { return data_.size() == 0; }
 
@@ -142,6 +150,8 @@ class MediaSample : public base::RefCountedThreadSafe<MediaSample> {
   int64_t pts_;
   int64_t duration_;
   bool is_key_frame_;
+  // is sample encrypted ?
+  bool is_encrypted_;
 
   // Main buffer data.
   std::vector<uint8_t> data_;
diff --git a/packager/media/base/muxer.cc b/packager/media/base/muxer.cc
index 74096ac2cc..e975c7c05c 100644
--- a/packager/media/base/muxer.cc
+++ b/packager/media/base/muxer.cc
@@ -91,6 +91,9 @@ Status Muxer::AddSample(const MediaStream* stream,
     // to Muxer. In this case, there should be only one stream in Muxer.
     DCHECK_EQ(1u, streams_.size());
     return Finalize();
+  } else if (sample->is_encrypted()) {
+    LOG(ERROR) << "Unable to multiplex encrypted media sample";
+    return Status(error::INTERNAL_ERROR, "Encrypted media sample.");
   }
   return DoAddSample(stream, sample);
 }
diff --git a/packager/media/formats/wvm/wvm_media_parser.cc b/packager/media/formats/wvm/wvm_media_parser.cc
index 7c59c054dc..f1d4bdf423 100644
--- a/packager/media/formats/wvm/wvm_media_parser.cc
+++ b/packager/media/formats/wvm/wvm_media_parser.cc
@@ -373,7 +373,7 @@ bool WvmMediaParser::Parse(const uint8_t* buf, int size) {
             parse_state_ = IndexPayload;
             continue;
           default:
-            if (!DemuxNextPes(read_ptr, false)) {
+            if (!DemuxNextPes(false)) {
               return false;
             }
             parse_state_ = EsPayload;
@@ -460,7 +460,7 @@ bool WvmMediaParser::Parse(const uint8_t* buf, int size) {
       case ProgramEnd:
         parse_state_ = StartCode1;
         metadata_is_complete_ = true;
-        if (!DemuxNextPes(read_ptr, true)) {
+        if (!DemuxNextPes(true)) {
           return false;
         }
         Flush();
@@ -728,33 +728,34 @@ bool WvmMediaParser::ParseIndexEntry() {
   return true;
 }
 
-bool WvmMediaParser::DemuxNextPes(uint8_t* read_ptr, bool is_program_end) {
+bool WvmMediaParser::DemuxNextPes(bool is_program_end) {
+  bool output_encrypted_sample = false;
   if (!sample_data_.empty() && (prev_pes_flags_1_ & kScramblingBitsMask)) {
     // Decrypt crypto unit.
     if (!content_decryptor_) {
-      LOG(ERROR) << "Source content is encrypted, but decryption not enabled";
-      return false;
+      output_encrypted_sample = true;
+    } else {
+      content_decryptor_->Decrypt(&sample_data_[crypto_unit_start_pos_],
+                                  sample_data_.size() - crypto_unit_start_pos_,
+                                  &sample_data_[crypto_unit_start_pos_]);
     }
-    content_decryptor_->Decrypt(&sample_data_[crypto_unit_start_pos_],
-                                sample_data_.size() - crypto_unit_start_pos_,
-                                &sample_data_[crypto_unit_start_pos_]);
   }
   // Demux media sample if we are at program end or if we are not at a
   // continuation PES.
   if ((pes_flags_2_ & kPesOptPts) || is_program_end) {
     if (!sample_data_.empty()) {
-      if (!Output()) {
+      if (!Output(output_encrypted_sample)) {
         return false;
       }
     }
-    StartMediaSampleDemux(read_ptr);
+    StartMediaSampleDemux();
   }
 
   crypto_unit_start_pos_ = sample_data_.size();
   return true;
 }
 
-void WvmMediaParser::StartMediaSampleDemux(uint8_t* read_ptr) {
+void WvmMediaParser::StartMediaSampleDemux() {
   bool is_key_frame = ((pes_flags_1_ & kPesOptAlign) != 0);
   media_sample_ = MediaSample::CreateEmptyMediaSample();
   media_sample_->set_dts(dts_);
@@ -764,64 +765,69 @@ void WvmMediaParser::StartMediaSampleDemux(uint8_t* read_ptr) {
   sample_data_.clear();
 }
 
-bool WvmMediaParser::Output() {
-  if ((prev_pes_stream_id_ & kPesStreamIdVideoMask) == kPesStreamIdVideo) {
-    // Set data on the video stream from the NalUnitStream.
-    std::vector<uint8_t> nal_unit_stream;
-    if (!byte_to_unit_stream_converter_.ConvertByteStreamToNalUnitStream(
-            &sample_data_[0], sample_data_.size(), &nal_unit_stream)) {
-      LOG(ERROR) << "Could not convert h.264 byte stream sample";
-      return false;
-    }
-    media_sample_->set_data(nal_unit_stream.data(), nal_unit_stream.size());
-    if (!is_initialized_) {
-      // Set extra data for video stream from AVC Decoder Config Record.
-      // Also, set codec string from the AVC Decoder Config Record.
-      std::vector<uint8_t> decoder_config_record;
-      byte_to_unit_stream_converter_.GetAVCDecoderConfigurationRecord(
-          &decoder_config_record);
-      for (uint32_t i = 0; i < stream_infos_.size(); i++) {
-        if (stream_infos_[i]->stream_type() == media::kStreamVideo &&
-            stream_infos_[i]->extra_data().empty()) {
-          stream_infos_[i]->set_extra_data(decoder_config_record);
-          stream_infos_[i]->set_codec_string(VideoStreamInfo::GetCodecString(
-              kCodecH264, decoder_config_record[1], decoder_config_record[2],
-              decoder_config_record[3]));
+bool WvmMediaParser::Output(bool output_encrypted_sample) {
+  if (output_encrypted_sample) {
+    media_sample_->set_data(&sample_data_[0], sample_data_.size());
+    media_sample_->set_is_encrypted(true);
+  } else {
+    if ((prev_pes_stream_id_ & kPesStreamIdVideoMask) == kPesStreamIdVideo) {
+      // Set data on the video stream from the NalUnitStream.
+      std::vector<uint8_t> nal_unit_stream;
+      if (!byte_to_unit_stream_converter_.ConvertByteStreamToNalUnitStream(
+              &sample_data_[0], sample_data_.size(), &nal_unit_stream)) {
+        LOG(ERROR) << "Could not convert h.264 byte stream sample";
+        return false;
+      }
+      media_sample_->set_data(nal_unit_stream.data(), nal_unit_stream.size());
+      if (!is_initialized_) {
+        // Set extra data for video stream from AVC Decoder Config Record.
+        // Also, set codec string from the AVC Decoder Config Record.
+        std::vector<uint8_t> decoder_config_record;
+        byte_to_unit_stream_converter_.GetAVCDecoderConfigurationRecord(
+            &decoder_config_record);
+        for (uint32_t i = 0; i < stream_infos_.size(); i++) {
+          if (stream_infos_[i]->stream_type() == media::kStreamVideo &&
+              stream_infos_[i]->extra_data().empty()) {
+            stream_infos_[i]->set_extra_data(decoder_config_record);
+            stream_infos_[i]->set_codec_string(VideoStreamInfo::GetCodecString(
+                kCodecH264, decoder_config_record[1], decoder_config_record[2],
+                decoder_config_record[3]));
+          }
         }
       }
-    }
-  } else if ((prev_pes_stream_id_ & kPesStreamIdAudioMask) ==
-      kPesStreamIdAudio) {
-    // Set data on the audio stream from AdtsHeader.
-    int frame_size = media::mp2t::AdtsHeader::GetAdtsFrameSize(
-        &sample_data_[0], kAdtsHeaderMinSize);
-    media::mp2t::AdtsHeader adts_header;
-    const uint8_t* frame_ptr = &sample_data_[0];
-    std::vector<uint8_t> extra_data;
-    if (!adts_header.Parse(frame_ptr, frame_size) ||
-        !adts_header.GetAudioSpecificConfig(&extra_data)) {
-      LOG(ERROR) << "Could not parse ADTS header";
-      return false;
-    }
-    size_t header_size = adts_header.GetAdtsHeaderSize(frame_ptr,
-                                                       frame_size);
-    media_sample_->set_data(frame_ptr + header_size,
-                            frame_size - header_size);
-    if (!is_initialized_) {
-      for (uint32_t i = 0; i < stream_infos_.size(); i++) {
-        if (stream_infos_[i]->stream_type() == media::kStreamAudio &&
-            stream_infos_[i]->extra_data().empty()) {
-          // Set AudioStreamInfo fields using information from the ADTS
-          // header.
-          AudioStreamInfo* audio_stream_info =
-              reinterpret_cast<AudioStreamInfo*>(
-                  stream_infos_[i].get());
-          audio_stream_info->set_sampling_frequency(
-              adts_header.GetSamplingFrequency());
-          audio_stream_info->set_extra_data(extra_data);
-          audio_stream_info->set_codec_string(
-              AudioStreamInfo::GetCodecString(
-                  kCodecAAC, adts_header.GetObjectType()));
+    } else if ((prev_pes_stream_id_ & kPesStreamIdAudioMask) ==
+        kPesStreamIdAudio) {
+      // Set data on the audio stream from AdtsHeader.
+      int frame_size = media::mp2t::AdtsHeader::GetAdtsFrameSize(
+          &sample_data_[0], kAdtsHeaderMinSize);
+      media::mp2t::AdtsHeader adts_header;
+      const uint8_t* frame_ptr = &sample_data_[0];
+      std::vector<uint8_t> extra_data;
+      if (!adts_header.Parse(frame_ptr, frame_size) ||
+          !adts_header.GetAudioSpecificConfig(&extra_data)) {
+        LOG(ERROR) << "Could not parse ADTS header";
+        return false;
+      }
+      size_t header_size = adts_header.GetAdtsHeaderSize(frame_ptr,
+                                                         frame_size);
+      media_sample_->set_data(frame_ptr + header_size,
+                              frame_size - header_size);
+      if (!is_initialized_) {
+        for (uint32_t i = 0; i < stream_infos_.size(); i++) {
+          if (stream_infos_[i]->stream_type() == media::kStreamAudio &&
+              stream_infos_[i]->extra_data().empty()) {
+            // Set AudioStreamInfo fields using information from the ADTS
+            // header.
+            AudioStreamInfo* audio_stream_info =
+                reinterpret_cast<AudioStreamInfo*>(
+                    stream_infos_[i].get());
+            audio_stream_info->set_sampling_frequency(
+                adts_header.GetSamplingFrequency());
+            audio_stream_info->set_extra_data(extra_data);
+            audio_stream_info->set_codec_string(
+                AudioStreamInfo::GetCodecString(
+                    kCodecAAC, adts_header.GetObjectType()));
+          }
         }
       }
     }
diff --git a/packager/media/formats/wvm/wvm_media_parser.h b/packager/media/formats/wvm/wvm_media_parser.h
index b39ebaaeeb..92174efdbf 100644
--- a/packager/media/formats/wvm/wvm_media_parser.h
+++ b/packager/media/formats/wvm/wvm_media_parser.h
@@ -164,9 +164,9 @@ class WvmMediaParser : public MediaParser {
   // Index denotes 'search index' in the WVM content.
   bool ParseIndexEntry();
 
-  bool DemuxNextPes(uint8_t* start, bool is_program_end);
+  bool DemuxNextPes(bool is_program_end);
 
-  void StartMediaSampleDemux(uint8_t* start);
+  void StartMediaSampleDemux();
 
   template <typename T>
   Tag GetTag(const uint8_t& tag,
@@ -195,7 +195,9 @@ class WvmMediaParser : public MediaParser {
     return Tag(tag);
   }
 
-  bool Output();
+  // |must_process_encrypted| setting determines if Output() should attempt
+  // to ouput media sample as encrypted.
+  bool Output(bool must_process_encrypted);
 
   bool GetAssetKey(const uint32_t asset_id, EncryptionKey* encryption_key);
 
diff --git a/packager/media/formats/wvm/wvm_media_parser_unittest.cc b/packager/media/formats/wvm/wvm_media_parser_unittest.cc
index a86552c805..63ce76096b 100644
--- a/packager/media/formats/wvm/wvm_media_parser_unittest.cc
+++ b/packager/media/formats/wvm/wvm_media_parser_unittest.cc
@@ -28,6 +28,7 @@ const char kWvmFile[] = "hb2_4stream_encrypted.wvm";
 const uint32_t kExpectedStreams = 4;
 const int kExpectedVideoFrameCount = 6665;
 const int kExpectedAudioFrameCount = 11964;
+const int kExpectedEncryptedSampleCount = 17287;
 const uint8_t kExpectedAssetKey[] =
     "\x06\x81\x7f\x48\x6b\xf2\x7f\x3e\xc7\x39\xa8\x3f\x12\x0a\xd2\xfc";
 const uint8_t k64ByteAssetKey[] =
@@ -66,6 +67,7 @@ class WvmMediaParserTest : public testing::Test {
   WvmMediaParserTest()
       : audio_frame_count_(0),
         video_frame_count_(0),
+        encrypted_sample_count_(0),
         video_max_dts_(kNoTimestamp),
         current_track_id_(-1) {
     parser_.reset(new WvmMediaParser());
@@ -82,6 +84,7 @@ class WvmMediaParserTest : public testing::Test {
   StreamMap stream_map_;
   int audio_frame_count_;
   int video_frame_count_;
+  int encrypted_sample_count_;
   int64_t video_max_dts_;
   uint32_t current_track_id_;
   EncryptionKey encryption_key_;
@@ -128,6 +131,9 @@ class WvmMediaParserTest : public testing::Test {
       }
     }
 
+    if (sample->is_encrypted()) {
+      ++encrypted_sample_count_;
+    }
     return true;
   }
 
@@ -149,12 +155,14 @@ class WvmMediaParserTest : public testing::Test {
 };
 
 TEST_F(WvmMediaParserTest, ParseWvmWithoutKeySource) {
-  // Parsing should fail but it will get the streams successfully.
   key_source_.reset();
   InitializeParser();
   std::vector<uint8_t> buffer = ReadTestDataFile(kWvmFile);
-  EXPECT_FALSE(parser_->Parse(buffer.data(), buffer.size()));
+  EXPECT_TRUE(parser_->Parse(buffer.data(), buffer.size()));
   EXPECT_EQ(kExpectedStreams, stream_map_.size());
+  EXPECT_EQ(kExpectedVideoFrameCount, video_frame_count_);
+  EXPECT_EQ(kExpectedAudioFrameCount, audio_frame_count_);
+  EXPECT_EQ(kExpectedEncryptedSampleCount, encrypted_sample_count_);
 }
 
 TEST_F(WvmMediaParserTest, ParseWvmInitWithoutKeySource) {
@@ -173,6 +181,7 @@ TEST_F(WvmMediaParserTest, ParseWvm) {
   EXPECT_EQ(kExpectedStreams, stream_map_.size());
   EXPECT_EQ(kExpectedVideoFrameCount, video_frame_count_);
   EXPECT_EQ(kExpectedAudioFrameCount, audio_frame_count_);
+  EXPECT_EQ(0, encrypted_sample_count_);
 }
 
 TEST_F(WvmMediaParserTest, ParseWvmWith64ByteAssetKey) {