diff --git a/packager/app/packager_main.cc b/packager/app/packager_main.cc index cc2732202e..a7edc35fbf 100644 --- a/packager/app/packager_main.cc +++ b/packager/app/packager_main.cc @@ -43,6 +43,7 @@ #include "packager/media/formats/mp2t/ts_muxer.h" #include "packager/media/formats/mp4/mp4_muxer.h" #include "packager/media/formats/webm/webm_muxer.h" +#include "packager/media/trick_play/trick_play_handler.h" #include "packager/mpd/base/dash_iop_mpd_notifier.h" #include "packager/mpd/base/media_info.pb.h" #include "packager/mpd/base/mpd_builder.h" @@ -234,7 +235,10 @@ bool CreateRemuxJobs(const StreamDescriptorList& stream_descriptors, DCHECK(!(mpd_notifier && hls_notifier)); DCHECK(remux_jobs); + std::shared_ptr trick_play_handler; + std::string previous_input; + std::string previous_stream_selector; int stream_number = 0; for (StreamDescriptorList::const_iterator stream_iter = stream_descriptors.begin(); @@ -294,6 +298,7 @@ bool CreateRemuxJobs(const StreamDescriptorList& stream_descriptors, demuxer->SetKeySource(std::move(decryption_key_source)); } remux_jobs->emplace_back(new RemuxJob(std::move(demuxer))); + trick_play_handler.reset(); previous_input = stream_iter->input; // Skip setting up muxers if output is not needed. if (stream_iter->output.empty() && stream_iter->segment_template.empty()) @@ -301,6 +306,15 @@ bool CreateRemuxJobs(const StreamDescriptorList& stream_descriptors, } DCHECK(!remux_jobs->empty()); + // Each stream selector requires an individual trick play handler. + // E.g., an input with two video streams needs two trick play handlers. + // TODO(hmchen): add a test case in packager_test.py for two video streams + // input. + if (stream_iter->stream_selector != previous_stream_selector) { + previous_stream_selector = stream_iter->stream_selector; + trick_play_handler.reset(); + } + std::shared_ptr muxer( CreateOutputMuxer(stream_muxer_options, stream_iter->output_format)); if (FLAGS_use_fake_clock_for_muxer) muxer->set_clock(fake_clock); @@ -341,8 +355,29 @@ bool CreateRemuxJobs(const StreamDescriptorList& stream_descriptors, if (muxer_listener) muxer->SetMuxerListener(std::move(muxer_listener)); - Status status; + // Create a new trick_play_handler. Note that the stream_decriptors + // are sorted so that for the same input and stream_selector, the main + // stream is always the last one following the trick play streams. + if (stream_iter->trick_play_rate > 0) { + if (!trick_play_handler) { + trick_play_handler.reset(new TrickPlayHandler()); + } + trick_play_handler->SetHandlerForTrickPlay(stream_iter->trick_play_rate, + std::move(muxer)); + if (trick_play_handler->IsConnected()) + continue; + } else if (trick_play_handler) { + trick_play_handler->SetHandlerForMainStream(std::move(muxer)); + DCHECK(trick_play_handler->IsConnected()); + continue; + } + + std::vector> handlers; + auto chunking_handler = std::make_shared(chunking_options); + handlers.push_back(chunking_handler); + + Status status; if (encryption_key_source) { auto new_encryption_options = encryption_options; // Use Sample AES in MPEG2TS. @@ -353,18 +388,22 @@ bool CreateRemuxJobs(const StreamDescriptorList& stream_descriptors, new_encryption_options.protection_scheme = kAppleSampleAesProtectionScheme; } - auto encryption_handler = std::make_shared( - new_encryption_options, encryption_key_source); - status.Update(encryption_handler->SetHandler(0, std::move(muxer))); - status.Update( - chunking_handler->SetHandler(0, std::move(encryption_handler))); + handlers.emplace_back( + new EncryptionHandler(new_encryption_options, encryption_key_source)); + } + + // If trick_play_handler is available, muxer should already be connected to + // trick_play_handler. + if (trick_play_handler) { + handlers.push_back(trick_play_handler); } else { - status.Update(chunking_handler->SetHandler(0, std::move(muxer))); + handlers.push_back(std::move(muxer)); } auto* demuxer = remux_jobs->back()->demuxer(); const std::string& stream_selector = stream_iter->stream_selector; status.Update(demuxer->SetHandler(stream_selector, chunking_handler)); + status.Update(ConnectHandlers(handlers)); if (!status.ok()) { LOG(ERROR) << "Failed to setup graph: " << status; diff --git a/packager/app/packager_util.cc b/packager/app/packager_util.cc index 0c538c417d..323615801d 100644 --- a/packager/app/packager_util.cc +++ b/packager/app/packager_util.cc @@ -11,16 +11,18 @@ #include "packager/app/crypto_flags.h" #include "packager/app/fixed_key_encryption_flags.h" -#include "packager/app/playready_key_encryption_flags.h" #include "packager/app/mpd_flags.h" #include "packager/app/muxer_flags.h" +#include "packager/app/playready_key_encryption_flags.h" #include "packager/app/widevine_encryption_flags.h" #include "packager/base/logging.h" #include "packager/base/strings/string_number_conversions.h" #include "packager/media/base/fixed_key_source.h" +#include "packager/media/base/media_handler.h" #include "packager/media/base/muxer_options.h" #include "packager/media/base/playready_key_source.h" #include "packager/media/base/request_signer.h" +#include "packager/media/base/status.h" #include "packager/media/base/widevine_key_source.h" #include "packager/media/chunking/chunking_handler.h" #include "packager/media/crypto/encryption_handler.h" @@ -222,5 +224,14 @@ MpdOptions GetMpdOptions(bool on_demand_profile) { return mpd_options; } +Status ConnectHandlers(std::vector>& handlers) { + size_t num_handlers = handlers.size(); + Status status; + for (size_t i = 1; i < num_handlers; ++i) { + status.Update(handlers[i - 1]->AddHandler(handlers[i])); + } + return status; +} + } // namespace media } // namespace shaka diff --git a/packager/app/packager_util.h b/packager/app/packager_util.h index a6c17a4d6e..604550dc18 100644 --- a/packager/app/packager_util.h +++ b/packager/app/packager_util.h @@ -21,7 +21,9 @@ struct MpdOptions; namespace media { +class MediaHandler; class KeySource; +class Status; struct ChunkingOptions; struct EncryptionOptions; struct MuxerOptions; @@ -50,6 +52,12 @@ MuxerOptions GetMuxerOptions(); /// @return MpdOptions from provided command line options. MpdOptions GetMpdOptions(bool on_demand_profile); +/// Connect handlers in the vector. +/// @param handlers A vector of media handlers to be conncected. the handlers +/// are chained from front() to back(). +/// @return OK on success. +Status ConnectHandlers(std::vector>& handlers); + } // namespace media } // namespace shaka diff --git a/packager/app/stream_descriptor.cc b/packager/app/stream_descriptor.cc index c08ed654ce..a8bbea6de2 100644 --- a/packager/app/stream_descriptor.cc +++ b/packager/app/stream_descriptor.cc @@ -71,8 +71,7 @@ FieldType GetFieldType(const std::string& field_name) { } // anonymous namespace -StreamDescriptor::StreamDescriptor() - : bandwidth(0), output_format(CONTAINER_UNKNOWN) {} +StreamDescriptor::StreamDescriptor() {} StreamDescriptor::~StreamDescriptor() {} @@ -144,6 +143,20 @@ bool InsertStreamDescriptor(const std::string& descriptor_string, descriptor.hls_playlist_name = iter->second; break; } + case kTrickPlayRateField: { + unsigned rate; + if (!base::StringToUint(iter->second, &rate)) { + LOG(ERROR) << "Non-numeric trick play rate " << iter->second + << " specified."; + return false; + } + if (rate == 0) { + LOG(ERROR) << "Stream trick_play_rate should be > 0."; + return false; + } + descriptor.trick_play_rate = rate; + break; + } default: LOG(ERROR) << "Unknown field in stream descriptor (\"" << iter->first << "\")."; diff --git a/packager/app/stream_descriptor.h b/packager/app/stream_descriptor.h index ab6ef368ee..5cb6c80489 100644 --- a/packager/app/stream_descriptor.h +++ b/packager/app/stream_descriptor.h @@ -27,18 +27,26 @@ struct StreamDescriptor { std::string input; std::string output; std::string segment_template; - uint32_t bandwidth; + uint32_t bandwidth = 0; std::string language; - MediaContainerName output_format; + MediaContainerName output_format = CONTAINER_UNKNOWN; std::string hls_name; std::string hls_group_id; std::string hls_playlist_name; - int16_t trick_play_rate; + uint32_t trick_play_rate = 0; }; class StreamDescriptorCompareFn { public: bool operator()(const StreamDescriptor& a, const StreamDescriptor& b) { + if (a.input == b.input) { + if (a.stream_selector == b.stream_selector) + // Stream with high trick_play_rate is at the beginning. + return a.trick_play_rate > b.trick_play_rate; + else + return a.stream_selector < b.stream_selector; + } + return a.input < b.input; } }; diff --git a/packager/app/test/packager_test.py b/packager/app/test/packager_test.py index 6ebce76ce8..4cb94db2a9 100755 --- a/packager/app/test/packager_test.py +++ b/packager/app/test/packager_test.py @@ -101,6 +101,41 @@ class PackagerAppTest(unittest.TestCase): self._DiffGold(self.output[1], 'bear-640x360-v-golden.mp4') self._DiffGold(self.mpd_output, 'bear-640x360-av-golden.mpd') + def testPackageAudioVideoWithTrickPlay(self): + self.packager.Package( + self._GetStreams(['audio', 'video', 'video,trick_play_rate=1']), + self._GetFlags()) + self._DiffGold(self.output[0], 'bear-640x360-a-golden.mp4') + self._DiffGold(self.output[1], 'bear-640x360-v-golden.mp4') + self._DiffGold(self.output[2], 'bear-640x360-v-trick-1-golden.mp4') + self._DiffGold(self.mpd_output, 'bear-640x360-av-trick-1-golden.mpd') + + def testPackageAudioVideoWithTwoTrickPlay(self): + self.packager.Package( + self._GetStreams(['audio', 'video', 'video,trick_play_rate=1', + 'video,trick_play_rate=2']), + self._GetFlags()) + self._DiffGold(self.output[0], 'bear-640x360-a-golden.mp4') + self._DiffGold(self.output[1], 'bear-640x360-v-golden.mp4') + self._DiffGold(self.output[2], 'bear-640x360-v-trick-1-golden.mp4') + self._DiffGold(self.output[3], 'bear-640x360-v-trick-2-golden.mp4') + self._DiffGold(self.mpd_output, + 'bear-640x360-av-trick-1-trick-2-golden.mpd') + + def testPackageAudioVideoWithTwoTrickPlayDecreasingRate(self): + self.packager.Package( + self._GetStreams(['audio', 'video', 'video,trick_play_rate=2', + 'video,trick_play_rate=1']), + self._GetFlags()) + self._DiffGold(self.output[0], 'bear-640x360-a-golden.mp4') + self._DiffGold(self.output[1], 'bear-640x360-v-golden.mp4') + self._DiffGold(self.output[2], 'bear-640x360-v-trick-2-golden.mp4') + self._DiffGold(self.output[3], 'bear-640x360-v-trick-1-golden.mp4') + # Since the stream descriptors are sorted in packager app, a different + # order of trick play rates gets the same mpd. + self._DiffGold(self.mpd_output, + 'bear-640x360-av-trick-1-trick-2-golden.mpd') + def testPackageAudioVideoWithLanguageOverride(self): self.packager.Package( self._GetStreams(['audio', 'video'], language_override='por-BR'), @@ -205,6 +240,36 @@ class PackagerAppTest(unittest.TestCase): self._VerifyDecryption(self.output[0], 'bear-640x360-a-golden.mp4') self._VerifyDecryption(self.output[1], 'bear-640x360-v-golden.mp4') + def testPackageWithEncryptionAndTrickPlay(self): + self.packager.Package( + self._GetStreams(['audio', 'video', 'video,trick_play_rate=1']), + self._GetFlags(encryption=True)) + self._DiffGold(self.output[0], 'bear-640x360-a-cenc-golden.mp4') + self._DiffGold(self.output[1], 'bear-640x360-v-cenc-golden.mp4') + self._DiffGold(self.output[2], 'bear-640x360-v-trick-1-cenc-golden.mp4') + self._DiffGold(self.mpd_output, 'bear-640x360-av-trick-1-cenc-golden.mpd') + self._VerifyDecryption(self.output[0], 'bear-640x360-a-golden.mp4') + self._VerifyDecryption(self.output[1], 'bear-640x360-v-golden.mp4') + self._VerifyDecryption(self.output[2], 'bear-640x360-v-trick-1-golden.mp4') + + # TODO(hmchen): Add a test case that SD and HD AdapatationSet share one trick + # play stream. + def testPackageWithEncryptionAndTwoTrickPlays(self): + self.packager.Package( + self._GetStreams(['audio', 'video', 'video,trick_play_rate=1', + 'video,trick_play_rate=2']), + self._GetFlags(encryption=True)) + self._DiffGold(self.output[0], 'bear-640x360-a-cenc-golden.mp4') + self._DiffGold(self.output[1], 'bear-640x360-v-cenc-golden.mp4') + self._DiffGold(self.output[2], 'bear-640x360-v-trick-1-cenc-golden.mp4') + self._DiffGold(self.output[3], 'bear-640x360-v-trick-2-cenc-golden.mp4') + self._DiffGold(self.mpd_output, + 'bear-640x360-av-trick-1-trick-2-cenc-golden.mpd') + self._VerifyDecryption(self.output[0], 'bear-640x360-a-golden.mp4') + self._VerifyDecryption(self.output[1], 'bear-640x360-v-golden.mp4') + self._VerifyDecryption(self.output[2], 'bear-640x360-v-trick-1-golden.mp4') + self._VerifyDecryption(self.output[3], 'bear-640x360-v-trick-2-golden.mp4') + def testPackageWithEncryptionAndNoPsshInStream(self): self.packager.Package( self._GetStreams(['audio', 'video']), @@ -574,6 +639,11 @@ class PackagerAppTest(unittest.TestCase): else: output_prefix = '%s_%d_%s' % (self.output_prefix, test_file_index, stream_descriptor) + # Replace ',', '=' with '_' to make it more like a filename, also + # avoid potential illegal charactors for a filename. + for ch in [',', '=']: + output_prefix = output_prefix.replace(ch, '_') + if live: if output_format == 'ts': stream = ('input=%s,stream=%s,segment_template=%s-$Number$.ts,' diff --git a/packager/app/test/testdata/bear-640x360-av-trick-1-cenc-golden.mpd b/packager/app/test/testdata/bear-640x360-av-trick-1-cenc-golden.mpd new file mode 100644 index 0000000000..8262361526 --- /dev/null +++ b/packager/app/test/testdata/bear-640x360-av-trick-1-cenc-golden.mpd @@ -0,0 +1,44 @@ + + + + + + + + AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA== + + + output_video.mp4 + + + + + + + + + AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA== + + + + output_video_trick_play_rate_1.mp4 + + + + + + + + + AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA== + + + + output_audio.mp4 + + + + + + + diff --git a/packager/app/test/testdata/bear-640x360-av-trick-1-golden.mpd b/packager/app/test/testdata/bear-640x360-av-trick-1-golden.mpd new file mode 100644 index 0000000000..c4f46e2ca7 --- /dev/null +++ b/packager/app/test/testdata/bear-640x360-av-trick-1-golden.mpd @@ -0,0 +1,32 @@ + + + + + + + output_video.mp4 + + + + + + + + + output_video_trick_play_rate_1.mp4 + + + + + + + + + output_audio.mp4 + + + + + + + diff --git a/packager/app/test/testdata/bear-640x360-av-trick-1-trick-2-cenc-golden.mpd b/packager/app/test/testdata/bear-640x360-av-trick-1-trick-2-cenc-golden.mpd new file mode 100644 index 0000000000..f021e58096 --- /dev/null +++ b/packager/app/test/testdata/bear-640x360-av-trick-1-trick-2-cenc-golden.mpd @@ -0,0 +1,50 @@ + + + + + + + + AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA== + + + output_video.mp4 + + + + + + + + + AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA== + + + + output_video_trick_play_rate_2.mp4 + + + + + + output_video_trick_play_rate_1.mp4 + + + + + + + + + AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA== + + + + output_audio.mp4 + + + + + + + diff --git a/packager/app/test/testdata/bear-640x360-av-trick-1-trick-2-golden.mpd b/packager/app/test/testdata/bear-640x360-av-trick-1-trick-2-golden.mpd new file mode 100644 index 0000000000..59d55b7be2 --- /dev/null +++ b/packager/app/test/testdata/bear-640x360-av-trick-1-trick-2-golden.mpd @@ -0,0 +1,38 @@ + + + + + + + output_video.mp4 + + + + + + + + + output_video_trick_play_rate_2.mp4 + + + + + + output_video_trick_play_rate_1.mp4 + + + + + + + + + output_audio.mp4 + + + + + + + diff --git a/packager/app/test/testdata/bear-640x360-v-trick-1-cenc-golden.mp4 b/packager/app/test/testdata/bear-640x360-v-trick-1-cenc-golden.mp4 new file mode 100644 index 0000000000..2a96070111 Binary files /dev/null and b/packager/app/test/testdata/bear-640x360-v-trick-1-cenc-golden.mp4 differ diff --git a/packager/app/test/testdata/bear-640x360-v-trick-1-golden.mp4 b/packager/app/test/testdata/bear-640x360-v-trick-1-golden.mp4 new file mode 100644 index 0000000000..60407efb5f Binary files /dev/null and b/packager/app/test/testdata/bear-640x360-v-trick-1-golden.mp4 differ diff --git a/packager/app/test/testdata/bear-640x360-v-trick-2-cenc-golden.mp4 b/packager/app/test/testdata/bear-640x360-v-trick-2-cenc-golden.mp4 new file mode 100644 index 0000000000..e8daf0b08e Binary files /dev/null and b/packager/app/test/testdata/bear-640x360-v-trick-2-cenc-golden.mp4 differ diff --git a/packager/app/test/testdata/bear-640x360-v-trick-2-golden.mp4 b/packager/app/test/testdata/bear-640x360-v-trick-2-golden.mp4 new file mode 100644 index 0000000000..078bad9cc8 Binary files /dev/null and b/packager/app/test/testdata/bear-640x360-v-trick-2-golden.mp4 differ diff --git a/packager/media/base/media_handler.cc b/packager/media/base/media_handler.cc index a806761df5..7d16d87877 100644 --- a/packager/media/base/media_handler.cc +++ b/packager/media/base/media_handler.cc @@ -69,5 +69,15 @@ Status MediaHandler::FlushDownstream(size_t output_stream_index) { return handler_it->second.first->OnFlushRequest(handler_it->second.second); } +Status MediaHandler::FlushAllDownstreams() { + for (const auto& pair : output_handlers_) { + Status status = pair.second.first->OnFlushRequest(pair.second.second); + if (!status.ok()) { + return status; + } + } + return Status::OK; +} + } // namespace media } // namespace shaka diff --git a/packager/media/base/media_handler.h b/packager/media/base/media_handler.h index 7a8bdd481d..f8f21f66f8 100644 --- a/packager/media/base/media_handler.h +++ b/packager/media/base/media_handler.h @@ -86,6 +86,9 @@ class MediaHandler { /// called after setting up the graph before running the graph. Status Initialize(); + /// Validate if the handler is connected to its upstream handler. + bool IsConnected() { return num_input_streams_ > 0; } + protected: /// Internal implementation of initialize. Note that it should only initialize /// the MediaHandler itself. Downstream handlers are handled in Initialize(). @@ -160,6 +163,9 @@ class MediaHandler { /// Flush the downstream connected at the specified output stream index. Status FlushDownstream(size_t output_stream_index); + /// Flush all connected downstreams. + Status FlushAllDownstreams(); + bool initialized() { return initialized_; } size_t num_input_streams() const { return num_input_streams_; } size_t next_output_stream_index() const { return next_output_stream_index_; } diff --git a/packager/media/base/media_handler_test_base.cc b/packager/media/base/media_handler_test_base.cc index 32ba2cfd29..fe25ab6024 100644 --- a/packager/media/base/media_handler_test_base.cc +++ b/packager/media/base/media_handler_test_base.cc @@ -59,27 +59,22 @@ const uint8_t kData[]{ namespace shaka { namespace media { -// A fake media handler definition used for testing. -class FakeMediaHandler : public MediaHandler { - public: - const std::vector>& stream_data_vector() const { - return stream_data_vector_; - } - void clear_stream_data_vector() { stream_data_vector_.clear(); } +Status FakeMediaHandler::InitializeInternal() { + return Status::OK; +} - protected: - Status InitializeInternal() override { return Status::OK; } - Status Process(std::unique_ptr stream_data) override { - stream_data_vector_.push_back(std::move(stream_data)); - return Status::OK; - } - Status OnFlushRequest(size_t input_stream_index) override { return Status::OK; } - bool ValidateOutputStreamIndex(size_t stream_index) const override { - return true; - } +Status FakeMediaHandler::Process(std::unique_ptr stream_data) { + stream_data_vector_.push_back(std::move(stream_data)); + return Status::OK; +} - std::vector> stream_data_vector_; -}; +Status FakeMediaHandler::OnFlushRequest(size_t input_stream_index) { + return Status::OK; +} + +bool FakeMediaHandler::ValidateOutputStreamIndex(size_t stream_index) const { + return true; +} MediaHandlerTestBase::MediaHandlerTestBase() : next_handler_(new FakeMediaHandler), diff --git a/packager/media/base/media_handler_test_base.h b/packager/media/base/media_handler_test_base.h index bdf9274f18..1dd3096818 100644 --- a/packager/media/base/media_handler_test_base.h +++ b/packager/media/base/media_handler_test_base.h @@ -13,8 +13,6 @@ namespace shaka { namespace media { -class FakeMediaHandler; - MATCHER_P3(IsStreamInfo, stream_index, time_scale, encrypted, "") { *result_listener << "which is (" << stream_index << "," << time_scale << "," << (encrypted ? "encrypted" : "not encrypted") << ")"; @@ -76,6 +74,23 @@ MATCHER_P4(IsMediaSample, stream_index, timestamp, duration, encrypted, "") { arg->media_sample->is_encrypted() == encrypted; } +// A fake media handler definition used for testing. +class FakeMediaHandler : public MediaHandler { + public: + const std::vector>& stream_data_vector() const { + return stream_data_vector_; + } + void clear_stream_data_vector() { stream_data_vector_.clear(); } + + protected: + Status InitializeInternal() override; + Status Process(std::unique_ptr stream_data) override; + Status OnFlushRequest(size_t input_stream_index) override; + bool ValidateOutputStreamIndex(size_t stream_index) const override; + + std::vector> stream_data_vector_; +}; + class MediaHandlerTestBase : public ::testing::Test { public: MediaHandlerTestBase(); @@ -126,6 +141,9 @@ class MediaHandlerTestBase : public ::testing::Test { /// @return some random handler that can be used for testing. std::shared_ptr some_handler() { return some_handler_; } + /// @return some a downstream handler that can be used for connecting. + std::shared_ptr next_handler() { return next_handler_; } + private: MediaHandlerTestBase(const MediaHandlerTestBase&) = delete; MediaHandlerTestBase& operator=(const MediaHandlerTestBase&) = delete; diff --git a/packager/media/base/video_stream_info.cc b/packager/media/base/video_stream_info.cc index 2e3ee259a5..704970ad74 100644 --- a/packager/media/base/video_stream_info.cc +++ b/packager/media/base/video_stream_info.cc @@ -48,7 +48,7 @@ VideoStreamInfo::VideoStreamInfo(int track_id, uint16_t height, uint32_t pixel_width, uint32_t pixel_height, - int16_t trick_play_rate, + uint32_t trick_play_rate, uint8_t nalu_length_size, const std::string& language, bool is_encrypted) diff --git a/packager/media/base/video_stream_info.h b/packager/media/base/video_stream_info.h index c462a5019b..a837990c16 100644 --- a/packager/media/base/video_stream_info.h +++ b/packager/media/base/video_stream_info.h @@ -39,7 +39,7 @@ class VideoStreamInfo : public StreamInfo { uint16_t height, uint32_t pixel_width, uint32_t pixel_height, - int16_t trick_play_rate, + uint32_t trick_play_rate, uint8_t nalu_length_size, const std::string& language, bool is_encrypted); @@ -62,16 +62,20 @@ class VideoStreamInfo : public StreamInfo { /// @return 0 if unknown. uint32_t pixel_height() const { return pixel_height_; } uint8_t nalu_length_size() const { return nalu_length_size_; } - int16_t trick_play_rate() const { return trick_play_rate_; } + uint32_t trick_play_rate() const { return trick_play_rate_; } + uint32_t playback_rate() const { return playback_rate_; } const std::vector& eme_init_data() const { return eme_init_data_; } void set_width(uint32_t width) { width_ = width; } void set_height(uint32_t height) { height_ = height; } void set_pixel_width(uint32_t pixel_width) { pixel_width_ = pixel_width; } void set_pixel_height(uint32_t pixel_height) { pixel_height_ = pixel_height; } - void set_trick_play_rate(int16_t trick_play_rate) { + void set_trick_play_rate(uint32_t trick_play_rate) { trick_play_rate_ = trick_play_rate; } + void set_playback_rate(uint32_t playback_rate) { + playback_rate_ = playback_rate; + } void set_eme_init_data(const uint8_t* eme_init_data, size_t eme_init_data_size) { eme_init_data_.assign(eme_init_data, eme_init_data + eme_init_data_size); @@ -86,7 +90,19 @@ class VideoStreamInfo : public StreamInfo { // 0 means unknown. uint32_t pixel_width_; uint32_t pixel_height_; - int16_t trick_play_rate_; // Non-zero for trick-play streams. + uint32_t trick_play_rate_ = 0; // Non-zero for trick-play streams. + + // Playback rate is the attribute for trick play stream, which signals the + // playout capabilities + // (http://dashif.org/wp-content/uploads/2016/12/DASH-IF-IOP-v4.0-clean.pdf, + // page 18, line 1). It is the ratio of main frame rate to the trick play + // frame rate. If the time scale and frame duration are not modified after + // trick play handler processing, the playback_rate equals to the number of + // frames between consecutive key frames selected for trick play stream. For + // example, if the video stream has GOP size of 10 and the trick play rate is + // 3, the key frames are in this trick play stream are [frame_0, frame_30, + // frame_60, ...]. Then the playback_rate is 30. + uint32_t playback_rate_; // Specifies the size of the NAL unit length field. Can be 1, 2 or 4 bytes, or // 0 if the stream is not a NAL structured video stream or if it is an AnnexB diff --git a/packager/media/event/muxer_listener_internal.cc b/packager/media/event/muxer_listener_internal.cc index 65faf21cae..3a2349bcba 100644 --- a/packager/media/event/muxer_listener_internal.cc +++ b/packager/media/event/muxer_listener_internal.cc @@ -101,6 +101,13 @@ void AddVideoInfo(const VideoStreamInfo* video_stream_info, if (!codec_config.empty()) { video_info->set_decoder_config(&codec_config[0], codec_config.size()); } + + if (video_stream_info->trick_play_rate() > 0) { + video_info->set_trick_play_rate(video_stream_info->trick_play_rate()); + CHECK_GT(video_stream_info->playback_rate(), 0u) + << "Max playout rate should be > 0 for trick play streams."; + video_info->set_playback_rate(video_stream_info->playback_rate()); + } } void AddAudioInfo(const AudioStreamInfo* audio_stream_info, diff --git a/packager/media/formats/wvm/wvm_media_parser.cc b/packager/media/formats/wvm/wvm_media_parser.cc index f4214fd0ce..252f76bb4c 100644 --- a/packager/media/formats/wvm/wvm_media_parser.cc +++ b/packager/media/formats/wvm/wvm_media_parser.cc @@ -574,7 +574,7 @@ bool WvmMediaParser::ParseIndexEntry() { } uint64_t track_duration = 0; - int16_t trick_play_rate = 0; + uint32_t trick_play_rate = 0; uint32_t sampling_frequency = kDefaultSamplingFrequency; uint32_t time_scale = kMpeg2ClockRate; uint16_t video_width = 0; diff --git a/packager/media/trick_play/trick_play_handler.cc b/packager/media/trick_play/trick_play_handler.cc index b73af1795b..677e779933 100644 --- a/packager/media/trick_play/trick_play_handler.cc +++ b/packager/media/trick_play/trick_play_handler.cc @@ -12,17 +12,40 @@ namespace shaka { namespace media { -TrickPlayHandler::TrickPlayHandler(const TrickPlayOptions& trick_play_option) - : trick_play_options_(trick_play_option), - cached_stream_data_(trick_play_option.trick_play_rates.size()) { - for (auto rate : trick_play_option.trick_play_rates) { - CHECK_GT(rate, 0); - } +namespace { +const size_t kMainStreamIndex = 0; } +TrickPlayHandler::TrickPlayHandler() {} + TrickPlayHandler::~TrickPlayHandler() {} +void TrickPlayHandler::SetHandlerForMainStream( + std::shared_ptr handler) { + SetHandler(kMainStreamIndex, std::move(handler)); +} + +void TrickPlayHandler::SetHandlerForTrickPlay( + uint32_t trick_play_rate, + std::shared_ptr handler) { + trick_play_rates_.push_back(trick_play_rate); + // Trick play streams start from index 1. + SetHandler(trick_play_rates_.size(), std::move(handler)); +} + Status TrickPlayHandler::InitializeInternal() { + if (!HasMainStream()) { + return Status(error::TRICK_PLAY_ERROR, + "Trick play does not have main stream"); + } + if (trick_play_rates_.empty()) { + return Status(error::TRICK_PLAY_ERROR, + "Trick play rates are not specified."); + } + size_t num_trick_play_rates = trick_play_rates_.size(); + cached_stream_data_.resize(num_trick_play_rates); + playback_rates_.resize(num_trick_play_rates, 0); + return Status::OK; } @@ -54,6 +77,18 @@ Status TrickPlayHandler::Process( } } + if (stream_data->stream_data_type == StreamDataType::kSegmentInfo) { + for (auto& cached_data : cached_stream_data_) { + // It is possible that trick play stream has large frame duration that + // some segments in the main stream are skipped. To avoid empty segments, + // only cache SegementInfo with MediaSample before it. + if (!cached_data.empty() && + cached_data.back()->stream_data_type == StreamDataType::kMediaSample) + cached_data.push_back(stream_data); + } + return Status::OK; + } + if (stream_data->stream_data_type != StreamDataType::kMediaSample) { // Non media sample stream data needs to be dispatched to every output // stream. It is just cached in every queue until a new key frame comes or @@ -66,12 +101,18 @@ Status TrickPlayHandler::Process( if (stream_data->media_sample->is_key_frame()) { // For a new key frame, some of the trick play streams may include it. // The cached data in those trick play streams will be processed. - DCHECK_EQ(trick_play_options_.trick_play_rates.size(), - cached_stream_data_.size()); + DCHECK_EQ(trick_play_rates_.size(), cached_stream_data_.size()); for (size_t i = 0; i < cached_stream_data_.size(); ++i) { - int16_t rate = trick_play_options_.trick_play_rates[i]; + uint32_t rate = trick_play_rates_[i]; if (total_key_frames_ % rate == 0) { - if (!cached_stream_data_[i].empty()) { + // Delay processing cached stream data until receiving the second key + // frame so that the GOP size could be derived. + if (!cached_stream_data_[i].empty() && total_key_frames_ > 0) { + // Num of frames between first two key frames in the trick play + // streams. Use this as the playback_rate. + if (playback_rates_[i] == 0) + playback_rates_[i] = total_frames_; + Status status = ProcessCachedStreamData(i + 1, &cached_stream_data_[i]); if (!status.ok()) @@ -84,24 +125,40 @@ Status TrickPlayHandler::Process( total_key_frames_++; } + total_frames_++; prev_sample_end_timestamp_ = stream_data->media_sample->dts() + stream_data->media_sample->duration(); + return Status::OK; } bool TrickPlayHandler::ValidateOutputStreamIndex(size_t stream_index) const { // Output stream index should be less than the number of trick play // streams + one original stream. - return stream_index <= trick_play_options_.trick_play_rates.size(); + return stream_index <= trick_play_rates_.size(); }; Status TrickPlayHandler::OnFlushRequest(size_t input_stream_index) { DCHECK_EQ(input_stream_index, 0u) << "Trick Play Handler should only have single input."; for (size_t i = 0; i < cached_stream_data_.size(); ++i) { + LOG_IF(WARNING, playback_rates_[i] == 0) + << "Max playout rate for trick play rate " << trick_play_rates_[i] + << " is not determined. " + << "Specify it as total number of frames: " << total_frames_ << "."; + playback_rates_[i] = total_frames_; ProcessCachedStreamData(i + 1, &cached_stream_data_[i]); } - return MediaHandler::FlushDownstream(input_stream_index); + return MediaHandler::FlushAllDownstreams(); +} + +bool TrickPlayHandler::HasMainStream() { + const auto& handlers = output_handlers(); + const auto& main_stream_handler = handlers.find(kMainStreamIndex); + if (main_stream_handler == handlers.end()) { + return false; + } + return main_stream_handler->second.first != nullptr; } Status TrickPlayHandler::ProcessCachedStreamData( @@ -121,8 +178,8 @@ Status TrickPlayHandler::ProcessCachedStreamData( Status TrickPlayHandler::ProcessOneStreamData( size_t output_stream_index, const std::shared_ptr& stream_data) { - uint32_t trick_play_rate = - trick_play_options_.trick_play_rates[output_stream_index - 1]; + size_t trick_play_index = output_stream_index - 1; + uint32_t trick_play_rate = trick_play_rates_[trick_play_index]; Status status; switch (stream_data->stream_data_type) { // trick_play_rate in StreamInfo should be modified. @@ -132,6 +189,9 @@ Status TrickPlayHandler::ProcessOneStreamData( std::shared_ptr trick_play_video_stream_info( new VideoStreamInfo(video_stream_info)); trick_play_video_stream_info->set_trick_play_rate(trick_play_rate); + DCHECK_GT(playback_rates_[trick_play_index], 0u); + trick_play_video_stream_info->set_playback_rate( + playback_rates_[trick_play_index]); status = DispatchStreamInfo(output_stream_index, trick_play_video_stream_info); break; @@ -142,7 +202,6 @@ Status TrickPlayHandler::ProcessOneStreamData( MediaSample::CopyFrom(*(stream_data->media_sample)); trick_play_media_sample->set_duration(prev_sample_end_timestamp_ - stream_data->media_sample->dts()); - status = DispatchMediaSample(output_stream_index, trick_play_media_sample); } diff --git a/packager/media/trick_play/trick_play_handler.h b/packager/media/trick_play/trick_play_handler.h index 55999f0e99..5af28b0b57 100644 --- a/packager/media/trick_play/trick_play_handler.h +++ b/packager/media/trick_play/trick_play_handler.h @@ -12,17 +12,6 @@ namespace shaka { namespace media { -struct TrickPlayOptions { - /// Trick play rates. Note that there can be multiple trick play rates, - /// e.g., 2, 4 and 8. That means, one input video stream will generate 3 - /// output trick play streams and original stream. Three trick play streams - /// are: - /// [key_frame_0, key_frame_2, key_frame_4, ...] - /// [key_frame_0, key_frame_4, key_frame_8,...] - /// [key_frame_0, key_frame_8, key_frame_16, ...]. - std::vector trick_play_rates; -}; - /// TrickPlayHandler is a single-input-multiple-output media handler. It creates /// trick play streams from the input. // The stream data in trick play stream is not a simple duplicate. Some @@ -33,9 +22,13 @@ struct TrickPlayOptions { // input stream data before the next key frame. class TrickPlayHandler : public MediaHandler { public: - explicit TrickPlayHandler(const TrickPlayOptions& trick_play_options); + TrickPlayHandler(); ~TrickPlayHandler() override; + void SetHandlerForMainStream(std::shared_ptr handler); + void SetHandlerForTrickPlay(uint32_t trick_play_rate, + std::shared_ptr handler); + protected: /// @name MediaHandler implementation overrides. /// @{ @@ -48,6 +41,10 @@ class TrickPlayHandler : public MediaHandler { private: friend class TrickPlayHandlerTest; + // Returns true if the trick play handler has main stream output handler + // connected, otherwise returns false. + bool HasMainStream(); + // Process the cached stream data for one trick play stream. // The cached data is dispatched to the |output_stream_index|. Status ProcessCachedStreamData( @@ -62,7 +59,14 @@ class TrickPlayHandler : public MediaHandler { Status ProcessOneStreamData(size_t output_stream_index, const std::shared_ptr& stream_data); - const TrickPlayOptions trick_play_options_; + // Trick play rates. Note that there can be multiple trick play rates, + // e.g., 2, 4 and 8. That means, one input video stream will generate 3 + // output trick play streams and original stream. Three trick play streams + // are: + // [key_frame_0, key_frame_2, key_frame_4, ...] + // [key_frame_0, key_frame_4, key_frame_8,...] + // [key_frame_0, key_frame_8, key_frame_16, ...]. + std::vector trick_play_rates_; TrickPlayHandler(const TrickPlayHandler&) = delete; TrickPlayHandler& operator=(const TrickPlayHandler&) = delete; @@ -70,11 +74,17 @@ class TrickPlayHandler : public MediaHandler { /// Num of key frames received. uint32_t total_key_frames_ = 0; + // Num of frames received. + uint32_t total_frames_ = 0; + // End timestamp of the previous processed media_sample, which is |dts| + // |duration|. The duration of key frame in trick play stream is updated based // on this timestamp. int64_t prev_sample_end_timestamp_ = 0; + // Record playback_rate for each trick play stream. + std::vector playback_rates_; + // The data in output streams should be in the same order as in the input // stream. Cache the stream data before next key frame so that we can // determine the duration for the current key frame. Since one key frame may diff --git a/packager/media/trick_play/trick_play_handler_unittest.cc b/packager/media/trick_play/trick_play_handler_unittest.cc index 0cf9795992..871b1eed02 100644 --- a/packager/media/trick_play/trick_play_handler_unittest.cc +++ b/packager/media/trick_play/trick_play_handler_unittest.cc @@ -22,18 +22,19 @@ namespace { const size_t kStreamIndex0 = 0; const size_t kStreamIndex1 = 1; const size_t kStreamIndex2 = 2; -const size_t kStreamIndex3 = 3; const uint32_t kTimeScale = 800; const uint32_t kDuration = 200; -const int16_t kTrickPlayRates[]{1, 2, 4}; +const uint32_t kTrickPlayRates[]{1, 2}; +const uint32_t kTrickPlayRatesDecreasing[]{2, 1}; const bool kEncrypted = true; } // namespace -MATCHER_P4(IsTrickPlayVideoStreamInfo, +MATCHER_P5(IsTrickPlayVideoStreamInfo, stream_index, time_scale, encrypted, trick_play_rate, + playback_rate, "") { return arg->stream_index == stream_index && arg->stream_data_type == StreamDataType::kStreamInfo && @@ -41,7 +42,9 @@ MATCHER_P4(IsTrickPlayVideoStreamInfo, arg->stream_info->is_encrypted() == encrypted && arg->stream_info->stream_type() == kStreamVideo && static_cast(arg->stream_info.get()) - ->trick_play_rate() == trick_play_rate; + ->trick_play_rate() == trick_play_rate && + static_cast(arg->stream_info.get()) + ->playback_rate() == playback_rate; } MATCHER_P3(IsKeyFrameMediaSample, stream_index, timestamp, duration, "") { @@ -54,12 +57,16 @@ MATCHER_P3(IsKeyFrameMediaSample, stream_index, timestamp, duration, "") { class TrickPlayHandlerTest : public MediaHandlerTestBase { public: - void SetUpTrickPlayHandler(const TrickPlayOptions& trick_play_options) { - trick_play_handler_.reset(new TrickPlayHandler(trick_play_options)); - // The output stream size is number of trick play stream + one - // non-trick-play stream. - SetUpGraph(1, trick_play_options.trick_play_rates.size() + 1, - trick_play_handler_); + void SetUpTrickPlayHandler(const std::vector& trick_play_rates) { + trick_play_handler_.reset(new TrickPlayHandler()); + // Use SetUpGraph to set only input handler, and use + // SetHandlerForMainStream and SetHandlerForTrickPlay for the output + // handlers. + SetUpGraph(1, 0, trick_play_handler_); + trick_play_handler_->SetHandlerForMainStream(next_handler()); + for (uint32_t rate : trick_play_rates) { + trick_play_handler_->SetHandlerForTrickPlay(rate, next_handler()); + } ASSERT_OK(trick_play_handler_->Initialize()); } @@ -77,10 +84,9 @@ class TrickPlayHandlerTest : public MediaHandlerTestBase { // This test makes sure the audio stream is rejected by the trick play handler. TEST_F(TrickPlayHandlerTest, AudioStream) { - TrickPlayOptions trick_play_options; - trick_play_options.trick_play_rates.assign(std::begin(kTrickPlayRates), - std::end(kTrickPlayRates)); - SetUpTrickPlayHandler(trick_play_options); + const std::vector trick_play_rates(std::begin(kTrickPlayRates), + std::end(kTrickPlayRates)); + SetUpTrickPlayHandler(trick_play_rates); Status status = Process(GetAudioStreamInfoStreamData(kStreamIndex0, kTimeScale)); @@ -90,11 +96,10 @@ TEST_F(TrickPlayHandlerTest, AudioStream) { // This test makes sure the trick play handler can process stream data // correctly. -TEST_F(TrickPlayHandlerTest, VideoStream) { - TrickPlayOptions trick_play_options; - trick_play_options.trick_play_rates.assign(std::begin(kTrickPlayRates), - std::end(kTrickPlayRates)); - SetUpTrickPlayHandler(trick_play_options); +TEST_F(TrickPlayHandlerTest, VideoStreamWithTrickPlay) { + const std::vector trick_play_rates(std::begin(kTrickPlayRates), + std::end(kTrickPlayRates)); + SetUpTrickPlayHandler(trick_play_rates); ASSERT_OK(Process(GetVideoStreamInfoStreamData(kStreamIndex0, kTimeScale))); // The stream info is cached, so the output is empty. @@ -119,12 +124,6 @@ TEST_F(TrickPlayHandlerTest, VideoStream) { // Frame 0, key frame. IsMediaSample(kStreamIndex0, kVideoStartTimestamp, kDuration, !kEncrypted), - IsTrickPlayVideoStreamInfo(kStreamIndex1, kTimeScale, !kEncrypted, - kTrickPlayRates[0]), - IsTrickPlayVideoStreamInfo(kStreamIndex2, kTimeScale, !kEncrypted, - kTrickPlayRates[1]), - IsTrickPlayVideoStreamInfo(kStreamIndex3, kTimeScale, !kEncrypted, - kTrickPlayRates[2]), // Frame 1. IsMediaSample(kStreamIndex0, kVideoStartTimestamp + kDuration, kDuration, !kEncrypted), @@ -148,6 +147,10 @@ TEST_F(TrickPlayHandlerTest, VideoStream) { // Frame 3, key frame. IsKeyFrameMediaSample( kStreamIndex0, kVideoStartTimestamp + kDuration * 3, kDuration), + // Stream info, TrickPlayRate = 1. + IsTrickPlayVideoStreamInfo( + kStreamIndex1, kTimeScale, !kEncrypted, kTrickPlayRates[0], + static_cast(kGOPSize) * kTrickPlayRates[0]), // Frame 0, TrickPlayRate = 1. IsKeyFrameMediaSample(kStreamIndex1, kVideoStartTimestamp, kDuration * 3), @@ -178,6 +181,10 @@ TEST_F(TrickPlayHandlerTest, VideoStream) { IsKeyFrameMediaSample(kStreamIndex1, kVideoStartTimestamp + kDuration * 3, kDuration * 3), + // Stream info, TrickPlayRate = 2. + IsTrickPlayVideoStreamInfo( + kStreamIndex2, kTimeScale, !kEncrypted, kTrickPlayRates[1], + static_cast(kGOPSize) * kTrickPlayRates[1]), // Frame 0, TrickPlayRate = 2. IsKeyFrameMediaSample(kStreamIndex2, kVideoStartTimestamp, kDuration * 6), @@ -195,11 +202,128 @@ TEST_F(TrickPlayHandlerTest, VideoStream) { kDuration * 2), // Frame 6, TrickPlayRate = 2. IsKeyFrameMediaSample(kStreamIndex2, + kVideoStartTimestamp + kDuration * 6, + kDuration * 2))); + ClearOutputStreamDataVector(); + + // Flush again, get nothing. + ASSERT_OK(FlushStream(0)); + EXPECT_THAT(GetOutputStreamDataVector(), IsEmpty()); +} + +// This test makes sure the trick play handler can process stream data +// correctly with a decreasing order of trick play rates. +TEST_F(TrickPlayHandlerTest, VideoStreamWithDecreasingTrickPlayRates) { + const std::vector trick_play_rates( + std::begin(kTrickPlayRatesDecreasing), + std::end(kTrickPlayRatesDecreasing)); + SetUpTrickPlayHandler(trick_play_rates); + + ASSERT_OK(Process(GetVideoStreamInfoStreamData(kStreamIndex0, kTimeScale))); + // The stream info is cached, so the output is empty. + EXPECT_THAT( + GetOutputStreamDataVector(), + ElementsAre(IsStreamInfo(kStreamIndex0, kTimeScale, !kEncrypted))); + ClearOutputStreamDataVector(); + + const int kVideoStartTimestamp = 12345; + // Group of Picture size, the frequency of key frames. + const int kGOPSize = 3; + for (int i = 0; i < 3; ++i) { + const bool is_key_frame = (i % kGOPSize == 0); + ASSERT_OK(Process(GetMediaSampleStreamData( + kStreamIndex0, kVideoStartTimestamp + kDuration * i, kDuration, + is_key_frame))); + } + + EXPECT_THAT( + GetOutputStreamDataVector(), + ElementsAre( + // Frame 0, key frame. + IsMediaSample(kStreamIndex0, kVideoStartTimestamp, kDuration, + !kEncrypted), + // Frame 1. + IsMediaSample(kStreamIndex0, kVideoStartTimestamp + kDuration, + kDuration, !kEncrypted), + // Frame 2. + IsMediaSample(kStreamIndex0, kVideoStartTimestamp + kDuration * 2, + kDuration, !kEncrypted))); + ClearOutputStreamDataVector(); + + // This expectation are separated from the expectation above because + // ElementsAre supports at most 10 elements. + for (int i = 3; i < 6; ++i) { + const bool is_key_frame = (i % kGOPSize == 0); + ASSERT_OK(Process(GetMediaSampleStreamData( + kStreamIndex0, kVideoStartTimestamp + kDuration * i, kDuration, + is_key_frame))); + } + + EXPECT_THAT( + GetOutputStreamDataVector(), + ElementsAre( + // Frame 3, key frame. + IsKeyFrameMediaSample( + kStreamIndex0, kVideoStartTimestamp + kDuration * 3, kDuration), + // Stream info, TrickPlayRate = 1. + IsTrickPlayVideoStreamInfo( + kStreamIndex2, kTimeScale, !kEncrypted, + kTrickPlayRatesDecreasing[1], + static_cast(kGOPSize) * kTrickPlayRatesDecreasing[1]), + // Frame 0, TrickPlayRate = 1. + IsKeyFrameMediaSample(kStreamIndex2, kVideoStartTimestamp, + kDuration * 3), + // Frame 4. + IsMediaSample(kStreamIndex0, kVideoStartTimestamp + kDuration * 4, + kDuration, !kEncrypted), + // Frame 5. + IsMediaSample(kStreamIndex0, kVideoStartTimestamp + kDuration * 5, + kDuration, !kEncrypted))); + ClearOutputStreamDataVector(); + + // This expectation are separated from the expectation above because + // ElementsAre supports at most 10 elements. + for (int i = 6; i < 8; ++i) { + const bool is_key_frame = (i % kGOPSize == 0); + ASSERT_OK(Process(GetMediaSampleStreamData( + kStreamIndex0, kVideoStartTimestamp + kDuration * i, kDuration, + is_key_frame))); + } + + EXPECT_THAT( + GetOutputStreamDataVector(), + ElementsAre( + // Frame 6, key frame. + IsKeyFrameMediaSample( + kStreamIndex0, kVideoStartTimestamp + kDuration * 6, kDuration), + // Stream info, TrickPlayRate = 2. + IsTrickPlayVideoStreamInfo( + kStreamIndex1, kTimeScale, !kEncrypted, + kTrickPlayRatesDecreasing[0], + static_cast(kGOPSize) * kTrickPlayRatesDecreasing[0]), + // Frame 0, TrickPlayRate = 2. + IsKeyFrameMediaSample(kStreamIndex1, kVideoStartTimestamp, + kDuration * 6), + // Frame 3, TrickPlayRate = 1. + IsKeyFrameMediaSample(kStreamIndex2, + kVideoStartTimestamp + kDuration * 3, + kDuration * 3), + // Frame 7. + IsMediaSample(kStreamIndex0, kVideoStartTimestamp + kDuration * 7, + kDuration, !kEncrypted))); + ClearOutputStreamDataVector(); + + ASSERT_OK(FlushStream(0)); + EXPECT_THAT(GetOutputStreamDataVector(), + ElementsAre( + // Frame 6, TrickPlayRate = 2. + IsKeyFrameMediaSample(kStreamIndex1, kVideoStartTimestamp + kDuration * 6, kDuration * 2), - // Frame 0, TrickPlayRate = 4. - IsKeyFrameMediaSample(kStreamIndex3, kVideoStartTimestamp, - kDuration * 8))); + // Frame 6, TrickPlayRate = 1. + IsKeyFrameMediaSample(kStreamIndex2, + kVideoStartTimestamp + kDuration * 6, + kDuration * 2))); ClearOutputStreamDataVector(); // Flush again, get nothing. diff --git a/packager/mpd/base/dash_iop_mpd_notifier.cc b/packager/mpd/base/dash_iop_mpd_notifier.cc index f48dc854a2..48fb53f702 100644 --- a/packager/mpd/base/dash_iop_mpd_notifier.cc +++ b/packager/mpd/base/dash_iop_mpd_notifier.cc @@ -151,6 +151,34 @@ bool DashIopMpdNotifier::Flush() { return WriteMpdToFile(output_path_, mpd_builder_.get()); } +AdaptationSet* DashIopMpdNotifier::ReuseAdaptationSet( + const std::list& adaptation_sets, + const MediaInfo& media_info) { + const bool has_protected_content = media_info.has_protected_content(); + for (AdaptationSet* adaptation_set : adaptation_sets) { + ProtectedContentMap::const_iterator protected_content_it = + protected_content_map_.find(adaptation_set->id()); + + // If the AdaptationSet ID is not registered in the map, then it is clear + // content. + if (protected_content_it == protected_content_map_.end()) { + // Can reuse the AdaptationSet without content protection. + if (!has_protected_content) { + return adaptation_set; + } + continue; + } + + if (ProtectedContentEq(protected_content_it->second, + media_info.protected_content())) { + // Content protection info matches. Reuse the AdaptationSet. + return adaptation_set; + } + } + + return nullptr; +} + AdaptationSet* DashIopMpdNotifier::GetAdaptationSetForMediaInfo( const std::string& key, const MediaInfo& media_info) { @@ -158,30 +186,10 @@ AdaptationSet* DashIopMpdNotifier::GetAdaptationSetForMediaInfo( if (adaptation_sets.empty()) return NewAdaptationSet(media_info, &adaptation_sets); - const bool has_protected_content = media_info.has_protected_content(); - - for (std::list::const_iterator adaptation_set_it = - adaptation_sets.begin(); - adaptation_set_it != adaptation_sets.end(); ++adaptation_set_it) { - ProtectedContentMap::const_iterator protected_content_it = - protected_content_map_.find((*adaptation_set_it)->id()); - - // If the AdaptationSet ID is not registered in the map, then it is clear - // content (or encrypted but doesn't need element - // possibly because the player knows how to handle it). - if (protected_content_it == protected_content_map_.end()) { - // Can reuse the AdaptationSet without content protection. - if (!has_protected_content) - return *adaptation_set_it; - continue; - } - - if (ProtectedContentEq(protected_content_it->second, - media_info.protected_content())) { - // Content protection info matches. Reuse the AdaptationSet. - return *adaptation_set_it; - } - } + AdaptationSet* reuse_adaptation_set = + ReuseAdaptationSet(adaptation_sets, media_info); + if (reuse_adaptation_set) + return reuse_adaptation_set; // None of the adaptation sets match with the new content protection. // Need a new one. @@ -267,8 +275,42 @@ AdaptationSet* DashIopMpdNotifier::NewAdaptationSet( (*adaptation_sets->begin())->AddRole(AdaptationSet::kRoleMain); new_adaptation_set->AddRole(AdaptationSet::kRoleMain); } + + if (media_info.video_info().trick_play_rate() > 0) { + uint32_t trick_play_reference_id = 0; + if (!FindOriginalAdaptationSetForTrickPlay(media_info, + &trick_play_reference_id)) { + LOG(ERROR) << "Failed to find main adaptation set for trick play."; + return nullptr; + } + DCHECK_NE(new_adaptation_set->id(), trick_play_reference_id); + new_adaptation_set->AddTrickPlayReferenceId(trick_play_reference_id); + } } return new_adaptation_set; } +bool DashIopMpdNotifier::FindOriginalAdaptationSetForTrickPlay( + const MediaInfo& media_info, + uint32_t* main_adaptation_set_id) { + MediaInfo media_info_no_trickplay = media_info; + media_info_no_trickplay.mutable_video_info()->clear_trick_play_rate(); + std::string key = GetAdaptationSetKey(media_info_no_trickplay); + const std::list& adaptation_sets = + adaptation_set_list_map_[key]; + if (adaptation_sets.empty()) { + return false; + } + + AdaptationSet* reuse_adaptation_set = + ReuseAdaptationSet(adaptation_sets, media_info); + if (!reuse_adaptation_set) { + return false; + } + + *main_adaptation_set_id = reuse_adaptation_set->id(); + + return true; +} + } // namespace shaka diff --git a/packager/mpd/base/dash_iop_mpd_notifier.h b/packager/mpd/base/dash_iop_mpd_notifier.h index a9cc1ebe01..75d7347fd0 100644 --- a/packager/mpd/base/dash_iop_mpd_notifier.h +++ b/packager/mpd/base/dash_iop_mpd_notifier.h @@ -64,6 +64,19 @@ class DashIopMpdNotifier : public MpdNotifier { // Maps AdaptationSet ID to ProtectedContent. typedef std::map ProtectedContentMap; + // Find reusable AdaptationSet, instead of creating a new AdaptationSet for + // the |media_info|. There are two cases that an |existing_adaptation_set| + // can be used: + // 1) The media info does not have protected content and there is an existing + // unprotected content AdapationSet. + // 2) The media info has protected content and there is an exisiting + // AdaptationSet, which has same MediaInfo::ProtectedContent protobuf. + // Returns the reusable AdaptationSet pointer if found, otherwise returns + // nullptr. + AdaptationSet* ReuseAdaptationSet( + const std::list& adaptation_sets, + const MediaInfo& media_info); + // Checks the protected_content field of media_info and returns a non-null // AdaptationSet* for a new Representation. // This does not necessarily return a new AdaptationSet. If @@ -83,6 +96,15 @@ class DashIopMpdNotifier : public MpdNotifier { AdaptationSet* NewAdaptationSet(const MediaInfo& media_info, std::list* adaptation_sets); + // Gets the original AdaptationSet which the trick play video belongs + // to and returns the id of the original adapatation set. + // It is assumed that the corresponding AdaptationSet has been created before + // the trick play AdaptationSet. + // Returns true if main_adaptation_id is found, otherwise false; + bool FindOriginalAdaptationSetForTrickPlay( + const MediaInfo& media_info, + uint32_t* original_adaptation_set_id); + // Testing only method. Returns a pointer to MpdBuilder. MpdBuilder* MpdBuilderForTesting() const { return mpd_builder_.get(); diff --git a/packager/mpd/base/dash_iop_mpd_notifier_unittest.cc b/packager/mpd/base/dash_iop_mpd_notifier_unittest.cc index 8a08437846..d802f91937 100644 --- a/packager/mpd/base/dash_iop_mpd_notifier_unittest.cc +++ b/packager/mpd/base/dash_iop_mpd_notifier_unittest.cc @@ -154,6 +154,72 @@ TEST_F(DashIopMpdNotifierTest, NotifyNewContainer) { EXPECT_TRUE(notifier.Flush()); } +// Verify that basic VOD NotifyNewContainer() operation works on trick play +// streams. +// No encrypted contents. +TEST_F(DashIopMpdNotifierTest, NotifyNewContainerForTrickPlay) { + const char kTrickPlayMediaInfo[] = + "video_info {\n" + " codec: 'avc1'\n" + " width: 1280\n" + " height: 720\n" + " time_scale: 10\n" + " frame_duration: 100\n" + " pixel_width: 1\n" + " pixel_height: 1\n" + " trick_play_rate: 2\n" + " playback_rate: 10\n" + "}\n" + "container_type: 1\n"; + DashIopMpdNotifier notifier(empty_mpd_option_, empty_base_urls_, + output_path_); + + std::unique_ptr mock_mpd_builder(new MockMpdBuilder()); + + // Not using default mocks in this test so that we can keep track of + // mocks by named mocks. + const uint32_t kAdaptationSetId = 2u; + const uint32_t kTrickPlayAdaptationSetId = 3u; + std::unique_ptr mock_adaptation_set( + new MockAdaptationSet(kAdaptationSetId)); + std::unique_ptr mock_tp_adaptation_set( + new MockAdaptationSet(kTrickPlayAdaptationSetId)); + + const uint32_t kRepresentationId = 4u; + const uint32_t kTrickPlayRepresentationId = 5u; + std::unique_ptr mock_representation( + new MockRepresentation(kRepresentationId)); + std::unique_ptr mock_tp_representation( + new MockRepresentation(kTrickPlayRepresentationId)); + + InSequence in_sequence; + EXPECT_CALL(*mock_mpd_builder, AddAdaptationSet(_)) + .WillOnce(Return(mock_adaptation_set.get())); + EXPECT_CALL(*mock_adaptation_set, AddRole(_)).Times(0); + EXPECT_CALL(*mock_adaptation_set, AddRepresentation(_)) + .WillOnce(Return(mock_representation.get())); + + // Calls for trick-play stream. + EXPECT_CALL(*mock_mpd_builder, AddAdaptationSet(_)) + .WillOnce(Return(mock_tp_adaptation_set.get())); + EXPECT_CALL(*mock_tp_adaptation_set, AddRole(_)).Times(0); + EXPECT_CALL(*mock_tp_adaptation_set, + AddTrickPlayReferenceId(kAdaptationSetId)) + .Times(1); + EXPECT_CALL(*mock_tp_adaptation_set, AddRepresentation(_)) + .WillOnce(Return(mock_tp_representation.get())); + + EXPECT_CALL(*mock_mpd_builder, ToString(_)).WillOnce(Return(true)); + + uint32_t unused_container_id; + SetMpdBuilder(¬ifier, std::move(mock_mpd_builder)); + EXPECT_TRUE(notifier.NotifyNewContainer(ConvertToMediaInfo(kValidMediaInfo), + &unused_container_id)); + EXPECT_TRUE(notifier.NotifyNewContainer( + ConvertToMediaInfo(kTrickPlayMediaInfo), &unused_container_id)); + EXPECT_TRUE(notifier.Flush()); +} + // Verify that if the MediaInfo contains text information, then // MpdBuilder::ForceSetSegmentAlignment() is called. TEST_F(DashIopMpdNotifierTest, NotifyNewTextContainer) { diff --git a/packager/mpd/base/media_info.proto b/packager/mpd/base/media_info.proto index 493395802d..5a629e9b17 100644 --- a/packager/mpd/base/media_info.proto +++ b/packager/mpd/base/media_info.proto @@ -40,6 +40,13 @@ message MediaInfo { // aspect ratio, or the @par attribute set on AdaptationSet element. optional uint32 pixel_width = 7; optional uint32 pixel_height = 8; + + // trick_play_rate: sample rate of the key frame from the original stream. + // e.g., 1 means every key frame, 2 means every two key frames. + optional uint32 trick_play_rate = 9; + // playback_rate: the playout capability (e.g., 4x, 8x, 16x fast foward) of + // the trick play stream. + optional uint32 playback_rate = 10; } message AudioInfo { diff --git a/packager/mpd/base/mock_mpd_builder.h b/packager/mpd/base/mock_mpd_builder.h index e74e6e94ec..d1446dcf52 100644 --- a/packager/mpd/base/mock_mpd_builder.h +++ b/packager/mpd/base/mock_mpd_builder.h @@ -38,6 +38,7 @@ class MockAdaptationSet : public AdaptationSet { void(const std::string& drm_uuid, const std::string& pssh)); MOCK_METHOD1(AddRole, void(AdaptationSet::Role role)); MOCK_METHOD1(ForceSetSegmentAlignment, void(bool segment_alignment)); + MOCK_METHOD1(AddTrickPlayReferenceId, void(uint32_t id)); private: // Only for constructing the super class. Not used for testing. diff --git a/packager/mpd/base/mpd_builder.cc b/packager/mpd/base/mpd_builder.cc index d92406e614..739565f009 100644 --- a/packager/mpd/base/mpd_builder.cc +++ b/packager/mpd/base/mpd_builder.cc @@ -824,6 +824,17 @@ xml::scoped_xml_ptr AdaptationSet::GetXml() { return xml::scoped_xml_ptr(); } + if (!trick_play_reference_ids_.empty()) { + std::string id_string; + for (uint32_t id : trick_play_reference_ids_) { + id_string += std::to_string(id) + ","; + } + DCHECK(!id_string.empty()); + id_string.resize(id_string.size() - 1); + adaptation_set.AddEssentialProperty( + "http://dashif.org/guidelines/trickmode", id_string); + } + std::string switching_ids; for (uint32_t id : adaptation_set_switching_ids_) { if (!switching_ids.empty()) @@ -889,6 +900,10 @@ void AdaptationSet::OnSetFrameRateForRepresentation( RecordFrameRate(frame_duration, timescale); } +void AdaptationSet::AddTrickPlayReferenceId(uint32_t id) { + trick_play_reference_ids_.insert(id); +} + bool AdaptationSet::GetEarliestTimestamp(double* timestamp_seconds) { DCHECK(timestamp_seconds); @@ -1279,7 +1294,7 @@ bool Representation::IsContiguous(uint64_t start_time, LOG(ERROR) << "Segments should not be out of order segment. Adding segment " "with start_time == " << start_time << " but the previous segment starts at " - << previous.start_time << "."; + << previous_segment_start_time << "."; return false; } diff --git a/packager/mpd/base/mpd_builder.h b/packager/mpd/base/mpd_builder.h index 965f1a1924..e3a8a6431d 100644 --- a/packager/mpd/base/mpd_builder.h +++ b/packager/mpd/base/mpd_builder.h @@ -267,6 +267,11 @@ class AdaptationSet { uint32_t frame_duration, uint32_t timescale); + /// Add the id of the adaptation set this trick play adaptation set belongs + /// to. + /// @param id the id of the reference (or main) adapation set. + virtual void AddTrickPlayReferenceId(uint32_t id); + protected: /// @param adaptation_set_id is an ID number for this AdaptationSet. /// @param lang is the language of this AdaptationSet. Mainly relevant for @@ -384,6 +389,12 @@ class AdaptationSet { // reasonable and may cause an out-of-memory problem. RepresentationTimeline representation_segment_start_times_; + // Record the reference id for the original adaptation sets the trick play + // stream belongs to. This is a set because the trick play streams may be for + // multiple AdaptationSets (e.g. SD and HD videos in different AdaptationSets + // can share the same trick play stream.) + std::set trick_play_reference_ids_; + DISALLOW_COPY_AND_ASSIGN(AdaptationSet); }; diff --git a/packager/mpd/base/mpd_utils.cc b/packager/mpd/base/mpd_utils.cc index 6fa388ed1f..7baf484402 100644 --- a/packager/mpd/base/mpd_utils.cc +++ b/packager/mpd/base/mpd_utils.cc @@ -136,6 +136,13 @@ std::string GetAdaptationSetKey(const MediaInfo& media_info) { key.append(":"); key.append(GetLanguage(media_info)); + // Trick play streams of the same original stream, but possibly with + // different trick_play_rates, belong to the same trick play AdaptationSet. + if (media_info.has_video_info() && + media_info.video_info().trick_play_rate() > 0) { + key.append(":trick_play"); + } + return key; } diff --git a/packager/mpd/base/xml/xml_node.cc b/packager/mpd/base/xml/xml_node.cc index 3dd571b6d8..8309e33b96 100644 --- a/packager/mpd/base/xml/xml_node.cc +++ b/packager/mpd/base/xml/xml_node.cc @@ -175,6 +175,15 @@ void RepresentationBaseXmlNode::AddSupplementalProperty( AddChild(supplemental_property.PassScopedPtr()); } +void RepresentationBaseXmlNode::AddEssentialProperty( + const std::string& scheme_id_uri, + const std::string& value) { + XmlNode essential_property("EssentialProperty"); + essential_property.SetStringAttribute("schemeIdUri", scheme_id_uri); + essential_property.SetStringAttribute("value", value); + AddChild(essential_property.PassScopedPtr()); +} + bool RepresentationBaseXmlNode::AddContentProtectionElement( const ContentProtectionElement& content_protection_element) { XmlNode content_protection_node("ContentProtection"); @@ -245,6 +254,16 @@ bool RepresentationXmlNode::AddVideoInfo(const VideoInfo& video_info, base::IntToString(video_info.time_scale()) + "/" + base::IntToString(video_info.frame_duration())); } + + if (video_info.has_playback_rate()) { + SetStringAttribute("maxPlayoutRate", + base::IntToString(video_info.playback_rate())); + // Since the trick play stream contains only key frames, there is no coding + // dependency on the main stream. Simply set the codingDependency to false. + // TODO(hmchen): propagate this attribute up to the AdaptationSet, since + // all are set to false. + SetStringAttribute("codingDependency", "false"); + } return true; } diff --git a/packager/mpd/base/xml/xml_node.h b/packager/mpd/base/xml/xml_node.h index 2d5c88ee1b..c841b60768 100644 --- a/packager/mpd/base/xml/xml_node.h +++ b/packager/mpd/base/xml/xml_node.h @@ -105,6 +105,11 @@ class RepresentationBaseXmlNode : public XmlNode { void AddSupplementalProperty(const std::string& scheme_id_uri, const std::string& value); + /// @param scheme_id_uri is content of the schemeIdUri attribute. + /// @param value is the content of value attribute. + void AddEssentialProperty(const std::string& scheme_id_uri, + const std::string& value); + protected: explicit RepresentationBaseXmlNode(const char* name); diff --git a/packager/packager.gyp b/packager/packager.gyp index c91cdc8d28..6d0e9c48aa 100644 --- a/packager/packager.gyp +++ b/packager/packager.gyp @@ -54,6 +54,7 @@ 'media/formats/webm/webm.gyp:webm', 'media/formats/webvtt/webvtt.gyp:webvtt', 'media/formats/wvm/wvm.gyp:wvm', + 'media/trick_play/trick_play.gyp:trick_play', 'mpd/mpd.gyp:mpd_builder', 'third_party/boringssl/boringssl.gyp:boringssl', 'third_party/gflags/gflags.gyp:gflags',