// Copyright 2014 Google LLC. All rights reserved. // // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file or at // https://developers.google.com/open-source/licenses/bsd #include "packager/media/demuxer/demuxer.h" #include #include "packager/base/bind.h" #include "packager/base/logging.h" #include "packager/base/strings/string_number_conversions.h" #include "packager/file/file.h" #include "packager/media/base/decryptor_source.h" #include "packager/media/base/key_source.h" #include "packager/media/base/macros.h" #include "packager/media/base/media_sample.h" #include "packager/media/base/stream_info.h" #include "packager/media/formats/mp2t/mp2t_media_parser.h" #include "packager/media/formats/mp4/mp4_media_parser.h" #include "packager/media/formats/webm/webm_media_parser.h" #include "packager/media/formats/webvtt/webvtt_parser.h" #include "packager/media/formats/wvm/wvm_media_parser.h" namespace { // 65KB, sufficient to determine the container and likely all init data. const size_t kInitBufSize = 0x10000; const size_t kBufSize = 0x200000; // 2MB // Maximum number of allowed queued samples. If we are receiving a lot of // samples before seeing init_event, something is not right. The number // set here is arbitrary though. const size_t kQueuedSamplesLimit = 10000; const size_t kInvalidStreamIndex = static_cast(-1); const size_t kBaseVideoOutputStreamIndex = 0x100; const size_t kBaseAudioOutputStreamIndex = 0x200; const size_t kBaseTextOutputStreamIndex = 0x300; std::string GetStreamLabel(size_t stream_index) { switch (stream_index) { case kBaseVideoOutputStreamIndex: return "video"; case kBaseAudioOutputStreamIndex: return "audio"; case kBaseTextOutputStreamIndex: return "text"; default: return base::SizeTToString(stream_index); } } bool GetStreamIndex(const std::string& stream_label, size_t* stream_index) { DCHECK(stream_index); if (stream_label == "video") { *stream_index = kBaseVideoOutputStreamIndex; } else if (stream_label == "audio") { *stream_index = kBaseAudioOutputStreamIndex; } else if (stream_label == "text") { *stream_index = kBaseTextOutputStreamIndex; } else { // Expect stream_label to be a zero based stream id. if (!base::StringToSizeT(stream_label, stream_index)) { LOG(ERROR) << "Invalid argument --stream=" << stream_label << "; " << "should be 'audio', 'video', 'text', or a number"; return false; } } return true; } } namespace shaka { namespace media { Demuxer::Demuxer(const std::string& file_name) : file_name_(file_name), buffer_(new uint8_t[kBufSize]) {} Demuxer::~Demuxer() { if (media_file_) media_file_->Close(); } void Demuxer::SetKeySource(std::unique_ptr key_source) { key_source_ = std::move(key_source); } Status Demuxer::Run() { LOG(INFO) << "Demuxer::Run() on file '" << file_name_ << "'."; Status status = InitializeParser(); // ParserInitEvent callback is called after a few calls to Parse(), which sets // up the streams. Only after that, we can verify the outputs below. while (!all_streams_ready_ && status.ok()) status.Update(Parse()); // If no output is defined, then return success after receiving all stream // info. if (all_streams_ready_ && output_handlers().empty()) return Status::OK; if (!init_event_status_.ok()) return init_event_status_; if (!status.ok()) return status; // Check if all specified outputs exists. for (const auto& pair : output_handlers()) { if (std::find(stream_indexes_.begin(), stream_indexes_.end(), pair.first) == stream_indexes_.end()) { LOG(ERROR) << "Invalid argument, stream=" << GetStreamLabel(pair.first) << " not available."; return Status(error::INVALID_ARGUMENT, "Stream not available"); } } while (!cancelled_ && status.ok()) status.Update(Parse()); if (cancelled_ && status.ok()) return Status(error::CANCELLED, "Demuxer run cancelled"); if (status.error_code() == error::END_OF_STREAM) { for (size_t stream_index : stream_indexes_) { status = FlushDownstream(stream_index); if (!status.ok()) return status; } return Status::OK; } return status; } void Demuxer::Cancel() { cancelled_ = true; } Status Demuxer::SetHandler(const std::string& stream_label, std::shared_ptr handler) { size_t stream_index = kInvalidStreamIndex; if (!GetStreamIndex(stream_label, &stream_index)) { return Status(error::INVALID_ARGUMENT, "Invalid stream: " + stream_label); } return MediaHandler::SetHandler(stream_index, std::move(handler)); } void Demuxer::SetLanguageOverride(const std::string& stream_label, const std::string& language_override) { size_t stream_index = kInvalidStreamIndex; if (!GetStreamIndex(stream_label, &stream_index)) LOG(WARNING) << "Invalid stream for language override " << stream_label; language_overrides_[stream_index] = language_override; } Status Demuxer::InitializeParser() { DCHECK(!media_file_); DCHECK(!all_streams_ready_); LOG(INFO) << "Initialize Demuxer for file '" << file_name_ << "'."; media_file_ = File::Open(file_name_.c_str(), "r"); if (!media_file_) { return Status(error::FILE_FAILURE, "Cannot open file for reading " + file_name_); } // Read enough bytes before detecting the container. int64_t bytes_read = 0; while (static_cast(bytes_read) < kInitBufSize) { int64_t read_result = media_file_->Read(buffer_.get() + bytes_read, kInitBufSize); if (read_result < 0) return Status(error::FILE_FAILURE, "Cannot read file " + file_name_); if (read_result == 0) break; bytes_read += read_result; } container_name_ = DetermineContainer(buffer_.get(), bytes_read); // Initialize media parser. switch (container_name_) { case CONTAINER_MOV: parser_.reset(new mp4::MP4MediaParser()); break; case CONTAINER_MPEG2TS: parser_.reset(new mp2t::Mp2tMediaParser()); break; // Widevine classic (WVM) is derived from MPEG2PS. We do not support // non-WVM MPEG2PS file, thus we do not differentiate between the two. // Every MPEG2PS file is assumed to be WVM file. If it turns out not the // case, an error will be reported when trying to parse the file as WVM // file. case CONTAINER_MPEG2PS: FALLTHROUGH_INTENDED; case CONTAINER_WVM: parser_.reset(new wvm::WvmMediaParser()); break; case CONTAINER_WEBM: parser_.reset(new WebMMediaParser()); break; case CONTAINER_WEBVTT: parser_.reset(new WebVttParser()); break; case CONTAINER_UNKNOWN: { const int64_t kDumpSizeLimit = 512; LOG(ERROR) << "Failed to detect the container type from the buffer: " << base::HexEncode(buffer_.get(), std::min(bytes_read, kDumpSizeLimit)); return Status(error::INVALID_ARGUMENT, "Failed to detect the container type."); } default: NOTIMPLEMENTED() << "Container " << container_name_ << " is not supported."; return Status(error::UNIMPLEMENTED, "Container not supported."); } parser_->Init( base::Bind(&Demuxer::ParserInitEvent, base::Unretained(this)), base::Bind(&Demuxer::NewMediaSampleEvent, base::Unretained(this)), base::Bind(&Demuxer::NewTextSampleEvent, base::Unretained(this)), key_source_.get()); // Handle trailing 'moov'. if (container_name_ == CONTAINER_MOV && File::IsLocalRegularFile(file_name_.c_str())) { // TODO(kqyang): Investigate whether we can reuse the existing file // descriptor |media_file_| instead of opening the same file again. static_cast(parser_.get())->LoadMoov(file_name_); } if (!parser_->Parse(buffer_.get(), bytes_read)) { return Status(error::PARSER_FAILURE, "Cannot parse media file " + file_name_); } return Status::OK; } void Demuxer::ParserInitEvent( const std::vector>& stream_infos) { if (dump_stream_info_) { printf("\nFile \"%s\":\n", file_name_.c_str()); printf("Found %zu stream(s).\n", stream_infos.size()); for (size_t i = 0; i < stream_infos.size(); ++i) printf("Stream [%zu] %s\n", i, stream_infos[i]->ToString().c_str()); } int base_stream_index = 0; bool video_handler_set = output_handlers().find(kBaseVideoOutputStreamIndex) != output_handlers().end(); bool audio_handler_set = output_handlers().find(kBaseAudioOutputStreamIndex) != output_handlers().end(); bool text_handler_set = output_handlers().find(kBaseTextOutputStreamIndex) != output_handlers().end(); for (const std::shared_ptr& stream_info : stream_infos) { size_t stream_index = base_stream_index; if (video_handler_set && stream_info->stream_type() == kStreamVideo) { stream_index = kBaseVideoOutputStreamIndex; // Only for the first video stream. video_handler_set = false; } if (audio_handler_set && stream_info->stream_type() == kStreamAudio) { stream_index = kBaseAudioOutputStreamIndex; // Only for the first audio stream. audio_handler_set = false; } if (text_handler_set && stream_info->stream_type() == kStreamText) { stream_index = kBaseTextOutputStreamIndex; text_handler_set = false; } const bool handler_set = output_handlers().find(stream_index) != output_handlers().end(); if (handler_set) { track_id_to_stream_index_map_[stream_info->track_id()] = stream_index; stream_indexes_.push_back(stream_index); auto iter = language_overrides_.find(stream_index); if (iter != language_overrides_.end() && stream_info->stream_type() != kStreamVideo) { stream_info->set_language(iter->second); } if (stream_info->is_encrypted()) { init_event_status_.Update(Status(error::INVALID_ARGUMENT, "A decryption key source is not " "provided for an encrypted stream.")); } else { init_event_status_.Update( DispatchStreamInfo(stream_index, stream_info)); } } else { track_id_to_stream_index_map_[stream_info->track_id()] = kInvalidStreamIndex; } ++base_stream_index; } all_streams_ready_ = true; } bool Demuxer::NewMediaSampleEvent(uint32_t track_id, std::shared_ptr sample) { if (!all_streams_ready_) { if (queued_media_samples_.size() >= kQueuedSamplesLimit) { LOG(ERROR) << "Queued samples limit reached: " << kQueuedSamplesLimit; return false; } queued_media_samples_.emplace_back(track_id, sample); return true; } if (!init_event_status_.ok()) { return false; } while (!queued_media_samples_.empty()) { if (!PushMediaSample(queued_media_samples_.front().track_id, queued_media_samples_.front().sample)) { return false; } queued_media_samples_.pop_front(); } return PushMediaSample(track_id, sample); } bool Demuxer::NewTextSampleEvent(uint32_t track_id, std::shared_ptr sample) { if (!all_streams_ready_) { if (queued_text_samples_.size() >= kQueuedSamplesLimit) { LOG(ERROR) << "Queued samples limit reached: " << kQueuedSamplesLimit; return false; } queued_text_samples_.emplace_back(track_id, sample); return true; } if (!init_event_status_.ok()) { return false; } while (!queued_text_samples_.empty()) { if (!PushTextSample(queued_text_samples_.front().track_id, queued_text_samples_.front().sample)) { return false; } queued_text_samples_.pop_front(); } return PushTextSample(track_id, sample); } bool Demuxer::PushMediaSample(uint32_t track_id, std::shared_ptr sample) { auto stream_index_iter = track_id_to_stream_index_map_.find(track_id); if (stream_index_iter == track_id_to_stream_index_map_.end()) { LOG(ERROR) << "Track " << track_id << " not found."; return false; } if (stream_index_iter->second == kInvalidStreamIndex) return true; Status status = DispatchMediaSample(stream_index_iter->second, sample); if (!status.ok()) { LOG(ERROR) << "Failed to process sample " << stream_index_iter->second << " " << status; return false; } return true; } bool Demuxer::PushTextSample(uint32_t track_id, std::shared_ptr sample) { auto stream_index_iter = track_id_to_stream_index_map_.find(track_id); if (stream_index_iter == track_id_to_stream_index_map_.end()) { LOG(ERROR) << "Track " << track_id << " not found."; return false; } if (stream_index_iter->second == kInvalidStreamIndex) return true; Status status = DispatchTextSample(stream_index_iter->second, sample); if (!status.ok()) { LOG(ERROR) << "Failed to process sample " << stream_index_iter->second << " " << status; return false; } return true; } Status Demuxer::Parse() { DCHECK(media_file_); DCHECK(parser_); DCHECK(buffer_); int64_t bytes_read = media_file_->Read(buffer_.get(), kBufSize); if (bytes_read == 0) { if (!parser_->Flush()) return Status(error::PARSER_FAILURE, "Failed to flush."); return Status(error::END_OF_STREAM, ""); } else if (bytes_read < 0) { return Status(error::FILE_FAILURE, "Cannot read file " + file_name_); } return parser_->Parse(buffer_.get(), bytes_read) ? Status::OK : Status(error::PARSER_FAILURE, "Cannot parse media file " + file_name_); } } // namespace media } // namespace shaka