// Copyright 2014 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_ #define MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_ #include #include #include #include #include "packager/base/memory/scoped_ptr.h" #include "packager/media/base/media_parser.h" #include "packager/media/base/media_sample.h" #include "packager/media/formats/webm/webm_parser.h" #include "packager/media/formats/webm/webm_tracks_parser.h" namespace edash_packager { namespace media { class WebMClusterParser : public WebMParserClient { public: // Numbers chosen to estimate the duration of a buffer if none is set and // there is not enough information to get a better estimate. enum { // Common 1k samples @44.1kHz kDefaultAudioBufferDurationInMs = 23, // Chosen to represent 16fps duration, which will prevent MSE stalls in // videos with frame-rates as low as 8fps. kDefaultVideoBufferDurationInMs = 63 }; // Opus packets encode the duration and other parameters in the 5 most // significant bits of the first byte. The index in this array corresponds // to the duration of each frame of the packet in microseconds. See // https://tools.ietf.org/html/rfc6716#page-14 static const uint16_t kOpusFrameDurationsMu[]; private: // Helper class that manages per-track state. class Track { public: Track(int track_num, bool is_video, int64_t default_duration, const MediaParser::NewSampleCB& new_sample_cb); ~Track(); int track_num() const { return track_num_; } // If |last_added_buffer_missing_duration_| is set, updates its duration // relative to |buffer|'s timestamp, and adds it to |buffers_| and unsets // |last_added_buffer_missing_duration_|. Then, if |buffer| is missing // duration, saves |buffer| into |last_added_buffer_missing_duration_|, or // otherwise adds |buffer| to |buffers_|. bool AddBuffer(const scoped_refptr& buffer); // If |last_added_buffer_missing_duration_| is set, updates its duration to // be non-kNoTimestamp() value of |estimated_next_frame_duration_| or a // hard-coded default, then adds it to |buffers_| and unsets // |last_added_buffer_missing_duration_|. (This method helps stream parser // emit all buffers in a media segment before signaling end of segment.) void ApplyDurationEstimateIfNeeded(); // Clears all buffer state, including any possibly held-aside buffer that // was missing duration, and all contents of |buffers_|. void Reset(); // Helper function used to inspect block data to determine if the // block is a keyframe. // |data| contains the bytes in the block. // |size| indicates the number of bytes in |data|. bool IsKeyframe(const uint8_t* data, int size) const; int64_t default_duration() const { return default_duration_; } private: // Helper that sanity-checks |buffer| duration, updates // |estimated_next_frame_duration_|, and adds |buffer| to |buffers_|. // Returns false if |buffer| failed sanity check and therefore was not added // to |buffers_|. Returns true otherwise. bool QueueBuffer(const scoped_refptr& buffer); // Helper that calculates the buffer duration to use in // ApplyDurationEstimateIfNeeded(). int64_t GetDurationEstimate(); // Counts the number of estimated durations used in this track. Used to // prevent log spam for LOG()s about estimated duration. int num_duration_estimates_ = 0; int track_num_; bool is_video_; // Parsed track buffers, each with duration and in (decode) timestamp order, // that have not yet been extracted into |ready_buffers_|. Note that up to // one additional buffer missing duration may be tracked by // |last_added_buffer_missing_duration_|. scoped_refptr last_added_buffer_missing_duration_; // If kNoTimestamp(), then |estimated_next_frame_duration_| will be used. int64_t default_duration_; // If kNoTimestamp(), then a default value will be used. This estimate is // the maximum (for video), or minimum (for audio) duration seen so far for // this track, and is used only if |default_duration_| is kNoTimestamp(). // TODO(chcunningham): Use maximum for audio too, adding checks to disable // splicing when these estimates are observed in SourceBufferStream. int64_t estimated_next_frame_duration_; MediaParser::NewSampleCB new_sample_cb_; }; typedef std::map TextTrackMap; public: WebMClusterParser(int64_t timecode_scale, int audio_track_num, int64_t audio_default_duration, int video_track_num, int64_t video_default_duration, const WebMTracksParser::TextTracks& text_tracks, const std::set& ignored_tracks, const std::string& audio_encryption_key_id, const std::string& video_encryption_key_id, const AudioCodec audio_codec, const MediaParser::NewSampleCB& new_sample_cb); ~WebMClusterParser() override; // Resets the parser state so it can accept a new cluster. void Reset(); // Parses a WebM cluster element in |buf|. // // Returns -1 if the parse fails. // Returns 0 if more data is needed. // Returns the number of bytes parsed on success. int Parse(const uint8_t* buf, int size); int64_t cluster_start_time() const { return cluster_start_time_; } // Returns true if the last Parse() call stopped at the end of a cluster. bool cluster_ended() const { return cluster_ended_; } private: // WebMParserClient methods. WebMParserClient* OnListStart(int id) override; bool OnListEnd(int id) override; bool OnUInt(int id, int64_t val) override; bool OnBinary(int id, const uint8_t* data, int size) override; bool ParseBlock(bool is_simple_block, const uint8_t* buf, int size, const uint8_t* additional, int additional_size, int duration, int64_t discard_padding); bool OnBlock(bool is_simple_block, int track_num, int timecode, int duration, int flags, const uint8_t* data, int size, const uint8_t* additional, int additional_size, int64_t discard_padding); // Resets the Track objects associated with each text track. void ResetTextTracks(); // Search for the indicated track_num among the text tracks. Returns NULL // if that track num is not a text track. Track* FindTextTrack(int track_num); // Attempts to read the duration from the encoded audio data, returning as // TimeDelta or kNoTimestamp() if duration cannot be retrieved. This obviously // violates layering rules, but is useful for MSE to know duration in cases // where it isn't explicitly given and cannot be calculated for Blocks at the // end of a Cluster (the next Cluster in playback-order may not be the next // Cluster we parse, so we can't simply use the delta of the first Block in // the next Cluster). Avoid calling if encrypted; may produce unexpected // output. See implementation for supported codecs. int64_t TryGetEncodedAudioDuration(const uint8_t* data, int size); // Reads Opus packet header to determine packet duration. Duration returned // as TimeDelta or kNoTimestamp() upon failure to read duration from packet. int64_t ReadOpusDuration(const uint8_t* data, int size); // Tracks the number of LOGs made in process of reading encoded // duration. Useful to prevent log spam. int num_duration_errors_ = 0; double timecode_multiplier_; // Multiplier used to convert timecodes into // microseconds. std::set ignored_tracks_; std::string audio_encryption_key_id_; std::string video_encryption_key_id_; const AudioCodec audio_codec_; WebMListParser parser_; int64_t last_block_timecode_ = -1; scoped_ptr block_data_; int block_data_size_ = -1; int64_t block_duration_ = -1; int64_t block_add_id_ = -1; scoped_ptr block_additional_data_; // Must be 0 if |block_additional_data_| is null. Must be > 0 if // |block_additional_data_| is NOT null. int block_additional_data_size_ = 0; int64_t discard_padding_ = -1; bool discard_padding_set_ = false; int64_t cluster_timecode_ = -1; int64_t cluster_start_time_; bool cluster_ended_ = false; Track audio_; Track video_; TextTrackMap text_track_map_; DISALLOW_IMPLICIT_CONSTRUCTORS(WebMClusterParser); }; } // namespace media } // namespace edash_packager #endif // MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_