DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs
webm_cluster_parser.h
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_
6 #define MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_
7 
8 #include <deque>
9 #include <map>
10 #include <set>
11 #include <string>
12 
13 #include "packager/base/memory/scoped_ptr.h"
14 #include "packager/media/base/media_parser.h"
15 #include "packager/media/base/media_sample.h"
16 #include "packager/media/formats/webm/webm_parser.h"
17 #include "packager/media/formats/webm/webm_tracks_parser.h"
18 
19 namespace edash_packager {
20 namespace media {
21 
23  public:
24  // Numbers chosen to estimate the duration of a buffer if none is set and
25  // there is not enough information to get a better estimate.
26  enum {
27  // Common 1k samples @44.1kHz
28  kDefaultAudioBufferDurationInMs = 23,
29 
30  // Chosen to represent 16fps duration, which will prevent MSE stalls in
31  // videos with frame-rates as low as 8fps.
32  kDefaultVideoBufferDurationInMs = 63
33  };
34 
35  // Opus packets encode the duration and other parameters in the 5 most
36  // significant bits of the first byte. The index in this array corresponds
37  // to the duration of each frame of the packet in microseconds. See
38  // https://tools.ietf.org/html/rfc6716#page-14
39  static const uint16_t kOpusFrameDurationsMu[];
40 
41  private:
42  // Helper class that manages per-track state.
43  class Track {
44  public:
45  Track(int track_num,
46  bool is_video,
47  int64_t default_duration,
48  const MediaParser::NewSampleCB& new_sample_cb);
49  ~Track();
50 
51  int track_num() const { return track_num_; }
52 
53  // If |last_added_buffer_missing_duration_| is set, updates its duration
54  // relative to |buffer|'s timestamp, and adds it to |buffers_| and unsets
55  // |last_added_buffer_missing_duration_|. Then, if |buffer| is missing
56  // duration, saves |buffer| into |last_added_buffer_missing_duration_|, or
57  // otherwise adds |buffer| to |buffers_|.
58  bool AddBuffer(const scoped_refptr<MediaSample>& buffer);
59 
60  // If |last_added_buffer_missing_duration_| is set, updates its duration to
61  // be non-kNoTimestamp() value of |estimated_next_frame_duration_| or a
62  // hard-coded default, then adds it to |buffers_| and unsets
63  // |last_added_buffer_missing_duration_|. (This method helps stream parser
64  // emit all buffers in a media segment before signaling end of segment.)
65  void ApplyDurationEstimateIfNeeded();
66 
67  // Clears all buffer state, including any possibly held-aside buffer that
68  // was missing duration, and all contents of |buffers_|.
69  void Reset();
70 
71  // Helper function used to inspect block data to determine if the
72  // block is a keyframe.
73  // |data| contains the bytes in the block.
74  // |size| indicates the number of bytes in |data|.
75  bool IsKeyframe(const uint8_t* data, int size) const;
76 
77  int64_t default_duration() const { return default_duration_; }
78 
79  private:
80  // Helper that sanity-checks |buffer| duration, updates
81  // |estimated_next_frame_duration_|, and adds |buffer| to |buffers_|.
82  // Returns false if |buffer| failed sanity check and therefore was not added
83  // to |buffers_|. Returns true otherwise.
84  bool QueueBuffer(const scoped_refptr<MediaSample>& buffer);
85 
86  // Helper that calculates the buffer duration to use in
87  // ApplyDurationEstimateIfNeeded().
88  int64_t GetDurationEstimate();
89 
90  // Counts the number of estimated durations used in this track. Used to
91  // prevent log spam for LOG()s about estimated duration.
92  int num_duration_estimates_ = 0;
93 
94  int track_num_;
95  bool is_video_;
96 
97  // Parsed track buffers, each with duration and in (decode) timestamp order,
98  // that have not yet been extracted into |ready_buffers_|. Note that up to
99  // one additional buffer missing duration may be tracked by
100  // |last_added_buffer_missing_duration_|.
101  scoped_refptr<MediaSample> last_added_buffer_missing_duration_;
102 
103  // If kNoTimestamp(), then |estimated_next_frame_duration_| will be used.
104  int64_t default_duration_;
105 
106  // If kNoTimestamp(), then a default value will be used. This estimate is
107  // the maximum (for video), or minimum (for audio) duration seen so far for
108  // this track, and is used only if |default_duration_| is kNoTimestamp().
109  // TODO(chcunningham): Use maximum for audio too, adding checks to disable
110  // splicing when these estimates are observed in SourceBufferStream.
111  int64_t estimated_next_frame_duration_;
112 
113  MediaParser::NewSampleCB new_sample_cb_;
114  };
115 
116  typedef std::map<int, Track> TextTrackMap;
117 
118  public:
119  WebMClusterParser(int64_t timecode_scale,
120  int audio_track_num,
121  int64_t audio_default_duration,
122  int video_track_num,
123  int64_t video_default_duration,
124  const WebMTracksParser::TextTracks& text_tracks,
125  const std::set<int64_t>& ignored_tracks,
126  const std::string& audio_encryption_key_id,
127  const std::string& video_encryption_key_id,
128  const AudioCodec audio_codec,
129  const MediaParser::NewSampleCB& new_sample_cb);
130  ~WebMClusterParser() override;
131 
132  // Resets the parser state so it can accept a new cluster.
133  void Reset();
134 
135  // Parses a WebM cluster element in |buf|.
136  //
137  // Returns -1 if the parse fails.
138  // Returns 0 if more data is needed.
139  // Returns the number of bytes parsed on success.
140  int Parse(const uint8_t* buf, int size);
141 
142  int64_t cluster_start_time() const { return cluster_start_time_; }
143 
144  // Returns true if the last Parse() call stopped at the end of a cluster.
145  bool cluster_ended() const { return cluster_ended_; }
146 
147  private:
148  // WebMParserClient methods.
149  WebMParserClient* OnListStart(int id) override;
150  bool OnListEnd(int id) override;
151  bool OnUInt(int id, int64_t val) override;
152  bool OnBinary(int id, const uint8_t* data, int size) override;
153 
154  bool ParseBlock(bool is_simple_block,
155  const uint8_t* buf,
156  int size,
157  const uint8_t* additional,
158  int additional_size,
159  int duration,
160  int64_t discard_padding);
161  bool OnBlock(bool is_simple_block,
162  int track_num,
163  int timecode,
164  int duration,
165  int flags,
166  const uint8_t* data,
167  int size,
168  const uint8_t* additional,
169  int additional_size,
170  int64_t discard_padding);
171 
172  // Resets the Track objects associated with each text track.
173  void ResetTextTracks();
174 
175  // Search for the indicated track_num among the text tracks. Returns NULL
176  // if that track num is not a text track.
177  Track* FindTextTrack(int track_num);
178 
179  // Attempts to read the duration from the encoded audio data, returning as
180  // TimeDelta or kNoTimestamp() if duration cannot be retrieved. This obviously
181  // violates layering rules, but is useful for MSE to know duration in cases
182  // where it isn't explicitly given and cannot be calculated for Blocks at the
183  // end of a Cluster (the next Cluster in playback-order may not be the next
184  // Cluster we parse, so we can't simply use the delta of the first Block in
185  // the next Cluster). Avoid calling if encrypted; may produce unexpected
186  // output. See implementation for supported codecs.
187  int64_t TryGetEncodedAudioDuration(const uint8_t* data, int size);
188 
189  // Reads Opus packet header to determine packet duration. Duration returned
190  // as TimeDelta or kNoTimestamp() upon failure to read duration from packet.
191  int64_t ReadOpusDuration(const uint8_t* data, int size);
192 
193  // Tracks the number of LOGs made in process of reading encoded
194  // duration. Useful to prevent log spam.
195  int num_duration_errors_ = 0;
196 
197  double timecode_multiplier_; // Multiplier used to convert timecodes into
198  // microseconds.
199  std::set<int64_t> ignored_tracks_;
200  std::string audio_encryption_key_id_;
201  std::string video_encryption_key_id_;
202  const AudioCodec audio_codec_;
203 
204  WebMListParser parser_;
205 
206  int64_t last_block_timecode_ = -1;
207  scoped_ptr<uint8_t[]> block_data_;
208  int block_data_size_ = -1;
209  int64_t block_duration_ = -1;
210  int64_t block_add_id_ = -1;
211 
212  scoped_ptr<uint8_t[]> block_additional_data_;
213  // Must be 0 if |block_additional_data_| is null. Must be > 0 if
214  // |block_additional_data_| is NOT null.
215  int block_additional_data_size_ = 0;
216 
217  int64_t discard_padding_ = -1;
218  bool discard_padding_set_ = false;
219 
220  int64_t cluster_timecode_ = -1;
221  int64_t cluster_start_time_;
222  bool cluster_ended_ = false;
223 
224  Track audio_;
225  Track video_;
226  TextTrackMap text_track_map_;
227 
228  DISALLOW_IMPLICIT_CONSTRUCTORS(WebMClusterParser);
229 };
230 
231 } // namespace media
232 } // namespace edash_packager
233 
234 #endif // MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_
base::Callback< bool(uint32_t track_id, const scoped_refptr< MediaSample > &media_sample)> NewSampleCB
Definition: media_parser.h:43