Shaka Packager SDK
webm_tracks_parser.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "packager/media/formats/webm/webm_tracks_parser.h"
6 
7 #include "packager/base/logging.h"
8 #include "packager/base/strings/string_number_conversions.h"
9 #include "packager/base/strings/string_util.h"
10 #include "packager/media/base/timestamp.h"
11 #include "packager/media/formats/webm/webm_constants.h"
12 #include "packager/media/formats/webm/webm_content_encodings.h"
13 
14 namespace shaka {
15 namespace media {
16 
17 static TextKind CodecIdToTextKind(const std::string& codec_id) {
18  if (codec_id == kWebMCodecSubtitles)
19  return kTextSubtitles;
20 
21  if (codec_id == kWebMCodecCaptions)
22  return kTextCaptions;
23 
24  if (codec_id == kWebMCodecDescriptions)
25  return kTextDescriptions;
26 
27  if (codec_id == kWebMCodecMetadata)
28  return kTextMetadata;
29 
30  return kTextNone;
31 }
32 
33 static int64_t PrecisionCappedDefaultDuration(
34  const double timecode_scale_in_us,
35  const int64_t duration_in_ns) {
36  if (duration_in_ns <= 0)
37  return kNoTimestamp;
38 
39  int64_t mult = duration_in_ns / 1000;
40  mult /= timecode_scale_in_us;
41  if (mult == 0)
42  return kNoTimestamp;
43 
44  mult = static_cast<double>(mult) * timecode_scale_in_us;
45  return mult;
46 }
47 
48 WebMTracksParser::WebMTracksParser(bool ignore_text_tracks)
49  : track_type_(-1),
50  track_num_(-1),
51  seek_preroll_(-1),
52  codec_delay_(-1),
53  default_duration_(-1),
54  audio_track_num_(-1),
55  audio_default_duration_(-1),
56  video_track_num_(-1),
57  video_default_duration_(-1),
58  ignore_text_tracks_(ignore_text_tracks),
59  audio_client_(),
60  video_client_() {
61 }
62 
63 WebMTracksParser::~WebMTracksParser() {}
64 
65 int WebMTracksParser::Parse(const uint8_t* buf, int size) {
66  track_type_ =-1;
67  track_num_ = -1;
68  default_duration_ = -1;
69  track_name_.clear();
70  track_language_.clear();
71  audio_track_num_ = -1;
72  audio_default_duration_ = -1;
73  audio_stream_info_ = nullptr;
74  video_track_num_ = -1;
75  video_default_duration_ = -1;
76  video_stream_info_ = nullptr;
77  text_tracks_.clear();
78  ignored_tracks_.clear();
79 
80  WebMListParser parser(kWebMIdTracks, this);
81  int result = parser.Parse(buf, size);
82 
83  if (result <= 0)
84  return result;
85 
86  // For now we do all or nothing parsing.
87  return parser.IsParsingComplete() ? result : 0;
88 }
89 
91  const double timecode_scale_in_us) const {
92  return PrecisionCappedDefaultDuration(timecode_scale_in_us,
93  audio_default_duration_);
94 }
95 
96 int64_t WebMTracksParser::GetVideoDefaultDuration(
97  const double timecode_scale_in_us) const {
98  return PrecisionCappedDefaultDuration(timecode_scale_in_us,
99  video_default_duration_);
100 }
101 
102 WebMParserClient* WebMTracksParser::OnListStart(int id) {
103  if (id == kWebMIdContentEncodings) {
104  DCHECK(!track_content_encodings_client_.get());
105  track_content_encodings_client_.reset(new WebMContentEncodingsClient());
106  return track_content_encodings_client_->OnListStart(id);
107  }
108 
109  if (id == kWebMIdTrackEntry) {
110  track_type_ = -1;
111  track_num_ = -1;
112  default_duration_ = -1;
113  track_name_.clear();
114  track_language_.clear();
115  codec_id_ = "";
116  codec_private_.clear();
117  audio_client_.Reset();
118  video_client_.Reset();
119  return this;
120  }
121 
122  if (id == kWebMIdAudio)
123  return &audio_client_;
124 
125  if (id == kWebMIdVideo)
126  return &video_client_;
127 
128  return this;
129 }
130 
131 bool WebMTracksParser::OnListEnd(int id) {
132  if (id == kWebMIdContentEncodings) {
133  DCHECK(track_content_encodings_client_.get());
134  return track_content_encodings_client_->OnListEnd(id);
135  }
136 
137  if (id == kWebMIdTrackEntry) {
138  if (track_type_ == -1 || track_num_ == -1) {
139  LOG(ERROR) << "Missing TrackEntry data for "
140  << " TrackType " << track_type_ << " TrackNum " << track_num_;
141  return false;
142  }
143 
144  if (track_type_ != kWebMTrackTypeAudio &&
145  track_type_ != kWebMTrackTypeVideo &&
146  track_type_ != kWebMTrackTypeSubtitlesOrCaptions &&
147  track_type_ != kWebMTrackTypeDescriptionsOrMetadata) {
148  LOG(ERROR) << "Unexpected TrackType " << track_type_;
149  return false;
150  }
151 
152  TextKind text_track_kind = kTextNone;
153  if (track_type_ == kWebMTrackTypeSubtitlesOrCaptions) {
154  text_track_kind = CodecIdToTextKind(codec_id_);
155  if (text_track_kind == kTextNone) {
156  LOG(ERROR) << "Missing TrackEntry CodecID"
157  << " TrackNum " << track_num_;
158  return false;
159  }
160 
161  if (text_track_kind != kTextSubtitles &&
162  text_track_kind != kTextCaptions) {
163  LOG(ERROR) << "Wrong TrackEntry CodecID"
164  << " TrackNum " << track_num_;
165  return false;
166  }
167  } else if (track_type_ == kWebMTrackTypeDescriptionsOrMetadata) {
168  text_track_kind = CodecIdToTextKind(codec_id_);
169  if (text_track_kind == kTextNone) {
170  LOG(ERROR) << "Missing TrackEntry CodecID"
171  << " TrackNum " << track_num_;
172  return false;
173  }
174 
175  if (text_track_kind != kTextDescriptions &&
176  text_track_kind != kTextMetadata) {
177  LOG(ERROR) << "Wrong TrackEntry CodecID"
178  << " TrackNum " << track_num_;
179  return false;
180  }
181  }
182 
183  std::string encryption_key_id;
184  if (track_content_encodings_client_) {
185  DCHECK(!track_content_encodings_client_->content_encodings().empty());
186  // If we have multiple ContentEncoding in one track. Always choose the
187  // key id in the first ContentEncoding as the key id of the track.
188  encryption_key_id = track_content_encodings_client_->
189  content_encodings()[0]->encryption_key_id();
190  }
191 
192  if (track_type_ == kWebMTrackTypeAudio) {
193  if (audio_track_num_ == -1) {
194  audio_track_num_ = track_num_;
195  audio_encryption_key_id_ = encryption_key_id;
196 
197  if (default_duration_ == 0) {
198  LOG(ERROR) << "Illegal 0ns audio TrackEntry "
199  "DefaultDuration";
200  return false;
201  }
202  audio_default_duration_ = default_duration_;
203 
204  DCHECK(!audio_stream_info_);
205  audio_stream_info_ = audio_client_.GetAudioStreamInfo(
206  audio_track_num_, codec_id_, codec_private_, seek_preroll_,
207  codec_delay_, track_language_, !audio_encryption_key_id_.empty());
208  if (!audio_stream_info_)
209  return false;
210  } else {
211  DLOG(INFO) << "Ignoring audio track " << track_num_;
212  ignored_tracks_.insert(track_num_);
213  }
214  } else if (track_type_ == kWebMTrackTypeVideo) {
215  if (video_track_num_ == -1) {
216  video_track_num_ = track_num_;
217  video_encryption_key_id_ = encryption_key_id;
218 
219  if (default_duration_ == 0) {
220  LOG(ERROR) << "Illegal 0ns video TrackEntry "
221  "DefaultDuration";
222  return false;
223  }
224  video_default_duration_ = default_duration_;
225 
226  DCHECK(!video_stream_info_);
227  video_stream_info_ = video_client_.GetVideoStreamInfo(
228  video_track_num_, codec_id_, codec_private_,
229  !video_encryption_key_id_.empty());
230  if (!video_stream_info_)
231  return false;
232 
233  if (codec_id_ == "V_VP8" || codec_id_ == "V_VP9") {
234  vp_config_ = video_client_.GetVpCodecConfig(codec_private_);
235  const double kNanosecondsPerSecond = 1000000000.0;
236  if (codec_id_ == "V_VP9" &&
237  (!vp_config_.is_level_set() || vp_config_.level() == 0)) {
238  vp_config_.SetVP9Level(
239  video_stream_info_->width(), video_stream_info_->height(),
240  video_default_duration_ / kNanosecondsPerSecond);
241  }
242  }
243 
244  } else {
245  DLOG(INFO) << "Ignoring video track " << track_num_;
246  ignored_tracks_.insert(track_num_);
247  }
248  } else if (track_type_ == kWebMTrackTypeSubtitlesOrCaptions ||
249  track_type_ == kWebMTrackTypeDescriptionsOrMetadata) {
250  if (ignore_text_tracks_) {
251  DLOG(INFO) << "Ignoring text track " << track_num_;
252  ignored_tracks_.insert(track_num_);
253  } else {
254  std::string track_num = base::Int64ToString(track_num_);
255  text_tracks_[track_num_] = TextTrackConfig(
256  text_track_kind, track_name_, track_language_, track_num);
257  }
258  } else {
259  LOG(ERROR) << "Unexpected TrackType " << track_type_;
260  return false;
261  }
262 
263  track_type_ = -1;
264  track_num_ = -1;
265  default_duration_ = -1;
266  track_name_.clear();
267  track_language_.clear();
268  codec_id_ = "";
269  codec_private_.clear();
270  track_content_encodings_client_.reset();
271 
272  audio_client_.Reset();
273  video_client_.Reset();
274  return true;
275  }
276 
277  return true;
278 }
279 
280 bool WebMTracksParser::OnUInt(int id, int64_t val) {
281  int64_t* dst = NULL;
282 
283  switch (id) {
284  case kWebMIdTrackNumber:
285  dst = &track_num_;
286  break;
287  case kWebMIdTrackType:
288  dst = &track_type_;
289  break;
290  case kWebMIdSeekPreRoll:
291  dst = &seek_preroll_;
292  break;
293  case kWebMIdCodecDelay:
294  dst = &codec_delay_;
295  break;
296  case kWebMIdDefaultDuration:
297  dst = &default_duration_;
298  break;
299  default:
300  return true;
301  }
302 
303  if (*dst != -1) {
304  LOG(ERROR) << "Multiple values for id " << std::hex << id << " specified";
305  return false;
306  }
307 
308  *dst = val;
309  return true;
310 }
311 
312 bool WebMTracksParser::OnFloat(int id, double val) {
313  return true;
314 }
315 
316 bool WebMTracksParser::OnBinary(int id, const uint8_t* data, int size) {
317  if (id == kWebMIdCodecPrivate) {
318  if (!codec_private_.empty()) {
319  LOG(ERROR) << "Multiple CodecPrivate fields in a track.";
320  return false;
321  }
322  codec_private_.assign(data, data + size);
323  return true;
324  }
325  return true;
326 }
327 
328 bool WebMTracksParser::OnString(int id, const std::string& str) {
329  if (id == kWebMIdCodecID) {
330  if (!codec_id_.empty()) {
331  LOG(ERROR) << "Multiple CodecID fields in a track";
332  return false;
333  }
334 
335  codec_id_ = str;
336  return true;
337  }
338 
339  if (id == kWebMIdName) {
340  track_name_ = str;
341  return true;
342  }
343 
344  if (id == kWebMIdLanguage) {
345  track_language_ = str;
346  return true;
347  }
348 
349  return true;
350 }
351 
352 } // namespace media
353 } // namespace shaka
shaka::media::WebMVideoClient::Reset
void Reset()
Reset this object's state so it can process a new video track element.
Definition: webm_video_client.cc:27
shaka::media::WebMListParser::IsParsingComplete
bool IsParsingComplete() const
Definition: webm_parser.cc:828
shaka
All the methods that are virtual are virtual for mocking.
Definition: gflags_hex_bytes.cc:11
shaka::media::VPCodecConfigurationRecord::SetVP9Level
void SetVP9Level(uint16_t width, uint16_t height, double sample_duration_seconds)
Compute and set VP9 Level based on the input attributes.
Definition: vp_codec_configuration_record.cc:228
shaka::media::WebMVideoClient::GetVpCodecConfig
VPCodecConfigurationRecord GetVpCodecConfig(const std::vector< uint8_t > &codec_private)
Definition: webm_video_client.cc:130
shaka::media::WebMListParser
Definition: webm_parser.h:54
shaka::media::WebMTracksParser::Parse
int Parse(const uint8_t *buf, int size)
Definition: webm_tracks_parser.cc:65
shaka::media::WebMTracksParser::GetAudioDefaultDuration
int64_t GetAudioDefaultDuration(const double timecode_scale_in_us) const
Definition: webm_tracks_parser.cc:90
shaka::media::WebMVideoClient::GetVideoStreamInfo
std::shared_ptr< VideoStreamInfo > GetVideoStreamInfo(int64_t track_num, const std::string &codec_id, const std::vector< uint8_t > &codec_private, bool is_encrypted)
Definition: webm_video_client.cc:50
shaka::media::WebMListParser::Parse
int Parse(const uint8_t *buf, int size)
Definition: webm_parser.cc:744
shaka::media::WebMAudioClient::Reset
void Reset()
Reset this object's state so it can process a new audio track element.
Definition: webm_audio_client.cc:25
shaka::media::WebMAudioClient::GetAudioStreamInfo
std::shared_ptr< AudioStreamInfo > GetAudioStreamInfo(int64_t track_num, const std::string &codec_id, const std::vector< uint8_t > &codec_private, int64_t seek_preroll, int64_t codec_delay, const std::string &language, bool is_encrypted)
Definition: webm_audio_client.cc:31