Shaka Packager SDK
representation.cc
1 // Copyright 2017 Google Inc. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file or at
5 // https://developers.google.com/open-source/licenses/bsd
6 
7 #include "packager/mpd/base/representation.h"
8 
9 #include <gflags/gflags.h>
10 
11 #include <algorithm>
12 
13 #include "packager/base/logging.h"
14 #include "packager/base/strings/stringprintf.h"
15 #include "packager/file/file.h"
16 #include "packager/media/base/muxer_util.h"
17 #include "packager/mpd/base/mpd_options.h"
18 #include "packager/mpd/base/mpd_utils.h"
19 #include "packager/mpd/base/xml/xml_node.h"
20 
21 namespace shaka {
22 namespace {
23 
24 std::string GetMimeType(const std::string& prefix,
25  MediaInfo::ContainerType container_type) {
26  switch (container_type) {
27  case MediaInfo::CONTAINER_MP4:
28  return prefix + "/mp4";
29  case MediaInfo::CONTAINER_MPEG2_TS:
30  // NOTE: DASH MPD spec uses lowercase but RFC3555 says uppercase.
31  return prefix + "/MP2T";
32  case MediaInfo::CONTAINER_WEBM:
33  return prefix + "/webm";
34  default:
35  break;
36  }
37 
38  // Unsupported container types should be rejected/handled by the caller.
39  LOG(ERROR) << "Unrecognized container type: " << container_type;
40  return std::string();
41 }
42 
43 // Check whether the video info has width and height.
44 // DASH IOP also requires several other fields for video representations, namely
45 // width, height, framerate, and sar.
46 bool HasRequiredVideoFields(const MediaInfo_VideoInfo& video_info) {
47  if (!video_info.has_height() || !video_info.has_width()) {
48  LOG(ERROR)
49  << "Width and height are required fields for generating a valid MPD.";
50  return false;
51  }
52  // These fields are not required for a valid MPD, but required for DASH IOP
53  // compliant MPD. MpdBuilder can keep generating MPDs without these fields.
54  LOG_IF(WARNING, !video_info.has_time_scale())
55  << "Video info does not contain timescale required for "
56  "calculating framerate. @frameRate is required for DASH IOP.";
57  LOG_IF(WARNING, !video_info.has_pixel_width())
58  << "Video info does not contain pixel_width to calculate the sample "
59  "aspect ratio required for DASH IOP.";
60  LOG_IF(WARNING, !video_info.has_pixel_height())
61  << "Video info does not contain pixel_height to calculate the sample "
62  "aspect ratio required for DASH IOP.";
63  return true;
64 }
65 
66 uint32_t GetTimeScale(const MediaInfo& media_info) {
67  if (media_info.has_reference_time_scale()) {
68  return media_info.reference_time_scale();
69  }
70 
71  if (media_info.has_video_info()) {
72  return media_info.video_info().time_scale();
73  }
74 
75  if (media_info.has_audio_info()) {
76  return media_info.audio_info().time_scale();
77  }
78 
79  LOG(WARNING) << "No timescale specified, using 1 as timescale.";
80  return 1;
81 }
82 
83 } // namespace
84 
86  const MediaInfo& media_info,
87  const MpdOptions& mpd_options,
88  uint32_t id,
89  std::unique_ptr<RepresentationStateChangeListener> state_change_listener)
90  : media_info_(media_info),
91  id_(id),
92  mpd_options_(mpd_options),
93  state_change_listener_(std::move(state_change_listener)),
94  allow_approximate_segment_timeline_(
95  // TODO(kqyang): Need a better check. $Time is legitimate but not a
96  // template.
97  media_info.segment_template().find("$Time") == std::string::npos &&
98  mpd_options_.mpd_params.allow_approximate_segment_timeline) {}
99 
101  const Representation& representation,
102  std::unique_ptr<RepresentationStateChangeListener> state_change_listener)
103  : Representation(representation.media_info_,
104  representation.mpd_options_,
105  representation.id_,
106  std::move(state_change_listener)) {
107  mime_type_ = representation.mime_type_;
108  codecs_ = representation.codecs_;
109 
110  start_number_ = representation.start_number_;
111  for (const SegmentInfo& segment_info : representation.segment_infos_)
112  start_number_ += segment_info.repeat + 1;
113 }
114 
115 Representation::~Representation() {}
116 
118  if (!AtLeastOneTrue(media_info_.has_video_info(),
119  media_info_.has_audio_info(),
120  media_info_.has_text_info())) {
121  // This is an error. Segment information can be in AdaptationSet, Period, or
122  // MPD but the interface does not provide a way to set them.
123  // See 5.3.9.1 ISO 23009-1:2012 for segment info.
124  LOG(ERROR) << "Representation needs one of video, audio, or text.";
125  return false;
126  }
127 
128  if (MoreThanOneTrue(media_info_.has_video_info(),
129  media_info_.has_audio_info(),
130  media_info_.has_text_info())) {
131  LOG(ERROR) << "Only one of VideoInfo, AudioInfo, or TextInfo can be set.";
132  return false;
133  }
134 
135  if (media_info_.container_type() == MediaInfo::CONTAINER_UNKNOWN) {
136  LOG(ERROR) << "'container_type' in MediaInfo cannot be CONTAINER_UNKNOWN.";
137  return false;
138  }
139 
140  if (media_info_.has_video_info()) {
141  mime_type_ = GetVideoMimeType();
142  if (!HasRequiredVideoFields(media_info_.video_info())) {
143  LOG(ERROR) << "Missing required fields to create a video Representation.";
144  return false;
145  }
146  } else if (media_info_.has_audio_info()) {
147  mime_type_ = GetAudioMimeType();
148  } else if (media_info_.has_text_info()) {
149  mime_type_ = GetTextMimeType();
150  }
151 
152  if (mime_type_.empty())
153  return false;
154 
155  codecs_ = GetCodecs(media_info_);
156  return true;
157 }
158 
160  const ContentProtectionElement& content_protection_element) {
161  content_protection_elements_.push_back(content_protection_element);
162  RemoveDuplicateAttributes(&content_protection_elements_.back());
163 }
164 
165 void Representation::UpdateContentProtectionPssh(const std::string& drm_uuid,
166  const std::string& pssh) {
167  UpdateContentProtectionPsshHelper(drm_uuid, pssh,
168  &content_protection_elements_);
169 }
170 
171 void Representation::AddNewSegment(int64_t start_time,
172  int64_t duration,
173  uint64_t size) {
174  if (start_time == 0 && duration == 0) {
175  LOG(WARNING) << "Got segment with start_time and duration == 0. Ignoring.";
176  return;
177  }
178 
179  // In order for the oldest segment to be accessible for at least
180  // |time_shift_buffer_depth| seconds, the latest segment should not be in the
181  // sliding window since the player could be playing any part of the latest
182  // segment. So the current segment duration is added to the sum of segment
183  // durations (in the manifest/playlist) after sliding the window.
184  SlideWindow();
185 
186  if (state_change_listener_)
187  state_change_listener_->OnNewSegmentForRepresentation(start_time, duration);
188 
189  AddSegmentInfo(start_time, duration);
190  current_buffer_depth_ += segment_infos_.back().duration;
191 
192  bandwidth_estimator_.AddBlock(
193  size, static_cast<double>(duration) / media_info_.reference_time_scale());
194 }
195 
196 void Representation::SetSampleDuration(uint32_t frame_duration) {
197  // Sample duration is used to generate approximate SegmentTimeline.
198  // Text is required to have exactly the same segment duration.
199  if (media_info_.has_audio_info() || media_info_.has_video_info())
200  frame_duration_ = frame_duration;
201 
202  if (media_info_.has_video_info()) {
203  media_info_.mutable_video_info()->set_frame_duration(frame_duration);
204  if (state_change_listener_) {
205  state_change_listener_->OnSetFrameRateForRepresentation(
206  frame_duration, media_info_.video_info().time_scale());
207  }
208  }
209 }
210 
211 const MediaInfo& Representation::GetMediaInfo() const {
212  return media_info_;
213 }
214 
215 // Uses info in |media_info_| and |content_protection_elements_| to create a
216 // "Representation" node.
217 // MPD schema has strict ordering. The following must be done in order.
218 // AddVideoInfo() (possibly adds FramePacking elements), AddAudioInfo() (Adds
219 // AudioChannelConfig elements), AddContentProtectionElements*(), and
220 // AddVODOnlyInfo() (Adds segment info).
221 base::Optional<xml::XmlNode> Representation::GetXml() {
222  if (!HasRequiredMediaInfoFields()) {
223  LOG(ERROR) << "MediaInfo missing required fields.";
224  return base::nullopt;
225  }
226 
227  const uint64_t bandwidth = media_info_.has_bandwidth()
228  ? media_info_.bandwidth()
229  : bandwidth_estimator_.Max();
230 
231  DCHECK(!(HasVODOnlyFields(media_info_) && HasLiveOnlyFields(media_info_)));
232 
233  xml::RepresentationXmlNode representation;
234  // Mandatory fields for Representation.
235  if (!representation.SetId(id_) ||
236  !representation.SetIntegerAttribute("bandwidth", bandwidth) ||
237  !(codecs_.empty() ||
238  representation.SetStringAttribute("codecs", codecs_)) ||
239  !representation.SetStringAttribute("mimeType", mime_type_)) {
240  return base::nullopt;
241  }
242 
243  const bool has_video_info = media_info_.has_video_info();
244  const bool has_audio_info = media_info_.has_audio_info();
245 
246  if (has_video_info &&
247  !representation.AddVideoInfo(
248  media_info_.video_info(),
249  !(output_suppression_flags_ & kSuppressWidth),
250  !(output_suppression_flags_ & kSuppressHeight),
251  !(output_suppression_flags_ & kSuppressFrameRate))) {
252  LOG(ERROR) << "Failed to add video info to Representation XML.";
253  return base::nullopt;
254  }
255 
256  if (has_audio_info &&
257  !representation.AddAudioInfo(media_info_.audio_info())) {
258  LOG(ERROR) << "Failed to add audio info to Representation XML.";
259  return base::nullopt;
260  }
261 
262  if (!representation.AddContentProtectionElements(
263  content_protection_elements_)) {
264  return base::nullopt;
265  }
266 
267  if (HasVODOnlyFields(media_info_) &&
268  !representation.AddVODOnlyInfo(
269  media_info_, mpd_options_.mpd_params.use_segment_list,
270  mpd_options_.mpd_params.target_segment_duration)) {
271  LOG(ERROR) << "Failed to add VOD info.";
272  return base::nullopt;
273  }
274 
275  if (HasLiveOnlyFields(media_info_) &&
276  !representation.AddLiveOnlyInfo(media_info_, segment_infos_,
277  start_number_)) {
278  LOG(ERROR) << "Failed to add Live info.";
279  return base::nullopt;
280  }
281  // TODO(rkuroiwa): It is likely that all representations have the exact same
282  // SegmentTemplate. Optimize and propagate the tag up to AdaptationSet level.
283 
284  output_suppression_flags_ = 0;
285  return std::move(representation);
286 }
287 
288 void Representation::SuppressOnce(SuppressFlag flag) {
289  output_suppression_flags_ |= flag;
290 }
291 
293  double presentation_time_offset) {
294  int64_t pto = presentation_time_offset * media_info_.reference_time_scale();
295  if (pto <= 0)
296  return;
297  media_info_.set_presentation_time_offset(pto);
298 }
299 
301  double* start_timestamp_seconds,
302  double* end_timestamp_seconds) const {
303  if (segment_infos_.empty())
304  return false;
305 
306  if (start_timestamp_seconds) {
307  *start_timestamp_seconds =
308  static_cast<double>(segment_infos_.begin()->start_time) /
309  GetTimeScale(media_info_);
310  }
311  if (end_timestamp_seconds) {
312  *end_timestamp_seconds =
313  static_cast<double>(segment_infos_.rbegin()->start_time +
314  segment_infos_.rbegin()->duration *
315  (segment_infos_.rbegin()->repeat + 1)) /
316  GetTimeScale(media_info_);
317  }
318  return true;
319 }
320 
321 bool Representation::HasRequiredMediaInfoFields() const {
322  if (HasVODOnlyFields(media_info_) && HasLiveOnlyFields(media_info_)) {
323  LOG(ERROR) << "MediaInfo cannot have both VOD and Live fields.";
324  return false;
325  }
326 
327  if (!media_info_.has_container_type()) {
328  LOG(ERROR) << "MediaInfo missing required field: container_type.";
329  return false;
330  }
331 
332  return true;
333 }
334 
335 void Representation::AddSegmentInfo(int64_t start_time, int64_t duration) {
336  const uint64_t kNoRepeat = 0;
337  const int64_t adjusted_duration = AdjustDuration(duration);
338 
339  if (!segment_infos_.empty()) {
340  // Contiguous segment.
341  const SegmentInfo& previous = segment_infos_.back();
342  const int64_t previous_segment_end_time =
343  previous.start_time + previous.duration * (previous.repeat + 1);
344  // Make it continuous if the segment start time is close to previous segment
345  // end time.
346  if (ApproximiatelyEqual(previous_segment_end_time, start_time)) {
347  const int64_t segment_end_time_for_same_duration =
348  previous_segment_end_time + previous.duration;
349  const int64_t actual_segment_end_time = start_time + duration;
350  // Consider the segments having identical duration if the segment end time
351  // is close to calculated segment end time by assuming identical duration.
352  if (ApproximiatelyEqual(segment_end_time_for_same_duration,
353  actual_segment_end_time)) {
354  ++segment_infos_.back().repeat;
355  } else {
356  segment_infos_.push_back(
357  {previous_segment_end_time,
358  actual_segment_end_time - previous_segment_end_time, kNoRepeat});
359  }
360  return;
361  }
362 
363  // A gap since previous.
364  const int64_t kRoundingErrorGrace = 5;
365  if (previous_segment_end_time + kRoundingErrorGrace < start_time) {
366  LOG(WARNING) << RepresentationAsString() << " Found a gap of size "
367  << (start_time - previous_segment_end_time)
368  << " > kRoundingErrorGrace (" << kRoundingErrorGrace
369  << "). The new segment starts at " << start_time
370  << " but the previous segment ends at "
371  << previous_segment_end_time << ".";
372  }
373 
374  // No overlapping segments.
375  if (start_time < previous_segment_end_time - kRoundingErrorGrace) {
376  LOG(WARNING)
377  << RepresentationAsString()
378  << " Segments should not be overlapping. The new segment starts at "
379  << start_time << " but the previous segment ends at "
380  << previous_segment_end_time << ".";
381  }
382  }
383 
384  segment_infos_.push_back({start_time, adjusted_duration, kNoRepeat});
385 }
386 
387 bool Representation::ApproximiatelyEqual(int64_t time1, int64_t time2) const {
388  if (!allow_approximate_segment_timeline_)
389  return time1 == time2;
390 
391  // It is not always possible to align segment duration to target duration
392  // exactly. For example, for AAC with sampling rate of 44100, there are always
393  // 1024 audio samples per frame, so the frame duration is 1024/44100. For a
394  // target duration of 2 seconds, the closest segment duration would be 1.984
395  // or 2.00533.
396 
397  // An arbitrary error threshold cap. This makes sure that the error is not too
398  // large for large samples.
399  const double kErrorThresholdSeconds = 0.05;
400 
401  // So we consider two times equal if they differ by less than one sample.
402  const uint32_t error_threshold =
403  std::min(frame_duration_,
404  static_cast<uint32_t>(kErrorThresholdSeconds *
405  media_info_.reference_time_scale()));
406  return std::abs(time1 - time2) <= error_threshold;
407 }
408 
409 int64_t Representation::AdjustDuration(int64_t duration) const {
410  if (!allow_approximate_segment_timeline_)
411  return duration;
412  const int64_t scaled_target_duration =
413  mpd_options_.mpd_params.target_segment_duration *
414  media_info_.reference_time_scale();
415  return ApproximiatelyEqual(scaled_target_duration, duration)
416  ? scaled_target_duration
417  : duration;
418 }
419 
420 void Representation::SlideWindow() {
421  if (mpd_options_.mpd_params.time_shift_buffer_depth <= 0.0 ||
422  mpd_options_.mpd_type == MpdType::kStatic)
423  return;
424 
425  const uint32_t time_scale = GetTimeScale(media_info_);
426  DCHECK_GT(time_scale, 0u);
427 
428  const int64_t time_shift_buffer_depth = static_cast<int64_t>(
429  mpd_options_.mpd_params.time_shift_buffer_depth * time_scale);
430 
431  if (current_buffer_depth_ <= time_shift_buffer_depth)
432  return;
433 
434  std::list<SegmentInfo>::iterator first = segment_infos_.begin();
435  std::list<SegmentInfo>::iterator last = first;
436  for (; last != segment_infos_.end(); ++last) {
437  // Remove the current segment only if it falls completely out of time shift
438  // buffer range.
439  while (last->repeat >= 0 &&
440  current_buffer_depth_ - last->duration >= time_shift_buffer_depth) {
441  current_buffer_depth_ -= last->duration;
442  RemoveOldSegment(&*last);
443  start_number_++;
444  }
445  if (last->repeat >= 0)
446  break;
447  }
448  segment_infos_.erase(first, last);
449 }
450 
451 void Representation::RemoveOldSegment(SegmentInfo* segment_info) {
452  int64_t segment_start_time = segment_info->start_time;
453  segment_info->start_time += segment_info->duration;
454  segment_info->repeat--;
455 
456  if (mpd_options_.mpd_params.preserved_segments_outside_live_window == 0)
457  return;
458 
459  segments_to_be_removed_.push_back(
460  media::GetSegmentName(media_info_.segment_template(), segment_start_time,
461  start_number_ - 1, media_info_.bandwidth()));
462  while (segments_to_be_removed_.size() >
463  mpd_options_.mpd_params.preserved_segments_outside_live_window) {
464  VLOG(2) << "Deleting " << segments_to_be_removed_.front();
465  if (!File::Delete(segments_to_be_removed_.front().c_str())) {
466  LOG(WARNING) << "Failed to delete " << segments_to_be_removed_.front()
467  << "; Will retry later.";
468  break;
469  }
470  segments_to_be_removed_.pop_front();
471  }
472 }
473 
474 std::string Representation::GetVideoMimeType() const {
475  return GetMimeType("video", media_info_.container_type());
476 }
477 
478 std::string Representation::GetAudioMimeType() const {
479  return GetMimeType("audio", media_info_.container_type());
480 }
481 
482 std::string Representation::GetTextMimeType() const {
483  CHECK(media_info_.has_text_info());
484  if (media_info_.text_info().codec() == "ttml") {
485  switch (media_info_.container_type()) {
486  case MediaInfo::CONTAINER_TEXT:
487  return "application/ttml+xml";
488  case MediaInfo::CONTAINER_MP4:
489  return "application/mp4";
490  default:
491  LOG(ERROR) << "Failed to determine MIME type for TTML container: "
492  << media_info_.container_type();
493  return "";
494  }
495  }
496  if (media_info_.text_info().codec() == "wvtt") {
497  if (media_info_.container_type() == MediaInfo::CONTAINER_TEXT) {
498  return "text/vtt";
499  } else if (media_info_.container_type() == MediaInfo::CONTAINER_MP4) {
500  return "application/mp4";
501  }
502  LOG(ERROR) << "Failed to determine MIME type for VTT container: "
503  << media_info_.container_type();
504  return "";
505  }
506 
507  LOG(ERROR) << "Cannot determine MIME type for format: "
508  << media_info_.text_info().codec()
509  << " container: " << media_info_.container_type();
510  return "";
511 }
512 
513 std::string Representation::RepresentationAsString() const {
514  std::string s = base::StringPrintf("Representation (id=%d,", id_);
515  if (media_info_.has_video_info()) {
516  const MediaInfo_VideoInfo& video_info = media_info_.video_info();
517  base::StringAppendF(&s, "codec='%s',width=%d,height=%d",
518  video_info.codec().c_str(), video_info.width(),
519  video_info.height());
520  } else if (media_info_.has_audio_info()) {
521  const MediaInfo_AudioInfo& audio_info = media_info_.audio_info();
522  base::StringAppendF(
523  &s, "codec='%s',frequency=%d,language='%s'", audio_info.codec().c_str(),
524  audio_info.sampling_frequency(), audio_info.language().c_str());
525  } else if (media_info_.has_text_info()) {
526  const MediaInfo_TextInfo& text_info = media_info_.text_info();
527  base::StringAppendF(&s, "codec='%s',language='%s'",
528  text_info.codec().c_str(),
529  text_info.language().c_str());
530  }
531  base::StringAppendF(&s, ")");
532  return s;
533 }
534 
535 } // namespace shaka
void AddBlock(uint64_t size_in_bytes, double duration)
static bool Delete(const char *file_name)
Definition: file.cc:212
virtual void SetSampleDuration(uint32_t sample_duration)
virtual void AddContentProtectionElement(const ContentProtectionElement &element)
virtual void UpdateContentProtectionPssh(const std::string &drm_uuid, const std::string &pssh)
void SuppressOnce(SuppressFlag flag)
virtual const MediaInfo & GetMediaInfo() const
bool GetStartAndEndTimestamps(double *start_timestamp_seconds, double *end_timestamp_seconds) const
Representation(const MediaInfo &media_info, const MpdOptions &mpd_options, uint32_t representation_id, std::unique_ptr< RepresentationStateChangeListener > state_change_listener)
void SetPresentationTimeOffset(double presentation_time_offset)
Set @presentationTimeOffset in SegmentBase / SegmentTemplate.
base::Optional< xml::XmlNode > GetXml()
virtual void AddNewSegment(int64_t start_time, int64_t duration, uint64_t size)
RepresentationType in MPD.
Definition: xml_node.h:182
bool AddVideoInfo(const MediaInfo::VideoInfo &video_info, bool set_width, bool set_height, bool set_frame_rate) WARN_UNUSED_RESULT
Definition: xml_node.cc:338
bool AddLiveOnlyInfo(const MediaInfo &media_info, const std::list< SegmentInfo > &segment_infos, uint32_t start_number) WARN_UNUSED_RESULT
Definition: xml_node.cc:460
bool AddVODOnlyInfo(const MediaInfo &media_info, bool use_segment_list, double target_segment_duration) WARN_UNUSED_RESULT
Definition: xml_node.cc:380
bool AddAudioInfo(const MediaInfo::AudioInfo &audio_info) WARN_UNUSED_RESULT
Definition: xml_node.cc:375
bool SetIntegerAttribute(const std::string &attribute_name, uint64_t number) WARN_UNUSED_RESULT
Definition: xml_node.cc:191
bool SetStringAttribute(const std::string &attribute_name, const std::string &attribute) WARN_UNUSED_RESULT
Definition: xml_node.cc:184
bool SetId(uint32_t id) WARN_UNUSED_RESULT
Definition: xml_node.cc:205
All the methods that are virtual are virtual for mocking.
Defines Mpd Options.
Definition: mpd_options.h:25
bool use_segment_list
Definition: mpd_params.h:93
size_t preserved_segments_outside_live_window
Definition: mpd_params.h:46
double target_segment_duration
Definition: mpd_params.h:82
double time_shift_buffer_depth
Definition: mpd_params.h:39