Shaka Packager SDK
representation.cc
1 // Copyright 2017 Google Inc. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file or at
5 // https://developers.google.com/open-source/licenses/bsd
6 
7 #include "packager/mpd/base/representation.h"
8 
9 #include <gflags/gflags.h>
10 
11 #include <algorithm>
12 
13 #include "packager/base/logging.h"
14 #include "packager/file/file.h"
15 #include "packager/media/base/muxer_util.h"
16 #include "packager/mpd/base/mpd_options.h"
17 #include "packager/mpd/base/mpd_utils.h"
18 #include "packager/mpd/base/xml/xml_node.h"
19 
20 namespace shaka {
21 namespace {
22 
23 std::string GetMimeType(const std::string& prefix,
24  MediaInfo::ContainerType container_type) {
25  switch (container_type) {
26  case MediaInfo::CONTAINER_MP4:
27  return prefix + "/mp4";
28  case MediaInfo::CONTAINER_MPEG2_TS:
29  // NOTE: DASH MPD spec uses lowercase but RFC3555 says uppercase.
30  return prefix + "/MP2T";
31  case MediaInfo::CONTAINER_WEBM:
32  return prefix + "/webm";
33  default:
34  break;
35  }
36 
37  // Unsupported container types should be rejected/handled by the caller.
38  LOG(ERROR) << "Unrecognized container type: " << container_type;
39  return std::string();
40 }
41 
42 // Check whether the video info has width and height.
43 // DASH IOP also requires several other fields for video representations, namely
44 // width, height, framerate, and sar.
45 bool HasRequiredVideoFields(const MediaInfo_VideoInfo& video_info) {
46  if (!video_info.has_height() || !video_info.has_width()) {
47  LOG(ERROR)
48  << "Width and height are required fields for generating a valid MPD.";
49  return false;
50  }
51  // These fields are not required for a valid MPD, but required for DASH IOP
52  // compliant MPD. MpdBuilder can keep generating MPDs without these fields.
53  LOG_IF(WARNING, !video_info.has_time_scale())
54  << "Video info does not contain timescale required for "
55  "calculating framerate. @frameRate is required for DASH IOP.";
56  LOG_IF(WARNING, !video_info.has_pixel_width())
57  << "Video info does not contain pixel_width to calculate the sample "
58  "aspect ratio required for DASH IOP.";
59  LOG_IF(WARNING, !video_info.has_pixel_height())
60  << "Video info does not contain pixel_height to calculate the sample "
61  "aspect ratio required for DASH IOP.";
62  return true;
63 }
64 
65 uint32_t GetTimeScale(const MediaInfo& media_info) {
66  if (media_info.has_reference_time_scale()) {
67  return media_info.reference_time_scale();
68  }
69 
70  if (media_info.has_video_info()) {
71  return media_info.video_info().time_scale();
72  }
73 
74  if (media_info.has_audio_info()) {
75  return media_info.audio_info().time_scale();
76  }
77 
78  LOG(WARNING) << "No timescale specified, using 1 as timescale.";
79  return 1;
80 }
81 
82 uint64_t LastSegmentStartTime(const SegmentInfo& segment_info) {
83  return segment_info.start_time + segment_info.duration * segment_info.repeat;
84 }
85 
86 // This is equal to |segment_info| end time
87 uint64_t LastSegmentEndTime(const SegmentInfo& segment_info) {
88  return segment_info.start_time +
89  segment_info.duration * (segment_info.repeat + 1);
90 }
91 
92 uint64_t LatestSegmentStartTime(const std::list<SegmentInfo>& segments) {
93  DCHECK(!segments.empty());
94  const SegmentInfo& latest_segment = segments.back();
95  return LastSegmentStartTime(latest_segment);
96 }
97 
98 // Given |timeshift_limit|, finds out the number of segments that are no longer
99 // valid and should be removed from |segment_info|.
100 uint64_t SearchTimedOutRepeatIndex(uint64_t timeshift_limit,
101  const SegmentInfo& segment_info) {
102  DCHECK_LE(timeshift_limit, LastSegmentEndTime(segment_info));
103  if (timeshift_limit < segment_info.start_time)
104  return 0;
105 
106  return (timeshift_limit - segment_info.start_time) / segment_info.duration;
107 }
108 
109 } // namespace
110 
112  const MediaInfo& media_info,
113  const MpdOptions& mpd_options,
114  uint32_t id,
115  std::unique_ptr<RepresentationStateChangeListener> state_change_listener)
116  : media_info_(media_info),
117  id_(id),
118  bandwidth_estimator_(BandwidthEstimator::kUseAllBlocks),
119  mpd_options_(mpd_options),
120  state_change_listener_(std::move(state_change_listener)),
121  allow_approximate_segment_timeline_(
122  // TODO(kqyang): Need a better check. $Time is legitimate but not a
123  // template.
124  media_info.segment_template().find("$Time") == std::string::npos &&
125  mpd_options_.mpd_params.allow_approximate_segment_timeline) {}
126 
128  const Representation& representation,
129  std::unique_ptr<RepresentationStateChangeListener> state_change_listener)
130  : Representation(representation.media_info_,
131  representation.mpd_options_,
132  representation.id_,
133  std::move(state_change_listener)) {
134  mime_type_ = representation.mime_type_;
135  codecs_ = representation.codecs_;
136 
137  start_number_ = representation.start_number_;
138  for (const SegmentInfo& segment_info : representation.segment_infos_)
139  start_number_ += segment_info.repeat + 1;
140 }
141 
142 Representation::~Representation() {}
143 
145  if (!AtLeastOneTrue(media_info_.has_video_info(),
146  media_info_.has_audio_info(),
147  media_info_.has_text_info())) {
148  // This is an error. Segment information can be in AdaptationSet, Period, or
149  // MPD but the interface does not provide a way to set them.
150  // See 5.3.9.1 ISO 23009-1:2012 for segment info.
151  LOG(ERROR) << "Representation needs one of video, audio, or text.";
152  return false;
153  }
154 
155  if (MoreThanOneTrue(media_info_.has_video_info(),
156  media_info_.has_audio_info(),
157  media_info_.has_text_info())) {
158  LOG(ERROR) << "Only one of VideoInfo, AudioInfo, or TextInfo can be set.";
159  return false;
160  }
161 
162  if (media_info_.container_type() == MediaInfo::CONTAINER_UNKNOWN) {
163  LOG(ERROR) << "'container_type' in MediaInfo cannot be CONTAINER_UNKNOWN.";
164  return false;
165  }
166 
167  if (media_info_.has_video_info()) {
168  mime_type_ = GetVideoMimeType();
169  if (!HasRequiredVideoFields(media_info_.video_info())) {
170  LOG(ERROR) << "Missing required fields to create a video Representation.";
171  return false;
172  }
173  } else if (media_info_.has_audio_info()) {
174  mime_type_ = GetAudioMimeType();
175  } else if (media_info_.has_text_info()) {
176  mime_type_ = GetTextMimeType();
177  }
178 
179  if (mime_type_.empty())
180  return false;
181 
182  codecs_ = GetCodecs(media_info_);
183  return true;
184 }
185 
187  const ContentProtectionElement& content_protection_element) {
188  content_protection_elements_.push_back(content_protection_element);
189  RemoveDuplicateAttributes(&content_protection_elements_.back());
190 }
191 
192 void Representation::UpdateContentProtectionPssh(const std::string& drm_uuid,
193  const std::string& pssh) {
194  UpdateContentProtectionPsshHelper(drm_uuid, pssh,
195  &content_protection_elements_);
196 }
197 
198 void Representation::AddNewSegment(uint64_t start_time,
199  uint64_t duration,
200  uint64_t size) {
201  if (start_time == 0 && duration == 0) {
202  LOG(WARNING) << "Got segment with start_time and duration == 0. Ignoring.";
203  return;
204  }
205 
206  if (state_change_listener_)
207  state_change_listener_->OnNewSegmentForRepresentation(start_time, duration);
208 
209  AddSegmentInfo(start_time, duration);
210 
211  bandwidth_estimator_.AddBlock(
212  size, static_cast<double>(duration) / media_info_.reference_time_scale());
213 
214  SlideWindow();
215  DCHECK_GE(segment_infos_.size(), 1u);
216 }
217 
218 void Representation::SetSampleDuration(uint32_t frame_duration) {
219  // Sample duration is used to generate approximate SegmentTimeline.
220  // Text is required to have exactly the same segment duration.
221  if (media_info_.has_audio_info() || media_info_.has_video_info())
222  frame_duration_ = frame_duration;
223 
224  if (media_info_.has_video_info()) {
225  media_info_.mutable_video_info()->set_frame_duration(frame_duration);
226  if (state_change_listener_) {
227  state_change_listener_->OnSetFrameRateForRepresentation(
228  frame_duration, media_info_.video_info().time_scale());
229  }
230  }
231 }
232 
233 const MediaInfo& Representation::GetMediaInfo() const {
234  return media_info_;
235 }
236 
237 // Uses info in |media_info_| and |content_protection_elements_| to create a
238 // "Representation" node.
239 // MPD schema has strict ordering. The following must be done in order.
240 // AddVideoInfo() (possibly adds FramePacking elements), AddAudioInfo() (Adds
241 // AudioChannelConfig elements), AddContentProtectionElements*(), and
242 // AddVODOnlyInfo() (Adds segment info).
243 xml::scoped_xml_ptr<xmlNode> Representation::GetXml() {
244  if (!HasRequiredMediaInfoFields()) {
245  LOG(ERROR) << "MediaInfo missing required fields.";
246  return xml::scoped_xml_ptr<xmlNode>();
247  }
248 
249  const uint64_t bandwidth = media_info_.has_bandwidth()
250  ? media_info_.bandwidth()
251  : bandwidth_estimator_.Max();
252 
253  DCHECK(!(HasVODOnlyFields(media_info_) && HasLiveOnlyFields(media_info_)));
254 
255  xml::RepresentationXmlNode representation;
256  // Mandatory fields for Representation.
257  representation.SetId(id_);
258  representation.SetIntegerAttribute("bandwidth", bandwidth);
259  if (!codecs_.empty())
260  representation.SetStringAttribute("codecs", codecs_);
261  representation.SetStringAttribute("mimeType", mime_type_);
262 
263  const bool has_video_info = media_info_.has_video_info();
264  const bool has_audio_info = media_info_.has_audio_info();
265 
266  if (has_video_info &&
267  !representation.AddVideoInfo(
268  media_info_.video_info(),
269  !(output_suppression_flags_ & kSuppressWidth),
270  !(output_suppression_flags_ & kSuppressHeight),
271  !(output_suppression_flags_ & kSuppressFrameRate))) {
272  LOG(ERROR) << "Failed to add video info to Representation XML.";
273  return xml::scoped_xml_ptr<xmlNode>();
274  }
275 
276  if (has_audio_info &&
277  !representation.AddAudioInfo(media_info_.audio_info())) {
278  LOG(ERROR) << "Failed to add audio info to Representation XML.";
279  return xml::scoped_xml_ptr<xmlNode>();
280  }
281 
282  if (!representation.AddContentProtectionElements(
283  content_protection_elements_)) {
284  return xml::scoped_xml_ptr<xmlNode>();
285  }
286 
287  if (HasVODOnlyFields(media_info_) &&
288  !representation.AddVODOnlyInfo(media_info_)) {
289  LOG(ERROR) << "Failed to add VOD info.";
290  return xml::scoped_xml_ptr<xmlNode>();
291  }
292 
293  if (HasLiveOnlyFields(media_info_) &&
294  !representation.AddLiveOnlyInfo(media_info_, segment_infos_,
295  start_number_)) {
296  LOG(ERROR) << "Failed to add Live info.";
297  return xml::scoped_xml_ptr<xmlNode>();
298  }
299  // TODO(rkuroiwa): It is likely that all representations have the exact same
300  // SegmentTemplate. Optimize and propagate the tag up to AdaptationSet level.
301 
302  output_suppression_flags_ = 0;
303  return representation.PassScopedPtr();
304 }
305 
306 void Representation::SuppressOnce(SuppressFlag flag) {
307  output_suppression_flags_ |= flag;
308 }
309 
311  double presentation_time_offset) {
312  uint64_t pto = presentation_time_offset * media_info_.reference_time_scale();
313  if (pto <= 0)
314  return;
315  media_info_.set_presentation_time_offset(pto);
316 }
317 
319  double* start_timestamp_seconds,
320  double* end_timestamp_seconds) const {
321  if (segment_infos_.empty())
322  return false;
323 
324  if (start_timestamp_seconds) {
325  *start_timestamp_seconds =
326  static_cast<double>(segment_infos_.begin()->start_time) /
327  GetTimeScale(media_info_);
328  }
329  if (end_timestamp_seconds) {
330  *end_timestamp_seconds =
331  static_cast<double>(segment_infos_.rbegin()->start_time +
332  segment_infos_.rbegin()->duration *
333  (segment_infos_.rbegin()->repeat + 1)) /
334  GetTimeScale(media_info_);
335  }
336  return true;
337 }
338 
339 bool Representation::HasRequiredMediaInfoFields() const {
340  if (HasVODOnlyFields(media_info_) && HasLiveOnlyFields(media_info_)) {
341  LOG(ERROR) << "MediaInfo cannot have both VOD and Live fields.";
342  return false;
343  }
344 
345  if (!media_info_.has_container_type()) {
346  LOG(ERROR) << "MediaInfo missing required field: container_type.";
347  return false;
348  }
349 
350  return true;
351 }
352 
353 void Representation::AddSegmentInfo(uint64_t start_time, uint64_t duration) {
354  const uint64_t kNoRepeat = 0;
355  const uint64_t adjusted_duration = AdjustDuration(duration);
356 
357  if (!segment_infos_.empty()) {
358  // Contiguous segment.
359  const SegmentInfo& previous = segment_infos_.back();
360  const uint64_t previous_segment_end_time =
361  previous.start_time + previous.duration * (previous.repeat + 1);
362  // Make it continuous if the segment start time is close to previous segment
363  // end time.
364  if (ApproximiatelyEqual(previous_segment_end_time, start_time)) {
365  const uint64_t segment_end_time_for_same_duration =
366  previous_segment_end_time + previous.duration;
367  const uint64_t actual_segment_end_time = start_time + duration;
368  // Consider the segments having identical duration if the segment end time
369  // is close to calculated segment end time by assuming identical duration.
370  if (ApproximiatelyEqual(segment_end_time_for_same_duration,
371  actual_segment_end_time)) {
372  ++segment_infos_.back().repeat;
373  } else {
374  segment_infos_.push_back(
375  {previous_segment_end_time,
376  actual_segment_end_time - previous_segment_end_time, kNoRepeat});
377  }
378  return;
379  }
380 
381  // A gap since previous.
382  const uint64_t kRoundingErrorGrace = 5;
383  if (previous_segment_end_time + kRoundingErrorGrace < start_time) {
384  LOG(WARNING) << "Found a gap of size "
385  << (start_time - previous_segment_end_time)
386  << " > kRoundingErrorGrace (" << kRoundingErrorGrace
387  << "). The new segment starts at " << start_time
388  << " but the previous segment ends at "
389  << previous_segment_end_time << ".";
390  }
391 
392  // No overlapping segments.
393  if (start_time < previous_segment_end_time - kRoundingErrorGrace) {
394  LOG(WARNING)
395  << "Segments should not be overlapping. The new segment starts at "
396  << start_time << " but the previous segment ends at "
397  << previous_segment_end_time << ".";
398  }
399  }
400 
401  segment_infos_.push_back({start_time, adjusted_duration, kNoRepeat});
402 }
403 
404 bool Representation::ApproximiatelyEqual(uint64_t time1, uint64_t time2) const {
405  if (!allow_approximate_segment_timeline_)
406  return time1 == time2;
407 
408  // It is not always possible to align segment duration to target duration
409  // exactly. For example, for AAC with sampling rate of 44100, there are always
410  // 1024 audio samples per frame, so the frame duration is 1024/44100. For a
411  // target duration of 2 seconds, the closest segment duration would be 1.984
412  // or 2.00533.
413 
414  // An arbitrary error threshold cap. This makes sure that the error is not too
415  // large for large samples.
416  const double kErrorThresholdSeconds = 0.05;
417 
418  // So we consider two times equal if they differ by less than one sample.
419  const uint32_t error_threshold =
420  std::min(frame_duration_,
421  static_cast<uint32_t>(kErrorThresholdSeconds *
422  media_info_.reference_time_scale()));
423  return time1 <= time2 + error_threshold && time2 <= time1 + error_threshold;
424 }
425 
426 uint64_t Representation::AdjustDuration(uint64_t duration) const {
427  if (!allow_approximate_segment_timeline_)
428  return duration;
429  const uint64_t scaled_target_duration =
430  mpd_options_.target_segment_duration * media_info_.reference_time_scale();
431  return ApproximiatelyEqual(scaled_target_duration, duration)
432  ? scaled_target_duration
433  : duration;
434 }
435 
436 void Representation::SlideWindow() {
437  DCHECK(!segment_infos_.empty());
438  if (mpd_options_.mpd_params.time_shift_buffer_depth <= 0.0 ||
439  mpd_options_.mpd_type == MpdType::kStatic)
440  return;
441 
442  const uint32_t time_scale = GetTimeScale(media_info_);
443  DCHECK_GT(time_scale, 0u);
444 
445  uint64_t time_shift_buffer_depth = static_cast<uint64_t>(
446  mpd_options_.mpd_params.time_shift_buffer_depth * time_scale);
447 
448  // The start time of the latest segment is considered the current_play_time,
449  // and this should guarantee that the latest segment will stay in the list.
450  const uint64_t current_play_time = LatestSegmentStartTime(segment_infos_);
451  if (current_play_time <= time_shift_buffer_depth)
452  return;
453 
454  const uint64_t timeshift_limit = current_play_time - time_shift_buffer_depth;
455 
456  // First remove all the SegmentInfos that are completely out of range, by
457  // looking at the very last segment's end time.
458  std::list<SegmentInfo>::iterator first = segment_infos_.begin();
459  std::list<SegmentInfo>::iterator last = first;
460  for (; last != segment_infos_.end(); ++last) {
461  const uint64_t last_segment_end_time = LastSegmentEndTime(*last);
462  if (timeshift_limit < last_segment_end_time)
463  break;
464  RemoveSegments(last->start_time, last->duration, last->repeat + 1);
465  start_number_ += last->repeat + 1;
466  }
467  segment_infos_.erase(first, last);
468 
469  // Now some segment in the first SegmentInfo should be left in the list.
470  SegmentInfo* first_segment_info = &segment_infos_.front();
471  DCHECK_LE(timeshift_limit, LastSegmentEndTime(*first_segment_info));
472 
473  // Identify which segments should still be in the SegmentInfo.
474  const uint64_t repeat_index =
475  SearchTimedOutRepeatIndex(timeshift_limit, *first_segment_info);
476  if (repeat_index == 0)
477  return;
478 
479  RemoveSegments(first_segment_info->start_time, first_segment_info->duration,
480  repeat_index);
481 
482  first_segment_info->start_time = first_segment_info->start_time +
483  first_segment_info->duration * repeat_index;
484  first_segment_info->repeat = first_segment_info->repeat - repeat_index;
485  start_number_ += repeat_index;
486 }
487 
488 void Representation::RemoveSegments(uint64_t start_time,
489  uint64_t duration,
490  uint64_t num_segments) {
491  if (mpd_options_.mpd_params.preserved_segments_outside_live_window == 0)
492  return;
493 
494  for (size_t i = 0; i < num_segments; ++i) {
495  segments_to_be_removed_.push_back(media::GetSegmentName(
496  media_info_.segment_template(), start_time + i * duration,
497  start_number_ - 1 + i, media_info_.bandwidth()));
498  }
499  while (segments_to_be_removed_.size() >
500  mpd_options_.mpd_params.preserved_segments_outside_live_window) {
501  VLOG(2) << "Deleting " << segments_to_be_removed_.front();
502  File::Delete(segments_to_be_removed_.front().c_str());
503  segments_to_be_removed_.pop_front();
504  }
505 }
506 
507 std::string Representation::GetVideoMimeType() const {
508  return GetMimeType("video", media_info_.container_type());
509 }
510 
511 std::string Representation::GetAudioMimeType() const {
512  return GetMimeType("audio", media_info_.container_type());
513 }
514 
515 std::string Representation::GetTextMimeType() const {
516  CHECK(media_info_.has_text_info());
517  if (media_info_.text_info().codec() == "ttml") {
518  switch (media_info_.container_type()) {
519  case MediaInfo::CONTAINER_TEXT:
520  return "application/ttml+xml";
521  case MediaInfo::CONTAINER_MP4:
522  return "application/mp4";
523  default:
524  LOG(ERROR) << "Failed to determine MIME type for TTML container: "
525  << media_info_.container_type();
526  return "";
527  }
528  }
529  if (media_info_.text_info().codec() == "wvtt") {
530  if (media_info_.container_type() == MediaInfo::CONTAINER_TEXT) {
531  return "text/vtt";
532  } else if (media_info_.container_type() == MediaInfo::CONTAINER_MP4) {
533  return "application/mp4";
534  }
535  LOG(ERROR) << "Failed to determine MIME type for VTT container: "
536  << media_info_.container_type();
537  return "";
538  }
539 
540  LOG(ERROR) << "Cannot determine MIME type for format: "
541  << media_info_.text_info().codec()
542  << " container: " << media_info_.container_type();
543  return "";
544 }
545 
546 } // namespace shaka
bool AddVideoInfo(const MediaInfo::VideoInfo &video_info, bool set_width, bool set_height, bool set_frame_rate)
Definition: xml_node.cc:266
virtual const MediaInfo & GetMediaInfo() const
RepresentationType in MPD.
Definition: xml_node.h:139
virtual void AddNewSegment(uint64_t start_time, uint64_t duration, uint64_t size)
static bool Delete(const char *file_name)
Definition: file.cc:198
size_t preserved_segments_outside_live_window
Definition: mpd_params.h:46
Representation(const MediaInfo &media_info, const MpdOptions &mpd_options, uint32_t representation_id, std::unique_ptr< RepresentationStateChangeListener > state_change_listener)
virtual void SetSampleDuration(uint32_t sample_duration)
STL namespace.
scoped_xml_ptr< xmlNode > PassScopedPtr()
Definition: xml_node.cc:169
All the methods that are virtual are virtual for mocking.
bool AddVODOnlyInfo(const MediaInfo &media_info)
Definition: xml_node.cc:311
void AddBlock(uint64_t size, double duration)
void SetStringAttribute(const char *attribute_name, const std::string &attribute)
Definition: xml_node.cc:137
bool AddLiveOnlyInfo(const MediaInfo &media_info, const std::list< SegmentInfo > &segment_infos, uint32_t start_number)
Definition: xml_node.cc:357
void SetId(uint32_t id)
Definition: xml_node.cc:160
xml::scoped_xml_ptr< xmlNode > GetXml()
void SetPresentationTimeOffset(double presentation_time_offset)
Set in SegmentBase / SegmentTemplate.
double target_segment_duration
Definition: mpd_options.h:33
virtual void AddContentProtectionElement(const ContentProtectionElement &element)
void SetIntegerAttribute(const char *attribute_name, uint64_t number)
Definition: xml_node.cc:144
Defines Mpd Options.
Definition: mpd_options.h:25
virtual void UpdateContentProtectionPssh(const std::string &drm_uuid, const std::string &pssh)
double time_shift_buffer_depth
Definition: mpd_params.h:39
void SuppressOnce(SuppressFlag flag)
bool AddAudioInfo(const MediaInfo::AudioInfo &audio_info)
Definition: xml_node.cc:303
bool GetStartAndEndTimestamps(double *start_timestamp_seconds, double *end_timestamp_seconds) const