Shaka Packager SDK
representation.cc
1 // Copyright 2017 Google Inc. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file or at
5 // https://developers.google.com/open-source/licenses/bsd
6 
7 #include "packager/mpd/base/representation.h"
8 
9 #include <gflags/gflags.h>
10 
11 #include <algorithm>
12 
13 #include "packager/base/logging.h"
14 #include "packager/file/file.h"
15 #include "packager/media/base/muxer_util.h"
16 #include "packager/mpd/base/mpd_options.h"
17 #include "packager/mpd/base/mpd_utils.h"
18 #include "packager/mpd/base/xml/xml_node.h"
19 
20 namespace shaka {
21 namespace {
22 
23 std::string GetMimeType(const std::string& prefix,
24  MediaInfo::ContainerType container_type) {
25  switch (container_type) {
26  case MediaInfo::CONTAINER_MP4:
27  return prefix + "/mp4";
28  case MediaInfo::CONTAINER_MPEG2_TS:
29  // NOTE: DASH MPD spec uses lowercase but RFC3555 says uppercase.
30  return prefix + "/MP2T";
31  case MediaInfo::CONTAINER_WEBM:
32  return prefix + "/webm";
33  default:
34  break;
35  }
36 
37  // Unsupported container types should be rejected/handled by the caller.
38  LOG(ERROR) << "Unrecognized container type: " << container_type;
39  return std::string();
40 }
41 
42 // Check whether the video info has width and height.
43 // DASH IOP also requires several other fields for video representations, namely
44 // width, height, framerate, and sar.
45 bool HasRequiredVideoFields(const MediaInfo_VideoInfo& video_info) {
46  if (!video_info.has_height() || !video_info.has_width()) {
47  LOG(ERROR)
48  << "Width and height are required fields for generating a valid MPD.";
49  return false;
50  }
51  // These fields are not required for a valid MPD, but required for DASH IOP
52  // compliant MPD. MpdBuilder can keep generating MPDs without these fields.
53  LOG_IF(WARNING, !video_info.has_time_scale())
54  << "Video info does not contain timescale required for "
55  "calculating framerate. @frameRate is required for DASH IOP.";
56  LOG_IF(WARNING, !video_info.has_pixel_width())
57  << "Video info does not contain pixel_width to calculate the sample "
58  "aspect ratio required for DASH IOP.";
59  LOG_IF(WARNING, !video_info.has_pixel_height())
60  << "Video info does not contain pixel_height to calculate the sample "
61  "aspect ratio required for DASH IOP.";
62  return true;
63 }
64 
65 uint32_t GetTimeScale(const MediaInfo& media_info) {
66  if (media_info.has_reference_time_scale()) {
67  return media_info.reference_time_scale();
68  }
69 
70  if (media_info.has_video_info()) {
71  return media_info.video_info().time_scale();
72  }
73 
74  if (media_info.has_audio_info()) {
75  return media_info.audio_info().time_scale();
76  }
77 
78  LOG(WARNING) << "No timescale specified, using 1 as timescale.";
79  return 1;
80 }
81 
82 int64_t LastSegmentStartTime(const SegmentInfo& segment_info) {
83  return segment_info.start_time + segment_info.duration * segment_info.repeat;
84 }
85 
86 // This is equal to |segment_info| end time
87 int64_t LastSegmentEndTime(const SegmentInfo& segment_info) {
88  return segment_info.start_time +
89  segment_info.duration * (segment_info.repeat + 1);
90 }
91 
92 int64_t LatestSegmentStartTime(const std::list<SegmentInfo>& segments) {
93  DCHECK(!segments.empty());
94  const SegmentInfo& latest_segment = segments.back();
95  return LastSegmentStartTime(latest_segment);
96 }
97 
98 // Given |timeshift_limit|, finds out the number of segments that are no longer
99 // valid and should be removed from |segment_info|.
100 uint64_t SearchTimedOutRepeatIndex(int64_t timeshift_limit,
101  const SegmentInfo& segment_info) {
102  DCHECK_LE(timeshift_limit, LastSegmentEndTime(segment_info));
103  if (timeshift_limit < segment_info.start_time)
104  return 0;
105 
106  return (timeshift_limit - segment_info.start_time) / segment_info.duration;
107 }
108 
109 } // namespace
110 
112  const MediaInfo& media_info,
113  const MpdOptions& mpd_options,
114  uint32_t id,
115  std::unique_ptr<RepresentationStateChangeListener> state_change_listener)
116  : media_info_(media_info),
117  id_(id),
118  mpd_options_(mpd_options),
119  state_change_listener_(std::move(state_change_listener)),
120  allow_approximate_segment_timeline_(
121  // TODO(kqyang): Need a better check. $Time is legitimate but not a
122  // template.
123  media_info.segment_template().find("$Time") == std::string::npos &&
124  mpd_options_.mpd_params.allow_approximate_segment_timeline) {}
125 
127  const Representation& representation,
128  std::unique_ptr<RepresentationStateChangeListener> state_change_listener)
129  : Representation(representation.media_info_,
130  representation.mpd_options_,
131  representation.id_,
132  std::move(state_change_listener)) {
133  mime_type_ = representation.mime_type_;
134  codecs_ = representation.codecs_;
135 
136  start_number_ = representation.start_number_;
137  for (const SegmentInfo& segment_info : representation.segment_infos_)
138  start_number_ += segment_info.repeat + 1;
139 }
140 
141 Representation::~Representation() {}
142 
144  if (!AtLeastOneTrue(media_info_.has_video_info(),
145  media_info_.has_audio_info(),
146  media_info_.has_text_info())) {
147  // This is an error. Segment information can be in AdaptationSet, Period, or
148  // MPD but the interface does not provide a way to set them.
149  // See 5.3.9.1 ISO 23009-1:2012 for segment info.
150  LOG(ERROR) << "Representation needs one of video, audio, or text.";
151  return false;
152  }
153 
154  if (MoreThanOneTrue(media_info_.has_video_info(),
155  media_info_.has_audio_info(),
156  media_info_.has_text_info())) {
157  LOG(ERROR) << "Only one of VideoInfo, AudioInfo, or TextInfo can be set.";
158  return false;
159  }
160 
161  if (media_info_.container_type() == MediaInfo::CONTAINER_UNKNOWN) {
162  LOG(ERROR) << "'container_type' in MediaInfo cannot be CONTAINER_UNKNOWN.";
163  return false;
164  }
165 
166  if (media_info_.has_video_info()) {
167  mime_type_ = GetVideoMimeType();
168  if (!HasRequiredVideoFields(media_info_.video_info())) {
169  LOG(ERROR) << "Missing required fields to create a video Representation.";
170  return false;
171  }
172  } else if (media_info_.has_audio_info()) {
173  mime_type_ = GetAudioMimeType();
174  } else if (media_info_.has_text_info()) {
175  mime_type_ = GetTextMimeType();
176  }
177 
178  if (mime_type_.empty())
179  return false;
180 
181  codecs_ = GetCodecs(media_info_);
182  return true;
183 }
184 
186  const ContentProtectionElement& content_protection_element) {
187  content_protection_elements_.push_back(content_protection_element);
188  RemoveDuplicateAttributes(&content_protection_elements_.back());
189 }
190 
191 void Representation::UpdateContentProtectionPssh(const std::string& drm_uuid,
192  const std::string& pssh) {
193  UpdateContentProtectionPsshHelper(drm_uuid, pssh,
194  &content_protection_elements_);
195 }
196 
197 void Representation::AddNewSegment(int64_t start_time,
198  int64_t duration,
199  uint64_t size) {
200  if (start_time == 0 && duration == 0) {
201  LOG(WARNING) << "Got segment with start_time and duration == 0. Ignoring.";
202  return;
203  }
204 
205  if (state_change_listener_)
206  state_change_listener_->OnNewSegmentForRepresentation(start_time, duration);
207 
208  AddSegmentInfo(start_time, duration);
209 
210  bandwidth_estimator_.AddBlock(
211  size, static_cast<double>(duration) / media_info_.reference_time_scale());
212 
213  SlideWindow();
214  DCHECK_GE(segment_infos_.size(), 1u);
215 }
216 
217 void Representation::SetSampleDuration(uint32_t frame_duration) {
218  // Sample duration is used to generate approximate SegmentTimeline.
219  // Text is required to have exactly the same segment duration.
220  if (media_info_.has_audio_info() || media_info_.has_video_info())
221  frame_duration_ = frame_duration;
222 
223  if (media_info_.has_video_info()) {
224  media_info_.mutable_video_info()->set_frame_duration(frame_duration);
225  if (state_change_listener_) {
226  state_change_listener_->OnSetFrameRateForRepresentation(
227  frame_duration, media_info_.video_info().time_scale());
228  }
229  }
230 }
231 
232 const MediaInfo& Representation::GetMediaInfo() const {
233  return media_info_;
234 }
235 
236 // Uses info in |media_info_| and |content_protection_elements_| to create a
237 // "Representation" node.
238 // MPD schema has strict ordering. The following must be done in order.
239 // AddVideoInfo() (possibly adds FramePacking elements), AddAudioInfo() (Adds
240 // AudioChannelConfig elements), AddContentProtectionElements*(), and
241 // AddVODOnlyInfo() (Adds segment info).
242 xml::scoped_xml_ptr<xmlNode> Representation::GetXml() {
243  if (!HasRequiredMediaInfoFields()) {
244  LOG(ERROR) << "MediaInfo missing required fields.";
245  return xml::scoped_xml_ptr<xmlNode>();
246  }
247 
248  const uint64_t bandwidth = media_info_.has_bandwidth()
249  ? media_info_.bandwidth()
250  : bandwidth_estimator_.Max();
251 
252  DCHECK(!(HasVODOnlyFields(media_info_) && HasLiveOnlyFields(media_info_)));
253 
254  xml::RepresentationXmlNode representation;
255  // Mandatory fields for Representation.
256  representation.SetId(id_);
257  representation.SetIntegerAttribute("bandwidth", bandwidth);
258  if (!codecs_.empty())
259  representation.SetStringAttribute("codecs", codecs_);
260  representation.SetStringAttribute("mimeType", mime_type_);
261 
262  const bool has_video_info = media_info_.has_video_info();
263  const bool has_audio_info = media_info_.has_audio_info();
264 
265  if (has_video_info &&
266  !representation.AddVideoInfo(
267  media_info_.video_info(),
268  !(output_suppression_flags_ & kSuppressWidth),
269  !(output_suppression_flags_ & kSuppressHeight),
270  !(output_suppression_flags_ & kSuppressFrameRate))) {
271  LOG(ERROR) << "Failed to add video info to Representation XML.";
272  return xml::scoped_xml_ptr<xmlNode>();
273  }
274 
275  if (has_audio_info &&
276  !representation.AddAudioInfo(media_info_.audio_info())) {
277  LOG(ERROR) << "Failed to add audio info to Representation XML.";
278  return xml::scoped_xml_ptr<xmlNode>();
279  }
280 
281  if (!representation.AddContentProtectionElements(
282  content_protection_elements_)) {
283  return xml::scoped_xml_ptr<xmlNode>();
284  }
285 
286  if (HasVODOnlyFields(media_info_) &&
287  !representation.AddVODOnlyInfo(media_info_)) {
288  LOG(ERROR) << "Failed to add VOD info.";
289  return xml::scoped_xml_ptr<xmlNode>();
290  }
291 
292  if (HasLiveOnlyFields(media_info_) &&
293  !representation.AddLiveOnlyInfo(media_info_, segment_infos_,
294  start_number_)) {
295  LOG(ERROR) << "Failed to add Live info.";
296  return xml::scoped_xml_ptr<xmlNode>();
297  }
298  // TODO(rkuroiwa): It is likely that all representations have the exact same
299  // SegmentTemplate. Optimize and propagate the tag up to AdaptationSet level.
300 
301  output_suppression_flags_ = 0;
302  return representation.PassScopedPtr();
303 }
304 
305 void Representation::SuppressOnce(SuppressFlag flag) {
306  output_suppression_flags_ |= flag;
307 }
308 
310  double presentation_time_offset) {
311  int64_t pto = presentation_time_offset * media_info_.reference_time_scale();
312  if (pto <= 0)
313  return;
314  media_info_.set_presentation_time_offset(pto);
315 }
316 
318  double* start_timestamp_seconds,
319  double* end_timestamp_seconds) const {
320  if (segment_infos_.empty())
321  return false;
322 
323  if (start_timestamp_seconds) {
324  *start_timestamp_seconds =
325  static_cast<double>(segment_infos_.begin()->start_time) /
326  GetTimeScale(media_info_);
327  }
328  if (end_timestamp_seconds) {
329  *end_timestamp_seconds =
330  static_cast<double>(segment_infos_.rbegin()->start_time +
331  segment_infos_.rbegin()->duration *
332  (segment_infos_.rbegin()->repeat + 1)) /
333  GetTimeScale(media_info_);
334  }
335  return true;
336 }
337 
338 bool Representation::HasRequiredMediaInfoFields() const {
339  if (HasVODOnlyFields(media_info_) && HasLiveOnlyFields(media_info_)) {
340  LOG(ERROR) << "MediaInfo cannot have both VOD and Live fields.";
341  return false;
342  }
343 
344  if (!media_info_.has_container_type()) {
345  LOG(ERROR) << "MediaInfo missing required field: container_type.";
346  return false;
347  }
348 
349  return true;
350 }
351 
352 void Representation::AddSegmentInfo(int64_t start_time, int64_t duration) {
353  const uint64_t kNoRepeat = 0;
354  const int64_t adjusted_duration = AdjustDuration(duration);
355 
356  if (!segment_infos_.empty()) {
357  // Contiguous segment.
358  const SegmentInfo& previous = segment_infos_.back();
359  const int64_t previous_segment_end_time =
360  previous.start_time + previous.duration * (previous.repeat + 1);
361  // Make it continuous if the segment start time is close to previous segment
362  // end time.
363  if (ApproximiatelyEqual(previous_segment_end_time, start_time)) {
364  const int64_t segment_end_time_for_same_duration =
365  previous_segment_end_time + previous.duration;
366  const int64_t actual_segment_end_time = start_time + duration;
367  // Consider the segments having identical duration if the segment end time
368  // is close to calculated segment end time by assuming identical duration.
369  if (ApproximiatelyEqual(segment_end_time_for_same_duration,
370  actual_segment_end_time)) {
371  ++segment_infos_.back().repeat;
372  } else {
373  segment_infos_.push_back(
374  {previous_segment_end_time,
375  actual_segment_end_time - previous_segment_end_time, kNoRepeat});
376  }
377  return;
378  }
379 
380  // A gap since previous.
381  const int64_t kRoundingErrorGrace = 5;
382  if (previous_segment_end_time + kRoundingErrorGrace < start_time) {
383  LOG(WARNING) << "Found a gap of size "
384  << (start_time - previous_segment_end_time)
385  << " > kRoundingErrorGrace (" << kRoundingErrorGrace
386  << "). The new segment starts at " << start_time
387  << " but the previous segment ends at "
388  << previous_segment_end_time << ".";
389  }
390 
391  // No overlapping segments.
392  if (start_time < previous_segment_end_time - kRoundingErrorGrace) {
393  LOG(WARNING)
394  << "Segments should not be overlapping. The new segment starts at "
395  << start_time << " but the previous segment ends at "
396  << previous_segment_end_time << ".";
397  }
398  }
399 
400  segment_infos_.push_back({start_time, adjusted_duration, kNoRepeat});
401 }
402 
403 bool Representation::ApproximiatelyEqual(int64_t time1, int64_t time2) const {
404  if (!allow_approximate_segment_timeline_)
405  return time1 == time2;
406 
407  // It is not always possible to align segment duration to target duration
408  // exactly. For example, for AAC with sampling rate of 44100, there are always
409  // 1024 audio samples per frame, so the frame duration is 1024/44100. For a
410  // target duration of 2 seconds, the closest segment duration would be 1.984
411  // or 2.00533.
412 
413  // An arbitrary error threshold cap. This makes sure that the error is not too
414  // large for large samples.
415  const double kErrorThresholdSeconds = 0.05;
416 
417  // So we consider two times equal if they differ by less than one sample.
418  const uint32_t error_threshold =
419  std::min(frame_duration_,
420  static_cast<uint32_t>(kErrorThresholdSeconds *
421  media_info_.reference_time_scale()));
422  return std::abs(time1 - time2) <= error_threshold;
423 }
424 
425 int64_t Representation::AdjustDuration(int64_t duration) const {
426  if (!allow_approximate_segment_timeline_)
427  return duration;
428  const int64_t scaled_target_duration =
429  mpd_options_.target_segment_duration * media_info_.reference_time_scale();
430  return ApproximiatelyEqual(scaled_target_duration, duration)
431  ? scaled_target_duration
432  : duration;
433 }
434 
435 void Representation::SlideWindow() {
436  DCHECK(!segment_infos_.empty());
437  if (mpd_options_.mpd_params.time_shift_buffer_depth <= 0.0 ||
438  mpd_options_.mpd_type == MpdType::kStatic)
439  return;
440 
441  const uint32_t time_scale = GetTimeScale(media_info_);
442  DCHECK_GT(time_scale, 0u);
443 
444  int64_t time_shift_buffer_depth = static_cast<int64_t>(
445  mpd_options_.mpd_params.time_shift_buffer_depth * time_scale);
446 
447  // The start time of the latest segment is considered the current_play_time,
448  // and this should guarantee that the latest segment will stay in the list.
449  const int64_t current_play_time = LatestSegmentStartTime(segment_infos_);
450  if (current_play_time <= time_shift_buffer_depth)
451  return;
452 
453  const int64_t timeshift_limit = current_play_time - time_shift_buffer_depth;
454 
455  // First remove all the SegmentInfos that are completely out of range, by
456  // looking at the very last segment's end time.
457  std::list<SegmentInfo>::iterator first = segment_infos_.begin();
458  std::list<SegmentInfo>::iterator last = first;
459  for (; last != segment_infos_.end(); ++last) {
460  const int64_t last_segment_end_time = LastSegmentEndTime(*last);
461  if (timeshift_limit < last_segment_end_time)
462  break;
463  RemoveSegments(last->start_time, last->duration, last->repeat + 1);
464  start_number_ += last->repeat + 1;
465  }
466  segment_infos_.erase(first, last);
467 
468  // Now some segment in the first SegmentInfo should be left in the list.
469  SegmentInfo* first_segment_info = &segment_infos_.front();
470  DCHECK_LE(timeshift_limit, LastSegmentEndTime(*first_segment_info));
471 
472  // Identify which segments should still be in the SegmentInfo.
473  const uint64_t repeat_index =
474  SearchTimedOutRepeatIndex(timeshift_limit, *first_segment_info);
475  if (repeat_index == 0)
476  return;
477 
478  RemoveSegments(first_segment_info->start_time, first_segment_info->duration,
479  repeat_index);
480 
481  first_segment_info->start_time = first_segment_info->start_time +
482  first_segment_info->duration * repeat_index;
483  first_segment_info->repeat = first_segment_info->repeat - repeat_index;
484  start_number_ += repeat_index;
485 }
486 
487 void Representation::RemoveSegments(int64_t start_time,
488  int64_t duration,
489  uint64_t num_segments) {
490  if (mpd_options_.mpd_params.preserved_segments_outside_live_window == 0)
491  return;
492 
493  for (size_t i = 0; i < num_segments; ++i) {
494  segments_to_be_removed_.push_back(media::GetSegmentName(
495  media_info_.segment_template(), start_time + i * duration,
496  start_number_ - 1 + i, media_info_.bandwidth()));
497  }
498  while (segments_to_be_removed_.size() >
499  mpd_options_.mpd_params.preserved_segments_outside_live_window) {
500  VLOG(2) << "Deleting " << segments_to_be_removed_.front();
501  File::Delete(segments_to_be_removed_.front().c_str());
502  segments_to_be_removed_.pop_front();
503  }
504 }
505 
506 std::string Representation::GetVideoMimeType() const {
507  return GetMimeType("video", media_info_.container_type());
508 }
509 
510 std::string Representation::GetAudioMimeType() const {
511  return GetMimeType("audio", media_info_.container_type());
512 }
513 
514 std::string Representation::GetTextMimeType() const {
515  CHECK(media_info_.has_text_info());
516  if (media_info_.text_info().codec() == "ttml") {
517  switch (media_info_.container_type()) {
518  case MediaInfo::CONTAINER_TEXT:
519  return "application/ttml+xml";
520  case MediaInfo::CONTAINER_MP4:
521  return "application/mp4";
522  default:
523  LOG(ERROR) << "Failed to determine MIME type for TTML container: "
524  << media_info_.container_type();
525  return "";
526  }
527  }
528  if (media_info_.text_info().codec() == "wvtt") {
529  if (media_info_.container_type() == MediaInfo::CONTAINER_TEXT) {
530  return "text/vtt";
531  } else if (media_info_.container_type() == MediaInfo::CONTAINER_MP4) {
532  return "application/mp4";
533  }
534  LOG(ERROR) << "Failed to determine MIME type for VTT container: "
535  << media_info_.container_type();
536  return "";
537  }
538 
539  LOG(ERROR) << "Cannot determine MIME type for format: "
540  << media_info_.text_info().codec()
541  << " container: " << media_info_.container_type();
542  return "";
543 }
544 
545 } // namespace shaka
bool AddVideoInfo(const MediaInfo::VideoInfo &video_info, bool set_width, bool set_height, bool set_frame_rate)
Definition: xml_node.cc:266
virtual const MediaInfo & GetMediaInfo() const
RepresentationType in MPD.
Definition: xml_node.h:139
static bool Delete(const char *file_name)
Definition: file.cc:198
size_t preserved_segments_outside_live_window
Definition: mpd_params.h:46
Representation(const MediaInfo &media_info, const MpdOptions &mpd_options, uint32_t representation_id, std::unique_ptr< RepresentationStateChangeListener > state_change_listener)
virtual void AddNewSegment(int64_t start_time, int64_t duration, uint64_t size)
virtual void SetSampleDuration(uint32_t sample_duration)
STL namespace.
scoped_xml_ptr< xmlNode > PassScopedPtr()
Definition: xml_node.cc:169
All the methods that are virtual are virtual for mocking.
bool AddVODOnlyInfo(const MediaInfo &media_info)
Definition: xml_node.cc:311
void SetStringAttribute(const char *attribute_name, const std::string &attribute)
Definition: xml_node.cc:137
void AddBlock(uint64_t size_in_bytes, double duration)
bool AddLiveOnlyInfo(const MediaInfo &media_info, const std::list< SegmentInfo > &segment_infos, uint32_t start_number)
Definition: xml_node.cc:357
void SetId(uint32_t id)
Definition: xml_node.cc:160
xml::scoped_xml_ptr< xmlNode > GetXml()
void SetPresentationTimeOffset(double presentation_time_offset)
Set in SegmentBase / SegmentTemplate.
double target_segment_duration
Definition: mpd_options.h:33
virtual void AddContentProtectionElement(const ContentProtectionElement &element)
void SetIntegerAttribute(const char *attribute_name, uint64_t number)
Definition: xml_node.cc:144
Defines Mpd Options.
Definition: mpd_options.h:25
virtual void UpdateContentProtectionPssh(const std::string &drm_uuid, const std::string &pssh)
double time_shift_buffer_depth
Definition: mpd_params.h:39
void SuppressOnce(SuppressFlag flag)
bool AddAudioInfo(const MediaInfo::AudioInfo &audio_info)
Definition: xml_node.cc:303
bool GetStartAndEndTimestamps(double *start_timestamp_seconds, double *end_timestamp_seconds) const