Shaka Packager SDK
adaptation_set.cc
1 // Copyright 2017 Google Inc. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file or at
5 // https://developers.google.com/open-source/licenses/bsd
6 
7 #include "packager/mpd/base/adaptation_set.h"
8 
9 #include <cmath>
10 
11 #include "packager/base/logging.h"
12 #include "packager/base/strings/string_number_conversions.h"
13 #include "packager/mpd/base/media_info.pb.h"
14 #include "packager/mpd/base/mpd_options.h"
15 #include "packager/mpd/base/mpd_utils.h"
16 #include "packager/mpd/base/representation.h"
17 #include "packager/mpd/base/xml/xml_node.h"
18 
19 namespace shaka {
20 namespace {
21 
22 AdaptationSet::Role MediaInfoTextTypeToRole(
23  MediaInfo::TextInfo::TextType type) {
24  switch (type) {
25  case MediaInfo::TextInfo::UNKNOWN:
26  LOG(WARNING) << "Unknown text type, assuming subtitle.";
27  return AdaptationSet::kRoleSubtitle;
28  case MediaInfo::TextInfo::CAPTION:
29  return AdaptationSet::kRoleCaption;
30  case MediaInfo::TextInfo::SUBTITLE:
31  return AdaptationSet::kRoleSubtitle;
32  default:
33  NOTREACHED() << "Unknown MediaInfo TextType: " << type
34  << " assuming subtitle.";
35  return AdaptationSet::kRoleSubtitle;
36  }
37 }
38 
39 std::string RoleToText(AdaptationSet::Role role) {
40  // Using switch so that the compiler can detect whether there is a case that's
41  // not being handled.
42  switch (role) {
43  case AdaptationSet::kRoleCaption:
44  return "caption";
45  case AdaptationSet::kRoleSubtitle:
46  return "subtitle";
47  case AdaptationSet::kRoleMain:
48  return "main";
49  case AdaptationSet::kRoleAlternate:
50  return "alternate";
51  case AdaptationSet::kRoleSupplementary:
52  return "supplementary";
53  case AdaptationSet::kRoleCommentary:
54  return "commentary";
55  case AdaptationSet::kRoleDub:
56  return "dub";
57  default:
58  break;
59  }
60 
61  NOTREACHED();
62  return "";
63 }
64 
65 // Returns the picture aspect ratio string e.g. "16:9", "4:3".
66 // "Reducing the quotient to minimal form" does not work well in practice as
67 // there may be some rounding performed in the input, e.g. the resolution of
68 // 480p is 854:480 for 16:9 aspect ratio, can only be reduced to 427:240.
69 // The algorithm finds out the pair of integers, num and den, where num / den is
70 // the closest ratio to scaled_width / scaled_height, by looping den through
71 // common values.
72 std::string GetPictureAspectRatio(uint32_t width,
73  uint32_t height,
74  uint32_t pixel_width,
75  uint32_t pixel_height) {
76  const uint32_t scaled_width = pixel_width * width;
77  const uint32_t scaled_height = pixel_height * height;
78  const double par = static_cast<double>(scaled_width) / scaled_height;
79 
80  // Typical aspect ratios have par_y less than or equal to 19:
81  // https://en.wikipedia.org/wiki/List_of_common_resolutions
82  const uint32_t kLargestPossibleParY = 19;
83 
84  uint32_t par_num = 0;
85  uint32_t par_den = 0;
86  double min_error = 1.0;
87  for (uint32_t den = 1; den <= kLargestPossibleParY; ++den) {
88  uint32_t num = par * den + 0.5;
89  double error = fabs(par - static_cast<double>(num) / den);
90  if (error < min_error) {
91  min_error = error;
92  par_num = num;
93  par_den = den;
94  if (error == 0)
95  break;
96  }
97  }
98  VLOG(2) << "width*pix_width : height*pixel_height (" << scaled_width << ":"
99  << scaled_height << ") reduced to " << par_num << ":" << par_den
100  << " with error " << min_error << ".";
101 
102  return base::IntToString(par_num) + ":" + base::IntToString(par_den);
103 }
104 
105 // Adds an entry to picture_aspect_ratio if the size of picture_aspect_ratio is
106 // less than 2 and video_info has both pixel width and pixel height.
107 void AddPictureAspectRatio(const MediaInfo::VideoInfo& video_info,
108  std::set<std::string>* picture_aspect_ratio) {
109  // If there are more than one entries in picture_aspect_ratio, the @par
110  // attribute cannot be set, so skip.
111  if (picture_aspect_ratio->size() > 1)
112  return;
113 
114  if (video_info.width() == 0 || video_info.height() == 0 ||
115  video_info.pixel_width() == 0 || video_info.pixel_height() == 0) {
116  // If there is even one Representation without a @sar attribute, @par cannot
117  // be calculated.
118  // Just populate the set with at least 2 bogus strings so that further call
119  // to this function will bail out immediately.
120  picture_aspect_ratio->insert("bogus");
121  picture_aspect_ratio->insert("entries");
122  return;
123  }
124 
125  const std::string par = GetPictureAspectRatio(
126  video_info.width(), video_info.height(), video_info.pixel_width(),
127  video_info.pixel_height());
128  DVLOG(1) << "Setting par as: " << par
129  << " for video with width: " << video_info.width()
130  << " height: " << video_info.height()
131  << " pixel_width: " << video_info.pixel_width() << " pixel_height; "
132  << video_info.pixel_height();
133  picture_aspect_ratio->insert(par);
134 }
135 
136 class RepresentationStateChangeListenerImpl
137  : public RepresentationStateChangeListener {
138  public:
139  // |adaptation_set| is not owned by this class.
140  RepresentationStateChangeListenerImpl(uint32_t representation_id,
141  AdaptationSet* adaptation_set)
142  : representation_id_(representation_id), adaptation_set_(adaptation_set) {
143  DCHECK(adaptation_set_);
144  }
145  ~RepresentationStateChangeListenerImpl() override {}
146 
147  // RepresentationStateChangeListener implementation.
148  void OnNewSegmentForRepresentation(int64_t start_time,
149  int64_t duration) override {
150  adaptation_set_->OnNewSegmentForRepresentation(representation_id_,
151  start_time, duration);
152  }
153 
154  void OnSetFrameRateForRepresentation(uint32_t frame_duration,
155  uint32_t timescale) override {
156  adaptation_set_->OnSetFrameRateForRepresentation(representation_id_,
157  frame_duration, timescale);
158  }
159 
160  private:
161  const uint32_t representation_id_;
162  AdaptationSet* const adaptation_set_;
163 
164  DISALLOW_COPY_AND_ASSIGN(RepresentationStateChangeListenerImpl);
165 };
166 
167 } // namespace
168 
169 AdaptationSet::AdaptationSet(const std::string& language,
170  const MpdOptions& mpd_options,
171  uint32_t* counter)
172  : representation_counter_(counter),
173  language_(language),
174  mpd_options_(mpd_options),
175  segments_aligned_(kSegmentAlignmentUnknown),
176  force_set_segment_alignment_(false) {
177  DCHECK(counter);
178 }
179 
180 AdaptationSet::~AdaptationSet() {}
181 
182 Representation* AdaptationSet::AddRepresentation(const MediaInfo& media_info) {
183  const uint32_t representation_id = (*representation_counter_)++;
184  // Note that AdaptationSet outlive Representation, so this object
185  // will die before AdaptationSet.
186  std::unique_ptr<RepresentationStateChangeListener> listener(
187  new RepresentationStateChangeListenerImpl(representation_id, this));
188  std::unique_ptr<Representation> new_representation(new Representation(
189  media_info, mpd_options_, representation_id, std::move(listener)));
190 
191  if (!new_representation->Init()) {
192  LOG(ERROR) << "Failed to initialize Representation.";
193  return NULL;
194  }
195  UpdateFromMediaInfo(media_info);
196  Representation* representation_ptr = new_representation.get();
197  representation_map_[representation_ptr->id()] = std::move(new_representation);
198  return representation_ptr;
199 }
200 
202  const Representation& representation) {
203  // Note that AdaptationSet outlive Representation, so this object
204  // will die before AdaptationSet.
205  std::unique_ptr<RepresentationStateChangeListener> listener(
206  new RepresentationStateChangeListenerImpl(representation.id(), this));
207  std::unique_ptr<Representation> new_representation(
208  new Representation(representation, std::move(listener)));
209 
210  UpdateFromMediaInfo(new_representation->GetMediaInfo());
211  Representation* representation_ptr = new_representation.get();
212  representation_map_[representation_ptr->id()] = std::move(new_representation);
213  return representation_ptr;
214 }
215 
217  const ContentProtectionElement& content_protection_element) {
218  content_protection_elements_.push_back(content_protection_element);
219  RemoveDuplicateAttributes(&content_protection_elements_.back());
220 }
221 
222 void AdaptationSet::UpdateContentProtectionPssh(const std::string& drm_uuid,
223  const std::string& pssh) {
224  UpdateContentProtectionPsshHelper(drm_uuid, pssh,
225  &content_protection_elements_);
226 }
227 
228 void AdaptationSet::AddRole(Role role) {
229  roles_.insert(role);
230 }
231 
232 // Creates a copy of <AdaptationSet> xml element, iterate thru all the
233 // <Representation> (child) elements and add them to the copy.
234 // Set all the attributes first and then add the children elements so that flags
235 // can be passed to Representation to avoid setting redundant attributes. For
236 // example, if AdaptationSet@width is set, then Representation@width is
237 // redundant and should not be set.
238 xml::scoped_xml_ptr<xmlNode> AdaptationSet::GetXml() {
239  xml::AdaptationSetXmlNode adaptation_set;
240 
241  bool suppress_representation_width = false;
242  bool suppress_representation_height = false;
243  bool suppress_representation_frame_rate = false;
244 
245  if (id_)
246  adaptation_set.SetId(id_.value());
247  adaptation_set.SetStringAttribute("contentType", content_type_);
248  if (!language_.empty() && language_ != "und") {
249  adaptation_set.SetStringAttribute("lang", language_);
250  }
251 
252  // Note that std::{set,map} are ordered, so the last element is the max value.
253  if (video_widths_.size() == 1) {
254  suppress_representation_width = true;
255  adaptation_set.SetIntegerAttribute("width", *video_widths_.begin());
256  } else if (video_widths_.size() > 1) {
257  adaptation_set.SetIntegerAttribute("maxWidth", *video_widths_.rbegin());
258  }
259  if (video_heights_.size() == 1) {
260  suppress_representation_height = true;
261  adaptation_set.SetIntegerAttribute("height", *video_heights_.begin());
262  } else if (video_heights_.size() > 1) {
263  adaptation_set.SetIntegerAttribute("maxHeight", *video_heights_.rbegin());
264  }
265 
266  if (video_frame_rates_.size() == 1) {
267  suppress_representation_frame_rate = true;
268  adaptation_set.SetStringAttribute("frameRate",
269  video_frame_rates_.begin()->second);
270  } else if (video_frame_rates_.size() > 1) {
271  adaptation_set.SetStringAttribute("maxFrameRate",
272  video_frame_rates_.rbegin()->second);
273  }
274 
275  // Note: must be checked before checking segments_aligned_ (below). So that
276  // segments_aligned_ is set before checking below.
277  if (mpd_options_.mpd_type == MpdType::kStatic) {
278  CheckStaticSegmentAlignment();
279  }
280 
281  if (segments_aligned_ == kSegmentAlignmentTrue) {
282  adaptation_set.SetStringAttribute(
283  mpd_options_.dash_profile == DashProfile::kOnDemand
284  ? "subsegmentAlignment"
285  : "segmentAlignment",
286  "true");
287  }
288 
289  if (picture_aspect_ratio_.size() == 1)
290  adaptation_set.SetStringAttribute("par", *picture_aspect_ratio_.begin());
291 
292  if (!adaptation_set.AddContentProtectionElements(
293  content_protection_elements_)) {
294  return xml::scoped_xml_ptr<xmlNode>();
295  }
296 
297  std::string trick_play_reference_ids;
298  for (const AdaptationSet* adaptation_set : trick_play_references_) {
299  if (!trick_play_reference_ids.empty())
300  trick_play_reference_ids += ',';
301  CHECK(adaptation_set->has_id());
302  trick_play_reference_ids += std::to_string(adaptation_set->id());
303  }
304  if (!trick_play_reference_ids.empty()) {
305  adaptation_set.AddEssentialProperty(
306  "http://dashif.org/guidelines/trickmode", trick_play_reference_ids);
307  }
308 
309  std::string switching_ids;
310  for (const AdaptationSet* adaptation_set : switchable_adaptation_sets_) {
311  if (!switching_ids.empty())
312  switching_ids += ',';
313  CHECK(adaptation_set->has_id());
314  switching_ids += std::to_string(adaptation_set->id());
315  }
316  if (!switching_ids.empty()) {
317  adaptation_set.AddSupplementalProperty(
318  "urn:mpeg:dash:adaptation-set-switching:2016", switching_ids);
319  }
320 
321  for (AdaptationSet::Role role : roles_)
322  adaptation_set.AddRoleElement("urn:mpeg:dash:role:2011", RoleToText(role));
323 
324  for (const auto& representation_pair : representation_map_) {
325  const auto& representation = representation_pair.second;
326  if (suppress_representation_width)
327  representation->SuppressOnce(Representation::kSuppressWidth);
328  if (suppress_representation_height)
329  representation->SuppressOnce(Representation::kSuppressHeight);
330  if (suppress_representation_frame_rate)
331  representation->SuppressOnce(Representation::kSuppressFrameRate);
332  xml::scoped_xml_ptr<xmlNode> child(representation->GetXml());
333  if (!child || !adaptation_set.AddChild(std::move(child)))
334  return xml::scoped_xml_ptr<xmlNode>();
335  }
336 
337  return adaptation_set.PassScopedPtr();
338 }
339 
340 void AdaptationSet::ForceSetSegmentAlignment(bool segment_alignment) {
341  segments_aligned_ =
342  segment_alignment ? kSegmentAlignmentTrue : kSegmentAlignmentFalse;
343  force_set_segment_alignment_ = true;
344 }
345 
347  const AdaptationSet* adaptation_set) {
348  switchable_adaptation_sets_.push_back(adaptation_set);
349 }
350 
351 // For dynamic MPD, storing all start_time and duration will out-of-memory
352 // because there's no way of knowing when it will end. Static MPD
353 // subsegmentAlignment check is *not* done here because it is possible that some
354 // Representations might not have been added yet (e.g. a thread is assigned per
355 // muxer so one might run faster than others). To be clear, for dynamic MPD, all
356 // Representations should be added before a segment is added.
357 void AdaptationSet::OnNewSegmentForRepresentation(uint32_t representation_id,
358  uint64_t start_time,
359  uint64_t duration) {
360  if (mpd_options_.mpd_type == MpdType::kDynamic) {
361  CheckDynamicSegmentAlignment(representation_id, start_time, duration);
362  } else {
363  representation_segment_start_times_[representation_id].push_back(
364  start_time);
365  }
366 }
367 
368 void AdaptationSet::OnSetFrameRateForRepresentation(uint32_t representation_id,
369  uint32_t frame_duration,
370  uint32_t timescale) {
371  RecordFrameRate(frame_duration, timescale);
372 }
373 
375  trick_play_references_.push_back(adaptation_set);
376 }
377 
378 const std::list<Representation*> AdaptationSet::GetRepresentations() const {
379  std::list<Representation*> representations;
380  for (const auto& representation_pair : representation_map_) {
381  representations.push_back(representation_pair.second.get());
382  }
383  return representations;
384 }
385 
387  return content_type_ == "video";
388 }
389 
390 void AdaptationSet::UpdateFromMediaInfo(const MediaInfo& media_info) {
391  // For videos, record the width, height, and the frame rate to calculate the
392  // max {width,height,framerate} required for DASH IOP.
393  if (media_info.has_video_info()) {
394  const MediaInfo::VideoInfo& video_info = media_info.video_info();
395  DCHECK(video_info.has_width());
396  DCHECK(video_info.has_height());
397  video_widths_.insert(video_info.width());
398  video_heights_.insert(video_info.height());
399 
400  if (video_info.has_time_scale() && video_info.has_frame_duration())
401  RecordFrameRate(video_info.frame_duration(), video_info.time_scale());
402 
403  AddPictureAspectRatio(video_info, &picture_aspect_ratio_);
404  }
405 
406  if (media_info.has_video_info()) {
407  content_type_ = "video";
408  } else if (media_info.has_audio_info()) {
409  content_type_ = "audio";
410  } else if (media_info.has_text_info()) {
411  content_type_ = "text";
412 
413  if (media_info.text_info().has_type() &&
414  (media_info.text_info().type() != MediaInfo::TextInfo::UNKNOWN)) {
415  roles_.insert(MediaInfoTextTypeToRole(media_info.text_info().type()));
416  }
417  }
418 }
419 
420 // This implementation assumes that each representations' segments' are
421 // contiguous.
422 // Also assumes that all Representations are added before this is called.
423 // This checks whether the first elements of the lists in
424 // representation_segment_start_times_ are aligned.
425 // For example, suppose this method was just called with args rep_id=2
426 // start_time=1.
427 // 1 -> [1, 100, 200]
428 // 2 -> [1]
429 // The timestamps of the first elements match, so this flags
430 // segments_aligned_=true.
431 // Also since the first segment start times match, the first element of all the
432 // lists are removed, so the map of lists becomes:
433 // 1 -> [100, 200]
434 // 2 -> []
435 // Note that there could be false positives.
436 // e.g. just got rep_id=3 start_time=1 duration=300, and the duration of the
437 // whole AdaptationSet is 300.
438 // 1 -> [1, 100, 200]
439 // 2 -> [1, 90, 100]
440 // 3 -> [1]
441 // They are not aligned but this will be marked as aligned.
442 // But since this is unlikely to happen in the packager (and to save
443 // computation), this isn't handled at the moment.
444 void AdaptationSet::CheckDynamicSegmentAlignment(uint32_t representation_id,
445  uint64_t start_time,
446  uint64_t /* duration */) {
447  if (segments_aligned_ == kSegmentAlignmentFalse ||
448  force_set_segment_alignment_) {
449  return;
450  }
451 
452  std::list<uint64_t>& current_representation_start_times =
453  representation_segment_start_times_[representation_id];
454  current_representation_start_times.push_back(start_time);
455  // There's no way to detemine whether the segments are aligned if some
456  // representations do not have any segments.
457  if (representation_segment_start_times_.size() != representation_map_.size())
458  return;
459 
460  DCHECK(!current_representation_start_times.empty());
461  const uint64_t expected_start_time =
462  current_representation_start_times.front();
463  for (const auto& key_value : representation_segment_start_times_) {
464  const std::list<uint64_t>& representation_start_time = key_value.second;
465  // If there are no entries in a list, then there is no way for the
466  // segment alignment status to change.
467  // Note that it can be empty because entries get deleted below.
468  if (representation_start_time.empty())
469  return;
470 
471  if (expected_start_time != representation_start_time.front()) {
472  VLOG(1) << "Seeing Misaligned segments with different start_times: "
473  << expected_start_time << " vs "
474  << representation_start_time.front();
475  // Flag as false and clear the start times data, no need to keep it
476  // around.
477  segments_aligned_ = kSegmentAlignmentFalse;
478  representation_segment_start_times_.clear();
479  return;
480  }
481  }
482  segments_aligned_ = kSegmentAlignmentTrue;
483 
484  for (auto& key_value : representation_segment_start_times_) {
485  std::list<uint64_t>& representation_start_time = key_value.second;
486  representation_start_time.pop_front();
487  }
488 }
489 
490 // Make sure all segements start times match for all Representations.
491 // This assumes that the segments are contiguous.
492 void AdaptationSet::CheckStaticSegmentAlignment() {
493  if (segments_aligned_ == kSegmentAlignmentFalse ||
494  force_set_segment_alignment_) {
495  return;
496  }
497  if (representation_segment_start_times_.empty())
498  return;
499  if (representation_segment_start_times_.size() == 1) {
500  segments_aligned_ = kSegmentAlignmentTrue;
501  return;
502  }
503 
504  // This is not the most efficient implementation to compare the values
505  // because expected_time_line is compared against all other time lines, but
506  // probably the most readable.
507  const std::list<uint64_t>& expected_time_line =
508  representation_segment_start_times_.begin()->second;
509 
510  bool all_segment_time_line_same_length = true;
511  // Note that the first entry is skipped because it is expected_time_line.
512  RepresentationTimeline::const_iterator it =
513  representation_segment_start_times_.begin();
514  for (++it; it != representation_segment_start_times_.end(); ++it) {
515  const std::list<uint64_t>& other_time_line = it->second;
516  if (expected_time_line.size() != other_time_line.size()) {
517  all_segment_time_line_same_length = false;
518  }
519 
520  const std::list<uint64_t>* longer_list = &other_time_line;
521  const std::list<uint64_t>* shorter_list = &expected_time_line;
522  if (expected_time_line.size() > other_time_line.size()) {
523  shorter_list = &other_time_line;
524  longer_list = &expected_time_line;
525  }
526 
527  if (!std::equal(shorter_list->begin(), shorter_list->end(),
528  longer_list->begin())) {
529  // Some segments are definitely unaligned.
530  segments_aligned_ = kSegmentAlignmentFalse;
531  representation_segment_start_times_.clear();
532  return;
533  }
534  }
535 
536  // TODO(rkuroiwa): The right way to do this is to also check the durations.
537  // For example:
538  // (a) 3 4 5
539  // (b) 3 4 5 6
540  // could be true or false depending on the length of the third segment of (a).
541  // i.e. if length of the third segment is 2, then this is not aligned.
542  if (!all_segment_time_line_same_length) {
543  segments_aligned_ = kSegmentAlignmentUnknown;
544  return;
545  }
546 
547  segments_aligned_ = kSegmentAlignmentTrue;
548 }
549 
550 // Since all AdaptationSet cares about is the maxFrameRate, representation_id
551 // is not passed to this method.
552 void AdaptationSet::RecordFrameRate(uint32_t frame_duration,
553  uint32_t timescale) {
554  if (frame_duration == 0) {
555  LOG(ERROR) << "Frame duration is 0 and cannot be set.";
556  return;
557  }
558  video_frame_rates_[static_cast<double>(timescale) / frame_duration] =
559  base::IntToString(timescale) + "/" + base::IntToString(frame_duration);
560 }
561 
562 } // namespace shaka
void OnSetFrameRateForRepresentation(uint32_t representation_id, uint32_t frame_duration, uint32_t timescale)
AdaptationSetType specified in MPD.
Definition: xml_node.h:124
virtual Representation * AddRepresentation(const MediaInfo &media_info)
uint32_t id() const
scoped_xml_ptr< xmlNode > PassScopedPtr()
Definition: xml_node.cc:169
All the methods that are virtual are virtual for mocking.
void AddEssentialProperty(const std::string &scheme_id_uri, const std::string &value)
Definition: xml_node.cc:211
virtual void AddContentProtectionElement(const ContentProtectionElement &element)
virtual Representation * CopyRepresentation(const Representation &representation)
void SetStringAttribute(const char *attribute_name, const std::string &attribute)
Definition: xml_node.cc:137
bool AddChild(scoped_xml_ptr< xmlNode > child)
Definition: xml_node.cc:95
virtual void AddRole(Role role)
virtual void UpdateContentProtectionPssh(const std::string &drm_uuid, const std::string &pssh)
void SetId(uint32_t id)
Definition: xml_node.cc:160
AdaptationSet(const std::string &language, const MpdOptions &mpd_options, uint32_t *representation_counter)
virtual void ForceSetSegmentAlignment(bool segment_alignment)
void AddSupplementalProperty(const std::string &scheme_id_uri, const std::string &value)
Definition: xml_node.cc:202
xml::scoped_xml_ptr< xmlNode > GetXml()
virtual void AddAdaptationSetSwitching(const AdaptationSet *adaptation_set)
void SetIntegerAttribute(const char *attribute_name, uint64_t number)
Definition: xml_node.cc:144
void AddRoleElement(const std::string &scheme_id_uri, const std::string &value)
Definition: xml_node.cc:254
Defines Mpd Options.
Definition: mpd_options.h:25
void OnNewSegmentForRepresentation(uint32_t representation_id, uint64_t start_time, uint64_t duration)
virtual void AddTrickPlayReference(const AdaptationSet *adaptation_set)