From b3fdafcf06e884b3c4e4b24d59e41e306924e949 Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Tue, 7 Mar 2023 11:16:48 +0000 Subject: [PATCH] Simplify Base URL joining and calculation on DASH This also fixes some DASH manifests where it uses multiple BaseURL definitions that must be joined together. --- devine/core/manifests/dash.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/devine/core/manifests/dash.py b/devine/core/manifests/dash.py index ff35276..f3b915b 100644 --- a/devine/core/manifests/dash.py +++ b/devine/core/manifests/dash.py @@ -315,13 +315,14 @@ class DASH: manifest = load_xml(session.get(manifest_url).text) manifest_url_query = urlparse(manifest_url).query - period_base_url = period.findtext("BaseURL") or manifest.findtext("BaseURL") - if not period_base_url or not re.match("^https?://", period_base_url, re.IGNORECASE): - period_base_url = urljoin(manifest_url, period_base_url) - period_duration = period.get("duration") or manifest.get("mediaPresentationDuration") + manifest_base_url = manifest.findtext("BaseURL") + if not manifest_base_url or not re.match("^https?://", manifest_base_url, re.IGNORECASE): + manifest_base_url = urljoin(manifest_url, "./", manifest_base_url) + period_base_url = urljoin(manifest_base_url, period.findtext("BaseURL")) + rep_base_url = urljoin(period_base_url, representation.findtext("BaseURL")) + period_duration = period.get("duration") or manifest.get("mediaPresentationDuration") init_data: Optional[bytes] = None - base_url = representation.findtext("BaseURL") or period_base_url segment_template = representation.find("SegmentTemplate") if segment_template is None: @@ -331,11 +332,11 @@ class DASH: if segment_list is None: segment_list = adaptation_set.find("SegmentList") - if segment_template is None and segment_list is None and base_url: + if segment_template is None and segment_list is None and rep_base_url: # If there's no SegmentTemplate and no SegmentList, then SegmentBase is used or just BaseURL # Regardless which of the two is used, we can just directly grab the BaseURL # Players would normally calculate segments via Byte-Ranges, but we don't care - track.url = urljoin(period_base_url, base_url) + track.url = rep_base_url track.descriptor = track.Descriptor.URL else: segments: list[tuple[str, Optional[str]]] = [] @@ -350,9 +351,9 @@ class DASH: if not value: continue if not re.match("^https?://", value, re.IGNORECASE): - if not base_url: + if not rep_base_url: raise ValueError("Resolved Segment URL is not absolute, and no Base URL is available.") - value = urljoin(base_url, value) + value = urljoin(rep_base_url, value) if not urlparse(value).query and manifest_url_query: value += f"?{manifest_url_query}" segment_template.set(item, value) @@ -407,14 +408,12 @@ class DASH: ), None )) elif segment_list is not None: - base_media_url = urljoin(period_base_url, base_url) - init_data = None initialization = segment_list.find("Initialization") if initialization: source_url = initialization.get("sourceURL") if source_url is None: - source_url = base_media_url + source_url = rep_base_url res = session.get(source_url) res.raise_for_status() @@ -424,7 +423,7 @@ class DASH: for segment_url in segment_urls: media_url = segment_url.get("media") if media_url is None: - media_url = base_media_url + media_url = rep_base_url segments.append(( media_url,