Use urljoin instead of an if check and + op in HLS

This used to be used even before devine was public, but it was constantly changed back and forth between an urljoin(), another form of urljoin (something custom or something I can't remember), and an if check + addition.

However, I can confirm that a simple if check will not work as the Base URI might not even be in the same relative root. The if checks have also been inconsistent with some checking if it starts with http(s)://, and some checking if it does not have the base URI at the start of the string.

This if check method does not work as well as an urljoin() has the potential to. It also fixes some services as some HLS playlists would have the m3u8 URL on a completely different root, subdomain, or even domain, causing it to completely break when trying to download segments.
This commit is contained in:
rlaphoenix 2023-05-21 00:06:30 +01:00
parent 301c026ca9
commit df2f9b85ae
1 changed files with 9 additions and 20 deletions

View File

@ -12,6 +12,7 @@ from pathlib import Path
from queue import Queue from queue import Queue
from threading import Event, Lock from threading import Event, Lock
from typing import Any, Callable, Optional, Union from typing import Any, Callable, Optional, Union
from urllib.parse import urljoin
import m3u8 import m3u8
import requests import requests
@ -96,10 +97,6 @@ class HLS:
tracks = Tracks() tracks = Tracks()
for playlist in self.manifest.playlists: for playlist in self.manifest.playlists:
url = playlist.uri
if not re.match("^https?://", url):
url = playlist.base_uri + url
audio_group = playlist.stream_info.audio audio_group = playlist.stream_info.audio
if audio_group: if audio_group:
audio_codec = Audio.Codec.from_codecs(playlist.stream_info.codecs) audio_codec = Audio.Codec.from_codecs(playlist.stream_info.codecs)
@ -115,7 +112,7 @@ class HLS:
tracks.add(primary_track_type( tracks.add(primary_track_type(
id_=md5(str(playlist).encode()).hexdigest()[0:7], # 7 chars only for filename length id_=md5(str(playlist).encode()).hexdigest()[0:7], # 7 chars only for filename length
url=url, url=urljoin(playlist.base_uri, playlist.uri),
codec=primary_track_type.Codec.from_codecs(playlist.stream_info.codecs), codec=primary_track_type.Codec.from_codecs(playlist.stream_info.codecs),
language=language, # HLS manifests do not seem to have language info language=language, # HLS manifests do not seem to have language info
is_original_lang=True, # TODO: All we can do is assume Yes is_original_lang=True, # TODO: All we can do is assume Yes
@ -136,13 +133,9 @@ class HLS:
)) ))
for media in self.manifest.media: for media in self.manifest.media:
url = media.uri if not media.uri:
if not url:
continue continue
if not re.match("^https?://", url):
url = media.base_uri + url
joc = 0 joc = 0
if media.type == "AUDIO": if media.type == "AUDIO":
track_type = Audio track_type = Audio
@ -156,7 +149,7 @@ class HLS:
tracks.add(track_type( tracks.add(track_type(
id_=md5(str(media).encode()).hexdigest()[0:6], # 6 chars only for filename length id_=md5(str(media).encode()).hexdigest()[0:6], # 6 chars only for filename length
url=url, url=urljoin(media.base_uri, media.uri),
codec=codec, codec=codec,
language=media.language or language, # HLS media may not have language info, fallback if needed language=media.language or language, # HLS media may not have language info, fallback if needed
is_original_lang=language and is_close_match(media.language, [language]), is_original_lang=language and is_close_match(media.language, [language]),
@ -371,9 +364,6 @@ class HLS:
# or if EXT-X-DISCONTINUITY is reached at the same time as EXT-X-MAP. # or if EXT-X-DISCONTINUITY is reached at the same time as EXT-X-MAP.
# Even if a new EXT-X-MAP is supplied, it may just be duplicate and would # Even if a new EXT-X-MAP is supplied, it may just be duplicate and would
# be unnecessary and slow to re-download the init data each time. # be unnecessary and slow to re-download the init data each time.
if not segment.init_section.uri.startswith(segment.init_section.base_uri):
segment.init_section.uri = segment.init_section.base_uri + segment.init_section.uri
if segment.init_section.byterange: if segment.init_section.byterange:
previous_range_offset = range_offset.get() previous_range_offset = range_offset.get()
byte_range = HLS.calculate_byte_range(segment.init_section.byterange, previous_range_offset) byte_range = HLS.calculate_byte_range(segment.init_section.byterange, previous_range_offset)
@ -383,8 +373,10 @@ class HLS:
} }
else: else:
range_header = {} range_header = {}
res = session.get(
res = session.get(segment.init_section.uri, headers=range_header) url=urljoin(segment.init_section.base_uri, segment.init_section.uri),
headers=range_header
)
res.raise_for_status() res.raise_for_status()
newest_init_data = res.content newest_init_data = res.content
finally: finally:
@ -416,9 +408,6 @@ class HLS:
if skip_event.is_set(): if skip_event.is_set():
return -1 return -1
if not segment.uri.startswith(segment.base_uri):
segment.uri = segment.base_uri + segment.uri
attempts = 1 attempts = 1
while True: while True:
try: try:
@ -433,7 +422,7 @@ class HLS:
else: else:
downloader_ = downloader downloader_ = downloader
downloader_( downloader_(
uri=segment.uri, uri=urljoin(segment.base_uri, segment.uri),
out=out_path, out=out_path,
headers=headers_, headers=headers_,
proxy=proxy, proxy=proxy,