fix(HLS): Use filtered out segment key info

Also simplifies calculation of wanted segment range when decrypting. Instead of storing the starting segment index number with the encryption_data variable, we just grab the first segment that isn't already merged.

Fixes #77
This commit is contained in:
rlaphoenix 2024-03-04 12:51:00 +00:00
parent 499fc67ea0
commit 6e8efc3f63
1 changed files with 71 additions and 58 deletions

View File

@ -239,19 +239,22 @@ class HLS:
else:
session_drm = None
segments = [
unwanted_segments = [
segment for segment in master.segments
if not callable(track.OnSegmentFilter) or not track.OnSegmentFilter(segment)
if callable(track.OnSegmentFilter) and track.OnSegmentFilter(segment)
]
total_segments = len(segments)
total_segments = len(master.segments) - len(unwanted_segments)
progress(total=total_segments)
downloader_ = downloader
urls: list[dict[str, Any]] = []
range_offset = 0
for segment in segments:
for segment in master.segments:
if segment in unwanted_segments:
continue
if segment.byterange:
if downloader_.__name__ == "aria2c":
# aria2(c) is shit and doesn't support the Range header, fallback to the requests downloader
@ -260,6 +263,7 @@ class HLS:
range_offset = byte_range.split("-")[0]
else:
byte_range = None
urls.append({
"url": urljoin(segment.base_uri, segment.uri),
"headers": {
@ -272,7 +276,7 @@ class HLS:
for status_update in downloader_(
urls=urls,
output_dir=segment_save_dir,
filename="{i:0%d}{ext}" % len(str(len(segments))),
filename="{i:0%d}{ext}" % len(str(len(urls))),
headers=session.headers,
cookies=session.cookies,
proxy=proxy,
@ -289,19 +293,21 @@ class HLS:
progress(total=total_segments, completed=0, downloaded="Merging")
name_len = len(str(total_segments))
discon_i = 0
range_offset = 0
map_data: Optional[tuple[m3u8.model.InitializationSection, bytes]] = None
if session_drm:
encryption_data: Optional[tuple[int, Optional[m3u8.Key], DRM_T]] = (0, None, session_drm)
encryption_data: Optional[tuple[Optional[m3u8.Key], DRM_T]] = (None, session_drm)
else:
encryption_data: Optional[tuple[int, Optional[m3u8.Key], DRM_T]] = None
encryption_data: Optional[tuple[Optional[m3u8.Key], DRM_T]] = None
for i, segment in enumerate(segments):
is_last_segment = (i + 1) == total_segments
name_len = len(str(total_segments))
segment_file_ext = get_extension(segment.uri)
segment_file_path = segment_save_dir / f"{str(i).zfill(name_len)}{segment_file_ext}"
i = -1
for real_i, segment in enumerate(master.segments):
if segment not in unwanted_segments:
i += 1
is_last_segment = (real_i + 1) == len(master.segments)
def merge(to: Path, via: list[Path], delete: bool = False, include_map_data: bool = False):
"""
@ -339,13 +345,17 @@ class HLS:
Returns the decrypted path.
"""
drm = encryption_data[2]
first_segment_i = encryption_data[0]
drm = encryption_data[1]
first_segment_i = next(
int(file.stem)
for file in sorted(segment_save_dir.iterdir())
if file.stem.isdigit()
)
last_segment_i = max(0, i - int(not include_this_segment))
range_len = (last_segment_i - first_segment_i) + 1
segment_range = f"{str(first_segment_i).zfill(name_len)}-{str(last_segment_i).zfill(name_len)}"
merged_path = segment_save_dir / f"{segment_range}{get_extension(segments[last_segment_i].uri)}"
merged_path = segment_save_dir / f"{segment_range}{get_extension(master.segments[last_segment_i].uri)}"
decrypted_path = segment_save_dir / f"{merged_path.stem}_decrypted{merged_path.suffix}"
files = [
@ -405,57 +415,60 @@ class HLS:
include_map_data=include_map_data
)
if isinstance(track, Subtitle):
segment_data = try_ensure_utf8(segment_file_path.read_bytes())
if track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML):
segment_data = segment_data.decode("utf8"). \
replace("‎", html.unescape("‎")). \
replace("‏", html.unescape("‏")). \
encode("utf8")
segment_file_path.write_bytes(segment_data)
if segment not in unwanted_segments:
if isinstance(track, Subtitle):
segment_file_ext = get_extension(segment.uri)
segment_file_path = segment_save_dir / f"{str(i).zfill(name_len)}{segment_file_ext}"
segment_data = try_ensure_utf8(segment_file_path.read_bytes())
if track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML):
segment_data = segment_data.decode("utf8"). \
replace("‎", html.unescape("‎")). \
replace("‏", html.unescape("‏")). \
encode("utf8")
segment_file_path.write_bytes(segment_data)
if segment.discontinuity and i != 0:
if encryption_data:
decrypt(include_this_segment=False)
merge_discontinuity(
include_this_segment=False,
include_map_data=not encryption_data or not encryption_data[2]
)
discon_i += 1
range_offset = 0 # TODO: Should this be reset or not?
map_data = None
if encryption_data:
encryption_data = (i, encryption_data[1], encryption_data[2])
if segment.init_section and (not map_data or segment.init_section != map_data[0]):
if segment.init_section.byterange:
init_byte_range = HLS.calculate_byte_range(
segment.init_section.byterange,
range_offset
if segment.discontinuity and i != 0:
if encryption_data:
decrypt(include_this_segment=False)
merge_discontinuity(
include_this_segment=False,
include_map_data=not encryption_data or not encryption_data[1]
)
range_offset = init_byte_range.split("-")[0]
init_range_header = {
"Range": f"bytes={init_byte_range}"
}
else:
init_range_header = {}
res = session.get(
url=urljoin(segment.init_section.base_uri, segment.init_section.uri),
headers=init_range_header
)
res.raise_for_status()
map_data = (segment.init_section, res.content)
discon_i += 1
range_offset = 0 # TODO: Should this be reset or not?
map_data = None
if encryption_data:
encryption_data = (encryption_data[0], encryption_data[1])
if segment.init_section and (not map_data or segment.init_section != map_data[0]):
if segment.init_section.byterange:
init_byte_range = HLS.calculate_byte_range(
segment.init_section.byterange,
range_offset
)
range_offset = init_byte_range.split("-")[0]
init_range_header = {
"Range": f"bytes={init_byte_range}"
}
else:
init_range_header = {}
res = session.get(
url=urljoin(segment.init_section.base_uri, segment.init_section.uri),
headers=init_range_header
)
res.raise_for_status()
map_data = (segment.init_section, res.content)
if segment.keys:
key = HLS.get_supported_key(segment.keys)
if encryption_data and encryption_data[1] != key and i != 0:
if encryption_data and encryption_data[0] != key and i != 0 and segment not in unwanted_segments:
decrypt(include_this_segment=False)
if key is None:
encryption_data = None
elif not encryption_data or encryption_data[1] != key:
elif not encryption_data or encryption_data[0] != key:
drm = HLS.get_drm(key, proxy)
if isinstance(drm, Widevine):
try:
@ -470,7 +483,7 @@ class HLS:
DOWNLOAD_CANCELLED.set() # skip pending track downloads
progress(downloaded="[red]FAILED")
raise
encryption_data = (i, key, drm)
encryption_data = (key, drm)
# TODO: This wont work as we already downloaded
if DOWNLOAD_LICENCE_ONLY.is_set():
@ -482,7 +495,7 @@ class HLS:
decrypt(include_this_segment=True)
merge_discontinuity(
include_this_segment=True,
include_map_data=not encryption_data or not encryption_data[2]
include_map_data=not encryption_data or not encryption_data[1]
)
progress(advance=1)