From 97efb59e5f56e4476386dab2d5febed3e23a34a8 Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Thu, 29 Feb 2024 22:06:57 +0000 Subject: [PATCH] Only decode text direction entities in Sub files (cont.) Already did this for HLS, but somehow forgot to for DASH and direct URLs. --- devine/commands/dl.py | 8 ++++++-- devine/core/manifests/dash.py | 5 ++++- devine/core/manifests/hls.py | 1 - 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/devine/commands/dl.py b/devine/commands/dl.py index f578f32..68c32d3 100644 --- a/devine/commands/dl.py +++ b/devine/commands/dl.py @@ -906,10 +906,14 @@ class dl: track.OnDecrypted(drm) progress(downloaded="Decrypted", completed=100) - if isinstance(track, Subtitle): + if isinstance(track, Subtitle) and \ + track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML): track_data = track.path.read_bytes() track_data = try_ensure_utf8(track_data) - track_data = html.unescape(track_data.decode("utf8")).encode("utf8") + track_data = track_data.decode("utf8"). \ + replace("‎", html.unescape("‎")). \ + replace("‏", html.unescape("‏")). \ + encode("utf8") track.path.write_bytes(track_data) progress(downloaded="Downloaded") diff --git a/devine/core/manifests/dash.py b/devine/core/manifests/dash.py index af1008f..a5ea946 100644 --- a/devine/core/manifests/dash.py +++ b/devine/core/manifests/dash.py @@ -482,7 +482,10 @@ class DASH: track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML) ): segment_data = try_ensure_utf8(segment_data) - segment_data = html.unescape(segment_data.decode("utf8")).encode("utf8") + segment_data = segment_data.decode("utf8"). \ + replace("‎", html.unescape("‎")). \ + replace("‏", html.unescape("‏")). \ + encode("utf8") f.write(segment_data) f.flush() segment_file.unlink() diff --git a/devine/core/manifests/hls.py b/devine/core/manifests/hls.py index 611f89a..ccbd5ad 100644 --- a/devine/core/manifests/hls.py +++ b/devine/core/manifests/hls.py @@ -400,7 +400,6 @@ class HLS: if isinstance(track, Subtitle): segment_data = try_ensure_utf8(segment_file_path.read_bytes()) if track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML): - # decode text direction entities or SubtitleEdit's /ReverseRtlStartEnd won't work segment_data = segment_data.decode("utf8"). \ replace("‎", html.unescape("‎")). \ replace("‏", html.unescape("‏")). \