Only decode text direction entities in Sub files (cont.)

Already did this for HLS, but somehow forgot to for DASH and direct URLs.
This commit is contained in:
rlaphoenix 2024-02-29 22:06:57 +00:00
parent 4073cefc74
commit 97efb59e5f
3 changed files with 10 additions and 4 deletions

View File

@ -906,10 +906,14 @@ class dl:
track.OnDecrypted(drm) track.OnDecrypted(drm)
progress(downloaded="Decrypted", completed=100) progress(downloaded="Decrypted", completed=100)
if isinstance(track, Subtitle): if isinstance(track, Subtitle) and \
track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML):
track_data = track.path.read_bytes() track_data = track.path.read_bytes()
track_data = try_ensure_utf8(track_data) track_data = try_ensure_utf8(track_data)
track_data = html.unescape(track_data.decode("utf8")).encode("utf8") track_data = track_data.decode("utf8"). \
replace("‎", html.unescape("‎")). \
replace("‏", html.unescape("‏")). \
encode("utf8")
track.path.write_bytes(track_data) track.path.write_bytes(track_data)
progress(downloaded="Downloaded") progress(downloaded="Downloaded")

View File

@ -482,7 +482,10 @@ class DASH:
track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML) track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML)
): ):
segment_data = try_ensure_utf8(segment_data) segment_data = try_ensure_utf8(segment_data)
segment_data = html.unescape(segment_data.decode("utf8")).encode("utf8") segment_data = segment_data.decode("utf8"). \
replace("‎", html.unescape("‎")). \
replace("‏", html.unescape("‏")). \
encode("utf8")
f.write(segment_data) f.write(segment_data)
f.flush() f.flush()
segment_file.unlink() segment_file.unlink()

View File

@ -400,7 +400,6 @@ class HLS:
if isinstance(track, Subtitle): if isinstance(track, Subtitle):
segment_data = try_ensure_utf8(segment_file_path.read_bytes()) segment_data = try_ensure_utf8(segment_file_path.read_bytes())
if track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML): if track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML):
# decode text direction entities or SubtitleEdit's /ReverseRtlStartEnd won't work
segment_data = segment_data.decode("utf8"). \ segment_data = segment_data.decode("utf8"). \
replace("‎", html.unescape("‎")). \ replace("‎", html.unescape("‎")). \
replace("‏", html.unescape("‏")). \ replace("‏", html.unescape("‏")). \