forked from DRMTalks/devine
Unescape HTML Entities in Subtitles after Downloading
This fixes some Subtitles having e.g., `&` instead of just `&`, but especially for special entities like `‏` which enables Right-to-Left mode on Hebrew and Arabic Subtitles.
This commit is contained in:
parent
26d067915f
commit
2056e056a4
|
@ -913,6 +913,7 @@ class dl:
|
||||||
if isinstance(track, Subtitle):
|
if isinstance(track, Subtitle):
|
||||||
track_data = track.path.read_bytes()
|
track_data = track.path.read_bytes()
|
||||||
track_data = try_ensure_utf8(track_data)
|
track_data = try_ensure_utf8(track_data)
|
||||||
|
track_data = html.unescape(track_data.decode("utf8")).encode("utf8")
|
||||||
track.path.write_bytes(track_data)
|
track.path.write_bytes(track_data)
|
||||||
|
|
||||||
progress(downloaded="Downloaded")
|
progress(downloaded="Downloaded")
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
import html
|
||||||
import logging
|
import logging
|
||||||
import math
|
import math
|
||||||
import re
|
import re
|
||||||
|
@ -473,6 +474,7 @@ class DASH:
|
||||||
track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML)
|
track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML)
|
||||||
):
|
):
|
||||||
segment_data = try_ensure_utf8(segment_data)
|
segment_data = try_ensure_utf8(segment_data)
|
||||||
|
segment_data = html.unescape(segment_data.decode("utf8")).encode("utf8")
|
||||||
f.write(segment_data)
|
f.write(segment_data)
|
||||||
segment_file.unlink()
|
segment_file.unlink()
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import html
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
@ -314,6 +315,8 @@ class HLS:
|
||||||
segment_data = segment_file.read_bytes()
|
segment_data = segment_file.read_bytes()
|
||||||
if isinstance(track, Subtitle):
|
if isinstance(track, Subtitle):
|
||||||
segment_data = try_ensure_utf8(segment_data)
|
segment_data = try_ensure_utf8(segment_data)
|
||||||
|
if track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML):
|
||||||
|
segment_data = html.unescape(segment_data.decode("utf8")).encode("utf8")
|
||||||
f.write(segment_data)
|
f.write(segment_data)
|
||||||
segment_file.unlink()
|
segment_file.unlink()
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue