forked from DRMTalks/devine
Exclude fragmented Sub Codecs from DASH UTF-8 checks
Chardet was detecting a mixture of mostly cp1252 and MacRoman encoding, where it should just be left as-is when parsing. The actual text within it perhaps may want to go through `try_ensure_utf8` when parsed, but not the entire box.
This commit is contained in:
parent
0be62541ba
commit
e87de50940
|
@ -473,7 +473,10 @@ class DASH:
|
||||||
for segment_file in sorted(save_dir.iterdir()):
|
for segment_file in sorted(save_dir.iterdir()):
|
||||||
segment_data = segment_file.read_bytes()
|
segment_data = segment_file.read_bytes()
|
||||||
# TODO: fix encoding after decryption?
|
# TODO: fix encoding after decryption?
|
||||||
if not drm and isinstance(track, Subtitle):
|
if (
|
||||||
|
not drm and isinstance(track, Subtitle) and
|
||||||
|
track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML)
|
||||||
|
):
|
||||||
segment_data = try_ensure_utf8(segment_data)
|
segment_data = try_ensure_utf8(segment_data)
|
||||||
f.write(segment_data)
|
f.write(segment_data)
|
||||||
segment_file.unlink()
|
segment_file.unlink()
|
||||||
|
|
Loading…
Reference in New Issue