From e87de509408ecc23284b4f0d68a02c558e6eecf6 Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Sat, 2 Dec 2023 17:44:47 +0000 Subject: [PATCH] Exclude fragmented Sub Codecs from DASH UTF-8 checks Chardet was detecting a mixture of mostly cp1252 and MacRoman encoding, where it should just be left as-is when parsing. The actual text within it perhaps may want to go through `try_ensure_utf8` when parsed, but not the entire box. --- devine/core/manifests/dash.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/devine/core/manifests/dash.py b/devine/core/manifests/dash.py index 3dffc8b..7390695 100644 --- a/devine/core/manifests/dash.py +++ b/devine/core/manifests/dash.py @@ -473,7 +473,10 @@ class DASH: for segment_file in sorted(save_dir.iterdir()): segment_data = segment_file.read_bytes() # TODO: fix encoding after decryption? - if not drm and isinstance(track, Subtitle): + if ( + not drm and isinstance(track, Subtitle) and + track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML) + ): segment_data = try_ensure_utf8(segment_data) f.write(segment_data) segment_file.unlink()