Fix subtitle conversion error where WEBVTT header is kept

This happened because the WEBVTT header was segmented and appended to each other without enough newline separation so pycaption thought it was an actual caption and to be kept.
2023-02-11 22:17:43 +00:00 · 2023-02-11 22:17:43 +00:00 · 4b5a2c703b
parent 47448aac3c
commit 4b5a2c703b
1 changed files with 7 additions and 1 deletions
--- a/devine/core/tracks/subtitle.py
+++ b/devine/core/tracks/subtitle.py
@ -170,7 +170,13 @@ class Subtitle(Track):
                caption_set: pycaption.CaptionSet = pycaption.CaptionSet(caption_lists)
                return caption_set
            if codec == Subtitle.Codec.WebVTT:
-                text = data.decode("utf8").replace("\r", "").replace("\n\n\n", "\n \n\n").replace("\n\n<", "\n<")
+                # Segmented VTT when merged may have the WEBVTT headers part of the next caption
+                # if they are not separated far enough from the previous caption, hence the \n\n
+                text = data.decode("utf8"). \
+                    replace("WEBVTT", "\n\nWEBVTT"). \
+                    replace("\r", ""). \
+                    replace("\n\n\n", "\n \n\n"). \
+                    replace("\n\n<", "\n<")
                captions: pycaption.CaptionSet = pycaption.WebVTTReader().read(text)
                return captions
        except pycaption.exceptions.CaptionReadSyntaxError: