Fix subtitle conversion error where WEBVTT header is kept

This happened because the WEBVTT header was segmented and appended to each other without enough newline separation so pycaption thought it was an actual caption and to be kept.
This commit is contained in:
rlaphoenix 2023-02-11 22:17:43 +00:00
parent 47448aac3c
commit 4b5a2c703b
1 changed files with 7 additions and 1 deletions

View File

@ -170,7 +170,13 @@ class Subtitle(Track):
caption_set: pycaption.CaptionSet = pycaption.CaptionSet(caption_lists) caption_set: pycaption.CaptionSet = pycaption.CaptionSet(caption_lists)
return caption_set return caption_set
if codec == Subtitle.Codec.WebVTT: if codec == Subtitle.Codec.WebVTT:
text = data.decode("utf8").replace("\r", "").replace("\n\n\n", "\n \n\n").replace("\n\n<", "\n<") # Segmented VTT when merged may have the WEBVTT headers part of the next caption
# if they are not separated far enough from the previous caption, hence the \n\n
text = data.decode("utf8"). \
replace("WEBVTT", "\n\nWEBVTT"). \
replace("\r", ""). \
replace("\n\n\n", "\n \n\n"). \
replace("\n\n<", "\n<")
captions: pycaption.CaptionSet = pycaption.WebVTTReader().read(text) captions: pycaption.CaptionSet = pycaption.WebVTTReader().read(text)
return captions return captions
except pycaption.exceptions.CaptionReadSyntaxError: except pycaption.exceptions.CaptionReadSyntaxError: