forked from DRMTalks/devine
Fix subtitle conversion error where WEBVTT header is kept
This happened because the WEBVTT header was segmented and appended to each other without enough newline separation so pycaption thought it was an actual caption and to be kept.
This commit is contained in:
parent
47448aac3c
commit
4b5a2c703b
|
@ -170,7 +170,13 @@ class Subtitle(Track):
|
||||||
caption_set: pycaption.CaptionSet = pycaption.CaptionSet(caption_lists)
|
caption_set: pycaption.CaptionSet = pycaption.CaptionSet(caption_lists)
|
||||||
return caption_set
|
return caption_set
|
||||||
if codec == Subtitle.Codec.WebVTT:
|
if codec == Subtitle.Codec.WebVTT:
|
||||||
text = data.decode("utf8").replace("\r", "").replace("\n\n\n", "\n \n\n").replace("\n\n<", "\n<")
|
# Segmented VTT when merged may have the WEBVTT headers part of the next caption
|
||||||
|
# if they are not separated far enough from the previous caption, hence the \n\n
|
||||||
|
text = data.decode("utf8"). \
|
||||||
|
replace("WEBVTT", "\n\nWEBVTT"). \
|
||||||
|
replace("\r", ""). \
|
||||||
|
replace("\n\n\n", "\n \n\n"). \
|
||||||
|
replace("\n\n<", "\n<")
|
||||||
captions: pycaption.CaptionSet = pycaption.WebVTTReader().read(text)
|
captions: pycaption.CaptionSet = pycaption.WebVTTReader().read(text)
|
||||||
return captions
|
return captions
|
||||||
except pycaption.exceptions.CaptionReadSyntaxError:
|
except pycaption.exceptions.CaptionReadSyntaxError:
|
||||||
|
|
Loading…
Reference in New Issue