Replace negative size values in TTML text with 0

Negative size values are not allowed by the spec basically anywhere in the document. Some services seem to accidentally specify a negative value which puts pycaption on a fritz.
This commit is contained in:
rlaphoenix 2023-03-17 19:28:55 +00:00
parent 41018d4574
commit f4a9d6c0b1
1 changed files with 3 additions and 0 deletions

View File

@ -1,5 +1,6 @@
from __future__ import annotations
import re
import subprocess
from collections import defaultdict
from enum import Enum
@ -161,6 +162,8 @@ class Subtitle(Track):
return captions
if codec == Subtitle.Codec.TimedTextMarkupLang:
text = data.decode("utf8").replace("tt:", "")
# negative size values aren't allowed in TTML/DFXP spec, replace with 0
text = re.sub(r'"(-\d+(\.\d+)?(px|em|%|c|pt))"', '"0"', text)
return pycaption.DFXPReader().read(text)
if codec == Subtitle.Codec.fVTT:
caption_lists: dict[str, pycaption.CaptionList] = defaultdict(pycaption.CaptionList)