forked from DRMTalks/devine
Add convert() method to Subtitle class
This commit is contained in:
parent
f4d8bc8dd0
commit
e76bc7201d
|
@ -5,6 +5,7 @@ import subprocess
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
from pathlib import Path
|
||||||
from typing import Any, Iterable, Optional
|
from typing import Any, Iterable, Optional
|
||||||
|
|
||||||
import pycaption
|
import pycaption
|
||||||
|
@ -143,6 +144,76 @@ class Subtitle(Track):
|
||||||
track_name += flag
|
track_name += flag
|
||||||
return track_name or None
|
return track_name or None
|
||||||
|
|
||||||
|
def convert(self, codec: Subtitle.Codec) -> Path:
|
||||||
|
"""
|
||||||
|
Convert this Subtitle to another Format.
|
||||||
|
|
||||||
|
The file path location of the Subtitle data will be kept at the same
|
||||||
|
location but the file extension will be changed appropriately.
|
||||||
|
|
||||||
|
Supported formats:
|
||||||
|
- SubRip - SubtitleEdit or pycaption.SRTWriter
|
||||||
|
- TimedTextMarkupLang - SubtitleEdit or pycaption.DFXPWriter
|
||||||
|
- WebVTT - SubtitleEdit or pycaption.WebVTTWriter
|
||||||
|
- SubStationAlphav4 - SubtitleEdit
|
||||||
|
- fTTML* - custom code using some pycaption functions
|
||||||
|
- fVTT* - custom code using some pycaption functions
|
||||||
|
*: Can read from format, but cannot convert to format
|
||||||
|
|
||||||
|
Note: It currently prioritizes using SubtitleEdit over PyCaption as
|
||||||
|
I have personally noticed more oddities with PyCaption parsing over
|
||||||
|
SubtitleEdit. Especially when working with TTML/DFXP where it would
|
||||||
|
often have timecodes and stuff mixed in/duplicated.
|
||||||
|
|
||||||
|
Returns the new file path of the Subtitle.
|
||||||
|
"""
|
||||||
|
if not self.path or not self.path.exists():
|
||||||
|
raise ValueError("You must download the subtitle track first.")
|
||||||
|
|
||||||
|
if self.codec == codec:
|
||||||
|
return self.path
|
||||||
|
|
||||||
|
output_path = self.path.with_suffix(f".{codec.value.lower()}")
|
||||||
|
|
||||||
|
sub_edit_executable = get_binary_path("SubtitleEdit")
|
||||||
|
if sub_edit_executable and self.codec not in (Subtitle.Codec.fTTML, Subtitle.Codec.fVTT):
|
||||||
|
sub_edit_format = {
|
||||||
|
Subtitle.Codec.SubStationAlphav4: "AdvancedSubStationAlpha",
|
||||||
|
Subtitle.Codec.TimedTextMarkupLang: "TimedText1.0"
|
||||||
|
}.get(codec, codec.name)
|
||||||
|
subprocess.run(
|
||||||
|
[
|
||||||
|
sub_edit_executable,
|
||||||
|
"/Convert", self.path, sub_edit_format,
|
||||||
|
f"/outputfilename:{output_path.name}",
|
||||||
|
f"/outputfolder:{output_path.parent}",
|
||||||
|
"/encoding:utf8"
|
||||||
|
],
|
||||||
|
check=True,
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.DEVNULL
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
writer = {
|
||||||
|
# pycaption generally only supports these subtitle formats
|
||||||
|
Subtitle.Codec.SubRip: pycaption.SRTWriter,
|
||||||
|
Subtitle.Codec.TimedTextMarkupLang: pycaption.DFXPWriter,
|
||||||
|
Subtitle.Codec.WebVTT: pycaption.WebVTTWriter,
|
||||||
|
}.get(codec)
|
||||||
|
if writer is None:
|
||||||
|
raise NotImplementedError(f"Cannot yet convert {self.codec.name} to {codec.name}.")
|
||||||
|
|
||||||
|
caption_set = self.parse(self.path.read_bytes(), self.codec)
|
||||||
|
Subtitle.merge_same_cues(caption_set)
|
||||||
|
subtitle_text = writer().write(caption_set)
|
||||||
|
|
||||||
|
output_path.write_text(subtitle_text, encoding="utf8")
|
||||||
|
|
||||||
|
self.swap(output_path)
|
||||||
|
self.codec = codec
|
||||||
|
|
||||||
|
return output_path
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def parse(data: bytes, codec: Subtitle.Codec) -> pycaption.CaptionSet:
|
def parse(data: bytes, codec: Subtitle.Codec) -> pycaption.CaptionSet:
|
||||||
if not isinstance(data, bytes):
|
if not isinstance(data, bytes):
|
||||||
|
|
Loading…
Reference in New Issue