Add convert() method to Subtitle class

2024-01-12 00:50:27 +00:00 · 2024-01-12 00:50:27 +00:00 · e76bc7201d
parent f4d8bc8dd0
commit e76bc7201d
1 changed files with 71 additions and 0 deletions
--- a/devine/core/tracks/subtitle.py
+++ b/devine/core/tracks/subtitle.py
@ -5,6 +5,7 @@ import subprocess
 from collections import defaultdict
 from enum import Enum
 from io import BytesIO
 from pathlib import Path
 from typing import Any, Iterable, Optional
 import pycaption
@ -143,6 +144,76 @@ class Subtitle(Track):
            track_name += flag
        return track_name or None
    def convert(self, codec: Subtitle.Codec) -> Path:
        """
        Convert this Subtitle to another Format.
        The file path location of the Subtitle data will be kept at the same
        location but the file extension will be changed appropriately.
        Supported formats:
        - SubRip - SubtitleEdit or pycaption.SRTWriter
        - TimedTextMarkupLang - SubtitleEdit or pycaption.DFXPWriter
        - WebVTT - SubtitleEdit or pycaption.WebVTTWriter
        - SubStationAlphav4 - SubtitleEdit
        - fTTML* - custom code using some pycaption functions
        - fVTT* - custom code using some pycaption functions
        *: Can read from format, but cannot convert to format
        Note: It currently prioritizes using SubtitleEdit over PyCaption as
        I have personally noticed more oddities with PyCaption parsing over
        SubtitleEdit. Especially when working with TTML/DFXP where it would
        often have timecodes and stuff mixed in/duplicated.
        Returns the new file path of the Subtitle.
        """
        if not self.path or not self.path.exists():
            raise ValueError("You must download the subtitle track first.")
        if self.codec == codec:
            return self.path
        output_path = self.path.with_suffix(f".{codec.value.lower()}")
        sub_edit_executable = get_binary_path("SubtitleEdit")
        if sub_edit_executable and self.codec not in (Subtitle.Codec.fTTML, Subtitle.Codec.fVTT):
            sub_edit_format = {
                Subtitle.Codec.SubStationAlphav4: "AdvancedSubStationAlpha",
                Subtitle.Codec.TimedTextMarkupLang: "TimedText1.0"
            }.get(codec, codec.name)
            subprocess.run(
                [
                    sub_edit_executable,
                    "/Convert", self.path, sub_edit_format,
                    f"/outputfilename:{output_path.name}",
                    f"/outputfolder:{output_path.parent}",
                    "/encoding:utf8"
                ],
                check=True,
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL
            )
        else:
            writer = {
                # pycaption generally only supports these subtitle formats
                Subtitle.Codec.SubRip: pycaption.SRTWriter,
                Subtitle.Codec.TimedTextMarkupLang: pycaption.DFXPWriter,
                Subtitle.Codec.WebVTT: pycaption.WebVTTWriter,
            }.get(codec)
            if writer is None:
                raise NotImplementedError(f"Cannot yet convert {self.codec.name} to {codec.name}.")
            caption_set = self.parse(self.path.read_bytes(), self.codec)
            Subtitle.merge_same_cues(caption_set)
            subtitle_text = writer().write(caption_set)
            output_path.write_text(subtitle_text, encoding="utf8")
        self.swap(output_path)
        self.codec = codec
        return output_path
    @staticmethod
    def parse(data: bytes, codec: Subtitle.Codec) -> pycaption.CaptionSet:
        if not isinstance(data, bytes):