Rework Chapter System, add `Chapters` class

Overall this commit is to just make working with Chapters a lot less manual and convoluted. The current system has you specify information that can easily be automated, like Chapter order and numbers, which is one of the main changes in this commit. Note: This is a Breaking change and requires updates to your Service code. The `get_chapters()` method must be updated. For more information see the updated doc-string for `Service.get_chapters()`. - Added new Chapters class which automatically sorts Chapters by timestamp. - Chapter class has been significantly reworked to be much more generic. Most operations have been mvoed to the new Chapters class. - Chapter objects can no longer specify a Chapter number. The number is now automatically set based on it's sorted order in the Chapters object, which is all done automatically. - Chapter objects can now provide a timestamp in more formats. Timestamp's are now verified more efficiently. - Chapter objects ID is now a crc32 hash of the timestamp and name instead of just basically their number. - The Chapters object now also has an ID which is also a crc32 hash of all of the Chapter IDs it holds. This ID can be used for stuff like temp paths. - `Service.get_chapters()` must now return a Chapters object. The Chapters object may be empty. The Chapters object must hold Chapter objects. - Using `Chapter {N}` or `Act {N}` Chapters and so on is no longer permitted. You should instead leave the name blank if there's no descriptive name to use for it. - If you or a user wants `Chapter {N}` names, then they can use the config option `chapter_fallback_name` set to `"Chapter {i:02}"`. See the config documentation for more info. - Do not add a `00:00:00.000` Chapter, at all. This is automatically added for you if there's at least 1 Chapter with a timestamp after `00:00:00.000`.
2024-02-05 01:42:43 +00:00 · 2024-02-05 01:42:43 +00:00 · c06ea4cea8
parent 2affb62ad0
commit c06ea4cea8
8 changed files with 250 additions and 111 deletions
--- a/CONFIG.md
+++ b/CONFIG.md
@ -67,6 +67,21 @@ DSNP:
 default: chromecdm_903_l3
 ```
 ## chapter_fallback_name (str)
 The Chapter Name to use when exporting a Chapter without a Name.
 The default is no fallback name at all and no Chapter name will be set.
 The fallback name can use the following variables in f-string style:
 - `{i}`: The Chapter number starting at 1.
         E.g., `"Chapter {i}"`: "Chapter 1", "Intro", "Chapter 3".
 - `{j}`: A number starting at 1 that increments any time a Chapter has no title.
         E.g., `"Chapter {j}"`: "Chapter 1", "Intro", "Chapter 2".
 These are formatted with f-strings, directives are supported.
 For example, `"Chapter {i:02}"` will result in `"Chapter 01"`.
 ## credentials (dict[str, str|list|dict])
 Specify login credentials to use for each Service, and optionally per-profile.
--- a/devine/commands/dl.py
+++ b/devine/commands/dl.py
@ -328,7 +328,7 @@ class dl:
            with console.status("Getting tracks...", spinner="dots"):
                title.tracks.add(service.get_tracks(title), warn_only=True)
-                title.tracks.add(service.get_chapters(title))
+                title.tracks.chapters = service.get_chapters(title)
            # strip SDH subs to non-SDH if no equivalent same-lang non-SDH is available
            # uses a loose check, e.g, wont strip en-US SDH sub if a non-SDH en-GB is available
@ -348,7 +348,6 @@ class dl:
                title.tracks.sort_videos(by_language=v_lang or lang)
                title.tracks.sort_audio(by_language=lang)
                title.tracks.sort_subtitles(by_language=s_lang)
                title.tracks.sort_chapters()
            if list_:
                available_tracks, _ = title.tracks.tree()
--- a/devine/core/config.py
+++ b/devine/core/config.py
@ -39,6 +39,7 @@ class Config:
        self.dl: dict = kwargs.get("dl") or {}
        self.aria2c: dict = kwargs.get("aria2c") or {}
        self.cdm: dict = kwargs.get("cdm") or {}
        self.chapter_fallback_name: str = kwargs.get("chapter_fallback_name") or ""
        self.curl_impersonate: dict = kwargs.get("curl_impersonate") or {}
        self.remote_cdm: list[dict] = kwargs.get("remote_cdm") or []
        self.credentials: dict = kwargs.get("credentials") or {}
--- a/devine/core/service.py
+++ b/devine/core/service.py
@ -17,7 +17,7 @@ from devine.core.console import console
 from devine.core.constants import AnyTrack
 from devine.core.credential import Credential
 from devine.core.titles import Title_T, Titles_T
-from devine.core.tracks import Chapter, Tracks
+from devine.core.tracks import Chapters, Tracks
 from devine.core.utilities import get_ip_info
@ -207,24 +207,22 @@ class Service(metaclass=ABCMeta):
        """
    @abstractmethod
-    def get_chapters(self, title: Title_T) -> list[Chapter]:
+    def get_chapters(self, title: Title_T) -> Chapters:
        """
-        Get Chapter objects of the Title.
+        Get Chapters for the Title.
-        Return a list of Chapter objects. This will be run after get_tracks. If there's anything
+        Parameters:
-        from the get_tracks that may be needed, e.g. "device_id" or a-like, store it in the class
+            title: The current Title from `get_titles` that is being processed.
        via `self` and re-use the value in get_chapters.
-        How it's used is generally the same as get_titles. These are only separated as to reduce
+        You must return a Chapters object containing 0 or more Chapter objects.
        function complexity and keep them focused on simple tasks.
-        You do not need to sort or order the chapters in any way. However, you do need to filter
+        You do not need to set a Chapter number or sort/order the chapters in any way as
-        and alter them as needed by the service. No modification is made after get_chapters is
+        the Chapters class automatically handles all of that for you. If there's no
-        ran. So that means ensure that the Chapter objects returned have consistent Chapter Titles
+        descriptive name for a Chapter then do not set a name at all.
        and Chapter Numbers.
-        :param title: The current `Title` from get_titles that is being executed.
+        You must not set Chapter names to "Chapter {n}" or such. If you (or the user)
-        :return: List of Chapter objects, if available, empty list otherwise.
+        wants "Chapter {n}" style Chapter names (or similar) then they can use the config
        option `chapter_fallback_name`. For example, `"Chapter {i:02}"` for "Chapter 01".
        """
--- a/devine/core/tracks/init.py
+++ b/devine/core/tracks/init.py
@ -1,8 +1,9 @@
 from .audio import Audio
 from .chapter import Chapter
 from .chapters import Chapters
 from .subtitle import Subtitle
 from .track import Track
 from .tracks import Tracks
 from .video import Video
-__all__ = ("Audio", "Chapter", "Subtitle", "Track", "Tracks", "Video")
+__all__ = ("Audio", "Chapter", "Chapters", "Subtitle", "Track", "Tracks", "Video")
--- a/devine/core/tracks/chapter.py
+++ b/devine/core/tracks/chapter.py
@ -1,95 +1,82 @@
 from __future__ import annotations
 import re
 from pathlib import Path
 from typing import Optional, Union
 from zlib import crc32
 TIMESTAMP_FORMAT = re.compile(r"^(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(?P<ms>.\d{3}|)$")
 class Chapter:
-    line_1 = re.compile(r"^CHAPTER(?P<number>\d+)=(?P<timecode>[\d\\.]+)$")
+    def __init__(self, timestamp: Union[str, int], name: Optional[str] = None):
-    line_2 = re.compile(r"^CHAPTER(?P<number>\d+)NAME=(?P<title>[\d\\.]+)$")
+        """
        Create a new Chapter with a Timestamp and optional name.
-    def __init__(self, number: int, timecode: str, title: Optional[str] = None):
+        The timestamp may be in the following formats:
-        self.id = f"chapter-{number}"
+        - "HH:MM:SS" string, e.g., `25:05:23`.
-        self.number = number
+        - "HH:MM:SS.mss" string, e.g., `25:05:23.120`.
-        self.timecode = timecode
+        - a timecode integer in milliseconds, e.g., `90323120` is `25:05:23.120`.
-        self.title = title
+        - a timecode float in seconds, e.g., `90323.12` is `25:05:23.120`.
-        if "." not in self.timecode:
+        If you have a timecode integer in seconds, just multiply it by 1000.
-            self.timecode += ".000"
+        If you have a timecode float in milliseconds (no decimal value), just convert
        it to an integer.
        """
        if timestamp is None:
            raise ValueError("The timestamp must be provided.")
-    def __bool__(self) -> bool:
+        if not isinstance(timestamp, (str, int)):
-        return self.number and self.number >= 0 and self.timecode
+            raise TypeError(f"Expected timestamp to be {str} or {int}, not {type(timestamp)}")
        if not isinstance(name, (str, type(None))):
            raise TypeError(f"Expected name to be {str}, not {type(name)}")
        if not isinstance(timestamp, str):
            if isinstance(timestamp, int):  # ms
                hours, remainder = divmod(timestamp, 1000 * 60 * 60)
                minutes, remainder = divmod(remainder, 1000 * 60)
                seconds, ms = divmod(remainder, 1000)
            elif isinstance(timestamp, float):  # seconds.ms
                hours, remainder = divmod(timestamp, 60 * 60)
                minutes, remainder = divmod(remainder, 60)
                seconds, ms = divmod(int(remainder * 1000), 1000)
            else:
                raise TypeError
            timestamp = f"{hours:02}:{minutes:02}:{seconds:02}.{str(ms).zfill(3)[:3]}"
        timestamp_m = TIMESTAMP_FORMAT.match(timestamp)
        if not timestamp_m:
            raise ValueError(f"The timestamp format is invalid: {timestamp}")
        hour, minute, second, ms = timestamp_m.groups()
        if not ms:
            timestamp += ".000"
        self.timestamp = timestamp
        self.name = name
    def __repr__(self) -> str:
-        """
+        return "{name}({items})".format(
-        OGM-based Simple Chapter Format intended for use with MKVToolNix.
+            name=self.__class__.__name__,
-
+            items=", ".join([f"{k}={repr(v)}" for k, v in self.__dict__.items()])
        This format is not officially part of the Matroska spec. This was a format
        designed for OGM tools that MKVToolNix has since re-used. More Information:
        https://mkvtoolnix.download/doc/mkvmerge.html#mkvmerge.chapters.simple
        """
        return "CHAPTER{num}={time}\nCHAPTER{num}NAME={name}".format(
            num=f"{self.number:02}",
            time=self.timecode,
            name=self.title or ""
        )
    def __str__(self) -> str:
        return " | ".join(filter(bool, [
            "CHP",
-            f"[{self.number:02}]",
+            self.timestamp,
-            self.timecode,
+            self.name
            self.title
        ]))
    @property
    def id(self) -> str:
        """Compute an ID from the Chapter data."""
        checksum = crc32(str(self).encode("utf8"))
        return hex(checksum)
    @property
    def named(self) -> bool:
        """Check if Chapter is named."""
-        return bool(self.title)
+        return bool(self.name)
    @classmethod
    def loads(cls, data: str) -> Chapter:
        """Load chapter data from a string."""
        lines = [x.strip() for x in data.strip().splitlines(keepends=False)]
        if len(lines) > 2:
            return cls.loads("\n".join(lines))
        one, two = lines
        one_m = cls.line_1.match(one)
        two_m = cls.line_2.match(two)
        if not one_m or not two_m:
            raise SyntaxError(f"An unexpected syntax error near:\n{one}\n{two}")
        one_str, timecode = one_m.groups()
        two_str, title = two_m.groups()
        one_num, two_num = int(one_str.lstrip("0")), int(two_str.lstrip("0"))
        if one_num != two_num:
            raise SyntaxError(f"The chapter numbers ({one_num},{two_num}) does not match.")
        if not timecode:
            raise SyntaxError("The timecode is missing.")
        if not title:
            title = None
        return cls(number=one_num, timecode=timecode, title=title)
    @classmethod
    def load(cls, path: Union[Path, str]) -> Chapter:
        """Load chapter data from a file."""
        if isinstance(path, str):
            path = Path(path)
        return cls.loads(path.read_text(encoding="utf8"))
    def dumps(self) -> str:
        """Return chapter data as a string."""
        return repr(self)
    def dump(self, path: Union[Path, str]) -> int:
        """Write chapter data to a file."""
        if isinstance(path, str):
            path = Path(path)
        return path.write_text(self.dumps(), encoding="utf8")
 __all__ = ("Chapter",)
--- a/devine/core/tracks/chapters.py
+++ b/devine/core/tracks/chapters.py
@ -0,0 +1,156 @@
 from __future__ import annotations
 import re
 from abc import ABC
 from pathlib import Path
 from typing import Any, Iterable, Optional, Union
 from zlib import crc32
 from sortedcontainers import SortedKeyList
 from devine.core.tracks import Chapter
 OGM_SIMPLE_LINE_1_FORMAT = re.compile(r"^CHAPTER(?P<number>\d+)=(?P<timestamp>[\d\\.]+)$")
 OGM_SIMPLE_LINE_2_FORMAT = re.compile(r"^CHAPTER(?P<number>\d+)NAME=(?P<name>[\d\\.]+)$")
 class Chapters(SortedKeyList, ABC):
    def __init__(self, iterable: Optional[Iterable] = None):
        super().__init__(key=lambda x: x.timestamp or 0)
        for chapter in iterable or []:
            self.add(chapter)
    def __repr__(self) -> str:
        return "{name}({items})".format(
            name=self.__class__.__name__,
            items=", ".join([f"{k}={repr(v)}" for k, v in self.__dict__.items()])
        )
    def __str__(self) -> str:
        return "\n".join([
            " | ".join(filter(bool, [
                "CHP",
                f"[{i:02}]",
                chapter.timestamp,
                chapter.name
            ]))
            for i, chapter in enumerate(self, start=1)
        ])
    @classmethod
    def loads(cls, data: str) -> Chapters:
        """Load chapter data from a string."""
        lines = [
            line.strip()
            for line in data.strip().splitlines(keepends=False)
        ]
        if len(lines) % 2 != 0:
            raise ValueError("The number of chapter lines must be even.")
        chapters = []
        for line_1, line_2 in zip(lines[::2], lines[1::2]):
            line_1_match = OGM_SIMPLE_LINE_1_FORMAT.match(line_1)
            if not line_1_match:
                raise SyntaxError(f"An unexpected syntax error occurred on: {line_1}")
            line_2_match = OGM_SIMPLE_LINE_2_FORMAT.match(line_2)
            if not line_2_match:
                raise SyntaxError(f"An unexpected syntax error occurred on: {line_2}")
            line_1_number, timestamp = line_1_match.groups()
            line_2_number, name = line_2_match.groups()
            if line_1_number != line_2_number:
                raise SyntaxError(
                    f"The chapter numbers {line_1_number} and {line_2_number} do not match on:\n{line_1}\n{line_2}")
            if not timestamp:
                raise SyntaxError(f"The timestamp is missing on: {line_1}")
            chapters.append(Chapter(timestamp, name))
        return cls(chapters)
    @classmethod
    def load(cls, path: Union[Path, str]) -> Chapters:
        """Load chapter data from a file."""
        if isinstance(path, str):
            path = Path(path)
        return cls.loads(path.read_text(encoding="utf8"))
    def dumps(self, fallback_name: str = "") -> str:
        """
        Return chapter data in OGM-based Simple Chapter format.
        https://mkvtoolnix.download/doc/mkvmerge.html#mkvmerge.chapters.simple
        Parameters:
            fallback_name: Name used for Chapters without a Name set.
        The fallback name can use the following variables in f-string style:
        - {i}: The Chapter number starting at 1.
               E.g., `"Chapter {i}"`: "Chapter 1", "Intro", "Chapter 3".
        - {j}: A number starting at 1 that increments any time a Chapter has no name.
               E.g., `"Chapter {j}"`: "Chapter 1", "Intro", "Chapter 2".
        These are formatted with f-strings, directives are supported.
        For example, `"Chapter {i:02}"` will result in `"Chapter 01"`.
        """
        chapters = []
        j = 0
        for i, chapter in enumerate(self, start=1):
            if not chapter.name:
                j += 1
            chapters.append("CHAPTER{num}={time}\nCHAPTER{num}NAME={name}".format(
                num=f"{i:02}",
                time=chapter.timestamp,
                name=chapter.name or fallback_name.format(
                    i=i,
                    j=j
                )
            ))
        return "\n".join(chapters)
    def dump(self, path: Union[Path, str], *args: Any, **kwargs: Any) -> int:
        """
        Write chapter data in OGM-based Simple Chapter format to a file.
        Parameters:
            path: The file path to write the Chapter data to, overwriting
                any existing data.
        See `Chapters.dumps` for more parameter documentation.
        """
        if isinstance(path, str):
            path = Path(path)
        path.parent.mkdir(parents=True, exist_ok=True)
        ogm_text = self.dumps(*args, **kwargs)
        return path.write_text(ogm_text, encoding="utf8")
    def add(self, value: Chapter) -> None:
        if not isinstance(value, Chapter):
            raise TypeError(f"Can only add {Chapter} objects, not {type(value)}")
        if any(chapter.timestamp == value.timestamp for chapter in self):
            raise ValueError(f"A Chapter with the Timestamp {value.timestamp} already exists")
        super().add(value)
        if not any(chapter.timestamp == "00:00:00.000" for chapter in self):
            self.add(Chapter(0))
    @property
    def id(self) -> str:
        """Compute an ID from the Chapter data."""
        checksum = crc32("\n".join([
            chapter.id
            for chapter in self
        ]).encode("utf8"))
        return hex(checksum)
 __all__ = ("Chapters", "Chapter")
--- a/devine/core/tracks/tracks.py
+++ b/devine/core/tracks/tracks.py
@ -6,7 +6,6 @@ from functools import partial
 from pathlib import Path
 from typing import Callable, Iterator, Optional, Sequence, Union
 from Cryptodome.Random import get_random_bytes
 from langcodes import Language, closest_supported_match
 from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeRemainingColumn
 from rich.table import Table
@ -16,7 +15,7 @@ from devine.core.config import config
 from devine.core.console import console
 from devine.core.constants import LANGUAGE_MAX_DISTANCE, LANGUAGE_MUX_MAP, AnyTrack, TrackT
 from devine.core.tracks.audio import Audio
-from devine.core.tracks.chapter import Chapter
+from devine.core.tracks.chapters import Chapter, Chapters
 from devine.core.tracks.subtitle import Subtitle
 from devine.core.tracks.track import Track
 from devine.core.tracks.video import Video
@ -41,7 +40,7 @@ class Tracks:
        self.videos: list[Video] = []
        self.audio: list[Audio] = []
        self.subtitles: list[Subtitle] = []
-        self.chapters: list[Chapter] = []
+        self.chapters = Chapters()
        if args:
            self.add(args)
@ -137,7 +136,7 @@ class Tracks:
    def add(
        self,
-        tracks: Union[Tracks, Sequence[Union[AnyTrack, Chapter]], Track, Chapter],
+        tracks: Union[Tracks, Sequence[Union[AnyTrack, Chapter, Chapters]], Track, Chapter, Chapters],
        warn_only: bool = False
    ) -> None:
        """Add a provided track to its appropriate array and ensuring it's not a duplicate."""
@ -166,7 +165,7 @@ class Tracks:
            elif isinstance(track, Subtitle):
                self.subtitles.append(track)
            elif isinstance(track, Chapter):
-                self.chapters.append(track)
+                self.chapters.add(track)
            else:
                raise ValueError("Track type was not set or is invalid.")
@ -243,13 +242,6 @@ class Tracks:
                continue
            self.subtitles.sort(key=lambda x: is_close_match(language, [x.language]), reverse=True)
    def sort_chapters(self) -> None:
        """Sort chapter tracks by chapter number."""
        if not self.chapters:
            return
        # number
        self.chapters.sort(key=lambda x: x.number)
    def select_video(self, x: Callable[[Video], bool]) -> None:
        self.videos = list(filter(x, self.videos))
@ -289,16 +281,6 @@ class Tracks:
            ][:per_language or None])
        return selected
    def export_chapters(self, to_file: Optional[Union[Path, str]] = None) -> str:
        """Export all chapters in order to a string or file."""
        self.sort_chapters()
        data = "\n".join(map(repr, self.chapters))
        if to_file:
            to_file = Path(to_file)
            to_file.parent.mkdir(parents=True, exist_ok=True)
            to_file.write_text(data, encoding="utf8")
        return data
    def mux(self, title: str, delete: bool = True, progress: Optional[partial] = None) -> tuple[Path, int]:
        """
        Multiplex all the Tracks into a Matroska Container file.
@ -373,9 +355,9 @@ class Tracks:
        if self.chapters:
            chapters_path = config.directories.temp / config.filenames.chapters.format(
                title=sanitize_filename(title),
-                random=get_random_bytes(16).hex()
+                random=self.chapters.id
            )
-            self.export_chapters(chapters_path)
+            self.chapters.dump(chapters_path, fallback_name=config.chapter_fallback_name)
            cl.extend(["--chapter-charset", "UTF-8", "--chapters", str(chapters_path)])
        else:
            chapters_path = None