diff --git a/CONFIG.md b/CONFIG.md index b5f5cb6..318f9eb 100644 --- a/CONFIG.md +++ b/CONFIG.md @@ -67,6 +67,21 @@ DSNP: default: chromecdm_903_l3 ``` +## chapter_fallback_name (str) + +The Chapter Name to use when exporting a Chapter without a Name. +The default is no fallback name at all and no Chapter name will be set. + +The fallback name can use the following variables in f-string style: + +- `{i}`: The Chapter number starting at 1. + E.g., `"Chapter {i}"`: "Chapter 1", "Intro", "Chapter 3". +- `{j}`: A number starting at 1 that increments any time a Chapter has no title. + E.g., `"Chapter {j}"`: "Chapter 1", "Intro", "Chapter 2". + +These are formatted with f-strings, directives are supported. +For example, `"Chapter {i:02}"` will result in `"Chapter 01"`. + ## credentials (dict[str, str|list|dict]) Specify login credentials to use for each Service, and optionally per-profile. diff --git a/devine/commands/dl.py b/devine/commands/dl.py index 1827a06..0790882 100644 --- a/devine/commands/dl.py +++ b/devine/commands/dl.py @@ -328,7 +328,7 @@ class dl: with console.status("Getting tracks...", spinner="dots"): title.tracks.add(service.get_tracks(title), warn_only=True) - title.tracks.add(service.get_chapters(title)) + title.tracks.chapters = service.get_chapters(title) # strip SDH subs to non-SDH if no equivalent same-lang non-SDH is available # uses a loose check, e.g, wont strip en-US SDH sub if a non-SDH en-GB is available @@ -348,7 +348,6 @@ class dl: title.tracks.sort_videos(by_language=v_lang or lang) title.tracks.sort_audio(by_language=lang) title.tracks.sort_subtitles(by_language=s_lang) - title.tracks.sort_chapters() if list_: available_tracks, _ = title.tracks.tree() diff --git a/devine/core/config.py b/devine/core/config.py index 226a3a2..2c00756 100644 --- a/devine/core/config.py +++ b/devine/core/config.py @@ -39,6 +39,7 @@ class Config: self.dl: dict = kwargs.get("dl") or {} self.aria2c: dict = kwargs.get("aria2c") or {} self.cdm: dict = kwargs.get("cdm") or {} + self.chapter_fallback_name: str = kwargs.get("chapter_fallback_name") or "" self.curl_impersonate: dict = kwargs.get("curl_impersonate") or {} self.remote_cdm: list[dict] = kwargs.get("remote_cdm") or [] self.credentials: dict = kwargs.get("credentials") or {} diff --git a/devine/core/service.py b/devine/core/service.py index 335b458..b021d91 100644 --- a/devine/core/service.py +++ b/devine/core/service.py @@ -17,7 +17,7 @@ from devine.core.console import console from devine.core.constants import AnyTrack from devine.core.credential import Credential from devine.core.titles import Title_T, Titles_T -from devine.core.tracks import Chapter, Tracks +from devine.core.tracks import Chapters, Tracks from devine.core.utilities import get_ip_info @@ -207,24 +207,22 @@ class Service(metaclass=ABCMeta): """ @abstractmethod - def get_chapters(self, title: Title_T) -> list[Chapter]: + def get_chapters(self, title: Title_T) -> Chapters: """ - Get Chapter objects of the Title. + Get Chapters for the Title. - Return a list of Chapter objects. This will be run after get_tracks. If there's anything - from the get_tracks that may be needed, e.g. "device_id" or a-like, store it in the class - via `self` and re-use the value in get_chapters. + Parameters: + title: The current Title from `get_titles` that is being processed. - How it's used is generally the same as get_titles. These are only separated as to reduce - function complexity and keep them focused on simple tasks. + You must return a Chapters object containing 0 or more Chapter objects. - You do not need to sort or order the chapters in any way. However, you do need to filter - and alter them as needed by the service. No modification is made after get_chapters is - ran. So that means ensure that the Chapter objects returned have consistent Chapter Titles - and Chapter Numbers. + You do not need to set a Chapter number or sort/order the chapters in any way as + the Chapters class automatically handles all of that for you. If there's no + descriptive name for a Chapter then do not set a name at all. - :param title: The current `Title` from get_titles that is being executed. - :return: List of Chapter objects, if available, empty list otherwise. + You must not set Chapter names to "Chapter {n}" or such. If you (or the user) + wants "Chapter {n}" style Chapter names (or similar) then they can use the config + option `chapter_fallback_name`. For example, `"Chapter {i:02}"` for "Chapter 01". """ diff --git a/devine/core/tracks/__init__.py b/devine/core/tracks/__init__.py index 071a99c..bcb1afd 100644 --- a/devine/core/tracks/__init__.py +++ b/devine/core/tracks/__init__.py @@ -1,8 +1,9 @@ from .audio import Audio from .chapter import Chapter +from .chapters import Chapters from .subtitle import Subtitle from .track import Track from .tracks import Tracks from .video import Video -__all__ = ("Audio", "Chapter", "Subtitle", "Track", "Tracks", "Video") +__all__ = ("Audio", "Chapter", "Chapters", "Subtitle", "Track", "Tracks", "Video") diff --git a/devine/core/tracks/chapter.py b/devine/core/tracks/chapter.py index cb929e5..4ceefc4 100644 --- a/devine/core/tracks/chapter.py +++ b/devine/core/tracks/chapter.py @@ -1,95 +1,82 @@ from __future__ import annotations import re -from pathlib import Path from typing import Optional, Union +from zlib import crc32 + +TIMESTAMP_FORMAT = re.compile(r"^(?P\d{2}):(?P\d{2}):(?P\d{2})(?P.\d{3}|)$") class Chapter: - line_1 = re.compile(r"^CHAPTER(?P\d+)=(?P[\d\\.]+)$") - line_2 = re.compile(r"^CHAPTER(?P\d+)NAME=(?P[\d\\.]+)$") + def __init__(self, timestamp: Union[str, int], name: Optional[str] = None): + """ + Create a new Chapter with a Timestamp and optional name. - def __init__(self, number: int, timecode: str, title: Optional[str] = None): - self.id = f"chapter-{number}" - self.number = number - self.timecode = timecode - self.title = title + The timestamp may be in the following formats: + - "HH:MM:SS" string, e.g., `25:05:23`. + - "HH:MM:SS.mss" string, e.g., `25:05:23.120`. + - a timecode integer in milliseconds, e.g., `90323120` is `25:05:23.120`. + - a timecode float in seconds, e.g., `90323.12` is `25:05:23.120`. - if "." not in self.timecode: - self.timecode += ".000" + If you have a timecode integer in seconds, just multiply it by 1000. + If you have a timecode float in milliseconds (no decimal value), just convert + it to an integer. + """ + if timestamp is None: + raise ValueError("The timestamp must be provided.") - def __bool__(self) -> bool: - return self.number and self.number >= 0 and self.timecode + if not isinstance(timestamp, (str, int)): + raise TypeError(f"Expected timestamp to be {str} or {int}, not {type(timestamp)}") + if not isinstance(name, (str, type(None))): + raise TypeError(f"Expected name to be {str}, not {type(name)}") + + if not isinstance(timestamp, str): + if isinstance(timestamp, int): # ms + hours, remainder = divmod(timestamp, 1000 * 60 * 60) + minutes, remainder = divmod(remainder, 1000 * 60) + seconds, ms = divmod(remainder, 1000) + elif isinstance(timestamp, float): # seconds.ms + hours, remainder = divmod(timestamp, 60 * 60) + minutes, remainder = divmod(remainder, 60) + seconds, ms = divmod(int(remainder * 1000), 1000) + else: + raise TypeError + timestamp = f"{hours:02}:{minutes:02}:{seconds:02}.{str(ms).zfill(3)[:3]}" + + timestamp_m = TIMESTAMP_FORMAT.match(timestamp) + if not timestamp_m: + raise ValueError(f"The timestamp format is invalid: {timestamp}") + + hour, minute, second, ms = timestamp_m.groups() + if not ms: + timestamp += ".000" + + self.timestamp = timestamp + self.name = name def __repr__(self) -> str: - """ - OGM-based Simple Chapter Format intended for use with MKVToolNix. - - This format is not officially part of the Matroska spec. This was a format - designed for OGM tools that MKVToolNix has since re-used. More Information: - https://mkvtoolnix.download/doc/mkvmerge.html#mkvmerge.chapters.simple - """ - return "CHAPTER{num}={time}\nCHAPTER{num}NAME={name}".format( - num=f"{self.number:02}", - time=self.timecode, - name=self.title or "" + return "{name}({items})".format( + name=self.__class__.__name__, + items=", ".join([f"{k}={repr(v)}" for k, v in self.__dict__.items()]) ) def __str__(self) -> str: return " | ".join(filter(bool, [ "CHP", - f"[{self.number:02}]", - self.timecode, - self.title + self.timestamp, + self.name ])) + @property + def id(self) -> str: + """Compute an ID from the Chapter data.""" + checksum = crc32(str(self).encode("utf8")) + return hex(checksum) + @property def named(self) -> bool: """Check if Chapter is named.""" - return bool(self.title) - - @classmethod - def loads(cls, data: str) -> Chapter: - """Load chapter data from a string.""" - lines = [x.strip() for x in data.strip().splitlines(keepends=False)] - if len(lines) > 2: - return cls.loads("\n".join(lines)) - one, two = lines - - one_m = cls.line_1.match(one) - two_m = cls.line_2.match(two) - if not one_m or not two_m: - raise SyntaxError(f"An unexpected syntax error near:\n{one}\n{two}") - - one_str, timecode = one_m.groups() - two_str, title = two_m.groups() - one_num, two_num = int(one_str.lstrip("0")), int(two_str.lstrip("0")) - - if one_num != two_num: - raise SyntaxError(f"The chapter numbers ({one_num},{two_num}) does not match.") - if not timecode: - raise SyntaxError("The timecode is missing.") - if not title: - title = None - - return cls(number=one_num, timecode=timecode, title=title) - - @classmethod - def load(cls, path: Union[Path, str]) -> Chapter: - """Load chapter data from a file.""" - if isinstance(path, str): - path = Path(path) - return cls.loads(path.read_text(encoding="utf8")) - - def dumps(self) -> str: - """Return chapter data as a string.""" - return repr(self) - - def dump(self, path: Union[Path, str]) -> int: - """Write chapter data to a file.""" - if isinstance(path, str): - path = Path(path) - return path.write_text(self.dumps(), encoding="utf8") + return bool(self.name) __all__ = ("Chapter",) diff --git a/devine/core/tracks/chapters.py b/devine/core/tracks/chapters.py new file mode 100644 index 0000000..9b2a32a --- /dev/null +++ b/devine/core/tracks/chapters.py @@ -0,0 +1,156 @@ +from __future__ import annotations + +import re +from abc import ABC +from pathlib import Path +from typing import Any, Iterable, Optional, Union +from zlib import crc32 + +from sortedcontainers import SortedKeyList + +from devine.core.tracks import Chapter + +OGM_SIMPLE_LINE_1_FORMAT = re.compile(r"^CHAPTER(?P<number>\d+)=(?P<timestamp>[\d\\.]+)$") +OGM_SIMPLE_LINE_2_FORMAT = re.compile(r"^CHAPTER(?P<number>\d+)NAME=(?P<name>[\d\\.]+)$") + + +class Chapters(SortedKeyList, ABC): + def __init__(self, iterable: Optional[Iterable] = None): + super().__init__(key=lambda x: x.timestamp or 0) + for chapter in iterable or []: + self.add(chapter) + + def __repr__(self) -> str: + return "{name}({items})".format( + name=self.__class__.__name__, + items=", ".join([f"{k}={repr(v)}" for k, v in self.__dict__.items()]) + ) + + def __str__(self) -> str: + return "\n".join([ + " | ".join(filter(bool, [ + "CHP", + f"[{i:02}]", + chapter.timestamp, + chapter.name + ])) + for i, chapter in enumerate(self, start=1) + ]) + + @classmethod + def loads(cls, data: str) -> Chapters: + """Load chapter data from a string.""" + lines = [ + line.strip() + for line in data.strip().splitlines(keepends=False) + ] + + if len(lines) % 2 != 0: + raise ValueError("The number of chapter lines must be even.") + + chapters = [] + + for line_1, line_2 in zip(lines[::2], lines[1::2]): + line_1_match = OGM_SIMPLE_LINE_1_FORMAT.match(line_1) + if not line_1_match: + raise SyntaxError(f"An unexpected syntax error occurred on: {line_1}") + line_2_match = OGM_SIMPLE_LINE_2_FORMAT.match(line_2) + if not line_2_match: + raise SyntaxError(f"An unexpected syntax error occurred on: {line_2}") + + line_1_number, timestamp = line_1_match.groups() + line_2_number, name = line_2_match.groups() + + if line_1_number != line_2_number: + raise SyntaxError( + f"The chapter numbers {line_1_number} and {line_2_number} do not match on:\n{line_1}\n{line_2}") + + if not timestamp: + raise SyntaxError(f"The timestamp is missing on: {line_1}") + + chapters.append(Chapter(timestamp, name)) + + return cls(chapters) + + @classmethod + def load(cls, path: Union[Path, str]) -> Chapters: + """Load chapter data from a file.""" + if isinstance(path, str): + path = Path(path) + return cls.loads(path.read_text(encoding="utf8")) + + def dumps(self, fallback_name: str = "") -> str: + """ + Return chapter data in OGM-based Simple Chapter format. + https://mkvtoolnix.download/doc/mkvmerge.html#mkvmerge.chapters.simple + + Parameters: + fallback_name: Name used for Chapters without a Name set. + + The fallback name can use the following variables in f-string style: + + - {i}: The Chapter number starting at 1. + E.g., `"Chapter {i}"`: "Chapter 1", "Intro", "Chapter 3". + - {j}: A number starting at 1 that increments any time a Chapter has no name. + E.g., `"Chapter {j}"`: "Chapter 1", "Intro", "Chapter 2". + + These are formatted with f-strings, directives are supported. + For example, `"Chapter {i:02}"` will result in `"Chapter 01"`. + """ + chapters = [] + j = 0 + + for i, chapter in enumerate(self, start=1): + if not chapter.name: + j += 1 + chapters.append("CHAPTER{num}={time}\nCHAPTER{num}NAME={name}".format( + num=f"{i:02}", + time=chapter.timestamp, + name=chapter.name or fallback_name.format( + i=i, + j=j + ) + )) + + return "\n".join(chapters) + + def dump(self, path: Union[Path, str], *args: Any, **kwargs: Any) -> int: + """ + Write chapter data in OGM-based Simple Chapter format to a file. + + Parameters: + path: The file path to write the Chapter data to, overwriting + any existing data. + + See `Chapters.dumps` for more parameter documentation. + """ + if isinstance(path, str): + path = Path(path) + path.parent.mkdir(parents=True, exist_ok=True) + + ogm_text = self.dumps(*args, **kwargs) + return path.write_text(ogm_text, encoding="utf8") + + def add(self, value: Chapter) -> None: + if not isinstance(value, Chapter): + raise TypeError(f"Can only add {Chapter} objects, not {type(value)}") + + if any(chapter.timestamp == value.timestamp for chapter in self): + raise ValueError(f"A Chapter with the Timestamp {value.timestamp} already exists") + + super().add(value) + + if not any(chapter.timestamp == "00:00:00.000" for chapter in self): + self.add(Chapter(0)) + + @property + def id(self) -> str: + """Compute an ID from the Chapter data.""" + checksum = crc32("\n".join([ + chapter.id + for chapter in self + ]).encode("utf8")) + return hex(checksum) + + +__all__ = ("Chapters", "Chapter") diff --git a/devine/core/tracks/tracks.py b/devine/core/tracks/tracks.py index cc53a40..65c945c 100644 --- a/devine/core/tracks/tracks.py +++ b/devine/core/tracks/tracks.py @@ -6,7 +6,6 @@ from functools import partial from pathlib import Path from typing import Callable, Iterator, Optional, Sequence, Union -from Cryptodome.Random import get_random_bytes from langcodes import Language, closest_supported_match from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeRemainingColumn from rich.table import Table @@ -16,7 +15,7 @@ from devine.core.config import config from devine.core.console import console from devine.core.constants import LANGUAGE_MAX_DISTANCE, LANGUAGE_MUX_MAP, AnyTrack, TrackT from devine.core.tracks.audio import Audio -from devine.core.tracks.chapter import Chapter +from devine.core.tracks.chapters import Chapter, Chapters from devine.core.tracks.subtitle import Subtitle from devine.core.tracks.track import Track from devine.core.tracks.video import Video @@ -41,7 +40,7 @@ class Tracks: self.videos: list[Video] = [] self.audio: list[Audio] = [] self.subtitles: list[Subtitle] = [] - self.chapters: list[Chapter] = [] + self.chapters = Chapters() if args: self.add(args) @@ -137,7 +136,7 @@ class Tracks: def add( self, - tracks: Union[Tracks, Sequence[Union[AnyTrack, Chapter]], Track, Chapter], + tracks: Union[Tracks, Sequence[Union[AnyTrack, Chapter, Chapters]], Track, Chapter, Chapters], warn_only: bool = False ) -> None: """Add a provided track to its appropriate array and ensuring it's not a duplicate.""" @@ -166,7 +165,7 @@ class Tracks: elif isinstance(track, Subtitle): self.subtitles.append(track) elif isinstance(track, Chapter): - self.chapters.append(track) + self.chapters.add(track) else: raise ValueError("Track type was not set or is invalid.") @@ -243,13 +242,6 @@ class Tracks: continue self.subtitles.sort(key=lambda x: is_close_match(language, [x.language]), reverse=True) - def sort_chapters(self) -> None: - """Sort chapter tracks by chapter number.""" - if not self.chapters: - return - # number - self.chapters.sort(key=lambda x: x.number) - def select_video(self, x: Callable[[Video], bool]) -> None: self.videos = list(filter(x, self.videos)) @@ -289,16 +281,6 @@ class Tracks: ][:per_language or None]) return selected - def export_chapters(self, to_file: Optional[Union[Path, str]] = None) -> str: - """Export all chapters in order to a string or file.""" - self.sort_chapters() - data = "\n".join(map(repr, self.chapters)) - if to_file: - to_file = Path(to_file) - to_file.parent.mkdir(parents=True, exist_ok=True) - to_file.write_text(data, encoding="utf8") - return data - def mux(self, title: str, delete: bool = True, progress: Optional[partial] = None) -> tuple[Path, int]: """ Multiplex all the Tracks into a Matroska Container file. @@ -373,9 +355,9 @@ class Tracks: if self.chapters: chapters_path = config.directories.temp / config.filenames.chapters.format( title=sanitize_filename(title), - random=get_random_bytes(16).hex() + random=self.chapters.id ) - self.export_chapters(chapters_path) + self.chapters.dump(chapters_path, fallback_name=config.chapter_fallback_name) cl.extend(["--chapter-charset", "UTF-8", "--chapters", str(chapters_path)]) else: chapters_path = None