Rework Chapter System, add `Chapters` class

Overall this commit is to just make working with Chapters a lot less manual and convoluted. The current system has you specify information that can easily be automated, like Chapter order and numbers, which is one of the main changes in this commit.

Note: This is a Breaking change and requires updates to your Service code. The `get_chapters()` method must be updated. For more information see the updated doc-string for `Service.get_chapters()`.

- Added new Chapters class which automatically sorts Chapters by timestamp.
- Chapter class has been significantly reworked to be much more generic. Most operations have been mvoed to the new Chapters class.
- Chapter objects can no longer specify a Chapter number. The number is now automatically set based on it's sorted order in the Chapters object, which is all done automatically.
- Chapter objects can now provide a timestamp in more formats. Timestamp's are now verified more efficiently.
- Chapter objects ID is now a crc32 hash of the timestamp and name instead of just basically their number.
- The Chapters object now also has an ID which is also a crc32 hash of all of the Chapter IDs it holds. This ID can be used for stuff like temp paths.
- `Service.get_chapters()` must now return a Chapters object. The Chapters object may be empty. The Chapters object must hold Chapter objects.
- Using `Chapter {N}` or `Act {N}` Chapters and so on is no longer permitted. You should instead leave the name blank if there's no descriptive name to use for it.
- If you or a user wants `Chapter {N}` names, then they can use the config option `chapter_fallback_name` set to `"Chapter {i:02}"`. See the config documentation for more info.
- Do not add a `00:00:00.000` Chapter, at all. This is automatically added for you if there's at least 1 Chapter with a timestamp after `00:00:00.000`.
This commit is contained in:
rlaphoenix 2024-02-05 01:42:43 +00:00
parent 2affb62ad0
commit c06ea4cea8
8 changed files with 250 additions and 111 deletions

View File

@ -67,6 +67,21 @@ DSNP:
default: chromecdm_903_l3 default: chromecdm_903_l3
``` ```
## chapter_fallback_name (str)
The Chapter Name to use when exporting a Chapter without a Name.
The default is no fallback name at all and no Chapter name will be set.
The fallback name can use the following variables in f-string style:
- `{i}`: The Chapter number starting at 1.
E.g., `"Chapter {i}"`: "Chapter 1", "Intro", "Chapter 3".
- `{j}`: A number starting at 1 that increments any time a Chapter has no title.
E.g., `"Chapter {j}"`: "Chapter 1", "Intro", "Chapter 2".
These are formatted with f-strings, directives are supported.
For example, `"Chapter {i:02}"` will result in `"Chapter 01"`.
## credentials (dict[str, str|list|dict]) ## credentials (dict[str, str|list|dict])
Specify login credentials to use for each Service, and optionally per-profile. Specify login credentials to use for each Service, and optionally per-profile.

View File

@ -328,7 +328,7 @@ class dl:
with console.status("Getting tracks...", spinner="dots"): with console.status("Getting tracks...", spinner="dots"):
title.tracks.add(service.get_tracks(title), warn_only=True) title.tracks.add(service.get_tracks(title), warn_only=True)
title.tracks.add(service.get_chapters(title)) title.tracks.chapters = service.get_chapters(title)
# strip SDH subs to non-SDH if no equivalent same-lang non-SDH is available # strip SDH subs to non-SDH if no equivalent same-lang non-SDH is available
# uses a loose check, e.g, wont strip en-US SDH sub if a non-SDH en-GB is available # uses a loose check, e.g, wont strip en-US SDH sub if a non-SDH en-GB is available
@ -348,7 +348,6 @@ class dl:
title.tracks.sort_videos(by_language=v_lang or lang) title.tracks.sort_videos(by_language=v_lang or lang)
title.tracks.sort_audio(by_language=lang) title.tracks.sort_audio(by_language=lang)
title.tracks.sort_subtitles(by_language=s_lang) title.tracks.sort_subtitles(by_language=s_lang)
title.tracks.sort_chapters()
if list_: if list_:
available_tracks, _ = title.tracks.tree() available_tracks, _ = title.tracks.tree()

View File

@ -39,6 +39,7 @@ class Config:
self.dl: dict = kwargs.get("dl") or {} self.dl: dict = kwargs.get("dl") or {}
self.aria2c: dict = kwargs.get("aria2c") or {} self.aria2c: dict = kwargs.get("aria2c") or {}
self.cdm: dict = kwargs.get("cdm") or {} self.cdm: dict = kwargs.get("cdm") or {}
self.chapter_fallback_name: str = kwargs.get("chapter_fallback_name") or ""
self.curl_impersonate: dict = kwargs.get("curl_impersonate") or {} self.curl_impersonate: dict = kwargs.get("curl_impersonate") or {}
self.remote_cdm: list[dict] = kwargs.get("remote_cdm") or [] self.remote_cdm: list[dict] = kwargs.get("remote_cdm") or []
self.credentials: dict = kwargs.get("credentials") or {} self.credentials: dict = kwargs.get("credentials") or {}

View File

@ -17,7 +17,7 @@ from devine.core.console import console
from devine.core.constants import AnyTrack from devine.core.constants import AnyTrack
from devine.core.credential import Credential from devine.core.credential import Credential
from devine.core.titles import Title_T, Titles_T from devine.core.titles import Title_T, Titles_T
from devine.core.tracks import Chapter, Tracks from devine.core.tracks import Chapters, Tracks
from devine.core.utilities import get_ip_info from devine.core.utilities import get_ip_info
@ -207,24 +207,22 @@ class Service(metaclass=ABCMeta):
""" """
@abstractmethod @abstractmethod
def get_chapters(self, title: Title_T) -> list[Chapter]: def get_chapters(self, title: Title_T) -> Chapters:
""" """
Get Chapter objects of the Title. Get Chapters for the Title.
Return a list of Chapter objects. This will be run after get_tracks. If there's anything Parameters:
from the get_tracks that may be needed, e.g. "device_id" or a-like, store it in the class title: The current Title from `get_titles` that is being processed.
via `self` and re-use the value in get_chapters.
How it's used is generally the same as get_titles. These are only separated as to reduce You must return a Chapters object containing 0 or more Chapter objects.
function complexity and keep them focused on simple tasks.
You do not need to sort or order the chapters in any way. However, you do need to filter You do not need to set a Chapter number or sort/order the chapters in any way as
and alter them as needed by the service. No modification is made after get_chapters is the Chapters class automatically handles all of that for you. If there's no
ran. So that means ensure that the Chapter objects returned have consistent Chapter Titles descriptive name for a Chapter then do not set a name at all.
and Chapter Numbers.
:param title: The current `Title` from get_titles that is being executed. You must not set Chapter names to "Chapter {n}" or such. If you (or the user)
:return: List of Chapter objects, if available, empty list otherwise. wants "Chapter {n}" style Chapter names (or similar) then they can use the config
option `chapter_fallback_name`. For example, `"Chapter {i:02}"` for "Chapter 01".
""" """

View File

@ -1,8 +1,9 @@
from .audio import Audio from .audio import Audio
from .chapter import Chapter from .chapter import Chapter
from .chapters import Chapters
from .subtitle import Subtitle from .subtitle import Subtitle
from .track import Track from .track import Track
from .tracks import Tracks from .tracks import Tracks
from .video import Video from .video import Video
__all__ = ("Audio", "Chapter", "Subtitle", "Track", "Tracks", "Video") __all__ = ("Audio", "Chapter", "Chapters", "Subtitle", "Track", "Tracks", "Video")

View File

@ -1,95 +1,82 @@
from __future__ import annotations from __future__ import annotations
import re import re
from pathlib import Path
from typing import Optional, Union from typing import Optional, Union
from zlib import crc32
TIMESTAMP_FORMAT = re.compile(r"^(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(?P<ms>.\d{3}|)$")
class Chapter: class Chapter:
line_1 = re.compile(r"^CHAPTER(?P<number>\d+)=(?P<timecode>[\d\\.]+)$") def __init__(self, timestamp: Union[str, int], name: Optional[str] = None):
line_2 = re.compile(r"^CHAPTER(?P<number>\d+)NAME=(?P<title>[\d\\.]+)$") """
Create a new Chapter with a Timestamp and optional name.
def __init__(self, number: int, timecode: str, title: Optional[str] = None): The timestamp may be in the following formats:
self.id = f"chapter-{number}" - "HH:MM:SS" string, e.g., `25:05:23`.
self.number = number - "HH:MM:SS.mss" string, e.g., `25:05:23.120`.
self.timecode = timecode - a timecode integer in milliseconds, e.g., `90323120` is `25:05:23.120`.
self.title = title - a timecode float in seconds, e.g., `90323.12` is `25:05:23.120`.
if "." not in self.timecode: If you have a timecode integer in seconds, just multiply it by 1000.
self.timecode += ".000" If you have a timecode float in milliseconds (no decimal value), just convert
it to an integer.
"""
if timestamp is None:
raise ValueError("The timestamp must be provided.")
def __bool__(self) -> bool: if not isinstance(timestamp, (str, int)):
return self.number and self.number >= 0 and self.timecode raise TypeError(f"Expected timestamp to be {str} or {int}, not {type(timestamp)}")
if not isinstance(name, (str, type(None))):
raise TypeError(f"Expected name to be {str}, not {type(name)}")
if not isinstance(timestamp, str):
if isinstance(timestamp, int): # ms
hours, remainder = divmod(timestamp, 1000 * 60 * 60)
minutes, remainder = divmod(remainder, 1000 * 60)
seconds, ms = divmod(remainder, 1000)
elif isinstance(timestamp, float): # seconds.ms
hours, remainder = divmod(timestamp, 60 * 60)
minutes, remainder = divmod(remainder, 60)
seconds, ms = divmod(int(remainder * 1000), 1000)
else:
raise TypeError
timestamp = f"{hours:02}:{minutes:02}:{seconds:02}.{str(ms).zfill(3)[:3]}"
timestamp_m = TIMESTAMP_FORMAT.match(timestamp)
if not timestamp_m:
raise ValueError(f"The timestamp format is invalid: {timestamp}")
hour, minute, second, ms = timestamp_m.groups()
if not ms:
timestamp += ".000"
self.timestamp = timestamp
self.name = name
def __repr__(self) -> str: def __repr__(self) -> str:
""" return "{name}({items})".format(
OGM-based Simple Chapter Format intended for use with MKVToolNix. name=self.__class__.__name__,
items=", ".join([f"{k}={repr(v)}" for k, v in self.__dict__.items()])
This format is not officially part of the Matroska spec. This was a format
designed for OGM tools that MKVToolNix has since re-used. More Information:
https://mkvtoolnix.download/doc/mkvmerge.html#mkvmerge.chapters.simple
"""
return "CHAPTER{num}={time}\nCHAPTER{num}NAME={name}".format(
num=f"{self.number:02}",
time=self.timecode,
name=self.title or ""
) )
def __str__(self) -> str: def __str__(self) -> str:
return " | ".join(filter(bool, [ return " | ".join(filter(bool, [
"CHP", "CHP",
f"[{self.number:02}]", self.timestamp,
self.timecode, self.name
self.title
])) ]))
@property
def id(self) -> str:
"""Compute an ID from the Chapter data."""
checksum = crc32(str(self).encode("utf8"))
return hex(checksum)
@property @property
def named(self) -> bool: def named(self) -> bool:
"""Check if Chapter is named.""" """Check if Chapter is named."""
return bool(self.title) return bool(self.name)
@classmethod
def loads(cls, data: str) -> Chapter:
"""Load chapter data from a string."""
lines = [x.strip() for x in data.strip().splitlines(keepends=False)]
if len(lines) > 2:
return cls.loads("\n".join(lines))
one, two = lines
one_m = cls.line_1.match(one)
two_m = cls.line_2.match(two)
if not one_m or not two_m:
raise SyntaxError(f"An unexpected syntax error near:\n{one}\n{two}")
one_str, timecode = one_m.groups()
two_str, title = two_m.groups()
one_num, two_num = int(one_str.lstrip("0")), int(two_str.lstrip("0"))
if one_num != two_num:
raise SyntaxError(f"The chapter numbers ({one_num},{two_num}) does not match.")
if not timecode:
raise SyntaxError("The timecode is missing.")
if not title:
title = None
return cls(number=one_num, timecode=timecode, title=title)
@classmethod
def load(cls, path: Union[Path, str]) -> Chapter:
"""Load chapter data from a file."""
if isinstance(path, str):
path = Path(path)
return cls.loads(path.read_text(encoding="utf8"))
def dumps(self) -> str:
"""Return chapter data as a string."""
return repr(self)
def dump(self, path: Union[Path, str]) -> int:
"""Write chapter data to a file."""
if isinstance(path, str):
path = Path(path)
return path.write_text(self.dumps(), encoding="utf8")
__all__ = ("Chapter",) __all__ = ("Chapter",)

View File

@ -0,0 +1,156 @@
from __future__ import annotations
import re
from abc import ABC
from pathlib import Path
from typing import Any, Iterable, Optional, Union
from zlib import crc32
from sortedcontainers import SortedKeyList
from devine.core.tracks import Chapter
OGM_SIMPLE_LINE_1_FORMAT = re.compile(r"^CHAPTER(?P<number>\d+)=(?P<timestamp>[\d\\.]+)$")
OGM_SIMPLE_LINE_2_FORMAT = re.compile(r"^CHAPTER(?P<number>\d+)NAME=(?P<name>[\d\\.]+)$")
class Chapters(SortedKeyList, ABC):
def __init__(self, iterable: Optional[Iterable] = None):
super().__init__(key=lambda x: x.timestamp or 0)
for chapter in iterable or []:
self.add(chapter)
def __repr__(self) -> str:
return "{name}({items})".format(
name=self.__class__.__name__,
items=", ".join([f"{k}={repr(v)}" for k, v in self.__dict__.items()])
)
def __str__(self) -> str:
return "\n".join([
" | ".join(filter(bool, [
"CHP",
f"[{i:02}]",
chapter.timestamp,
chapter.name
]))
for i, chapter in enumerate(self, start=1)
])
@classmethod
def loads(cls, data: str) -> Chapters:
"""Load chapter data from a string."""
lines = [
line.strip()
for line in data.strip().splitlines(keepends=False)
]
if len(lines) % 2 != 0:
raise ValueError("The number of chapter lines must be even.")
chapters = []
for line_1, line_2 in zip(lines[::2], lines[1::2]):
line_1_match = OGM_SIMPLE_LINE_1_FORMAT.match(line_1)
if not line_1_match:
raise SyntaxError(f"An unexpected syntax error occurred on: {line_1}")
line_2_match = OGM_SIMPLE_LINE_2_FORMAT.match(line_2)
if not line_2_match:
raise SyntaxError(f"An unexpected syntax error occurred on: {line_2}")
line_1_number, timestamp = line_1_match.groups()
line_2_number, name = line_2_match.groups()
if line_1_number != line_2_number:
raise SyntaxError(
f"The chapter numbers {line_1_number} and {line_2_number} do not match on:\n{line_1}\n{line_2}")
if not timestamp:
raise SyntaxError(f"The timestamp is missing on: {line_1}")
chapters.append(Chapter(timestamp, name))
return cls(chapters)
@classmethod
def load(cls, path: Union[Path, str]) -> Chapters:
"""Load chapter data from a file."""
if isinstance(path, str):
path = Path(path)
return cls.loads(path.read_text(encoding="utf8"))
def dumps(self, fallback_name: str = "") -> str:
"""
Return chapter data in OGM-based Simple Chapter format.
https://mkvtoolnix.download/doc/mkvmerge.html#mkvmerge.chapters.simple
Parameters:
fallback_name: Name used for Chapters without a Name set.
The fallback name can use the following variables in f-string style:
- {i}: The Chapter number starting at 1.
E.g., `"Chapter {i}"`: "Chapter 1", "Intro", "Chapter 3".
- {j}: A number starting at 1 that increments any time a Chapter has no name.
E.g., `"Chapter {j}"`: "Chapter 1", "Intro", "Chapter 2".
These are formatted with f-strings, directives are supported.
For example, `"Chapter {i:02}"` will result in `"Chapter 01"`.
"""
chapters = []
j = 0
for i, chapter in enumerate(self, start=1):
if not chapter.name:
j += 1
chapters.append("CHAPTER{num}={time}\nCHAPTER{num}NAME={name}".format(
num=f"{i:02}",
time=chapter.timestamp,
name=chapter.name or fallback_name.format(
i=i,
j=j
)
))
return "\n".join(chapters)
def dump(self, path: Union[Path, str], *args: Any, **kwargs: Any) -> int:
"""
Write chapter data in OGM-based Simple Chapter format to a file.
Parameters:
path: The file path to write the Chapter data to, overwriting
any existing data.
See `Chapters.dumps` for more parameter documentation.
"""
if isinstance(path, str):
path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True)
ogm_text = self.dumps(*args, **kwargs)
return path.write_text(ogm_text, encoding="utf8")
def add(self, value: Chapter) -> None:
if not isinstance(value, Chapter):
raise TypeError(f"Can only add {Chapter} objects, not {type(value)}")
if any(chapter.timestamp == value.timestamp for chapter in self):
raise ValueError(f"A Chapter with the Timestamp {value.timestamp} already exists")
super().add(value)
if not any(chapter.timestamp == "00:00:00.000" for chapter in self):
self.add(Chapter(0))
@property
def id(self) -> str:
"""Compute an ID from the Chapter data."""
checksum = crc32("\n".join([
chapter.id
for chapter in self
]).encode("utf8"))
return hex(checksum)
__all__ = ("Chapters", "Chapter")

View File

@ -6,7 +6,6 @@ from functools import partial
from pathlib import Path from pathlib import Path
from typing import Callable, Iterator, Optional, Sequence, Union from typing import Callable, Iterator, Optional, Sequence, Union
from Cryptodome.Random import get_random_bytes
from langcodes import Language, closest_supported_match from langcodes import Language, closest_supported_match
from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeRemainingColumn from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeRemainingColumn
from rich.table import Table from rich.table import Table
@ -16,7 +15,7 @@ from devine.core.config import config
from devine.core.console import console from devine.core.console import console
from devine.core.constants import LANGUAGE_MAX_DISTANCE, LANGUAGE_MUX_MAP, AnyTrack, TrackT from devine.core.constants import LANGUAGE_MAX_DISTANCE, LANGUAGE_MUX_MAP, AnyTrack, TrackT
from devine.core.tracks.audio import Audio from devine.core.tracks.audio import Audio
from devine.core.tracks.chapter import Chapter from devine.core.tracks.chapters import Chapter, Chapters
from devine.core.tracks.subtitle import Subtitle from devine.core.tracks.subtitle import Subtitle
from devine.core.tracks.track import Track from devine.core.tracks.track import Track
from devine.core.tracks.video import Video from devine.core.tracks.video import Video
@ -41,7 +40,7 @@ class Tracks:
self.videos: list[Video] = [] self.videos: list[Video] = []
self.audio: list[Audio] = [] self.audio: list[Audio] = []
self.subtitles: list[Subtitle] = [] self.subtitles: list[Subtitle] = []
self.chapters: list[Chapter] = [] self.chapters = Chapters()
if args: if args:
self.add(args) self.add(args)
@ -137,7 +136,7 @@ class Tracks:
def add( def add(
self, self,
tracks: Union[Tracks, Sequence[Union[AnyTrack, Chapter]], Track, Chapter], tracks: Union[Tracks, Sequence[Union[AnyTrack, Chapter, Chapters]], Track, Chapter, Chapters],
warn_only: bool = False warn_only: bool = False
) -> None: ) -> None:
"""Add a provided track to its appropriate array and ensuring it's not a duplicate.""" """Add a provided track to its appropriate array and ensuring it's not a duplicate."""
@ -166,7 +165,7 @@ class Tracks:
elif isinstance(track, Subtitle): elif isinstance(track, Subtitle):
self.subtitles.append(track) self.subtitles.append(track)
elif isinstance(track, Chapter): elif isinstance(track, Chapter):
self.chapters.append(track) self.chapters.add(track)
else: else:
raise ValueError("Track type was not set or is invalid.") raise ValueError("Track type was not set or is invalid.")
@ -243,13 +242,6 @@ class Tracks:
continue continue
self.subtitles.sort(key=lambda x: is_close_match(language, [x.language]), reverse=True) self.subtitles.sort(key=lambda x: is_close_match(language, [x.language]), reverse=True)
def sort_chapters(self) -> None:
"""Sort chapter tracks by chapter number."""
if not self.chapters:
return
# number
self.chapters.sort(key=lambda x: x.number)
def select_video(self, x: Callable[[Video], bool]) -> None: def select_video(self, x: Callable[[Video], bool]) -> None:
self.videos = list(filter(x, self.videos)) self.videos = list(filter(x, self.videos))
@ -289,16 +281,6 @@ class Tracks:
][:per_language or None]) ][:per_language or None])
return selected return selected
def export_chapters(self, to_file: Optional[Union[Path, str]] = None) -> str:
"""Export all chapters in order to a string or file."""
self.sort_chapters()
data = "\n".join(map(repr, self.chapters))
if to_file:
to_file = Path(to_file)
to_file.parent.mkdir(parents=True, exist_ok=True)
to_file.write_text(data, encoding="utf8")
return data
def mux(self, title: str, delete: bool = True, progress: Optional[partial] = None) -> tuple[Path, int]: def mux(self, title: str, delete: bool = True, progress: Optional[partial] = None) -> tuple[Path, int]:
""" """
Multiplex all the Tracks into a Matroska Container file. Multiplex all the Tracks into a Matroska Container file.
@ -373,9 +355,9 @@ class Tracks:
if self.chapters: if self.chapters:
chapters_path = config.directories.temp / config.filenames.chapters.format( chapters_path = config.directories.temp / config.filenames.chapters.format(
title=sanitize_filename(title), title=sanitize_filename(title),
random=get_random_bytes(16).hex() random=self.chapters.id
) )
self.export_chapters(chapters_path) self.chapters.dump(chapters_path, fallback_name=config.chapter_fallback_name)
cl.extend(["--chapter-charset", "UTF-8", "--chapters", str(chapters_path)]) cl.extend(["--chapter-charset", "UTF-8", "--chapters", str(chapters_path)])
else: else:
chapters_path = None chapters_path = None