Compare commits

...

3 Commits

Author SHA1 Message Date
rlaphoenix 08c497da0a Override Subtitle download method to convert fTTML/fVTT to TTML/VTT
We want to force convert these Subtitle formats to their respective normal formats as the way they download are not actually usable as-is. As in, even if the user wanted to keep the original Subtitle format, these formats wouldn't be usable as-is.
2024-01-21 18:47:49 +00:00
rlaphoenix f978f7f404 Pass Service Session to Track.download instead of Service 2024-01-21 18:47:49 +00:00
rlaphoenix 2b8f601074 Move dl command's download_track code to Track.download() 2024-01-21 18:47:49 +00:00
3 changed files with 167 additions and 156 deletions

View File

@ -40,18 +40,16 @@ from rich.tree import Tree
from devine.core.config import config from devine.core.config import config
from devine.core.console import console from devine.core.console import console
from devine.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY, AnyTrack, context_settings from devine.core.constants import DOWNLOAD_LICENCE_ONLY, AnyTrack, context_settings
from devine.core.credential import Credential from devine.core.credential import Credential
from devine.core.downloaders import downloader
from devine.core.drm import DRM_T, Widevine from devine.core.drm import DRM_T, Widevine
from devine.core.manifests import DASH, HLS
from devine.core.proxies import Basic, Hola, NordVPN from devine.core.proxies import Basic, Hola, NordVPN
from devine.core.service import Service from devine.core.service import Service
from devine.core.services import Services from devine.core.services import Services
from devine.core.titles import Movie, Song, Title_T from devine.core.titles import Movie, Song, Title_T
from devine.core.titles.episode import Episode from devine.core.titles.episode import Episode
from devine.core.tracks import Audio, Subtitle, Tracks, Video from devine.core.tracks import Audio, Subtitle, Tracks, Video
from devine.core.utilities import get_binary_path, is_close_match, time_elapsed_since, try_ensure_utf8 from devine.core.utilities import get_binary_path, is_close_match, time_elapsed_since
from devine.core.utils.click_types import LANGUAGE_RANGE, QUALITY_LIST, SEASON_RANGE, ContextData from devine.core.utils.click_types import LANGUAGE_RANGE, QUALITY_LIST, SEASON_RANGE, ContextData
from devine.core.utils.collections import merge_dict from devine.core.utils.collections import merge_dict
from devine.core.utils.subprocess import ffprobe from devine.core.utils.subprocess import ffprobe
@ -477,9 +475,8 @@ class dl:
with ThreadPoolExecutor(workers) as pool: with ThreadPoolExecutor(workers) as pool:
for download in futures.as_completed(( for download in futures.as_completed((
pool.submit( pool.submit(
self.download_track, track.download,
service=service, session=service.session,
track=track,
prepare_drm=partial( prepare_drm=partial(
partial( partial(
self.prepare_drm, self.prepare_drm,
@ -796,151 +793,6 @@ class dl:
keys[str(title)][str(track)].update(drm.content_keys) keys[str(title)][str(track)].update(drm.content_keys)
export.write_text(jsonpickle.dumps(keys, indent=4), encoding="utf8") export.write_text(jsonpickle.dumps(keys, indent=4), encoding="utf8")
def download_track(
self,
service: Service,
track: AnyTrack,
prepare_drm: Callable,
progress: partial
):
if DOWNLOAD_LICENCE_ONLY.is_set():
progress(downloaded="[yellow]SKIPPING")
if DOWNLOAD_CANCELLED.is_set():
progress(downloaded="[yellow]CANCELLED")
return
proxy = next(iter(service.session.proxies.values()), None)
save_path = config.directories.temp / f"{track.__class__.__name__}_{track.id}.mp4"
if isinstance(track, Subtitle):
save_path = save_path.with_suffix(f".{track.codec.extension}")
if track.descriptor != track.Descriptor.URL:
save_dir = save_path.with_name(save_path.name + "_segments")
else:
save_dir = save_path.parent
def cleanup():
# track file (e.g., "foo.mp4")
save_path.unlink(missing_ok=True)
# aria2c control file (e.g., "foo.mp4.aria2")
save_path.with_suffix(f"{save_path.suffix}.aria2").unlink(missing_ok=True)
if save_dir.exists() and save_dir.name.endswith("_segments"):
shutil.rmtree(save_dir)
if not DOWNLOAD_LICENCE_ONLY.is_set():
if config.directories.temp.is_file():
self.log.error(f"Temp Directory '{config.directories.temp}' must be a Directory, not a file")
sys.exit(1)
config.directories.temp.mkdir(parents=True, exist_ok=True)
# Delete any pre-existing temp files matching this track.
# We can't re-use or continue downloading these tracks as they do not use a
# lock file. Or at least the majority don't. Even if they did I've encountered
# corruptions caused by sudden interruptions to the lock file.
cleanup()
try:
if track.descriptor == track.Descriptor.M3U:
HLS.download_track(
track=track,
save_path=save_path,
save_dir=save_dir,
progress=progress,
session=service.session,
proxy=proxy,
license_widevine=prepare_drm
)
elif track.descriptor == track.Descriptor.MPD:
DASH.download_track(
track=track,
save_path=save_path,
save_dir=save_dir,
progress=progress,
session=service.session,
proxy=proxy,
license_widevine=prepare_drm
)
# no else-if as DASH may convert the track to URL descriptor
if track.descriptor == track.Descriptor.URL:
try:
if not track.drm and isinstance(track, (Video, Audio)):
# the service might not have explicitly defined the `drm` property
# try find widevine DRM information from the init data of URL
try:
track.drm = [Widevine.from_track(track, service.session)]
except Widevine.Exceptions.PSSHNotFound:
# it might not have Widevine DRM, or might not have found the PSSH
self.log.warning("No Widevine PSSH was found for this track, is it DRM free?")
if track.drm:
track_kid = track.get_key_id(session=service.session)
drm = track.drm[0] # just use the first supported DRM system for now
if isinstance(drm, Widevine):
# license and grab content keys
if not prepare_drm:
raise ValueError("prepare_drm func must be supplied to use Widevine DRM")
progress(downloaded="LICENSING")
prepare_drm(drm, track_kid=track_kid)
progress(downloaded="[yellow]LICENSED")
else:
drm = None
if DOWNLOAD_LICENCE_ONLY.is_set():
progress(downloaded="[yellow]SKIPPED")
else:
downloader(
uri=track.url,
out=save_path,
headers=service.session.headers,
cookies=service.session.cookies,
proxy=proxy,
progress=progress
)
track.path = save_path
if drm:
progress(downloaded="Decrypting", completed=0, total=100)
drm.decrypt(save_path)
track.drm = None
if callable(track.OnDecrypted):
track.OnDecrypted(track)
progress(downloaded="Decrypted", completed=100)
if isinstance(track, Subtitle):
track_data = track.path.read_bytes()
track_data = try_ensure_utf8(track_data)
track_data = html.unescape(track_data.decode("utf8")).encode("utf8")
track.path.write_bytes(track_data)
progress(downloaded="Downloaded")
except KeyboardInterrupt:
DOWNLOAD_CANCELLED.set()
progress(downloaded="[yellow]CANCELLED")
raise
except Exception:
DOWNLOAD_CANCELLED.set()
progress(downloaded="[red]FAILED")
raise
except (Exception, KeyboardInterrupt):
if not DOWNLOAD_LICENCE_ONLY.is_set():
cleanup()
raise
if DOWNLOAD_CANCELLED.is_set():
# we stopped during the download, let's exit
return
if not DOWNLOAD_LICENCE_ONLY.is_set():
if track.path.stat().st_size <= 3: # Empty UTF-8 BOM == 3 bytes
raise IOError("Download failed, the downloaded file is empty.")
if callable(track.OnDownloaded):
track.OnDownloaded(track)
@staticmethod @staticmethod
def get_profile(service: str) -> Optional[str]: def get_profile(service: str) -> Optional[str]:
"""Get profile for Service from config.""" """Get profile for Service from config."""

View File

@ -4,11 +4,13 @@ import re
import subprocess import subprocess
from collections import defaultdict from collections import defaultdict
from enum import Enum from enum import Enum
from functools import partial
from io import BytesIO from io import BytesIO
from pathlib import Path from pathlib import Path
from typing import Any, Iterable, Optional from typing import Any, Callable, Iterable, Optional
import pycaption import pycaption
import requests
from construct import Container from construct import Container
from pycaption import Caption, CaptionList, CaptionNode, WebVTTReader from pycaption import Caption, CaptionList, CaptionNode, WebVTTReader
from pycaption.geometry import Layout from pycaption.geometry import Layout
@ -144,6 +146,16 @@ class Subtitle(Track):
track_name += flag track_name += flag
return track_name or None return track_name or None
def download(self, session: requests.Session, prepare_drm: Callable, progress: partial) -> None:
super().download(session, prepare_drm, progress)
if not self.path:
return
if self.codec == Subtitle.Codec.fTTML:
self.convert(Subtitle.Codec.TimedTextMarkupLang)
elif self.codec == Subtitle.Codec.fVTT:
self.convert(Subtitle.Codec.WebVTT)
def convert(self, codec: Subtitle.Codec) -> Path: def convert(self, codec: Subtitle.Codec) -> Path:
""" """
Convert this Subtitle to another Format. Convert this Subtitle to another Format.

View File

@ -1,8 +1,11 @@
import base64 import base64
import html
import logging
import re import re
import shutil import shutil
import subprocess import subprocess
from enum import Enum from enum import Enum
from functools import partial
from pathlib import Path from pathlib import Path
from typing import Any, Callable, Iterable, Optional, Union from typing import Any, Callable, Iterable, Optional, Union
from uuid import UUID from uuid import UUID
@ -10,9 +13,11 @@ from uuid import UUID
import requests import requests
from langcodes import Language from langcodes import Language
from devine.core.constants import TERRITORY_MAP from devine.core.config import config
from devine.core.drm import DRM_T from devine.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY, TERRITORY_MAP
from devine.core.utilities import get_binary_path, get_boxes from devine.core.downloaders import downloader
from devine.core.drm import DRM_T, Widevine
from devine.core.utilities import get_binary_path, get_boxes, try_ensure_utf8
from devine.core.utils.subprocess import ffprobe from devine.core.utils.subprocess import ffprobe
@ -216,6 +221,148 @@ class Track:
return init_data return init_data
def download(self, session: requests.Session, prepare_drm: Callable, progress: partial) -> None:
if DOWNLOAD_LICENCE_ONLY.is_set():
progress(downloaded="[yellow]SKIPPING")
if DOWNLOAD_CANCELLED.is_set():
progress(downloaded="[yellow]CANCELLED")
return
log = logging.getLogger("Track")
proxy = next(iter(session.proxies.values()), None)
save_path = config.directories.temp / f"{self.__class__.__name__}_{self.id}.mp4"
if self.__class__.__name__ == "Subtitle":
save_path = save_path.with_suffix(f".{self.codec.extension}")
if self.descriptor != self.Descriptor.URL:
save_dir = save_path.with_name(save_path.name + "_segments")
else:
save_dir = save_path.parent
def cleanup():
# track file (e.g., "foo.mp4")
save_path.unlink(missing_ok=True)
# aria2c control file (e.g., "foo.mp4.aria2")
save_path.with_suffix(f"{save_path.suffix}.aria2").unlink(missing_ok=True)
if save_dir.exists() and save_dir.name.endswith("_segments"):
shutil.rmtree(save_dir)
if not DOWNLOAD_LICENCE_ONLY.is_set():
if config.directories.temp.is_file():
raise EnvironmentError(f"Temp Directory '{config.directories.temp}' must be a Directory, not a file")
config.directories.temp.mkdir(parents=True, exist_ok=True)
# Delete any pre-existing temp files matching this track.
# We can't re-use or continue downloading these tracks as they do not use a
# lock file. Or at least the majority don't. Even if they did I've encountered
# corruptions caused by sudden interruptions to the lock file.
cleanup()
try:
if self.descriptor == self.Descriptor.M3U:
from devine.core.manifests import HLS
HLS.download_track(
track=self,
save_path=save_path,
save_dir=save_dir,
progress=progress,
session=session,
proxy=proxy,
license_widevine=prepare_drm
)
elif self.descriptor == self.Descriptor.MPD:
from devine.core.manifests import DASH
DASH.download_track(
track=self,
save_path=save_path,
save_dir=save_dir,
progress=progress,
session=session,
proxy=proxy,
license_widevine=prepare_drm
)
# no else-if as DASH may convert the track to URL descriptor
if self.descriptor == self.Descriptor.URL:
try:
if not self.drm and self.__class__.__name__ in ("Video", "Audio"):
# the service might not have explicitly defined the `drm` property
# try find widevine DRM information from the init data of URL
try:
self.drm = [Widevine.from_track(self, session)]
except Widevine.Exceptions.PSSHNotFound:
# it might not have Widevine DRM, or might not have found the PSSH
log.warning("No Widevine PSSH was found for this track, is it DRM free?")
if self.drm:
track_kid = self.get_key_id(session=session)
drm = self.drm[0] # just use the first supported DRM system for now
if isinstance(drm, Widevine):
# license and grab content keys
if not prepare_drm:
raise ValueError("prepare_drm func must be supplied to use Widevine DRM")
progress(downloaded="LICENSING")
prepare_drm(drm, track_kid=track_kid)
progress(downloaded="[yellow]LICENSED")
else:
drm = None
if DOWNLOAD_LICENCE_ONLY.is_set():
progress(downloaded="[yellow]SKIPPED")
else:
downloader(
uri=self.url,
out=save_path,
headers=session.headers,
cookies=session.cookies,
proxy=proxy,
progress=progress
)
self.path = save_path
if drm:
progress(downloaded="Decrypting", completed=0, total=100)
drm.decrypt(save_path)
self.drm = None
if callable(self.OnDecrypted):
self.OnDecrypted(self)
progress(downloaded="Decrypted", completed=100)
if self.__class__.__name__ == "Subtitle":
track_data = self.path.read_bytes()
track_data = try_ensure_utf8(track_data)
track_data = html.unescape(track_data.decode("utf8")).encode("utf8")
self.path.write_bytes(track_data)
progress(downloaded="Downloaded")
except KeyboardInterrupt:
DOWNLOAD_CANCELLED.set()
progress(downloaded="[yellow]CANCELLED")
raise
except Exception:
DOWNLOAD_CANCELLED.set()
progress(downloaded="[red]FAILED")
raise
except (Exception, KeyboardInterrupt):
if not DOWNLOAD_LICENCE_ONLY.is_set():
cleanup()
raise
if DOWNLOAD_CANCELLED.is_set():
# we stopped during the download, let's exit
return
if not DOWNLOAD_LICENCE_ONLY.is_set():
if self.path.stat().st_size <= 3: # Empty UTF-8 BOM == 3 bytes
raise IOError("Download failed, the downloaded file is empty.")
if callable(self.OnDownloaded):
self.OnDownloaded(self)
def delete(self) -> None: def delete(self) -> None:
if self.path: if self.path:
self.path.unlink() self.path.unlink()