Compare commits

..

2 Commits

Author SHA1 Message Date
rlaphoenix 0530d24110 Translate SSL/TLS and connection refused logs to ConnectionRefusedErrors 2023-03-17 22:02:29 +00:00
rlaphoenix 1ef7419966 Dynamically reduce DASH worker pool on connection errors 2023-03-17 22:02:29 +00:00
60 changed files with 3400 additions and 6011 deletions

View File

@ -1,5 +1,9 @@
version = 1
exclude_patterns = [
"**_pb2.py" # protobuf files
]
[[analyzers]]
name = "python"
enabled = true

View File

@ -1,15 +0,0 @@
root = true
[*]
end_of_line = lf
charset = utf-8
insert_final_newline = true
indent_style = space
indent_size = 4
trim_trailing_whitespace = true
[*.{feature,json,md,yaml,yml,toml}]
indent_size = 2
[*.md]
trim_trailing_whitespace = false

3
.flake8 Normal file
View File

@ -0,0 +1,3 @@
[flake8]
exclude = .venv,build,dist,*_pb2.py,*.pyi
max-line-length = 120

1
.gitattributes vendored
View File

@ -1 +0,0 @@
* text=auto eol=lf

View File

@ -1,9 +1,4 @@
name: cd
permissions:
contents: "write"
id-token: "write"
packages: "write"
pull-requests: "read"
on:
push:
@ -15,32 +10,32 @@ jobs:
name: Tagged Release
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: Install Poetry
uses: abatilo/actions-poetry@v2
with:
poetry-version: 1.6.1
- name: Install project
run: poetry install --only main
- name: Build project
run: poetry build
- name: Upload wheel
uses: actions/upload-artifact@v3
with:
name: Python Wheel
path: "dist/*.whl"
- name: Deploy release
uses: marvinpinto/action-automatic-releases@latest
with:
prerelease: false
repo_token: "${{ secrets.GITHUB_TOKEN }}"
files: |
dist/*.whl
- name: Publish to PyPI
env:
POETRY_PYPI_TOKEN_PYPI: ${{ secrets.PYPI_TOKEN }}
run: poetry publish
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10.x'
- name: Install Poetry
uses: abatilo/actions-poetry@v2.2.0
with:
poetry-version: '1.3.2'
- name: Install dependencies
run: poetry install
- name: Build wheel
run: poetry build -f wheel
- name: Upload wheel
uses: actions/upload-artifact@v2.2.4
with:
name: Python Wheel
path: "dist/*.whl"
- name: Deploy release
uses: marvinpinto/action-automatic-releases@latest
with:
prerelease: false
repo_token: "${{ secrets.GITHUB_TOKEN }}"
files: |
dist/*.whl
- name: Publish to PyPI
env:
POETRY_PYPI_TOKEN_PYPI: ${{ secrets.PYPI_TOKEN }}
run: poetry publish

View File

@ -7,38 +7,32 @@ on:
branches: [ master ]
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: Install poetry
uses: abatilo/actions-poetry@v2
with:
poetry-version: 1.6.1
- name: Install project
run: poetry install --all-extras
- name: Run pre-commit which does various checks
run: poetry run pre-commit run --all-files --show-diff-on-failure
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11"]
python-version: ['3.9', '3.10', '3.11']
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install poetry
uses: abatilo/actions-poetry@v2
with:
poetry-version: 1.6.1
- name: Install project
run: poetry install --all-extras --only main
- name: Build project
run: poetry build
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install flake8
run: python -m pip install flake8
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Install poetry
uses: abatilo/actions-poetry@v2.2.0
with:
poetry-version: 1.3.2
- name: Install project
run: poetry install --no-dev
- name: Build project
run: poetry build

65
.gitignore vendored
View File

@ -36,7 +36,6 @@ parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
@ -55,17 +54,14 @@ pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
@ -75,7 +71,6 @@ cover/
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
@ -88,49 +83,16 @@ instance/
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
# celery beat schedule file
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
@ -151,26 +113,13 @@ venv.bak/
# Rope project settings
.ropeproject
# JetBrains project settings
.idea
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/
.directory
.idea/dataSources.local.xml

View File

@ -2,22 +2,17 @@
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/compilerla/conventional-pre-commit
rev: v3.1.0
hooks:
- id: conventional-pre-commit
stages: [commit-msg]
- repo: https://github.com/mtkennerly/pre-commit-hooks
rev: v0.3.0
hooks:
- id: poetry-ruff
- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
- repo: https://github.com/pycqa/flake8
rev: 6.0.0
hooks:
- id: flake8
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: end-of-file-fixer
- id: trailing-whitespace
args: [--markdown-linebreak-ext=md]
- id: end-of-file-fixer

View File

@ -1,13 +0,0 @@
{
"recommendations": [
"EditorConfig.EditorConfig",
"streetsidesoftware.code-spell-checker",
"ms-python.python",
"ms-python.vscode-pylance",
"charliermarsh.ruff",
"ms-python.isort",
"ms-python.mypy-type-checker",
"redhat.vscode-yaml",
"tamasfe.even-better-toml"
]
}

View File

@ -2,324 +2,8 @@
All notable changes to this project will be documented in this file.
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
Versions [3.0.0] and older use a format based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
but versions thereafter use a custom changelog format using [git-cliff](https://git-cliff.org).
## [3.1.0] - 2024-03-05
### Features
- *cli*: Implement MultipleChoice click param based on Choice param
- *dl*: Skip video lang filter if --v-lang unused & only 1 video lang
- *dl*: Change --vcodec default to None, use any codec
- *dl*: Support multiple -r/--range and mux ranges separately
- *Subtitle*: Convert from fTTML->TTML & fVTT->WebVTT post-download
- *Track*: Make ID optional, Automatically compute one if not provided
- *Track*: Add a name property to use for the Track Name
### Bug Fixes
- *dl*: Have --sub-format default to None to keep original sub format
- *HLS*: Use filtered out segment key info
- *Track*: Don't modify lang when getting name
- *Track*: Don't use fallback values "Zzzz"/"ZZ" for track name
- *version*: The `__version__` variable forgot to be updated
### Changes
- Move dl command's download_track() to Track.download()
- *dl*: Remove unused `get_profiles()` method
- *DASH*: Move data values from track url to track data property
- *DASH*: Change how Video FPS is gotten to remove FutureWarning log
- *Track*: Add type checks, improve typing
- *Track*: Remove swap() method and it's uses
- *Track*: Remove unused DRM enum
- *Track*: Rename Descriptor's M3U & MPD to HLS & DASH
- *Track*: Remove unnecessary bool casting
- *Track*: Move the path class instance variable with the rest
- *Track*: Return new path on move(), raise exceptions on errors
- *Track*: Move delete and move methods near start of Class
- *Track*: Rename extra to data, enforce type as dict
### Builds
- Explicitly use marisa-trie==1.1.0 for Python 3.12 wheels
## [3.0.0] - 2024-03-01
### Added
- Support for Python 3.12.
- Audio track's Codec Enum now has [FLAC](https://en.wikipedia.org/wiki/FLAC) defined.
- The Downloader to use can now be set in the config under the [downloader key](CONFIG.md#downloader-str).
- New Multi-Threaded Downloader, `requests`, that makes HTTP(S) calls using [Python-requests](https://requests.readthedocs.io).
- New Multi-Threaded Downloader, `curl_impersonate`, that makes HTTP(S) calls using [Curl-Impersonate](https://github.com/yifeikong/curl-impersonate) via [Curl_CFFI](https://github.com/yifeikong/curl_cffi).
- HLS manifests specifying a Byte range value without starting offsets are now supported.
- HLS segments that use `EXT-X-DISCONTINUITY` are now supported.
- DASH manifests with SegmentBase or only BaseURL are now supported.
- Subtitle tracks from DASH manifests now automatically marked as SDH if `urn:tva:metadata:cs:AudioPurposeCS:2007 = 2`.
- The `--audio-only/--subs-only/--chapters-only` flags can now be used simultaneously. For example, `--subs-only`
with `--chapters-only` will get just Subtitles and Chapters.
- Added `--video-only` flag, which can also still be simultaneously used with the only "only" flags. Using all four
of these flags will have the same effect as not using any of them.
- Added `--no-proxy` flag, disabling all uses of proxies, even if `--proxy` is set.
- Added `--sub-format` option, which sets the wanted output subtitle format, defaulting to SubRip (SRT).
- Added `Subtitle.reverse_rtl()` method to use SubtitleEdit's `/ReverseRtlStartEnd` functionality.
- Added `Subtitle.convert()` method to convert the loaded Subtitle to another format. Note that you cannot convert to
fTTML or fVTT, but you can convert from them. SubtitleEdit will be used in precedence over pycaption if available.
Converting to SubStationAlphav4 requires SubtitleEdit, but you may want to manually alter the Canvas resolution after
the download.
- Added support for SubRip (SRT) format subtitles in `Subtitle.parse()` via pycaption.
- Added `API` Vault Client aiming for a RESTful like API.
- Added `Chapters` Class to hold the new reworked `Chapter` objects, automatically handling stuff like order of the
Chapters, Chapter numbers, loading from a chapter file or string, and saving to a chapter file or string.
- Added new `chapter_fallback_name` config option allowing you to set a Chapter Name Template used when muxing Chapters
into an MKV Container with MKVMerge. Do note, it defaults to no Chapter Fallback Name at all, but MKVMerge will force
`Chapter {i:02}` at least for me on Windows with the program language set to English. You may want to instead use
`Chapter {j:02}` which will do `Chapter 01, Intro, Chapter 02` instead of `Chapter 01, Intro, Chapter 03` (an Intro
is not a Chapter of story, but it is the 2nd Chapter marker, so It's up to you how you want to interpret it).
- Added new `Track.OnSegmentDownloaded` Event, called any time one of the Track's segments were downloaded.
- Added new `Subtitle.OnConverted` Event, called any time that Subtitle is converted.
- Implemented `__add__` method to `Tracks` class, allowing you to add to the first Tracks object. For example, making
it handy to merge HLS video tracks with DASH tracks, `tracks = dash_tracks + hls_tracks.videos`, or for iterating:
`for track in dash.videos + hls.videos: ...`.
- Added new utility `get_free_port()` to get a free local port to use, though it may be taken by the time it's used.
### Changed
- Moved from my forked release of pymp4 (`rlaphoenix-pymp4`) back to the original `pymp4` release as it is
now up-to-date with some of my needed fixes.
- The DASH manifest is now stored in the Track `url` property to be reused by `DASH.download_track()`.
- Encrypted DASH streams are now downloaded in full and then decrypted, instead of downloading and decrypting
each individual segment. Unlike HLS, DASH cannot dynamically switch out the DRM/Protection information.
This brings both CPU and Disk IOPS improvements, as well as fixing rare weird decryption anomalies like broken
or odd timestamps, decryption failures, or broken a/v continuity.
- When a track is being decrypted, it now displays "Decrypting" and afterward "Decrypted" in place of the download
speed.
- When a track finishes downloaded, it now displays "Downloaded" in place of the download speed.
- When licensing is needed and fails, the track will display "FAILED" in place of the download speed. The track
download will cancel and all other track downloads will be skipped/cancelled; downloading will end.
- The fancy smart quotes (`“` and `”`) are now stripped from filenames.
- All available services are now listed if you provide an invalid service tag/alias.
- If a WVD file fails to load and looks to be in the older unsupported v1 format, then instructions on migrating to
v2 will be displayed.
- If Shaka-Packager prints an error (i.e., `:ERROR:` log message) it will now raise a `subprocess.CalledProcessError`
exception, even if the process return code is 0.
- The Video classes' Primaries, Transfer, and Matrix classes had changes to their enum names to better represent their
values and uses. See the changed names in the [commit](https://github.com/devine-dl/devine/commit/c159672181ee3bd07b06612f256fa8590d61795c).
- SubRip (SRT) Subtitles no longer have the `MULTI-LANGUAGE SRT` header forcefully removed. The root cause of the error
was identified and fixed in this release.
- Since `Range.Transfer.SDR_BT_601_625 = 5` has been removed, `Range.from_cicp()` now internally remaps CICP transfer
values of `5` to `6` (which is now `Range.Transfer.BT_601 = 6`).
- Referer and User-Agent Header values passed to the aria2(c) downloader is now set via the dedicated `--referer` and
`--user-agent` options respectively, instead of `--header`.
- The aria2(c) `-j`, `-x`, and `-s` option values can now be set by the config under the `aria2c` key in the options'
full names.
- The aria2(c) `-x`, and `-s` option values now use aria2(c)'s own default values for them instead of `16`. The `j`
option value defaults to ThreadPoolExecutor's algorithm of `min(32,(cpu_count+4))`.
- The download progress bar now states `LICENSING` on the speed text when licensing DRM, and `LICENSED` once finished.
- The download progress bar now states `CANCELLING`/`CANCELLED` on the speed text when cancelling downloads. This is to
make it more clear that it didn't just stop, but stopped as it was cancelled.
- The download cancel/skip events were moved to `constants.py` so it can be used across the codebase easier without
argument drilling. `DL_POOL_STOP` was renamed to `DOWNLOAD_CANCELLED` and `DL_POOL_SKIP` to `DOWNLOAD_LICENCE_ONLY`.
- The Cookie header is now calculated for each URL passed to the aria2(c) downloader based on the URL. Instead of
passing every single cookie, which could have two cookies with the same name aimed for different host names, we now
pass only cookies intended for the URL.
- The aria2(c) process no longer prints output to the terminal directly. Devine now only prints contents of the
captured log messages to the terminal. This allows filtering out of errors and warnings that isn't a problem.
- DASH and HLS no longer download segments silencing errors on all but the last retry as the downloader rework makes
this unnecessary. The errors will only be printed on the final retry regardless.
- `Track.repackage()` now saves as `{name}_repack.{ext}` instead of `{name}.repack.{ext}`.
- `Video.change_color_range()` now saves as `{name}_{limited|full}_range.{ext}` instead of `{name}.range{0|1}.{ext}`.
- `Widevine.decrypt()` now saves as `{name}_decrypted.{ext}` instead of `{name}.decrypted.{ext}`.
- Files starting with the save path's name and using the save path's extension, but not the save path, are no longer
deleted on download finish/stop/failure.
- The output container format is now explicitly specified as `MP4` when calling `shaka-packager`.
- The default downloader is now `requests` instead of `aria2c` to reduce required external dependencies.
- Reworked the `Chapter` class to only hold a timestamp and name value with an ID automatically generated as a CRC32 of
the Chapter representation.
- The `--group` option has been renamed to `--tag`.
- The config file is now read from three more locations in the following order:
1) The Devine Namespace Folder (e.g., `%appdata%/Python/Python311/site-packages/devine/devine.yaml`).
2) The Parent Folder to the Devine Namespace Folder (e.g., `%appdata%/Python/Python311/site-packages/devine.yaml`).
3) The AppDirs User Config Folder (e.g., `%localappdata%/devine/devine.yaml`).
Location 2 allows having a config at the root of a portable folder.
- An empty config file is no longer created when no config file is found.
- You can now set a default cookie file for a Service, [see README](README.md#cookies--credentials).
- You can now set a default credential for a Service, [see config](CONFIG.md#credentials-dictstr-strlistdict).
- Services are now auth-less by default and the error for not having at least a cookie or credential is removed.
Cookies/Credentials will only be loaded if a default one for the service is available, or if you use `-p/--profile`
and the profile exists.
- Subtitles when converting to SubRip (SRT) via SubtitleEdit will now use the `/ConvertColorsToDialog` option.
- HLS segments are now merged by discontinuity instead of all at once. The merged discontinuities are then finally
merged to one file using `ffmpeg`. Doing the final merge by byte concatenation did not work for some playlists.
- The Track is no longer passed through Event Callables. If you are able to set a function on an Even Callable, then
you should have access to the track reference to call it directly if needed.
- The Track.OnDecrypted event callable is now passed the DRM and Segment objects used to Decrypt. The segment object is
only passed from HLS downloads.
- The Track.OnDownloaded event callable is now called BEFORE decryption, right after downloading, not after decryption.
- All generated Track ID values across the codebase has moved from md5 to crc32 values as code processors complain
about its use surrounding security, and it's length is too large for our use case anyway.
- HLS segments are now downloaded multi-threaded first and then processed in sequence thereafter.
- HLS segments are no longer decrypted one-by-one, requiring a lot of shaka-packager processes to run and close.
They now merged and decrypt in groups based on their EXT-X-KEY, before being merged per discontinuity.
- The DASH and HLS downloaders now pass multiple URLs to the downloader instead of one-by-one, heavily increasing speed
and reliability as connections are kept alive and re-used.
- Downloaders now yield back progress information in the same convention used by `rich`'s `Progress.update()` method.
DASH and HLS now pass the yielded information to their progress callable instead of passing the progress callable to
the downloader.
- The aria2(c) downloader now uses the aria2(c) JSON-RPC interface to query for download progress updates instead of
parsing the stdout data in an extremely hacky way.
- The aria2(c) downloader now re-routes non-HTTP proxies via `pproxy` by a subprocess instead of the now-removed
`start_pproxy` utility. This way has proven to be easier, more reliable, and prevents pproxy from messing with rich's
terminal output in strange ways.
- All downloader function's have an altered signature but ultimately similar. `uri` to `urls`, `out` (path) was removed,
we now calculate the save path by passing an `output_dir` and `filename`. The `silent`, `segmented`, and `progress`
parameters were completely removed.
- All downloader `urls` can now be a string or a dictionary containing extra URL-specific options to use like
URL-specific headers. It can also be a list of the two types of URLs to downloading multi-threaded.
- All downloader `filenames` can be a static string, or a filename string template with a few variables to use. The
template system used is f-string, e.g., `"file_{i:03}{ext}"` (ext starts with `.` if there's an extension).
- DASH now updates the progress bar when merging segments.
- The `Widevine.decrypt()` method now also searches for shaka-packager as just `packager` as it is the default build
name. (#74)
### Removed
- The `devine auth` command and sub-commands due to lack of support, risk of data, and general quirks with it.
- Removed `profiles` config, you must now specify which profile you wish to use each time with `-p/--profile`. If you
use a specific profile a lot more than others, you should make it the default.
- The `saldl` downloader has been removed as their binary distribution is whack and development has seemed to stall.
It was only used as an alternative to what was at the time the only downloader, aria2(c), as it did not support any
form of Byte Range, but `saldl` did, which was crucial for resuming extremely large downloads or complex playlists.
However, now we have the requests downloader which does support the Range header.
- The `Track.needs_proxy` property was removed for a few design architectural reasons.
1) Design-wise it isn't valid to have --proxy (or via config/otherwise) set a proxy, then unpredictably have it
bypassed or disabled. If I specify `--proxy 127.0.0.1:8080`, I would expect it to use that proxy for all
communication indefinitely, not switch in and out depending on the track or service.
2) With reason 1, it's also a security problem. The only reason I implemented it in the first place was so I could
download faster on my home connection. This means I would authenticate and call APIs under a proxy, then suddenly
download manifests and segments e.t.c under my home connection. A competent service could see that as an indicator
of bad play and flag you.
3) Maintaining this setup across the codebase is extremely annoying, especially because of how proxies are setup/used
by Requests in the Session. There's no way to tell a request session to temporarily disable the proxy and turn it
back on later, without having to get the proxy from the session (in an annoying way) store it, then remove it,
make the calls, then assuming your still in the same function you can add it back. If you're not in the same
function, well, time for some spaghetti code.
- The `Range.Transfer.SDR_BT_601_625 = 5` key and value has been removed as I cannot find any official source to verify
it as the correct use. However, usually a `transfer` value of `5` would be PAL SD material so it better matches `6`,
which is (now named) `Range.Transfer.BT_601 = 6`. If you have something specifying transfer=5, just remap it to 6.
- The warning log `There's no ... Audio Tracks, likely part of an invariant playlist, continuing...` message has been
removed. So long as your playlist is expecting no audio tracks, or the audio is part of the video transport, then
this wouldn't be a problem whatsoever. Therefore, having it log this annoying warning all the time is pointless.
- The `--min-split-size` argument to the aria2(c) downloader as it was only used to disable splitting on
segmented downloads, but the newer downloader system wouldn't really need or want this to be done. If aria2 has
decided based on its other settings to have split a segment file, then it likely would benefit from doing so.
- The `--remote-time` argument from the aria2(c) downloader as it may need to do a GET and a HEAD request to
get the remote time information, slowing the download down. We don't need this information anyway as it will likely
be repacked with `ffmpeg` or multiplexed with `mkvmerge`, discarding/losing that information.
- DASH and HLS's 5-attempt retry loop as the downloaders will retry for us.
- The `start_pproxy` utility has been removed as all uses of it now call `pproxy` via subprocess instead.
- The `LANGUAGE_MUX_MAP` constant and it's usage has been removed as it is no longer necessary as of MKVToolNix v54.
### Fixed
- Uses of `__ALL__` with Class objects have been correct to `__all__` with string objects, following PEP8.
- Fixed value of URL passed to `Track.get_key_id()` as it was a tuple rather than the URL string.
- The `--skip-dl` flag now works again after breaking in v[1.3.0].
- Move WVD file to correct location on new installations in the `wvd add` command.
- Cookie data is now passed to downloaders and use URLs based on the URI it will be used for, just like a browser.
- Failure to get FPS in DASH when SegmentBase isn't used.
- An error message is now returned if a WVD file fails to load instead of raising an exception.
- Track language information within M3U playlists are now validated with langcodes before use. Some manifests use the
property for arbitrary data that their apps/players use for their own purposes.
- Attempt to fix non-UTF-8 and mixed-encoding Subtitle downloads by automatically converting to UTF-8. (#43)
Decoding is attempted in the following order: UTF-8, CP-1252, then finally chardet detection. If it's neither UTF-8
nor CP-1252 and chardet could not detect the encoding, then it is left as-is. Conversion is done per-segment if the
Subtitle is segmented, unless it's the fVTT or fTTML formats which are binary.
- Chapter Character Encoding is now explicitly set to UTF-8 when muxing to an MKV container as Windows seems to default
to latin1 or something, breaking Chapter names with any sort of special character within.
- Subtitle passed through SubtitleEdit now explicitly use UTF-8 character encoding as it usually defaulted to UTF-8
with Byte Order Marks (aka UTF-8-SIG/UTF-8-BOM).
- Subtitles passed through SubtitleEdit now use the same output format as the subtitle being processed instead of SRT.
- Fixed rare infinite loop when the Server hosting the init/header data/segment file responds with a `Content-Length`
header with a value of `0` or smaller.
- Removed empty caption lists/languages when parsing Subtitles with `Subtitle.parse()`. This stopped conversions to SRT
containing the `MULTI-LANGUAGE SRT` header when there was multiple caption lists, even though only one of them
actually contained captions.
- Text-based Subtitle formats now try to automatically convert to UTF-8 when run through `Subtitle.parse()`.
- Text-based Subtitle formats now have `‎` and `‏` HTML entities unescaped post-download as some rendering
libraries seems to not decode them for us. SubtitleEdit also has problems with `/ReverseRtlStartEnd` unless it's
already decoded.
- Fixed two concatenation errors surrounding DASH's BaseURL, sourceURL, and media values that start with or use `../`.
- Fixed the number values in the `Newly added to x/y Vaults` log, which now states `Cached n Key(s) to x/y Vaults`.
- File write handler now flushes after appending a new segment to the final save path or checkpoint file, reducing
memory usage by quite a bit in some scenarios.
### New Contributors
- [Shivelight](https://github.com/Shivelight)
## [2.2.0] - 2023-04-23
### Breaking Changes
Since `-q/--quality` has been reworked to support specifying multiple qualities, the type of this value is
no longer `None|int`. It is now `list[int]` and the list may be empty. It is no longer ever a `None` value.
Please make sure any Service code that uses `quality` via `ctx.parent.params` reflects this change. You may
need to go from an `if quality: ...` to `for res in quality: ...`, or such. You may still use `if quality`
to check if it has 1 or more resolution specified, but make sure that the code within that if tree supports
more than 1 value in the `quality` variable, which is now a list. Note that the list will always be in
descending order regardless of how the user specified them.
### Added
- Added the ability to specify and download multiple resolutions with `-q/--quality`. E.g., `-q 1080p,720p`.
- Added support for DASH manifests that use SegmentList with range values on the Initialization definition (#47).
- Added a check for `uuid` mp4 boxes containing `tenc` box data when getting the Track's Key ID to improve
chances of finding a Key ID.
### Changed
- The download path is no longer printed after each download. The simple reason is it felt unnecessary.
It filled up a fair amount of vertical space for information you should already know.
- The logs after a download finishes has been split into two logs. One after the actual downloading process
and the other after the multiplexing process. The downloading process has its own timer as well, so you can
see how long the downloads itself took.
- I've switched from using the official pymp4 (for now) with my fork. At the time this change was made the
original bearypig pymp4 repo was stagnant and the PyPI releases were old. I forked it, added some fixes
by TrueDread and released my own update to PyPI, so it's no longer outdated. This was needed for some
mp4 box parsing fixes. Since then the original repo is no longer stagnant, and a new release was made on
PyPI. However, my repo still has some of TrueDread's fixes that is not yet on the original repository nor
on PyPI.
### Removed
- Removed the `with_resolution` method in the Tracks class. It has been replaced with `by_resolutions`. The
new replacement method supports getting all or n amount of tracks by resolution instead of the original
always getting all tracks by resolution.
- Removed the `select_per_language` method in the Tracks class. It has been replaced with `by_language`. The
new replacement method supports getting all or n amount of tracks by language instead of the original only
able to get one track by language. It now defaults to getting all tracks by language.
### Fixed
- Prevented some duplicate Widevine tree logs under specific edge-cases.
- The Subtitle parse method no longer absorbs the syntax error message.
- Replaced all negative size values with 0 on TTML subtitles as a negative value would cause syntax errors.
- Fixed crash during decryption when shaka-packager skips decryption of a segment as it had no actual data and
was just headers.
- Fixed CCExtractor crash in some scenarios by repacking the video stream prior to extraction.
- Fixed rare crash when calculating download speed of DASH and HLS downloads where a segment immediately finished
after the previous segment. This seemed to only happen on the very last segment in rare situations.
- Fixed some failures parsing `tenc` mp4 boxes when obtaining the track's Key ID by using my own fork of pymp4
with up-to-date code and further fixes.
- Fixed crashes when parsing some `tenc` mp4 boxes by simply skipping `tenc` boxes that fail to parse. This happens
because some services seem to mix up the data of the `tenc` box with that of another type of box.
- Fixed using invalid `tenc` boxes by skipping ones with a version number greater than 1.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [2.1.0] - 2023-03-16
@ -685,9 +369,6 @@ This release brings a huge change to the fundamentals of Devine's logging, UI, a
Initial public release under the name Devine.
[3.1.0]: https://github.com/devine-dl/devine/releases/tag/v3.1.0
[3.0.0]: https://github.com/devine-dl/devine/releases/tag/v3.0.0
[2.2.0]: https://github.com/devine-dl/devine/releases/tag/v2.2.0
[2.1.0]: https://github.com/devine-dl/devine/releases/tag/v2.1.0
[2.0.1]: https://github.com/devine-dl/devine/releases/tag/v2.0.1
[2.0.0]: https://github.com/devine-dl/devine/releases/tag/v2.0.0

126
CONFIG.md
View File

@ -10,13 +10,6 @@ which does not keep comments.
## aria2c (dict)
- `max_concurrent_downloads`
Maximum number of parallel downloads. Default: `min(32,(cpu_count+4))`
Note: Overrides the `max_workers` parameter of the aria2(c) downloader function.
- `max_connection_per_server`
Maximum number of connections to one server for each download. Default: `1`
- `split`
Split a file into N chunks and download each chunk on its own connection. Default: `5`
- `file_allocation`
Specify file allocation method. Default: `"prealloc"`
@ -66,52 +59,27 @@ DSNP:
default: chromecdm_903_l3
```
## chapter_fallback_name (str)
## credentials (dict)
The Chapter Name to use when exporting a Chapter without a Name.
The default is no fallback name at all and no Chapter name will be set.
Specify login credentials to use for each Service by Profile as Key (case-sensitive).
The fallback name can use the following variables in f-string style:
- `{i}`: The Chapter number starting at 1.
E.g., `"Chapter {i}"`: "Chapter 1", "Intro", "Chapter 3".
- `{j}`: A number starting at 1 that increments any time a Chapter has no title.
E.g., `"Chapter {j}"`: "Chapter 1", "Intro", "Chapter 2".
These are formatted with f-strings, directives are supported.
For example, `"Chapter {i:02}"` will result in `"Chapter 01"`.
## credentials (dict[str, str|list|dict])
Specify login credentials to use for each Service, and optionally per-profile.
The value should be `email:password` or `username:password` (with some exceptions).
The first section does not have to be an email or username. It may also be a Phone number.
For example,
```yaml
ALL4: jane@gmail.com:LoremIpsum100 # directly
AMZN: # or per-profile, optionally with a default
default: jane@example.tld:LoremIpsum99 # <-- used by default if -p/--profile is not used
AMZN:
james: james@gmail.com:TheFriend97
jane: jane@example.tld:LoremIpsum99
john: john@example.tld:LoremIpsum98
NF: # the `default` key is not necessary, but no credential will be used by default
NF:
john: john@gmail.com:TheGuyWhoPaysForTheNetflix69420
```
The value should be in string form, i.e. `john@gmail.com:password123` or `john:password123`.
Any arbitrary values can be used on the left (username/password/phone) and right (password/secret).
You can also specify these in list form, i.e., `["john@gmail.com", ":PasswordWithAColon"]`.
If you specify multiple credentials with keys like the `AMZN` and `NF` example above, then you should
use a `default` key or no credential will be loaded automatically unless you use `-p/--profile`. You
do not have to use a `default` key at all.
Credentials must be specified per-profile. You cannot specify a fallback or default credential.
Please be aware that this information is sensitive and to keep it safe. Do not share your config.
## curl_impersonate (dict)
- `browser` - The Browser to impersonate as. A list of available Browsers and Versions are listed here:
<https://github.com/yifeikong/curl_cffi#sessions>
## directories (dict)
Override the default directories used across devine.
@ -152,7 +120,7 @@ For example to set the default primary language to download to German,
lang: de
```
to set `--bitrate=CVBR` for the AMZN service,
or to set `--bitrate=CVBR` for the AMZN service,
```yaml
lang: de
@ -160,26 +128,6 @@ AMZN:
bitrate: CVBR
```
or to change the output subtitle format from the default (original format) to WebVTT,
```yaml
sub_format: vtt
```
## downloader (str)
Choose what software to use to download data throughout Devine where needed.
Options:
- `requests` (default) - https://github.com/psf/requests
- `aria2c` - https://github.com/aria2/aria2
- `curl_impersonate` - https://github.com/yifeikong/curl-impersonate (via https://github.com/yifeikong/curl_cffi)
Note that aria2c can reach the highest speeds as it utilizes threading and more connections than the other
downloaders. However, aria2c can also be one of the more unstable downloaders. It will work one day, then
not another day. It also does not support HTTP(S) proxies while the other downloaders do.
## headers (dict)
Case-Insensitive dictionary of headers that all Services begin their Request Session state with.
@ -207,28 +155,12 @@ provide the same Key ID and CEK for both Video and Audio, as well as for multipl
You can have as many Key Vaults as you would like. It's nice to share Key Vaults or use a unified Vault on
Teams as sharing CEKs immediately can help reduce License calls drastically.
Three types of Vaults are in the Core codebase, API, SQLite and MySQL. API makes HTTP requests to a RESTful API,
whereas SQLite and MySQL directly connect to an SQLite or MySQL Database.
Two types of Vaults are in the Core codebase, SQLite and MySQL Vaults. Both directly connect to an SQLite or MySQL
Server. It has to connect directly to the Host/IP. It cannot be in front of a PHP API or such. Beware that some Hosts
do not let you access the MySQL server outside their intranet (aka Don't port forward or use permissive network
interfaces).
Note: SQLite and MySQL vaults have to connect directly to the Host/IP. It cannot be in front of a PHP API or such.
Beware that some Hosting Providers do not let you access the MySQL server outside their intranet and may not be
accessible outside their hosting platform.
### Using an API Vault
API vaults use a specific HTTP request format, therefore API or HTTP Key Vault APIs from other projects or services may
not work in Devine. The API format can be seen in the [API Vault Code](devine/vaults/API.py).
```yaml
- type: API
name: "John#0001's Vault" # arbitrary vault name
uri: "https://key-vault.example.com" # api base uri (can also be an IP or IP:Port)
# uri: "127.0.0.1:80/key-vault"
# uri: "https://api.example.com/key-vault"
token: "random secret key" # authorization token
```
### Using a MySQL Vault
### Connecting to a MySQL Vault
MySQL vaults can be either MySQL or MariaDB servers. I recommend MariaDB.
A MySQL Vault can be on a local or remote network, but I recommend SQLite for local Vaults.
@ -254,7 +186,7 @@ make tables yourself.
- You may give trusted users CREATE permission so devine can create tables if needed.
- Other uses should only be given SELECT and INSERT permissions.
### Using an SQLite Vault
### Connecting to an SQLite Vault
SQLite Vaults are usually only used for locally stored vaults. This vault may be stored on a mounted Cloud storage
drive, but I recommend using SQLite exclusively as an offline-only vault. Effectively this is your backup vault in
@ -279,6 +211,34 @@ together.
- `set_title`
Set the container title to `Show SXXEXX Episode Name` or `Movie (Year)`. Default: `true`
## profiles (dict)
Pre-define Profiles to use Per-Service.
For example,
```yaml
AMZN: jane
DSNP: john
```
You can also specify a fallback value to pre-define if a match was not made.
This can be done using `default` key. This can help reduce redundancy in your specifications.
```yaml
AMZN: jane
DSNP: john
default: james
```
If a Service doesn't require a profile (as it does not require Credentials or Authorization of any kind), you can
disable the profile checks by specifying `false` as the profile for the Service.
```yaml
ALL4: false
CTV: false
```
## proxy_providers (dict)
Enable external proxy provider services.

View File

@ -1,49 +0,0 @@
# Development
This project is managed using [Poetry](https://python-poetry.org), a fantastic Python packaging and dependency manager.
Install the latest version of Poetry before continuing. Development currently requires Python 3.9+.
## Set up
Starting from Zero? Not sure where to begin? Here's steps on setting up this Python project using Poetry. Note that
Poetry installation instructions should be followed from the Poetry Docs: https://python-poetry.org/docs/#installation
1. While optional, It's recommended to configure Poetry to install Virtual environments within project folders:
```shell
poetry config virtualenvs.in-project true
```
This makes it easier for Visual Studio Code to detect the Virtual Environment, as well as other IDEs and systems.
I've also had issues with Poetry creating duplicate Virtual environments in the default folder for an unknown
reason which quickly filled up my System storage.
2. Clone the Repository:
```shell
git clone https://github.com/devine-dl/devine
cd devine
```
3. Install the Project with Poetry:
```shell
poetry install
```
This creates a Virtual environment and then installs all project dependencies and executables into the Virtual
environment. Your System Python environment is not affected at all.
4. Now activate the Virtual environment:
```shell
poetry shell
```
Note:
- You can alternatively just prefix `poetry run` to any command you wish to run under the Virtual environment.
- I recommend entering the Virtual environment and all further instructions will have assumed you did.
- JetBrains PyCharm has integrated support for Poetry and automatically enters Poetry Virtual environments, assuming
the Python Interpreter on the bottom right is set up correctly.
- For more information, see: https://python-poetry.org/docs/basic-usage/#using-your-virtual-environment
5. Install Pre-commit tooling to ensure safe and quality commits:
```shell
pre-commit install
```
## Building Source and Wheel distributions
poetry build
You can optionally specify `-f` to build `sdist` or `wheel` only.
Built files can be found in the `/dist` directory.

View File

@ -671,4 +671,4 @@ into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
<https://www.gnu.org/licenses/why-not-lgpl.html>.
<https://www.gnu.org/licenses/why-not-lgpl.html>.

381
README.md
View File

@ -2,11 +2,7 @@
<img src="https://user-images.githubusercontent.com/17136956/216880837-478f3ec7-6af6-4cca-8eef-5c98ff02104c.png">
<a href="https://github.com/devine-dl/devine">Devine</a>
<br/>
<sup><em>Modular Movie, TV, and Music Archival Software</em></sup>
<br/>
<a href="https://discord.gg/34K2MGDrBN">
<img src="https://img.shields.io/discord/841055398240059422?label=&logo=discord&logoColor=ffffff&color=7289DA&labelColor=7289DA" alt="Discord">
</a>
<sup><em>Open-Source Movie, TV, and Music Downloading Solution</em></sup>
</p>
<p align="center">
@ -19,268 +15,170 @@
<a href="https://deepsource.io/gh/devine-dl/devine/?ref=repository-badge">
<img src="https://deepsource.io/gh/devine-dl/devine.svg/?label=active+issues&token=1ADCbjJ3FPiGT_s0Y0rlugGU" alt="DeepSource">
</a>
<br/>
<a href="https://github.com/astral-sh/ruff">
<img src="https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json" alt="Linter: Ruff">
</a>
<a href="https://python-poetry.org">
<img src="https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json" alt="Dependency management: Poetry">
</a>
</p>
## Features
- 🚀 Seamless Installation via [pip](#installation)
- 🎥 Movie, Episode, and Song Service Frameworks
- 🛠️ Built-in [DASH] and [HLS] Parsers
- 🔒 Widevine DRM integration via [pywidevine](https://github.com/devine-dl/pywidevine)
- 💾 Local & Remote DRM Key-vaults
- 🌍 Local & Remote Widevine CDMs
- 👥 Multi-profile Authentication per-service with Credentials and/or Cookies
- 🎥 Supports Movies, TV shows, and Music
- 🧩 Easy installation via PIP/PyPI
- 👥 Multi-profile authentication per-service with credentials or cookies
- 🤖 Automatic P2P filename structure with Group Tag
- 🛠️ Flexible Service framework system
- 📦 Portable Installations
- 🗃️ Local and Remote SQL-based Key Vault database
- ⚙️ YAML for Configuration
- 🌍 Local and Remote Widevine CDMs
- ❤️ Fully Open-Source! Pull Requests Welcome
[DASH]: <devine/core/manifests/dash.py>
[HLS]: <devine/core/manifests/hls.py>
## Installation
```shell
$ pip install devine
```
> [!NOTE]
> If pip gives you a warning about a path not being in your PATH environment variable then promptly add that path then
> close all open command prompt/terminal windows, or `devine` won't work as it will not be found.
> __Note__ If you see warnings about a path not being in your PATH environment variable, add it, or `devine` won't run.
Voilà 🎉 — You now have the `devine` package installed!
A command-line interface is now available, try `devine --help`.
Voilà 🎉! You now have the `devine` package installed and a `devine` executable is now available.
Check it out with `devine --help`!
### Dependencies
The following is a list of programs that need to be installed by you manually.
The following is a list of programs that need to be installed manually. I recommend installing these with [winget],
[chocolatey] or such where possible as it automatically adds them to your `PATH` environment variable and will be
easier to update in the future.
- [aria2(c)] for downloading streams and large manifests.
- [CCExtractor] for extracting Closed Caption data like EIA-608 from video streams and converting as SRT.
- [FFmpeg] (and ffprobe) for repacking/remuxing streams on specific services, and evaluating stream data.
- [MKVToolNix] v54+ for muxing individual streams to an `.mkv` file.
- [shaka-packager] for decrypting CENC-CTR and CENC-CBCS video and audio streams.
- (optional) [aria2(c)] to use as a [downloader](CONFIG.md#downloader-str).
> [!TIP]
> You should install these from a Package Repository if you can; including winget/chocolatey on Windows. They will
> automatically add the binary's path to your `PATH` environment variable and will be easier to update in the future.
> [!IMPORTANT]
> Most of these dependencies are portable utilities and therefore do not use installers. If you do not install them
> from a package repository like winget/choco/pacman then make sure you put them in your current working directory, in
> Devine's installation directory, or the binary's path into your `PATH` environment variable. If you do not do this
> then Devine will not be able to find the binaries.
For portable downloads, make sure you put them in your current working directory, in the installation directory,
or put the directory path in your `PATH` environment variable. If you do not do this then their binaries will not be
able to be found.
[winget]: <https://winget.run>
[chocolatey]: <https://chocolatey.org>
[aria2(c)]: <https://aria2.github.io>
[CCExtractor]: <https://github.com/CCExtractor/ccextractor>
[FFmpeg]: <https://ffmpeg.org>
[FFmpeg]: <https://fmpeg.org>
[MKVToolNix]: <https://mkvtoolnix.download/downloads.html>
[shaka-packager]: <https://github.com/google/shaka-packager/releases/latest>
## Usage
### Portable installation
First, take a look at `devine --help` for a full help document, listing all commands available and giving you more
information on what can be done with Devine.
1. Download a Python Embeddable Package of a supported Python version (the `.zip` download).
(make sure it's either x64/x86 and not ARM unless you're on an ARM device).
2. Extract the `.zip` and rename the folder, if you wish.
3. Open Terminal and `cd` to the extracted folder.
4. Run the following on Windows:
```
(Invoke-WebRequest -Uri https://gist.githubusercontent.com/rlaphoenix/5ef250e61ceeb123c6696c05ad4dee8b/raw -UseBasicParsing).Content | .\python -
```
or the following on Linux/macOS:
```
curl -sSL https://gist.githubusercontent.com/rlaphoenix/5ef250e61ceeb123c6696c05ad4dee8b/raw | ./python -
```
5. Run `.\python -m pip install devine`
Here's a checklist on what I recommend getting started with, in no particular order,
You can now call `devine` by,
- [ ] Add [Services](#services), these will be used in `devine dl`.
- [ ] Add [Profiles](#profiles-cookies--credentials), these are your cookies and credentials.
- [ ] Add [Widevine Provisions](#widevine-provisions), also known as CDMs, these are used for DRM-protected content.
- [ ] Set your Group Tag, the text at the end of the final filename, e.g., `devine cfg tag NOGRP` for `...-NOGRP`.
- [ ] Set Up a Local Key Vault, take a look at the [Key Vaults Config](CONFIG.md#keyvaults-listdict).
- running `./python -m devine --help`, or,
- running `./Scripts/devine.exe --help`, or,
- symlinking the `/Scripts/devine.exe` binary to the root of the folder, for `./devine --help`, or,
- zipping the entire folder to `devine.zip`, for `python devine.zip --help`.
And here's some more advanced things you could take a look at,
The last method of calling devine, by archiving to a zip file, is incredibly useful for sharing and portability!
I urge you to give it a try!
- [ ] Setting default Headers that the Request Session uses.
- [ ] Setting default Profiles and CDM Provisions to use for services.
- [ ] NordVPN and Hola Proxy Providers for automatic proxies.
- [ ] Hosting and/or Using Remote Key Vaults.
- [ ] Serving and/or Using Remote CDM Provisions.
### Services
Documentation on the config is available in the [CONFIG.md](CONFIG.md) file, it has a lot of handy settings.
If you start to get sick of putting something in your CLI call, then I recommend taking a look at it!
Devine does not come with any infringing Service code. You must develop your own Service code and place them in
the `/devine/services` directory. There are different ways the add services depending on your installation type.
In some cases you may use multiple of these methods to have separate copies.
## Services
Please refrain from making or using Service code unless you have full rights to do so. I also recommend ensuring that
you keep the Service code private and secure, i.e. a private repository or keeping it offline.
Unlike similar project's such as [youtube-dl], Devine does not currently come with any Services. You must develop your
own Services and only use Devine with Services you have the legal right to do so.
No matter which method you use, make sure that you install any further dependencies needed by the services. There's
currently no way to have these dependencies automatically install apart from within the Fork method.
> [!NOTE]
> If you made a Service for Devine that does not use Widevine or any other DRM systems, feel free to make a Pull Request
> and make your service available to others. Any Service on [youtube-dl] (or [yt-dlp]) would be able to be added to the
> Devine repository as they both use the [Unlicense license] therefore direct reading and porting of their code would be
> legal.
[youtube-dl]: <https://github.com/ytdl-org/youtube-dl>
[yt-dlp]: <https://github.com/yt-dlp/yt-dlp>
[Unlicense license]: <https://choosealicense.com/licenses/unlicense>
### Creating a Service
> [!WARNING]
> Only create or use Service Code with Services you have full legal right to do so.
A Service consists of a folder with an `__init__.py` file. The file must contain a class of the same name as the folder.
The class must inherit the [Service] class and implement all the abstracted methods. It must finally implement a new
method named `cli` where you define CLI arguments.
1. Make a new folder within `/devine/services`. The folder name you choose will be what's known as the [Service Tag].
This "tag" is used in the final output filename of downloaded files, for various code-checks, lookup keys in
key-vault databases, and more.
2. Within the new folder create an `__init__.py` file and write a class inheriting the [Service] class. It must be named
the exact same as the folder. It is case-sensitive.
3. Implement all the methods of the Service class you are inheriting that are marked as abstract.
4. Define CLI arguments by implementing a `cli` method. This method must be static (i.e. `@staticmethod`). For example
to implement the bare minimum to receive a Title ID of sorts:
```python
@staticmethod
@click.command(name="YT", short_help="https://youtube.com", help=__doc__)
@click.argument("title", type=str)
@click.pass_context
def cli(ctx, **kwargs):
return YT(ctx, **kwargs)
```
You must implement this `cli` method, even if you do not want or need any CLI arguments. It is required for the core
CLI functionality to be able to find and call the class.
5. Accept the CLI arguments by overriding the constructor (the `__init__()` method):
```python
def __init__(self, ctx, title):
self.title = title
super().__init__(ctx) # important
# ... the title is now available across all methods by calling self.title
```
> [!NOTE]
> - All methods of your class inherited from `Service` marked as abstract (`@abstractmethod`) MUST be implemented by
> your class.
> - When overriding any method (e.g., `__init__()` method) you MUST super call it, e.g., `super().__init__()` at the
> top of the override. This does not apply to any abstract methods, as they are unimplemented.
> - If preparing your Requests Session with global headers or such, then you should override the `get_session` method,
> then modify `self.session`. Do not manually make `self.session` from scratch.
> [!TIP]
> 1. To make web requests use the `self.session` class instance variable, e.g. `self.session.get(url)`.
> 2. If you make a `config.yaml` file next to your `__init__.py`, you can access it with `self.config`.
> 3. You can include any arbitrary file within your Service folder for use by your Service. For example TLS certificate
> files, or other python files with helper functions and classes.
[Service]: <devine/core/service.py>
[Service Tag]: <#service-tags>
### Service Tags
Service tags generally follow these rules:
- Tag must be between 2-4 characters long, consisting of just `[A-Z0-9i]{2,4}`.
- Lower-case `i` is only used for select services. Specifically BBC iPlayer and iTunes.
- If the Service's commercial name has a `+` or `Plus`, the last character should be a `P`.
E.g., `ATVP` for `Apple TV+`, `DSCP` for `Discovery+`, `DSNP` for `Disney+`, and `PMTP` for `Paramount+`.
These rules are not exhaustive and should only be used as a guide. You don't strictly have to follow these rules, but
I recommend doing so for consistency.
### Sharing Services
Sending and receiving zipped Service folders is quite cumbersome. Let's explore alternative routes to collaborating on
Service Code.
> [!WARNING]
> Please be careful with who you trust and what you run. The users you collaborate with on Service
> __Warning__ Please be careful with who you trust and what you run. The users you collaborate with on Service
> code could update it with malicious code that you would run via devine on the next call.
#### Forking
#### via Copy & Paste
If you are collaborating with a team on multiple services then forking the project is the best way to go.
If you have service code already and wish to just install and use it locally, then simply putting it into the Services
directory of your local pip installation will do the job. However, this method is the worst in terms of collaboration.
1. Create a new Private GitHub Repository without README, .gitignore, or LICENSE files.
Note: Do NOT use the GitHub Fork button, or you will not be able to make the repository private.
2. `git clone <your repo url here>` and then `cd` into it.
3. `git remote add upstream https://github.com/devine-dl/devine`
4. `git remote set-url --push upstream DISABLE`
5. `git fetch upstream`
6. `git pull upstream master`
7. (optionally) Hard reset to the latest stable version by tag. E.g., `git reset --hard v1.0.0`.
1. Get the installation directory by running the following in terminal,
`python -c 'import os,devine.__main__ as a;print(os.path.dirname(a.__file__))'`
2. Head to the installation directory and create a `services` folder if one is not yet created.
3. Within that `services` folder you may install or create service code.
Now commit your Services or other changes to your forked repository.
Once committed all your other team members can easily pull changes as well as push new changes.
> __Warning__ Uninstalling Python or Devine may result in the Services you installed being deleted. Make sure you back
> up the services before uninstalling.
When a new update comes out you can easily rebase your fork to that commit to update.
#### via a Forked Repository
1. `git fetch upstream`
2. `git rebase upstream/master`
If you are collaborating with a team on multiple services then forking the project is the best way to go. I recommend
forking the project then hard resetting to the latest stable update by tag. Once a new stable update comes out you can
easily rebase your fork to that commit to update.
However, please make sure you look at changes between each version before rebasing and resolve any breaking changes and
deprecations when rebasing to a new version.
If you are new to `git` then take a look at [GitHub Desktop](https://desktop.github.com).
1. Fork the project with `git` or GitHub [(fork)](https://github.com/devine-dl/devine/fork).
2. Head inside the root `devine` directory and create a `services` directory.
3. Within that `services` folder you may install or create service code.
> [!TIP]
> A huge benefit with this method is that you can also sync dependencies by your own Services as well!
> Just use `poetry` to add or modify dependencies appropriately and commit the changed `poetry.lock`.
> However, if the core project also has dependency changes your `poetry.lock` changes will conflict and you
> will need to learn how to do conflict resolution/rebasing. It is worth it though!
You may now commit changes or additions within that services folder to your forked repository.
Once committed all your other team members can easily sync and contribute changes.
#### Symlinking
> __Note__ You may add Service-specific Python dependencies using `poetry` that can install alongside the project.
> Just do note that this will complicate rebasing when even the `poetry.lock` gets updates in the upstream project.
This is a great option for those who wish to do something like the forking method, but may not care what changes
happened or when and just want changes synced across a team.
#### via Cloud storage (symlink)
This is a great option for those who wish to do something like the forking method, but without the need of constantly
rebasing their fork to the latest version. Overall less knowledge on git would be required, but each user would need
to do a bit of symlinking compared to the fork method.
This also opens up the ways you can host or collaborate on Service code. As long as you can receive a directory that
updates with just the services within it, then you're good to go. Options could include an FTP server, Shared Google
Drive, a non-fork repository with just services, and more.
1. Use any Cloud Source that gives you a pseudo-directory to access the Service files like a normal drive. E.g., rclone,
Google Drive Desktop (aka File Stream), Air Drive, CloudPool, etc.
2. Create a `services` directory somewhere in it and have all your services within it.
3. [Symlink](https://en.wikipedia.org/wiki/Symbolic_link) the `services` directory to the `/devine` folder. You should
end up with `/devine/services` folder containing services, not `/devine/services/services`.
1. Follow the steps in the [Copy & Paste method](#via-copy--paste) to create the `services` folder.
2. Use any Cloud Source that gives you a pseudo-directory to access the Service files. E.g., rclone or google drive fs.
3. Symlink the services directory from your Cloud Source to the new services folder you made.
(you may need to delete it first)
You have to make sure the original folder keeps receiving and downloading/streaming those changes. You must also make
sure that the version of devine you have locally is supported by the Service code.
Of course, you have to make sure the original folder keeps receiving and downloading/streaming those changes, or that
you keep git pulling those changes. You must also make sure that the version of devine you have locally is supported by
the Services code.
> [!NOTE]
> If you're using a cloud source that downloads the file once it gets opened, you don't have to worry as those will
> automatically download. Python importing the files triggers the download to begin. However, it may cause a delay on
> startup.
> __Note__ If you're using a cloud source that downloads the file once it gets opened, you don't have to worry as those
> will automatically download. Python importing the files triggers the download to begin. However, it may cause a delay
> on startup.
## Cookies & Credentials
### Profiles (Cookies & Credentials)
Devine can authenticate with Services using Cookies and/or Credentials. Credentials are stored in the config, and
Cookies are stored in the data directory which can be found by running `devine env info`.
Just like a streaming service, devine associates both a cookie and/or credential as a Profile. You can associate up to
one cookie and one credential per-profile, depending on which (or both) are needed by the Service. This system allows
you to configure multiple accounts per-service and choose which to use at any time.
To add a Credential to a Service, take a look at the [Credentials Config](CONFIG.md#credentials-dictstr-strlistdict)
for information on setting up one or more credentials per-service. You can add one or more Credential per-service and
use `-p/--profile` to choose which Credential to use.
Credentials are stored in the config, and Cookies are stored in the data directory. You can find the location of these
by running `devine env info`. However, you can manage profiles with `devine auth --help`. E.g. to add a new John
profile to Netflix with a Cookie and Credential, take a look at the following CLI call,
`devine auth add John NF --cookie "C:\Users\John\Downloads\netflix.com.txt --credential "john@gmail.com:pass123"`
To add a Cookie to a Service, use a Cookie file extension to make a `cookies.txt` file and move it into the Cookies
directory. You must rename the `cookies.txt` file to that of the Service tag (case-sensitive), e.g., `NF.txt`. You can
also place it in a Service Cookie folder, e.g., `/Cookies/NF/default.txt` or `/Cookies/NF/.txt`.
You can also delete a credential with `devine auth delete`. E.g., to delete the cookie for John that we just added, run
`devine auth delete John --cookie`. Take a look at `devine auth delete --help` for more information.
You can add multiple Cookies to the `/Cookies/NF/` folder with their own unique name and then use `-p/--profile` to
choose which one to use. E.g., `/Cookies/NF/sam.txt` and then use it with `--profile sam`. If you make a Service Cookie
folder without a `.txt` or `default.txt`, but with another file, then no Cookies will be loaded unless you use
`-p/--profile` like shown. This allows you to opt in to authentication at whim.
> __Note__ Profile names are case-sensitive and unique per-service. They also have no arbitrary character or length
> limit, but for convenience I don't recommend using any special characters as your terminal may get confused.
> [!TIP]
> - If your Service does not require Authentication, then do not define any Credential or Cookie for that Service.
> - You can use both Cookies and Credentials at the same time, so long as your Service takes and uses both.
> - If you are using profiles, then make sure you use the same name on the Credential name and Cookie file name when
> using `-p/--profile`.
> [!WARNING]
> Profile names are case-sensitive and unique per-service. They have no arbitrary character or length limit, but for
> convenience sake I don't recommend using any special characters as your terminal may get confused.
### Cookie file format and Extensions
#### Cookie file format and Extensions
Cookies must be in the standard Netscape cookies file format.
Recommended Cookie exporter extensions:
@ -297,7 +195,7 @@ Any other extension that exports to the standard Netscape format should theoreti
> versions floating around (usually just older versions of the extension), but since there are safe alternatives I'd
> just avoid it altogether. Source: https://reddit.com/r/youtubedl/comments/10ar7o7
## Widevine Provisions
### Widevine Provisions
A Widevine Provision is needed for acquiring licenses containing decryption keys for DRM-protected content.
They are not needed if you will be using devine on DRM-free services. Please do not ask for any Widevine Device Files,
@ -313,9 +211,50 @@ From here you can then set which WVD to use for each specific service. It's best
provision where possible.
An alternative would be using a pywidevine Serve-compliant CDM API. Of course, you would need to know someone who is
serving one, and they would need to give you access. Take a look at the [remote_cdm](CONFIG.md#remotecdm-listdict)
serving one, and they would need to give you access. Take a look at the [remote_cdm](CONFIG.md#remotecdm--listdict--)
config option for setup information. For further information on it see the pywidevine repository.
## Usage
First, take a look at `devine --help` for a full help document, listing all commands available and giving you more
information on what can be done with Devine.
Here's a checklist on what I recommend getting started with, in no particular order,
- [ ] Add [Services](#services), these will be used in `devine dl`.
- [ ] Add [Profiles](#profiles--cookies--credentials-), these are your cookies and credentials.
- [ ] Add [Widevine Provisions](#widevine-provisions), also known as CDMs, these are used for DRM-protected content.
- [ ] Set your Group Tag, the text at the end of the final filename, e.g., `devine cfg tag NOGRP` for ...-NOGRP.
- [ ] Set Up a Local Key Vault, take a look at the [Key Vaults Config](CONFIG.md#keyvaults--listdict--).
And here's some more advanced things you could take a look at,
- [ ] Setting default Headers that the Request Session uses.
- [ ] Setting default Profiles and CDM Provisions to use for services.
- [ ] NordVPN and Hola Proxy Providers for automatic proxies.
- [ ] Hosting and/or Using Remote Key Vaults.
- [ ] Serving and/or Using Remote CDM Provisions.
Documentation on the config is available in the [CONFIG.md](CONFIG.md) file, it has a lot of handy settings.
If you start to get sick of putting something in your CLI call, then I recommend taking a look at it!
## Development
The following steps are instructions on downloading, preparing, and running the code under a [Poetry] environment.
You can skip steps 3-5 with a simple `pip install .` call instead, but you miss out on a wide array of benefits.
1. `git clone https://github.com/devine-dl/devine`
2. `cd devine`
3. (optional) `poetry config virtualenvs.in-project true`
4. `poetry install`
5. `poetry run devine --help`
As seen in Step 5, running the `devine` executable is somewhat different to a normal PIP installation.
See [Poetry's Docs] on various ways of making calls under the virtual-environment.
[Poetry]: <https://python-poetry.org>
[Poetry's Docs]: <https://python-poetry.org/docs/basic-usage/#using-your-virtual-environment>
## End User License Agreement
Devine and it's community pages should be treated with the same kindness as other projects.
@ -330,23 +269,31 @@ Please refrain from spam or asking for questions that infringe upon a Service's
back immediately.
5. Be kind to one another and do not single anyone out.
## Disclaimer
1. This project requires a valid Google-provisioned Private/Public Keypair and a Device-specific Client Identification
blob; neither of which are included with this project.
2. Public testing provisions are available and provided by Google to use for testing projects such as this one.
3. License Servers have the ability to block requests from any provision, and are likely already blocking test provisions
on production endpoints. Therefore, have the ability to block the usage of Devine by themselves.
4. This project does not condone piracy or any action against the terms of the Service or DRM system.
5. All efforts in this project have been the result of Reverse-Engineering and Publicly available research.
## Credit
- The awesome community for their shared research and insight into the Widevine Protocol and Key Derivation.
## Contributors
<a href="https://github.com/rlaphoenix"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/17136956?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt="rlaphoenix"/></a>
<a href="https://github.com/mnmll"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/22942379?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt="mnmll"/></a>
<a href="https://github.com/shirt-dev"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/2660574?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt="shirt-dev"/></a>
<a href="https://github.com/nyuszika7h"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/482367?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt="nyuszika7h"/></a>
<a href="https://github.com/bccornfo"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/98013276?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt="bccornfo"/></a>
<a href="https://github.com/Arias800"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/24809312?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt="Arias800"/></a>
<a href="https://github.com/varyg1001"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/88599103?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt="varyg1001"/></a>
<a href="https://github.com/Hollander-1908"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/93162595?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt="Hollander-1908"/></a>
<a href="https://github.com/Shivelight"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/20620780?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt="Shivelight"/></a>
<a href="https://github.com/rlaphoenix"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/17136956?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt=""/></a>
<a href="https://github.com/mnmll"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/22942379?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt=""/></a>
<a href="https://github.com/shirt-dev"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/2660574?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt=""/></a>
<a href="https://github.com/nyuszika7h"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/482367?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt=""/></a>
<a href="https://github.com/bccornfo"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/98013276?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt=""/></a>
<a href="https://github.com/Arias800"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/24809312?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt=""/></a>
<a href="https://github.com/varyg1001"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/88599103?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt=""/></a>
<a href="https://github.com/Hollander-1908"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/93162595?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt=""/></a>
## Licensing
## License
This software is licensed under the terms of [GNU General Public License, Version 3.0](LICENSE).
You can find a copy of the license in the LICENSE file in the root folder.
* * *
© rlaphoenix 2019-2024
© 2019-2023 rlaphoenix — [GNU General Public License, Version 3.0](LICENSE)

View File

@ -1,71 +0,0 @@
# git-cliff ~ default configuration file
# https://git-cliff.org/docs/configuration
[changelog]
header = """
# Changelog\n
All notable changes to this project will be documented in this file.
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
Versions [3.0.0] and older use a format based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
but versions thereafter use a custom changelog format using [git-cliff](https://git-cliff.org).\n
"""
body = """
{% if version -%}
## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
{% else -%}
## [Unreleased]
{% endif -%}
{% for group, commits in commits | group_by(attribute="group") %}
### {{ group | striptags | trim | upper_first }}
{% for commit in commits %}
- {% if commit.scope %}*{{ commit.scope }}*: {% endif %}\
{% if commit.breaking %}[**breaking**] {% endif %}\
{{ commit.message | upper_first }}\
{% endfor %}
{% endfor %}\n
"""
footer = """
{% for release in releases -%}
{% if release.version -%}
{% if release.previous.version -%}
[{{ release.version | trim_start_matches(pat="v") }}]: \
https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}\
/compare/{{ release.previous.version }}..{{ release.version }}
{% endif -%}
{% else -%}
[unreleased]: https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }}\
/compare/{{ release.previous.version }}..HEAD
{% endif -%}
{% endfor %}
"""
trim = true
postprocessors = [
# { pattern = '<REPO>', replace = "https://github.com/orhun/git-cliff" }, # replace repository URL
]
[git]
conventional_commits = true
filter_unconventional = true
split_commits = false
commit_preprocessors = []
commit_parsers = [
{ message = "^feat", group = "<!-- 0 -->Features" },
{ message = "^fix|revert", group = "<!-- 1 -->Bug Fixes" },
{ message = "^docs", group = "<!-- 2 -->Documentation" },
{ message = "^style", skip = true },
{ message = "^refactor", group = "<!-- 3 -->Changes" },
{ message = "^perf", group = "<!-- 4 -->Performance Improvements" },
{ message = "^test", skip = true },
{ message = "^build", group = "<!-- 5 -->Builds" },
{ message = "^ci", skip = true },
{ message = "^chore", skip = true },
]
protect_breaking_commits = false
filter_commits = false
# tag_pattern = "v[0-9].*"
# skip_tags = ""
# ignore_tags = ""
topo_order = false
sort_commits = "oldest"

266
devine/commands/auth.py Normal file
View File

@ -0,0 +1,266 @@
import logging
import shutil
import sys
import tkinter.filedialog
from collections import defaultdict
from pathlib import Path
from typing import Optional
import click
from ruamel.yaml import YAML
from devine.core.config import Config, config
from devine.core.constants import context_settings
from devine.core.credential import Credential
@click.group(
short_help="Manage cookies and credentials for profiles of services.",
context_settings=context_settings)
@click.pass_context
def auth(ctx: click.Context) -> None:
"""Manage cookies and credentials for profiles of services."""
ctx.obj = logging.getLogger("auth")
@auth.command(
name="list",
short_help="List profiles and their state for a service or all services.",
context_settings=context_settings)
@click.argument("service", type=str, required=False)
@click.pass_context
def list_(ctx: click.Context, service: Optional[str] = None) -> None:
"""
List profiles and their state for a service or all services.
\b
Profile and Service names are case-insensitive.
"""
log = ctx.obj
service_f = service
auth_data: dict[str, dict[str, list]] = defaultdict(lambda: defaultdict(list))
if config.directories.cookies.exists():
for cookie_dir in config.directories.cookies.iterdir():
service = cookie_dir.name
for cookie in cookie_dir.glob("*.txt"):
if cookie.stem not in auth_data[service]:
auth_data[service][cookie.stem].append("Cookie")
for service, credentials in config.credentials.items():
for profile in credentials:
auth_data[service][profile].append("Credential")
for service, profiles in dict(sorted(auth_data.items())).items(): # type:ignore
if service_f and service != service_f.upper():
continue
log.info(service)
for profile, authorizations in dict(sorted(profiles.items())).items():
log.info(f' "{profile}": {", ".join(authorizations)}')
@auth.command(
short_help="View profile cookies and credentials for a service.",
context_settings=context_settings)
@click.argument("profile", type=str)
@click.argument("service", type=str)
@click.pass_context
def view(ctx: click.Context, profile: str, service: str) -> None:
"""
View profile cookies and credentials for a service.
\b
Profile and Service names are case-sensitive.
"""
log = ctx.obj
service_f = service
profile_f = profile
found = False
for cookie_dir in config.directories.cookies.iterdir():
if cookie_dir.name == service_f:
for cookie in cookie_dir.glob("*.txt"):
if cookie.stem == profile_f:
log.info(f"Cookie: {cookie}")
log.debug(cookie.read_text(encoding="utf8").strip())
found = True
break
for service, credentials in config.credentials.items():
if service == service_f:
for profile, credential in credentials.items():
if profile == profile_f:
log.info(f"Credential: {':'.join(list(credential))}")
found = True
break
if not found:
raise click.ClickException(
f"Could not find Profile '{profile_f}' for Service '{service_f}'."
f"\nThe profile and service values are case-sensitive."
)
@auth.command(
short_help="Check what profile is used by services.",
context_settings=context_settings)
@click.argument("service", type=str, required=False)
@click.pass_context
def status(ctx: click.Context, service: Optional[str] = None) -> None:
"""
Check what profile is used by services.
\b
Service names are case-sensitive.
"""
log = ctx.obj
found_profile = False
for service_, profile in config.profiles.items():
if not service or service_.upper() == service.upper():
log.info(f"{service_}: {profile or '--'}")
found_profile = True
if not found_profile:
log.info(f"No profile has been explicitly set for {service}")
default = config.profiles.get("default", "not set")
log.info(f"The default profile is {default}")
@auth.command(
short_help="Delete a profile and all of its authorization from a service.",
context_settings=context_settings)
@click.argument("profile", type=str)
@click.argument("service", type=str)
@click.option("--cookie", is_flag=True, default=False, help="Only delete the cookie.")
@click.option("--credential", is_flag=True, default=False, help="Only delete the credential.")
@click.pass_context
def delete(ctx: click.Context, profile: str, service: str, cookie: bool, credential: bool):
"""
Delete a profile and all of its authorization from a service.
\b
By default this does remove both Cookies and Credentials.
You may remove only one of them with --cookie or --credential.
\b
Profile and Service names are case-sensitive.
Comments may be removed from config!
"""
log = ctx.obj
service_f = service
profile_f = profile
found = False
if not credential:
for cookie_dir in config.directories.cookies.iterdir():
if cookie_dir.name == service_f:
for cookie_ in cookie_dir.glob("*.txt"):
if cookie_.stem == profile_f:
cookie_.unlink()
log.info(f"Deleted Cookie: {cookie_}")
found = True
break
if not cookie:
for key, credentials in config.credentials.items():
if key == service_f:
for profile, credential_ in credentials.items():
if profile == profile_f:
config_path = Config._Directories.user_configs / Config._Filenames.root_config
yaml, data = YAML(), None
yaml.default_flow_style = False
data = yaml.load(config_path)
del data["credentials"][key][profile_f]
yaml.dump(data, config_path)
log.info(f"Deleted Credential: {credential_}")
found = True
break
if not found:
raise click.ClickException(
f"Could not find Profile '{profile_f}' for Service '{service_f}'."
f"\nThe profile and service values are case-sensitive."
)
@auth.command(
short_help="Add a Credential and/or Cookies to an existing or new profile for a service.",
context_settings=context_settings)
@click.argument("profile", type=str)
@click.argument("service", type=str)
@click.option("--cookie", type=str, default=None, help="Direct path to Cookies to add.")
@click.option("--credential", type=str, default=None, help="Direct Credential string to add.")
@click.pass_context
def add(ctx: click.Context, profile: str, service: str, cookie: Optional[str] = None, credential: Optional[str] = None):
"""
Add a Credential and/or Cookies to an existing or new profile for a service.
\b
Cancel the Open File dialogue when presented if you do not wish to provide
cookies. The Credential should be in `Username:Password` form. The username
may be an email. If you do not wish to add a Credential, just hit enter.
\b
Profile and Service names are case-sensitive!
Comments may be removed from config!
"""
log = ctx.obj
service = service.upper()
profile = profile.lower()
if cookie:
cookie = Path(cookie)
if not cookie.is_file():
log.error(f"No such file or directory: {cookie}.")
sys.exit(1)
else:
print("Opening File Dialogue, select a Cookie file to import.")
cookie = tkinter.filedialog.askopenfilename(
title="Select a Cookie file (Cancel to skip)",
filetypes=[("Cookies", "*.txt"), ("All files", "*.*")]
)
if cookie:
cookie = Path(cookie)
else:
log.info("Skipped adding a Cookie...")
if credential:
try:
credential = Credential.loads(credential)
except ValueError as e:
raise click.ClickException(str(e))
else:
credential = input("Credential: ")
if credential:
try:
credential = Credential.loads(credential)
except ValueError as e:
raise click.ClickException(str(e))
else:
log.info("Skipped adding a Credential...")
if cookie:
final_path = (config.directories.cookies / service / profile).with_suffix(".txt")
final_path.parent.mkdir(parents=True, exist_ok=True)
if final_path.exists():
log.error(f"A Cookie file for the Profile {profile} on {service} already exists.")
sys.exit(1)
shutil.move(cookie, final_path)
log.info(f"Moved Cookie file to: {final_path}")
if credential:
config_path = Config._Directories.user_configs / Config._Filenames.root_config
yaml, data = YAML(), None
yaml.default_flow_style = False
data = yaml.load(config_path)
if not data:
data = {}
if "credentials" not in data:
data["credentials"] = {}
if service not in data["credentials"]:
data["credentials"][service] = {}
data["credentials"][service][profile] = credential.dumps()
yaml.dump(data, config_path)
log.info(f"Added Credential: {credential}")

File diff suppressed because it is too large Load Diff

View File

@ -1,166 +0,0 @@
from __future__ import annotations
import logging
import re
import sys
from typing import Any, Optional
import click
import yaml
from rich.padding import Padding
from rich.rule import Rule
from rich.tree import Tree
from devine.commands.dl import dl
from devine.core.config import config
from devine.core.console import console
from devine.core.constants import context_settings
from devine.core.proxies import Basic, Hola, NordVPN
from devine.core.service import Service
from devine.core.services import Services
from devine.core.utilities import get_binary_path
from devine.core.utils.click_types import ContextData
from devine.core.utils.collections import merge_dict
@click.command(
short_help="Search for titles from a Service.",
cls=Services,
context_settings=dict(
**context_settings,
token_normalize_func=Services.get_tag
))
@click.option("-p", "--profile", type=str, default=None,
help="Profile to use for Credentials and Cookies (if available).")
@click.option("--proxy", type=str, default=None,
help="Proxy URI to use. If a 2-letter country is provided, it will try get a proxy from the config.")
@click.option("--no-proxy", is_flag=True, default=False,
help="Force disable all proxy use.")
@click.pass_context
def search(
ctx: click.Context,
no_proxy: bool,
profile: Optional[str] = None,
proxy: Optional[str] = None
):
if not ctx.invoked_subcommand:
raise ValueError("A subcommand to invoke was not specified, the main code cannot continue.")
log = logging.getLogger("search")
service = Services.get_tag(ctx.invoked_subcommand)
profile = profile
if profile:
log.info(f"Using profile: '{profile}'")
with console.status("Loading Service Config...", spinner="dots"):
service_config_path = Services.get_path(service) / config.filenames.config
if service_config_path.exists():
service_config = yaml.safe_load(service_config_path.read_text(encoding="utf8"))
log.info("Service Config loaded")
else:
service_config = {}
merge_dict(config.services.get(service), service_config)
proxy_providers = []
if no_proxy:
ctx.params["proxy"] = None
else:
with console.status("Loading Proxy Providers...", spinner="dots"):
if config.proxy_providers.get("basic"):
proxy_providers.append(Basic(**config.proxy_providers["basic"]))
if config.proxy_providers.get("nordvpn"):
proxy_providers.append(NordVPN(**config.proxy_providers["nordvpn"]))
if get_binary_path("hola-proxy"):
proxy_providers.append(Hola())
for proxy_provider in proxy_providers:
log.info(f"Loaded {proxy_provider.__class__.__name__}: {proxy_provider}")
if proxy:
requested_provider = None
if re.match(r"^[a-z]+:.+$", proxy, re.IGNORECASE):
# requesting proxy from a specific proxy provider
requested_provider, proxy = proxy.split(":", maxsplit=1)
if re.match(r"^[a-z]{2}(?:\d+)?$", proxy, re.IGNORECASE):
proxy = proxy.lower()
with console.status(f"Getting a Proxy to {proxy}...", spinner="dots"):
if requested_provider:
proxy_provider = next((
x
for x in proxy_providers
if x.__class__.__name__.lower() == requested_provider
), None)
if not proxy_provider:
log.error(f"The proxy provider '{requested_provider}' was not recognised.")
sys.exit(1)
proxy_uri = proxy_provider.get_proxy(proxy)
if not proxy_uri:
log.error(f"The proxy provider {requested_provider} had no proxy for {proxy}")
sys.exit(1)
proxy = ctx.params["proxy"] = proxy_uri
log.info(f"Using {proxy_provider.__class__.__name__} Proxy: {proxy}")
else:
for proxy_provider in proxy_providers:
proxy_uri = proxy_provider.get_proxy(proxy)
if proxy_uri:
proxy = ctx.params["proxy"] = proxy_uri
log.info(f"Using {proxy_provider.__class__.__name__} Proxy: {proxy}")
break
else:
log.info(f"Using explicit Proxy: {proxy}")
ctx.obj = ContextData(
config=service_config,
cdm=None,
proxy_providers=proxy_providers,
profile=profile
)
@search.result_callback()
def result(service: Service, profile: Optional[str] = None, **_: Any) -> None:
log = logging.getLogger("search")
service_tag = service.__class__.__name__
with console.status("Authenticating with Service...", spinner="dots"):
cookies = dl.get_cookie_jar(service_tag, profile)
credential = dl.get_credentials(service_tag, profile)
service.authenticate(cookies, credential)
if cookies or credential:
log.info("Authenticated with Service")
search_results = Tree("Search Results", hide_root=True)
with console.status("Searching...", spinner="dots"):
for result in service.search():
result_text = f"[bold text]{result.title}[/]"
if result.url:
result_text = f"[link={result.url}]{result_text}[/link]"
if result.label:
result_text += f" [pink]{result.label}[/]"
if result.description:
result_text += f"\n[text2]{result.description}[/]"
result_text += f"\n[bright_black]id: {result.id}[/]"
search_results.add(result_text + "\n")
# update cookies
cookie_file = dl.get_cookie_path(service_tag, profile)
if cookie_file:
dl.save_cookies(cookie_file, service.session.cookies)
console.print(Padding(
Rule(f"[rule.text]{len(search_results.children)} Search Results"),
(1, 2)
))
if search_results.children:
console.print(Padding(
search_results,
(0, 5)
))
else:
console.print(Padding(
"[bold text]No matches[/]\n[bright_black]Please check spelling and search again....[/]",
(0, 5)
))

View File

@ -1,12 +1,13 @@
import logging
import shutil
from pathlib import Path
from typing import Optional
import click
import yaml
import shutil
from google.protobuf.json_format import MessageToDict
from pywidevine.device import Device, DeviceTypes
from pywidevine.device import Device
from pywidevine.license_protocol_pb2 import FileHashes
from rich.prompt import Prompt
from unidecode import UnidecodeError, unidecode
@ -38,7 +39,7 @@ def add(paths: list[Path]) -> None:
else:
# TODO: Check for and log errors
_ = Device.load(path) # test if WVD is valid
shutil.move(path, dst_path)
shutil.move(path, config.directories.wvds)
log.info(f"Added {path.stem}")
@ -182,7 +183,7 @@ def dump(wvd_paths: list[Path], out_dir: Path) -> None:
@click.argument("private_key", type=Path)
@click.argument("client_id", type=Path)
@click.argument("file_hashes", type=Path, required=False)
@click.option("-t", "--type", "type_", type=click.Choice([x.name for x in DeviceTypes], case_sensitive=False),
@click.option("-t", "--type", "type_", type=click.Choice([x.name for x in Device.Types], case_sensitive=False),
default="Android", help="Device Type")
@click.option("-l", "--level", type=click.IntRange(1, 3), default=1, help="Device Security Level")
@click.option("-o", "--output", type=Path, default=None, help="Output Directory")
@ -223,7 +224,7 @@ def new(
raise click.UsageError("file_hashes: Not a path to a file, or it doesn't exist.", ctx)
device = Device(
type_=DeviceTypes[type_.upper()],
type_=Device.Types[type_.upper()],
security_level=level,
flags=None,
private_key=private_key.read_bytes(),

View File

@ -1 +1 @@
__version__ = "3.1.0"
__version__ = "2.1.0"

View File

@ -5,11 +5,11 @@ from pathlib import Path
import click
import urllib3
from rich import traceback
from rich.console import Group
from rich.padding import Padding
from rich.text import Text
from urllib3.exceptions import InsecureRequestWarning
from rich import traceback
from rich.padding import Padding
from devine.core import __version__
from devine.core.commands import Commands
@ -27,7 +27,7 @@ LOGGING_PATH = None
@click.option("--log", "log_path", type=Path, default=config.directories.logs / config.filenames.log,
help="Log path (or filename). Path can contain the following f-string args: {name} {time}.")
def main(version: bool, debug: bool, log_path: Path) -> None:
"""Devine—Modular Movie, TV, and Music Archival Software."""
"""Devine—Open-Source Movie, TV, and Music Downloading Solution."""
logging.basicConfig(
level=logging.DEBUG if debug else logging.INFO,
format="%(message)s",
@ -66,7 +66,7 @@ def main(version: bool, debug: bool, log_path: Path) -> None:
style="ascii.art"
),
f"v[repr.number]{__version__}[/] Copyright © 2019-{datetime.now().year} rlaphoenix",
" [bright_blue]https://github.com/devine-dl/devine[/]"
f" [bright_blue]https://github.com/devine-dl/devine[/]"
),
(1, 21, 1, 20),
expand=True

View File

@ -40,4 +40,4 @@ class Commands(click.MultiCommand):
# Hide direct access to commands from quick import form, they shouldn't be accessed directly
__all__ = ("Commands",)
__ALL__ = (Commands,)

View File

@ -2,7 +2,7 @@ from __future__ import annotations
import tempfile
from pathlib import Path
from typing import Any, Optional
from typing import Any
import yaml
from appdirs import AppDirs
@ -39,8 +39,6 @@ class Config:
self.dl: dict = kwargs.get("dl") or {}
self.aria2c: dict = kwargs.get("aria2c") or {}
self.cdm: dict = kwargs.get("cdm") or {}
self.chapter_fallback_name: str = kwargs.get("chapter_fallback_name") or ""
self.curl_impersonate: dict = kwargs.get("curl_impersonate") or {}
self.remote_cdm: list[dict] = kwargs.get("remote_cdm") or []
self.credentials: dict = kwargs.get("credentials") or {}
@ -51,8 +49,6 @@ class Config:
continue
setattr(self.directories, name, Path(path).expanduser())
self.downloader = kwargs.get("downloader") or "requests"
self.filenames = self._Filenames()
for name, filename in (kwargs.get("filenames") or {}).items():
setattr(self.filenames, name, filename)
@ -61,6 +57,7 @@ class Config:
self.key_vaults: list[dict[str, Any]] = kwargs.get("key_vaults", [])
self.muxing: dict = kwargs.get("muxing") or {}
self.nordvpn: dict = kwargs.get("nordvpn") or {}
self.profiles: dict = kwargs.get("profiles") or {}
self.proxy_providers: dict = kwargs.get("proxy_providers") or {}
self.serve: dict = kwargs.get("serve") or {}
self.services: dict = kwargs.get("services") or {}
@ -76,35 +73,11 @@ class Config:
return cls(**yaml.safe_load(path.read_text(encoding="utf8")) or {})
def get_config_path() -> Optional[Path]:
"""
Get Path to Config from various locations.
# noinspection PyProtectedMember
config_path = Config._Directories.user_configs / Config._Filenames.root_config
if not config_path.is_file():
Config._Directories.user_configs.mkdir(parents=True, exist_ok=True)
config_path.write_text("")
config = Config.from_yaml(config_path)
Looks for a config file in the following folders in order:
1. The Devine Namespace Folder (e.g., %appdata%/Python/Python311/site-packages/devine)
2. The Parent Folder to the Devine Namespace Folder (e.g., %appdata%/Python/Python311/site-packages)
3. The AppDirs User Config Folder (e.g., %localappdata%/devine)
Returns None if no config file could be found.
"""
# noinspection PyProtectedMember
path = Config._Directories.namespace_dir / Config._Filenames.root_config
if not path.exists():
# noinspection PyProtectedMember
path = Config._Directories.namespace_dir.parent / Config._Filenames.root_config
if not path.exists():
# noinspection PyProtectedMember
path = Config._Directories.user_configs / Config._Filenames.root_config
if not path.exists():
path = None
return path
config_path = get_config_path()
if config_path:
config = Config.from_yaml(config_path)
else:
config = Config()
__all__ = ("config",)
__ALL__ = (config,)

View File

@ -134,12 +134,9 @@ class ComfyConsole(Console):
Args:
color_system (str, optional): The color system supported by your terminal,
either ``"standard"``, ``"256"`` or ``"truecolor"``. Leave as ``"auto"`` to autodetect.
force_terminal (Optional[bool], optional): Enable/disable terminal control codes, or None to auto-detect
terminal. Defaults to None.
force_jupyter (Optional[bool], optional): Enable/disable Jupyter rendering, or None to auto-detect Jupyter.
Defaults to None.
force_interactive (Optional[bool], optional): Enable/disable interactive mode, or None to auto-detect.
Defaults to None.
force_terminal (Optional[bool], optional): Enable/disable terminal control codes, or None to auto-detect terminal. Defaults to None.
force_jupyter (Optional[bool], optional): Enable/disable Jupyter rendering, or None to auto-detect Jupyter. Defaults to None.
force_interactive (Optional[bool], optional): Enable/disable interactive mode, or None to auto detect. Defaults to None.
soft_wrap (Optional[bool], optional): Set soft wrap default on print method. Defaults to False.
theme (Theme, optional): An optional style theme object, or ``None`` for default theme.
stderr (bool, optional): Use stderr rather than stdout if ``file`` is not specified. Defaults to False.
@ -148,7 +145,7 @@ class ComfyConsole(Console):
width (int, optional): The width of the terminal. Leave as default to auto-detect width.
height (int, optional): The height of the terminal. Leave as default to auto-detect height.
style (StyleType, optional): Style to apply to all output, or None for no style. Defaults to None.
no_color (Optional[bool], optional): Enabled no color mode, or None to auto-detect. Defaults to None.
no_color (Optional[bool], optional): Enabled no color mode, or None to auto detect. Defaults to None.
tab_size (int, optional): Number of spaces used to replace a tab character. Defaults to 8.
record (bool, optional): Boolean to enable recording of terminal output,
required to call :meth:`export_html`, :meth:`export_svg`, and :meth:`export_text`. Defaults to False.
@ -158,15 +155,13 @@ class ComfyConsole(Console):
highlight (bool, optional): Enable automatic highlighting. Defaults to True.
log_time (bool, optional): Boolean to enable logging of time by :meth:`log` methods. Defaults to True.
log_path (bool, optional): Boolean to enable the logging of the caller by :meth:`log`. Defaults to True.
log_time_format (Union[str, TimeFormatterCallable], optional): If ``log_time`` is enabled, either string for
strftime or callable that formats the time. Defaults to "[%X] ".
log_time_format (Union[str, TimeFormatterCallable], optional): If ``log_time`` is enabled, either string for strftime or callable that formats the time. Defaults to "[%X] ".
highlighter (HighlighterType, optional): Default highlighter.
legacy_windows (bool, optional): Enable legacy Windows mode, or ``None`` to auto-detect. Defaults to ``None``.
legacy_windows (bool, optional): Enable legacy Windows mode, or ``None`` to auto detect. Defaults to ``None``.
safe_box (bool, optional): Restrict box options that don't render on legacy Windows.
get_datetime (Callable[[], datetime], optional): Callable that gets the current time as a datetime.datetime
object (used by Console.log), or None for datetime.now.
get_time (Callable[[], time], optional): Callable that gets the current time in seconds, default uses
time.monotonic.
get_datetime (Callable[[], datetime], optional): Callable that gets the current time as a datetime.datetime object (used by Console.log),
or None for datetime.now.
get_time (Callable[[], time], optional): Callable that gets the current time in seconds, default uses time.monotonic.
"""
def __init__(
@ -362,4 +357,4 @@ console = ComfyConsole(
)
__all__ = ("ComfyLogRenderer", "ComfyRichHandler", "ComfyConsole", "console")
__ALL__ = (ComfyLogRenderer, ComfyRichHandler, ComfyConsole, console)

View File

@ -1,10 +1,21 @@
from threading import Event
from typing import TypeVar, Union
DOWNLOAD_CANCELLED = Event()
DOWNLOAD_LICENCE_ONLY = Event()
DRM_SORT_MAP = ["ClearKey", "Widevine"]
LANGUAGE_MUX_MAP = {
# List of language tags that cannot be used by mkvmerge and need replacements.
# Try get the replacement to be as specific locale-wise as possible.
# A bcp47 as the replacement is recommended.
"cmn": "zh",
"cmn-Hant": "zh-Hant",
"cmn-Hans": "zh-Hans",
"none": "und",
"yue": "zh-yue",
"yue-Hant": "zh-yue-Hant",
"yue-Hans": "zh-yue-Hans"
}
TERRITORY_MAP = {
"Hong Kong SAR China": "Hong Kong"
}
LANGUAGE_MAX_DISTANCE = 5 # this is max to be considered "same", e.g., en, en-US, en-AU
VIDEO_CODEC_MAP = {
"AVC": "H.264",

View File

@ -1,5 +1,4 @@
from .aria2c import aria2c
from .curl_impersonate import curl_impersonate
from .requests import requests
from .saldl import saldl
__all__ = ("aria2c", "curl_impersonate", "requests")
__ALL__ = (aria2c, saldl)

View File

@ -1,252 +1,163 @@
import os
import asyncio
import subprocess
import textwrap
import time
from functools import partial
from http.cookiejar import CookieJar
from pathlib import Path
from typing import Any, Callable, Generator, MutableMapping, Optional, Union
from urllib.parse import urlparse
from typing import Optional, Union
import requests
from Crypto.Random import get_random_bytes
from requests import Session
from requests.cookies import cookiejar_from_dict, get_cookie_header
from rich import filesize
from rich.text import Text
from devine.core.config import config
from devine.core.console import console
from devine.core.constants import DOWNLOAD_CANCELLED
from devine.core.utilities import get_binary_path, get_extension, get_free_port
from devine.core.utilities import get_binary_path, start_pproxy
def rpc(caller: Callable, secret: str, method: str, params: Optional[list[Any]] = None) -> Any:
"""Make a call to Aria2's JSON-RPC API."""
try:
rpc_res = caller(
json={
"jsonrpc": "2.0",
"id": get_random_bytes(16).hex(),
"method": method,
"params": [f"token:{secret}", *(params or [])]
}
).json()
if rpc_res.get("code"):
# wrap to console width - padding - '[Aria2c]: '
error_pretty = "\n ".join(textwrap.wrap(
f"RPC Error: {rpc_res['message']} ({rpc_res['code']})".strip(),
width=console.width - 20,
initial_indent=""
))
console.log(Text.from_ansi("\n[Aria2c]: " + error_pretty))
return rpc_res["result"]
except requests.exceptions.ConnectionError:
# absorb, process likely ended as it was calling RPC
return
def download(
urls: Union[str, list[str], dict[str, Any], list[dict[str, Any]]],
output_dir: Path,
filename: str,
headers: Optional[MutableMapping[str, Union[str, bytes]]] = None,
cookies: Optional[Union[MutableMapping[str, str], CookieJar]] = None,
async def aria2c(
uri: Union[str, list[str]],
out: Path,
headers: Optional[dict] = None,
proxy: Optional[str] = None,
max_workers: Optional[int] = None
) -> Generator[dict[str, Any], None, None]:
if not urls:
raise ValueError("urls must be provided and not empty")
elif not isinstance(urls, (str, dict, list)):
raise TypeError(f"Expected urls to be {str} or {dict} or a list of one of them, not {type(urls)}")
silent: bool = False,
segmented: bool = False,
progress: Optional[partial] = None,
*args: str
) -> int:
"""
Download files using Aria2(c).
https://aria2.github.io
if not output_dir:
raise ValueError("output_dir must be provided")
elif not isinstance(output_dir, Path):
raise TypeError(f"Expected output_dir to be {Path}, not {type(output_dir)}")
if not filename:
raise ValueError("filename must be provided")
elif not isinstance(filename, str):
raise TypeError(f"Expected filename to be {str}, not {type(filename)}")
if not isinstance(headers, (MutableMapping, type(None))):
raise TypeError(f"Expected headers to be {MutableMapping}, not {type(headers)}")
if not isinstance(cookies, (MutableMapping, CookieJar, type(None))):
raise TypeError(f"Expected cookies to be {MutableMapping} or {CookieJar}, not {type(cookies)}")
if not isinstance(proxy, (str, type(None))):
raise TypeError(f"Expected proxy to be {str}, not {type(proxy)}")
if not max_workers:
max_workers = min(32, (os.cpu_count() or 1) + 4)
elif not isinstance(max_workers, int):
raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}")
if not isinstance(urls, list):
urls = [urls]
If multiple URLs are provided they will be downloaded in the provided order
to the output directory. They will not be merged together.
"""
if isinstance(uri, list) and len(uri) == 1:
uri = uri[0]
if isinstance(uri, list):
segmented = True
uri = "\n".join([
f"{url}\n"
f"\tdir={out}\n"
f"\tout={i:08}.mp4"
for i, url in enumerate(uri)
])
if out.is_file():
raise ValueError("Provided multiple segments to download, expecting directory path")
elif "\t" not in uri:
uri = f"{uri}\n" \
f"\tdir={out.parent}\n" \
f"\tout={out.name}"
executable = get_binary_path("aria2c", "aria2")
if not executable:
raise EnvironmentError("Aria2c executable not found...")
if proxy and not proxy.lower().startswith("http://"):
raise ValueError("Only HTTP proxies are supported by aria2(c)")
if cookies and not isinstance(cookies, CookieJar):
cookies = cookiejar_from_dict(cookies)
url_files = []
for i, url in enumerate(urls):
if isinstance(url, str):
url_data = {
"url": url
}
else:
url_data: dict[str, Any] = url
url_filename = filename.format(
i=i,
ext=get_extension(url_data["url"])
)
url_text = url_data["url"]
url_text += f"\n\tdir={output_dir}"
url_text += f"\n\tout={url_filename}"
if cookies:
mock_request = requests.Request(url=url_data["url"])
cookie_header = get_cookie_header(cookies, mock_request)
if cookie_header:
url_text += f"\n\theader=Cookie: {cookie_header}"
for key, value in url_data.items():
if key == "url":
continue
if key == "headers":
for header_name, header_value in value.items():
url_text += f"\n\theader={header_name}: {header_value}"
else:
url_text += f"\n\t{key}={value}"
url_files.append(url_text)
url_file = "\n".join(url_files)
rpc_port = get_free_port()
rpc_secret = get_random_bytes(16).hex()
rpc_uri = f"http://127.0.0.1:{rpc_port}/jsonrpc"
rpc_session = Session()
max_concurrent_downloads = int(config.aria2c.get("max_concurrent_downloads", max_workers))
max_connection_per_server = int(config.aria2c.get("max_connection_per_server", 1))
split = int(config.aria2c.get("split", 5))
file_allocation = config.aria2c.get("file_allocation", "prealloc")
if len(urls) > 1:
split = 1
file_allocation = "none"
arguments = [
# [Basic Options]
"--input-file", "-",
"--all-proxy", proxy or "",
"--continue=true",
# [Connection Options]
f"--max-concurrent-downloads={max_concurrent_downloads}",
f"--max-connection-per-server={max_connection_per_server}",
f"--split={split}", # each split uses their own connection
"--max-file-not-found=5", # counted towards --max-tries
"--max-tries=5",
"--retry-wait=2",
# [Advanced Options]
"-c", # Continue downloading a partially downloaded file
"--remote-time", # Retrieve timestamp of the remote file from the and apply if available
"-x", "16", # The maximum number of connections to one server for each download
"-j", "16", # The maximum number of parallel downloads for every static (HTTP/FTP) URL
"-s", ("1" if segmented else "16"), # Download a file using N connections
"--min-split-size", ("1024M" if segmented else "20M"), # effectively disable split if segmented
"--allow-overwrite=true",
"--auto-file-renaming=false",
"--console-log-level=warn",
"--download-result=default",
f"--file-allocation={file_allocation}",
"--summary-interval=0",
# [RPC Options]
"--enable-rpc=true",
f"--rpc-listen-port={rpc_port}",
f"--rpc-secret={rpc_secret}"
"--retry-wait", "2", # Set the seconds to wait between retries.
"--max-tries", "5",
"--max-file-not-found", "5",
"--summary-interval", "0",
"--file-allocation", [
config.aria2c.get("file_allocation", "prealloc"),
"none"
][segmented],
"--console-log-level", "warn",
"--download-result", ["hide", "default"][bool(progress)],
*args,
"-i", "-"
]
for header, value in (headers or {}).items():
if header.lower() == "cookie":
raise ValueError("You cannot set Cookies as a header manually, please use the `cookies` param.")
if header.lower() == "accept-encoding":
# we cannot set an allowed encoding, or it will return compressed
# and the code is not set up to uncompress the data
continue
if header.lower() == "referer":
arguments.extend(["--referer", value])
continue
if header.lower() == "user-agent":
arguments.extend(["--user-agent", value])
continue
arguments.extend(["--header", f"{header}: {value}"])
yield dict(total=len(urls))
if proxy:
if proxy.lower().split(":")[0] != "http":
# HTTPS proxies are not supported by aria2(c).
# Proxy the proxy via pproxy to access it as an HTTP proxy.
async with start_pproxy(proxy) as pproxy_:
return await aria2c(uri, out, headers, pproxy_, silent, segmented, progress, *args)
arguments += ["--all-proxy", proxy]
try:
p = subprocess.Popen(
[
executable,
*arguments
],
p = await asyncio.create_subprocess_exec(
executable,
*arguments,
stdin=subprocess.PIPE,
stdout=subprocess.DEVNULL
stdout=[subprocess.PIPE, subprocess.DEVNULL][silent]
)
p.stdin.write(url_file.encode())
p.stdin.write(uri.encode())
await p.stdin.drain()
p.stdin.close()
while p.poll() is None:
global_stats: dict[str, Any] = rpc(
caller=partial(rpc_session.post, url=rpc_uri),
secret=rpc_secret,
method="aria2.getGlobalStat"
) or {}
if p.stdout:
is_dl_summary = False
log_buffer = ""
while True:
try:
chunk = await p.stdout.readuntil(b"\r")
except asyncio.IncompleteReadError as e:
chunk = e.partial
if not chunk:
break
for line in chunk.decode().strip().splitlines():
if not line:
continue
if line.startswith("Download Results"):
# we know it's 100% downloaded, but let's use the avg dl speed value
is_dl_summary = True
elif line.startswith("[") and line.endswith("]"):
if progress and "%" in line:
# id, dledMiB/totalMiB(x%), CN:xx, DL:xxMiB, ETA:Xs
# eta may not always be available
data_parts = line[1:-1].split()
perc_parts = data_parts[1].split("(")
if len(perc_parts) == 2:
# might otherwise be e.g., 0B/0B, with no % symbol provided
progress(
total=100,
completed=int(perc_parts[1][:-2]),
downloaded=f"{data_parts[3].split(':')[1]}/s"
)
elif is_dl_summary and "OK" in line and "|" in line:
gid, status, avg_speed, path_or_uri = line.split("|")
progress(total=100, completed=100, downloaded=avg_speed.strip())
elif not is_dl_summary:
if "aria2 will resume download if the transfer is restarted" in line:
continue
if "If there are any errors, then see the log file" in line:
continue
log_buffer += f"{line.strip()}\n"
number_stopped = int(global_stats.get("numStoppedTotal", 0))
download_speed = int(global_stats.get("downloadSpeed", -1))
if number_stopped:
yield dict(completed=number_stopped)
if download_speed != -1:
yield dict(downloaded=f"{filesize.decimal(download_speed)}/s")
stopped_downloads: list[dict[str, Any]] = rpc(
caller=partial(rpc_session.post, url=rpc_uri),
secret=rpc_secret,
method="aria2.tellStopped",
params=[0, 999999]
) or []
for dl in stopped_downloads:
if dl["status"] == "error":
used_uri = next(
uri["uri"]
for file in dl["files"]
if file["selected"] == "true"
for uri in file["uris"]
if uri["status"] == "used"
)
error = f"Download Error (#{dl['gid']}): {dl['errorMessage']} ({dl['errorCode']}), {used_uri}"
error_pretty = "\n ".join(textwrap.wrap(
error,
width=console.width - 20,
initial_indent=""
))
console.log(Text.from_ansi("\n[Aria2c]: " + error_pretty))
raise ValueError(error)
if number_stopped == len(urls):
rpc(
caller=partial(rpc_session.post, url=rpc_uri),
secret=rpc_secret,
method="aria2.shutdown"
if log_buffer:
log_buffer = log_buffer.rstrip()
refused_errors = (
"the target machine actively refused it",
"SSL/TLS handshake failure"
)
break
if segmented and any(x in log_buffer for x in refused_errors):
# likely too many connections
raise ConnectionRefusedError("Aria2 could not connect as the target machine actively refused it.")
time.sleep(1)
# wrap to console width - padding - '[Aria2c]: '
log_buffer = "\n ".join(textwrap.wrap(
log_buffer,
width=console.width - 20,
initial_indent=""
))
console.log(Text.from_ansi("\n[Aria2c]: " + log_buffer))
p.wait()
await p.wait()
if p.returncode != 0:
raise subprocess.CalledProcessError(p.returncode, arguments)
@ -259,96 +170,8 @@ def download(
# 0xC000013A is when it never got the chance to
raise KeyboardInterrupt()
raise
except KeyboardInterrupt:
DOWNLOAD_CANCELLED.set() # skip pending track downloads
yield dict(downloaded="[yellow]CANCELLED")
raise
except Exception:
DOWNLOAD_CANCELLED.set() # skip pending track downloads
yield dict(downloaded="[red]FAILED")
raise
finally:
rpc(
caller=partial(rpc_session.post, url=rpc_uri),
secret=rpc_secret,
method="aria2.shutdown"
)
return p.returncode
def aria2c(
urls: Union[str, list[str], dict[str, Any], list[dict[str, Any]]],
output_dir: Path,
filename: str,
headers: Optional[MutableMapping[str, Union[str, bytes]]] = None,
cookies: Optional[Union[MutableMapping[str, str], CookieJar]] = None,
proxy: Optional[str] = None,
max_workers: Optional[int] = None
) -> Generator[dict[str, Any], None, None]:
"""
Download files using Aria2(c).
https://aria2.github.io
Yields the following download status updates while chunks are downloading:
- {total: 100} (100% download total)
- {completed: 1} (1% download progress out of 100%)
- {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s)
The data is in the same format accepted by rich's progress.update() function.
Parameters:
urls: Web URL(s) to file(s) to download. You can use a dictionary with the key
"url" for the URI, and other keys for extra arguments to use per-URL.
output_dir: The folder to save the file into. If the save path's directory does
not exist then it will be made automatically.
filename: The filename or filename template to use for each file. The variables
you can use are `i` for the URL index and `ext` for the URL extension.
headers: A mapping of HTTP Header Key/Values to use for all downloads.
cookies: A mapping of Cookie Key/Values or a Cookie Jar to use for all downloads.
proxy: An optional proxy URI to route connections through for all downloads.
max_workers: The maximum amount of threads to use for downloads. Defaults to
min(32,(cpu_count+4)). Use for the --max-concurrent-downloads option.
"""
if proxy and not proxy.lower().startswith("http://"):
# Only HTTP proxies are supported by aria2(c)
proxy = urlparse(proxy)
port = get_free_port()
username, password = get_random_bytes(8).hex(), get_random_bytes(8).hex()
local_proxy = f"http://{username}:{password}@localhost:{port}"
scheme = {
"https": "http+ssl",
"socks5h": "socks"
}.get(proxy.scheme, proxy.scheme)
remote_server = f"{scheme}://{proxy.hostname}"
if proxy.port:
remote_server += f":{proxy.port}"
if proxy.username or proxy.password:
remote_server += "#"
if proxy.username:
remote_server += proxy.username
if proxy.password:
remote_server += f":{proxy.password}"
p = subprocess.Popen(
[
"pproxy",
"-l", f"http://:{port}#{username}:{password}",
"-r", remote_server
],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL
)
try:
yield from download(urls, output_dir, filename, headers, cookies, local_proxy, max_workers)
finally:
p.kill()
p.wait()
return
yield from download(urls, output_dir, filename, headers, cookies, proxy, max_workers)
__all__ = ("aria2c",)
__ALL__ = (aria2c,)

View File

@ -1,289 +0,0 @@
import math
import time
from concurrent import futures
from concurrent.futures.thread import ThreadPoolExecutor
from http.cookiejar import CookieJar
from pathlib import Path
from typing import Any, Generator, MutableMapping, Optional, Union
from curl_cffi.requests import Session
from rich import filesize
from devine.core.config import config
from devine.core.constants import DOWNLOAD_CANCELLED
from devine.core.utilities import get_extension
MAX_ATTEMPTS = 5
RETRY_WAIT = 2
CHUNK_SIZE = 1024
PROGRESS_WINDOW = 5
BROWSER = config.curl_impersonate.get("browser", "chrome120")
def download(
url: str,
save_path: Path,
session: Optional[Session] = None,
**kwargs: Any
) -> Generator[dict[str, Any], None, None]:
"""
Download files using Curl Impersonate.
https://github.com/lwthiker/curl-impersonate
Yields the following download status updates while chunks are downloading:
- {total: 123} (there are 123 chunks to download)
- {total: None} (there are an unknown number of chunks to download)
- {advance: 1} (one chunk was downloaded)
- {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s)
- {file_downloaded: Path(...), written: 1024} (download finished, has the save path and size)
The data is in the same format accepted by rich's progress.update() function. The
`downloaded` key is custom and is not natively accepted by all rich progress bars.
Parameters:
url: Web URL of a file to download.
save_path: The path to save the file to. If the save path's directory does not
exist then it will be made automatically.
session: The Requests or Curl-Impersonate Session to make HTTP requests with.
Useful to set Header, Cookie, and Proxy data. Connections are saved and
re-used with the session so long as the server keeps the connection alive.
kwargs: Any extra keyword arguments to pass to the session.get() call. Use this
for one-time request changes like a header, cookie, or proxy. For example,
to request Byte-ranges use e.g., `headers={"Range": "bytes=0-128"}`.
"""
if not session:
session = Session(impersonate=BROWSER)
save_dir = save_path.parent
control_file = save_path.with_name(f"{save_path.name}.!dev")
save_dir.mkdir(parents=True, exist_ok=True)
if control_file.exists():
# consider the file corrupt if the control file exists
save_path.unlink(missing_ok=True)
control_file.unlink()
elif save_path.exists():
# if it exists, and no control file, then it should be safe
yield dict(
file_downloaded=save_path,
written=save_path.stat().st_size
)
# TODO: Design a control file format so we know how much of the file is missing
control_file.write_bytes(b"")
attempts = 1
try:
while True:
written = 0
download_sizes = []
last_speed_refresh = time.time()
try:
stream = session.get(url, stream=True, **kwargs)
stream.raise_for_status()
try:
content_length = int(stream.headers.get("Content-Length", "0"))
except ValueError:
content_length = 0
if content_length > 0:
yield dict(total=math.ceil(content_length / CHUNK_SIZE))
else:
# we have no data to calculate total chunks
yield dict(total=None) # indeterminate mode
with open(save_path, "wb") as f:
for chunk in stream.iter_content(chunk_size=CHUNK_SIZE):
download_size = len(chunk)
f.write(chunk)
written += download_size
yield dict(advance=1)
now = time.time()
time_since = now - last_speed_refresh
download_sizes.append(download_size)
if time_since > PROGRESS_WINDOW or download_size < CHUNK_SIZE:
data_size = sum(download_sizes)
download_speed = math.ceil(data_size / (time_since or 1))
yield dict(downloaded=f"{filesize.decimal(download_speed)}/s")
last_speed_refresh = now
download_sizes.clear()
yield dict(
file_downloaded=save_path,
written=written
)
break
except Exception as e:
save_path.unlink(missing_ok=True)
if DOWNLOAD_CANCELLED.is_set() or attempts == MAX_ATTEMPTS:
raise e
time.sleep(RETRY_WAIT)
attempts += 1
finally:
control_file.unlink()
def curl_impersonate(
urls: Union[str, list[str], dict[str, Any], list[dict[str, Any]]],
output_dir: Path,
filename: str,
headers: Optional[MutableMapping[str, Union[str, bytes]]] = None,
cookies: Optional[Union[MutableMapping[str, str], CookieJar]] = None,
proxy: Optional[str] = None,
max_workers: Optional[int] = None
) -> Generator[dict[str, Any], None, None]:
"""
Download files using Curl Impersonate.
https://github.com/lwthiker/curl-impersonate
Yields the following download status updates while chunks are downloading:
- {total: 123} (there are 123 chunks to download)
- {total: None} (there are an unknown number of chunks to download)
- {advance: 1} (one chunk was downloaded)
- {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s)
- {file_downloaded: Path(...), written: 1024} (download finished, has the save path and size)
The data is in the same format accepted by rich's progress.update() function.
However, The `downloaded`, `file_downloaded` and `written` keys are custom and not
natively accepted by rich progress bars.
Parameters:
urls: Web URL(s) to file(s) to download. You can use a dictionary with the key
"url" for the URI, and other keys for extra arguments to use per-URL.
output_dir: The folder to save the file into. If the save path's directory does
not exist then it will be made automatically.
filename: The filename or filename template to use for each file. The variables
you can use are `i` for the URL index and `ext` for the URL extension.
headers: A mapping of HTTP Header Key/Values to use for all downloads.
cookies: A mapping of Cookie Key/Values or a Cookie Jar to use for all downloads.
proxy: An optional proxy URI to route connections through for all downloads.
max_workers: The maximum amount of threads to use for downloads. Defaults to
min(32,(cpu_count+4)).
"""
if not urls:
raise ValueError("urls must be provided and not empty")
elif not isinstance(urls, (str, dict, list)):
raise TypeError(f"Expected urls to be {str} or {dict} or a list of one of them, not {type(urls)}")
if not output_dir:
raise ValueError("output_dir must be provided")
elif not isinstance(output_dir, Path):
raise TypeError(f"Expected output_dir to be {Path}, not {type(output_dir)}")
if not filename:
raise ValueError("filename must be provided")
elif not isinstance(filename, str):
raise TypeError(f"Expected filename to be {str}, not {type(filename)}")
if not isinstance(headers, (MutableMapping, type(None))):
raise TypeError(f"Expected headers to be {MutableMapping}, not {type(headers)}")
if not isinstance(cookies, (MutableMapping, CookieJar, type(None))):
raise TypeError(f"Expected cookies to be {MutableMapping} or {CookieJar}, not {type(cookies)}")
if not isinstance(proxy, (str, type(None))):
raise TypeError(f"Expected proxy to be {str}, not {type(proxy)}")
if not isinstance(max_workers, (int, type(None))):
raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}")
if not isinstance(urls, list):
urls = [urls]
urls = [
dict(
save_path=save_path,
**url
) if isinstance(url, dict) else dict(
url=url,
save_path=save_path
)
for i, url in enumerate(urls)
for save_path in [output_dir / filename.format(
i=i,
ext=get_extension(url["url"] if isinstance(url, dict) else url)
)]
]
session = Session(impersonate=BROWSER)
if headers:
headers = {
k: v
for k, v in headers.items()
if k.lower() != "accept-encoding"
}
session.headers.update(headers)
if cookies:
session.cookies.update(cookies)
if proxy:
session.proxies.update({
"http": proxy.replace("https://", "http://"),
"https": proxy.replace("https://", "http://")
})
yield dict(total=len(urls))
download_sizes = []
last_speed_refresh = time.time()
with ThreadPoolExecutor(max_workers=max_workers) as pool:
for i, future in enumerate(futures.as_completed((
pool.submit(
download,
session=session,
**url
)
for url in urls
))):
file_path, download_size = None, None
try:
for status_update in future.result():
if status_update.get("file_downloaded") and status_update.get("written"):
file_path = status_update["file_downloaded"]
download_size = status_update["written"]
elif len(urls) == 1:
# these are per-chunk updates, only useful if it's one big file
yield status_update
except KeyboardInterrupt:
DOWNLOAD_CANCELLED.set() # skip pending track downloads
yield dict(downloaded="[yellow]CANCELLING")
pool.shutdown(wait=True, cancel_futures=True)
yield dict(downloaded="[yellow]CANCELLED")
# tell dl that it was cancelled
# the pool is already shut down, so exiting loop is fine
raise
except Exception:
DOWNLOAD_CANCELLED.set() # skip pending track downloads
yield dict(downloaded="[red]FAILING")
pool.shutdown(wait=True, cancel_futures=True)
yield dict(downloaded="[red]FAILED")
# tell dl that it failed
# the pool is already shut down, so exiting loop is fine
raise
else:
yield dict(file_downloaded=file_path)
yield dict(advance=1)
now = time.time()
time_since = now - last_speed_refresh
if download_size: # no size == skipped dl
download_sizes.append(download_size)
if download_sizes and (time_since > PROGRESS_WINDOW or i == len(urls)):
data_size = sum(download_sizes)
download_speed = math.ceil(data_size / (time_since or 1))
yield dict(downloaded=f"{filesize.decimal(download_speed)}/s")
last_speed_refresh = now
download_sizes.clear()
__all__ = ("curl_impersonate",)

View File

@ -1,295 +0,0 @@
import math
import os
import time
from concurrent import futures
from concurrent.futures.thread import ThreadPoolExecutor
from http.cookiejar import CookieJar
from pathlib import Path
from typing import Any, Generator, MutableMapping, Optional, Union
from requests import Session
from requests.adapters import HTTPAdapter
from rich import filesize
from devine.core.constants import DOWNLOAD_CANCELLED
from devine.core.utilities import get_extension
MAX_ATTEMPTS = 5
RETRY_WAIT = 2
CHUNK_SIZE = 1024
PROGRESS_WINDOW = 5
def download(
url: str,
save_path: Path,
session: Optional[Session] = None,
**kwargs: Any
) -> Generator[dict[str, Any], None, None]:
"""
Download a file using Python Requests.
https://requests.readthedocs.io
Yields the following download status updates while chunks are downloading:
- {total: 123} (there are 123 chunks to download)
- {total: None} (there are an unknown number of chunks to download)
- {advance: 1} (one chunk was downloaded)
- {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s)
- {file_downloaded: Path(...), written: 1024} (download finished, has the save path and size)
The data is in the same format accepted by rich's progress.update() function. The
`downloaded` key is custom and is not natively accepted by all rich progress bars.
Parameters:
url: Web URL of a file to download.
save_path: The path to save the file to. If the save path's directory does not
exist then it will be made automatically.
session: The Requests Session to make HTTP requests with. Useful to set Header,
Cookie, and Proxy data. Connections are saved and re-used with the session
so long as the server keeps the connection alive.
kwargs: Any extra keyword arguments to pass to the session.get() call. Use this
for one-time request changes like a header, cookie, or proxy. For example,
to request Byte-ranges use e.g., `headers={"Range": "bytes=0-128"}`.
"""
session = session or Session()
save_dir = save_path.parent
control_file = save_path.with_name(f"{save_path.name}.!dev")
save_dir.mkdir(parents=True, exist_ok=True)
if control_file.exists():
# consider the file corrupt if the control file exists
save_path.unlink(missing_ok=True)
control_file.unlink()
elif save_path.exists():
# if it exists, and no control file, then it should be safe
yield dict(
file_downloaded=save_path,
written=save_path.stat().st_size
)
# TODO: Design a control file format so we know how much of the file is missing
control_file.write_bytes(b"")
attempts = 1
try:
while True:
written = 0
download_sizes = []
last_speed_refresh = time.time()
try:
stream = session.get(url, stream=True, **kwargs)
stream.raise_for_status()
try:
content_length = int(stream.headers.get("Content-Length", "0"))
except ValueError:
content_length = 0
if content_length > 0:
yield dict(total=math.ceil(content_length / CHUNK_SIZE))
else:
# we have no data to calculate total chunks
yield dict(total=None) # indeterminate mode
with open(save_path, "wb") as f:
for chunk in stream.iter_content(chunk_size=CHUNK_SIZE):
download_size = len(chunk)
f.write(chunk)
written += download_size
yield dict(advance=1)
now = time.time()
time_since = now - last_speed_refresh
download_sizes.append(download_size)
if time_since > PROGRESS_WINDOW or download_size < CHUNK_SIZE:
data_size = sum(download_sizes)
download_speed = math.ceil(data_size / (time_since or 1))
yield dict(downloaded=f"{filesize.decimal(download_speed)}/s")
last_speed_refresh = now
download_sizes.clear()
yield dict(
file_downloaded=save_path,
written=written
)
break
except Exception as e:
save_path.unlink(missing_ok=True)
if DOWNLOAD_CANCELLED.is_set() or attempts == MAX_ATTEMPTS:
raise e
time.sleep(RETRY_WAIT)
attempts += 1
finally:
control_file.unlink()
def requests(
urls: Union[str, list[str], dict[str, Any], list[dict[str, Any]]],
output_dir: Path,
filename: str,
headers: Optional[MutableMapping[str, Union[str, bytes]]] = None,
cookies: Optional[Union[MutableMapping[str, str], CookieJar]] = None,
proxy: Optional[str] = None,
max_workers: Optional[int] = None
) -> Generator[dict[str, Any], None, None]:
"""
Download a file using Python Requests.
https://requests.readthedocs.io
Yields the following download status updates while chunks are downloading:
- {total: 123} (there are 123 chunks to download)
- {total: None} (there are an unknown number of chunks to download)
- {advance: 1} (one chunk was downloaded)
- {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s)
- {file_downloaded: Path(...), written: 1024} (download finished, has the save path and size)
The data is in the same format accepted by rich's progress.update() function.
However, The `downloaded`, `file_downloaded` and `written` keys are custom and not
natively accepted by rich progress bars.
Parameters:
urls: Web URL(s) to file(s) to download. You can use a dictionary with the key
"url" for the URI, and other keys for extra arguments to use per-URL.
output_dir: The folder to save the file into. If the save path's directory does
not exist then it will be made automatically.
filename: The filename or filename template to use for each file. The variables
you can use are `i` for the URL index and `ext` for the URL extension.
headers: A mapping of HTTP Header Key/Values to use for all downloads.
cookies: A mapping of Cookie Key/Values or a Cookie Jar to use for all downloads.
proxy: An optional proxy URI to route connections through for all downloads.
max_workers: The maximum amount of threads to use for downloads. Defaults to
min(32,(cpu_count+4)).
"""
if not urls:
raise ValueError("urls must be provided and not empty")
elif not isinstance(urls, (str, dict, list)):
raise TypeError(f"Expected urls to be {str} or {dict} or a list of one of them, not {type(urls)}")
if not output_dir:
raise ValueError("output_dir must be provided")
elif not isinstance(output_dir, Path):
raise TypeError(f"Expected output_dir to be {Path}, not {type(output_dir)}")
if not filename:
raise ValueError("filename must be provided")
elif not isinstance(filename, str):
raise TypeError(f"Expected filename to be {str}, not {type(filename)}")
if not isinstance(headers, (MutableMapping, type(None))):
raise TypeError(f"Expected headers to be {MutableMapping}, not {type(headers)}")
if not isinstance(cookies, (MutableMapping, CookieJar, type(None))):
raise TypeError(f"Expected cookies to be {MutableMapping} or {CookieJar}, not {type(cookies)}")
if not isinstance(proxy, (str, type(None))):
raise TypeError(f"Expected proxy to be {str}, not {type(proxy)}")
if not isinstance(max_workers, (int, type(None))):
raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}")
if not isinstance(urls, list):
urls = [urls]
if not max_workers:
max_workers = min(32, (os.cpu_count() or 1) + 4)
urls = [
dict(
save_path=save_path,
**url
) if isinstance(url, dict) else dict(
url=url,
save_path=save_path
)
for i, url in enumerate(urls)
for save_path in [output_dir / filename.format(
i=i,
ext=get_extension(url["url"] if isinstance(url, dict) else url)
)]
]
session = Session()
session.mount("https://", HTTPAdapter(
pool_connections=max_workers,
pool_maxsize=max_workers,
pool_block=True
))
session.mount("http://", session.adapters["https://"])
if headers:
headers = {
k: v
for k, v in headers.items()
if k.lower() != "accept-encoding"
}
session.headers.update(headers)
if cookies:
session.cookies.update(cookies)
if proxy:
session.proxies.update({"all": proxy})
yield dict(total=len(urls))
download_sizes = []
last_speed_refresh = time.time()
with ThreadPoolExecutor(max_workers=max_workers) as pool:
for i, future in enumerate(futures.as_completed((
pool.submit(
download,
session=session,
**url
)
for url in urls
))):
file_path, download_size = None, None
try:
for status_update in future.result():
if status_update.get("file_downloaded") and status_update.get("written"):
file_path = status_update["file_downloaded"]
download_size = status_update["written"]
elif len(urls) == 1:
# these are per-chunk updates, only useful if it's one big file
yield status_update
except KeyboardInterrupt:
DOWNLOAD_CANCELLED.set() # skip pending track downloads
yield dict(downloaded="[yellow]CANCELLING")
pool.shutdown(wait=True, cancel_futures=True)
yield dict(downloaded="[yellow]CANCELLED")
# tell dl that it was cancelled
# the pool is already shut down, so exiting loop is fine
raise
except Exception:
DOWNLOAD_CANCELLED.set() # skip pending track downloads
yield dict(downloaded="[red]FAILING")
pool.shutdown(wait=True, cancel_futures=True)
yield dict(downloaded="[red]FAILED")
# tell dl that it failed
# the pool is already shut down, so exiting loop is fine
raise
else:
yield dict(file_downloaded=file_path, written=download_size)
yield dict(advance=1)
now = time.time()
time_since = now - last_speed_refresh
if download_size: # no size == skipped dl
download_sizes.append(download_size)
if download_sizes and (time_since > PROGRESS_WINDOW or i == len(urls)):
data_size = sum(download_sizes)
download_speed = math.ceil(data_size / (time_since or 1))
yield dict(downloaded=f"{filesize.decimal(download_speed)}/s")
last_speed_refresh = now
download_sizes.clear()
__all__ = ("requests",)

View File

@ -0,0 +1,51 @@
import subprocess
from pathlib import Path
from typing import Optional, Union
from devine.core.utilities import get_binary_path
async def saldl(
uri: Union[str, list[str]],
out: Union[Path, str],
headers: Optional[dict] = None,
proxy: Optional[str] = None
) -> int:
out = Path(out)
if headers:
headers.update({k: v for k, v in headers.items() if k.lower() != "accept-encoding"})
executable = get_binary_path("saldl", "saldl-win64", "saldl-win32")
if not executable:
raise EnvironmentError("Saldl executable not found...")
arguments = [
executable,
# "--no-status",
"--skip-TLS-verification",
"--resume",
"--merge-in-order",
"-c8",
"--auto-size", "1",
"-D", str(out.parent),
"-o", out.name
]
if headers:
arguments.extend([
"--custom-headers",
"\r\n".join([f"{k}: {v}" for k, v in headers.items()])
])
if proxy:
arguments.extend(["--proxy", proxy])
if isinstance(uri, list):
raise ValueError("Saldl code does not yet support multiple uri (e.g. segmented) downloads.")
arguments.append(uri)
return subprocess.check_call(arguments)
__ALL__ = (saldl,)

View File

@ -6,4 +6,4 @@ from devine.core.drm.widevine import Widevine
DRM_T = Union[ClearKey, Widevine]
__all__ = ("ClearKey", "Widevine", "DRM_T")
__ALL__ = (ClearKey, Widevine, DRM_T)

View File

@ -6,10 +6,10 @@ from pathlib import Path
from typing import Optional, Union
from urllib.parse import urljoin
import requests
from Cryptodome.Cipher import AES
from Cryptodome.Util.Padding import pad, unpad
from m3u8.model import Key
from requests import Session
class ClearKey:
@ -58,33 +58,14 @@ class ClearKey:
shutil.move(decrypted_path, path)
@classmethod
def from_m3u_key(cls, m3u_key: Key, session: Optional[Session] = None) -> ClearKey:
"""
Load a ClearKey from an M3U(8) Playlist's EXT-X-KEY.
Parameters:
m3u_key: A Key object parsed from a m3u(8) playlist using
the `m3u8` library.
session: Optional session used to request external URIs with.
Useful to set headers, proxies, cookies, and so forth.
"""
def from_m3u_key(cls, m3u_key: Key, proxy: Optional[str] = None) -> ClearKey:
if not isinstance(m3u_key, Key):
raise ValueError(f"Provided M3U Key is in an unexpected type {m3u_key!r}")
if not isinstance(session, (Session, type(None))):
raise TypeError(f"Expected session to be a {Session}, not a {type(session)}")
if not m3u_key.method.startswith("AES"):
raise ValueError(f"Provided M3U Key is not an AES Clear Key, {m3u_key.method}")
if not m3u_key.uri:
raise ValueError("No URI in M3U Key, unable to get Key.")
if not session:
session = Session()
if not session.headers.get("User-Agent"):
# commonly needed default for HLS playlists
session.headers["User-Agent"] = "smartexoplayer/1.1.0 (Linux;Android 8.0.0) ExoPlayerLib/2.13.3"
if m3u_key.uri.startswith("data:"):
media_types, data = m3u_key.uri[5:].split(",")
media_types = media_types.split(";")
@ -93,7 +74,13 @@ class ClearKey:
key = data
else:
url = urljoin(m3u_key.base_uri, m3u_key.uri)
res = session.get(url)
res = requests.get(
url=url,
headers={
"User-Agent": "smartexoplayer/1.1.0 (Linux;Android 8.0.0) ExoPlayerLib/2.13.3"
},
proxies={"all": proxy} if proxy else None
)
res.raise_for_status()
if not res.content:
raise EOFError("Unexpected Empty Response by M3U Key URI.")
@ -109,4 +96,4 @@ class ClearKey:
return cls(key=key, iv=iv)
__all__ = ("ClearKey",)
__ALL__ = (ClearKey,)

View File

@ -78,7 +78,7 @@ class Widevine:
pssh_boxes: list[Container] = []
tenc_boxes: list[Container] = []
if track.descriptor == track.Descriptor.HLS:
if track.descriptor == track.Descriptor.M3U:
m3u_url = track.url
master = m3u8.loads(session.get(m3u_url).text, uri=m3u_url)
pssh_boxes.extend(
@ -224,18 +224,18 @@ class Widevine:
raise ValueError("Cannot decrypt a Track without any Content Keys...")
platform = {"win32": "win", "darwin": "osx"}.get(sys.platform, sys.platform)
executable = get_binary_path("shaka-packager", "packager", f"packager-{platform}", f"packager-{platform}-x64")
executable = get_binary_path("shaka-packager", f"packager-{platform}", f"packager-{platform}-x64")
if not executable:
raise EnvironmentError("Shaka Packager executable not found but is required.")
if not path or not path.exists():
raise ValueError("Tried to decrypt a file that does not exist.")
output_path = path.with_stem(f"{path.stem}_decrypted")
decrypted_path = path.with_suffix(f".decrypted{path.suffix}")
config.directories.temp.mkdir(parents=True, exist_ok=True)
try:
arguments = [
f"input={path},stream=0,output={output_path},output_format=MP4",
f"input={path},stream=0,output={decrypted_path}",
"--enable_raw_key_decryption", "--keys",
",".join([
*[
@ -259,7 +259,6 @@ class Widevine:
)
stream_skipped = False
had_error = False
shaka_log_buffer = ""
for line in iter(p.stderr.readline, ""):
@ -271,8 +270,6 @@ class Widevine:
stream_skipped = True
if ":INFO:" in line:
continue
if ":ERROR:" in line:
had_error = True
if "Insufficient bits in bitstream for given AVC profile" in line:
# this is a warning and is something we don't have to worry about
continue
@ -289,12 +286,12 @@ class Widevine:
p.wait()
if p.returncode != 0 or had_error:
if p.returncode != 0:
raise subprocess.CalledProcessError(p.returncode, arguments)
path.unlink()
if not stream_skipped:
shutil.move(output_path, path)
shutil.move(decrypted_path, path)
except subprocess.CalledProcessError as e:
if e.returncode == 0xC000013A: # STATUS_CONTROL_C_EXIT
raise KeyboardInterrupt()
@ -314,4 +311,4 @@ class Widevine:
"""License returned no Content Encryption Keys."""
__all__ = ("Widevine",)
__ALL__ = (Widevine,)

View File

@ -1,4 +1,4 @@
from .dash import DASH
from .hls import HLS
__all__ = ("DASH", "HLS")
__ALL__ = (DASH, HLS)

File diff suppressed because it is too large Load Diff

View File

@ -1,29 +1,33 @@
from __future__ import annotations
import html
import asyncio
import logging
import shutil
import subprocess
import re
import sys
import time
from concurrent import futures
from concurrent.futures import ThreadPoolExecutor
from functools import partial
from hashlib import md5
from pathlib import Path
from queue import Queue
from threading import Event, Lock
from typing import Any, Callable, Optional, Union
from urllib.parse import urljoin
from zlib import crc32
import m3u8
import requests
from langcodes import Language, tag_is_valid
from langcodes import Language
from m3u8 import M3U8
from pywidevine.cdm import Cdm as WidevineCdm
from pywidevine.pssh import PSSH
from requests import Session
from rich import filesize
from devine.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY, AnyTrack
from devine.core.downloaders import requests as requests_downloader
from devine.core.constants import AnyTrack
from devine.core.downloaders import aria2c
from devine.core.drm import DRM_T, ClearKey, Widevine
from devine.core.tracks import Audio, Subtitle, Tracks, Video
from devine.core.utilities import get_binary_path, get_extension, is_close_match, try_ensure_utf8
from devine.core.utilities import is_close_match
class HLS:
@ -87,12 +91,16 @@ class HLS:
All Track objects' URL will be to another M3U(8) document. However, these documents
will be Invariant Playlists and contain the list of segments URIs among other metadata.
"""
session_drm = HLS.get_all_drm(self.manifest.session_keys)
session_drm = HLS.get_drm(self.manifest.session_keys)
audio_codecs_by_group_id: dict[str, Audio.Codec] = {}
tracks = Tracks()
for playlist in self.manifest.playlists:
url = playlist.uri
if not re.match("^https?://", url):
url = playlist.base_uri + url
audio_group = playlist.stream_info.audio
if audio_group:
audio_codec = Audio.Codec.from_codecs(playlist.stream_info.codecs)
@ -107,19 +115,15 @@ class HLS:
primary_track_type = Video
tracks.add(primary_track_type(
id_=hex(crc32(str(playlist).encode()))[2:],
url=urljoin(playlist.base_uri, playlist.uri),
id_=md5(str(playlist).encode()).hexdigest()[0:7], # 7 chars only for filename length
url=url,
codec=primary_track_type.Codec.from_codecs(playlist.stream_info.codecs),
language=language, # HLS manifests do not seem to have language info
is_original_lang=True, # TODO: All we can do is assume Yes
bitrate=playlist.stream_info.average_bandwidth or playlist.stream_info.bandwidth,
descriptor=Video.Descriptor.HLS,
descriptor=Video.Descriptor.M3U,
drm=session_drm,
data={
"hls": {
"playlist": playlist
}
},
extra=playlist,
# video track args
**(dict(
range_=Video.Range.DV if any(
@ -133,9 +137,13 @@ class HLS:
))
for media in self.manifest.media:
if not media.uri:
url = media.uri
if not url:
continue
if not re.match("^https?://", url):
url = media.base_uri + url
joc = 0
if media.type == "AUDIO":
track_type = Audio
@ -147,33 +155,15 @@ class HLS:
track_type = Subtitle
codec = Subtitle.Codec.WebVTT # assuming WebVTT, codec info isn't shown
track_lang = next((
Language.get(option)
for x in (media.language, language)
for option in [(str(x) or "").strip()]
if tag_is_valid(option) and not option.startswith("und")
), None)
if not track_lang:
msg = "Language information could not be derived for a media."
if language is None:
msg += " No fallback language was provided when calling HLS.to_tracks()."
elif not tag_is_valid((str(language) or "").strip()) or str(language).startswith("und"):
msg += f" The fallback language provided is also invalid: {language}"
raise ValueError(msg)
tracks.add(track_type(
id_=hex(crc32(str(media).encode()))[2:],
url=urljoin(media.base_uri, media.uri),
id_=md5(str(media).encode()).hexdigest()[0:6], # 6 chars only for filename length
url=url,
codec=codec,
language=track_lang, # HLS media may not have language info, fallback if needed
is_original_lang=language and is_close_match(track_lang, [language]),
descriptor=Audio.Descriptor.HLS,
language=media.language or language, # HLS media may not have language info, fallback if needed
is_original_lang=language and is_close_match(media.language, [language]),
descriptor=Audio.Descriptor.M3U,
drm=session_drm if media.type == "AUDIO" else None,
data={
"hls": {
"media": media
}
},
extra=media,
# audio track args
**(dict(
bitrate=0, # TODO: M3U doesn't seem to state bitrate?
@ -193,6 +183,7 @@ class HLS:
track: AnyTrack,
save_path: Path,
save_dir: Path,
stop_event: Event,
progress: partial,
session: Optional[Session] = None,
proxy: Optional[str] = None,
@ -203,6 +194,9 @@ class HLS:
elif not isinstance(session, Session):
raise TypeError(f"Expected session to be a {Session}, not {session!r}")
if not track.needs_proxy and proxy:
proxy = None
if proxy:
session.proxies.update({
"all": proxy
@ -220,460 +214,265 @@ class HLS:
log.error("Track's HLS playlist has no segments, expecting an invariant M3U8 playlist.")
sys.exit(1)
drm_lock = Lock()
def download_segment(filename: str, segment: m3u8.Segment, init_data: Queue, segment_key: Queue) -> int:
if stop_event.is_set():
# the track already started downloading, but another failed or was stopped
raise KeyboardInterrupt()
if callable(track.OnSegmentFilter) and track.OnSegmentFilter(segment):
return 0
segment_save_path = (save_dir / filename).with_suffix(".mp4")
newest_init_data = init_data.get()
try:
if segment.init_section and (not newest_init_data or segment.discontinuity):
# Only use the init data if there's no init data yet (e.g., start of file)
# or if EXT-X-DISCONTINUITY is reached at the same time as EXT-X-MAP.
# Even if a new EXT-X-MAP is supplied, it may just be duplicate and would
# be unnecessary and slow to re-download the init data each time.
if not segment.init_section.uri.startswith(segment.init_section.base_uri):
segment.init_section.uri = segment.init_section.base_uri + segment.init_section.uri
if segment.init_section.byterange:
byte_range = HLS.calculate_byte_range(segment.init_section.byterange)
_ = range_offset.get()
range_offset.put(byte_range.split("-")[0])
headers = {
"Range": f"bytes={byte_range}"
}
else:
headers = {}
log.debug("Got new init segment, %s", segment.init_section.uri)
res = session.get(segment.init_section.uri, headers=headers)
res.raise_for_status()
newest_init_data = res.content
finally:
init_data.put(newest_init_data)
with drm_lock:
newest_segment_key = segment_key.get()
try:
if segment.keys and newest_segment_key[1] != segment.keys:
try:
drm = HLS.get_drm(
keys=segment.keys,
proxy=proxy
)
except NotImplementedError as e:
log.error(str(e))
sys.exit(1)
else:
if drm:
track.drm = drm
drm = drm[0] # just use the first supported DRM system for now
log.debug("Got segment key, %s", drm)
if isinstance(drm, Widevine):
# license and grab content keys
track_kid = track.get_key_id(newest_init_data)
if not license_widevine:
raise ValueError("license_widevine func must be supplied to use Widevine DRM")
license_widevine(drm, track_kid=track_kid)
newest_segment_key = (drm, segment.keys)
finally:
segment_key.put(newest_segment_key)
if not segment.uri.startswith(segment.base_uri):
segment.uri = segment.base_uri + segment.uri
attempts = 1
while True:
try:
if segment.byterange:
# aria2(c) doesn't support byte ranges, let's use python-requests (likely slower)
previous_range_offset = range_offset.get()
byte_range = HLS.calculate_byte_range(segment.byterange, previous_range_offset)
range_offset.put(byte_range.split("-")[0])
res = session.get(
url=segment.uri,
headers={
"Range": f"bytes={byte_range}"
}
)
res.raise_for_status()
segment_save_path.parent.mkdir(parents=True, exist_ok=True)
segment_save_path.write_bytes(res.content)
else:
asyncio.run(aria2c(
uri=segment.uri,
out=segment_save_path,
headers=session.headers,
proxy=proxy,
silent=attempts != 5,
segmented=True
))
break
except Exception as ee:
if stop_event.is_set() or attempts == 5:
raise ee
time.sleep(2)
attempts += 1
data_size = segment_save_path.stat().st_size
if isinstance(track, Audio) or newest_init_data:
with open(segment_save_path, "rb+") as f:
segment_data = f.read()
if isinstance(track, Audio):
# fix audio decryption on ATVP by fixing the sample description index
# TODO: Is this in mpeg data, or init data?
segment_data = re.sub(
b"(tfhd\x00\x02\x00\x1a\x00\x00\x00\x01\x00\x00\x00)\x02",
b"\\g<1>\x01",
segment_data
)
# prepend the init data to be able to decrypt
if newest_init_data:
f.seek(0)
f.write(newest_init_data)
f.write(segment_data)
if newest_segment_key[0]:
newest_segment_key[0].decrypt(segment_save_path)
track.drm = None
if callable(track.OnDecrypted):
track.OnDecrypted(track)
return data_size
segment_key = Queue(maxsize=1)
init_data = Queue(maxsize=1)
range_offset = Queue(maxsize=1)
if track.drm:
# TODO: What if we don't want to use the first DRM system?
session_drm = track.drm[0]
session_drm = track.drm[0] # just use the first supported DRM system for now
if isinstance(session_drm, Widevine):
# license and grab content keys
try:
if not license_widevine:
raise ValueError("license_widevine func must be supplied to use Widevine DRM")
progress(downloaded="LICENSING")
license_widevine(session_drm)
progress(downloaded="[yellow]LICENSED")
except Exception: # noqa
DOWNLOAD_CANCELLED.set() # skip pending track downloads
progress(downloaded="[red]FAILED")
raise
if not license_widevine:
raise ValueError("license_widevine func must be supplied to use Widevine DRM")
license_widevine(session_drm)
else:
session_drm = None
unwanted_segments = [
segment for segment in master.segments
if callable(track.OnSegmentFilter) and track.OnSegmentFilter(segment)
]
# have data to begin with, or it will be stuck waiting on the first pool forever
segment_key.put((session_drm, None))
init_data.put(None)
range_offset.put(0)
total_segments = len(master.segments) - len(unwanted_segments)
progress(total=total_segments)
progress(total=len(master.segments))
downloader = track.downloader
finished_threads = 0
download_sizes = []
last_speed_refresh = time.time()
urls: list[dict[str, Any]] = []
range_offset = 0
for segment in master.segments:
if segment in unwanted_segments:
continue
if segment.byterange:
if downloader.__name__ == "aria2c":
# aria2(c) is shit and doesn't support the Range header, fallback to the requests downloader
downloader = requests_downloader
byte_range = HLS.calculate_byte_range(segment.byterange, range_offset)
range_offset = byte_range.split("-")[0]
else:
byte_range = None
urls.append({
"url": urljoin(segment.base_uri, segment.uri),
"headers": {
"Range": f"bytes={byte_range}"
} if byte_range else {}
})
segment_save_dir = save_dir / "segments"
for status_update in downloader(
urls=urls,
output_dir=segment_save_dir,
filename="{i:0%d}{ext}" % len(str(len(urls))),
headers=session.headers,
cookies=session.cookies,
proxy=proxy,
max_workers=16
):
file_downloaded = status_update.get("file_downloaded")
if file_downloaded and callable(track.OnSegmentDownloaded):
track.OnSegmentDownloaded(file_downloaded)
else:
downloaded = status_update.get("downloaded")
if downloaded and downloaded.endswith("/s"):
status_update["downloaded"] = f"HLS {downloaded}"
progress(**status_update)
# see https://github.com/devine-dl/devine/issues/71
for control_file in segment_save_dir.glob("*.aria2__temp"):
control_file.unlink()
progress(total=total_segments, completed=0, downloaded="Merging")
name_len = len(str(total_segments))
discon_i = 0
range_offset = 0
map_data: Optional[tuple[m3u8.model.InitializationSection, bytes]] = None
if session_drm:
encryption_data: Optional[tuple[Optional[m3u8.Key], DRM_T]] = (None, session_drm)
else:
encryption_data: Optional[tuple[Optional[m3u8.Key], DRM_T]] = None
i = -1
for real_i, segment in enumerate(master.segments):
if segment not in unwanted_segments:
i += 1
is_last_segment = (real_i + 1) == len(master.segments)
def merge(to: Path, via: list[Path], delete: bool = False, include_map_data: bool = False):
"""
Merge all files to a given path, optionally including map data.
Parameters:
to: The output file with all merged data.
via: List of files to merge, in sequence.
delete: Delete the file once it's been merged.
include_map_data: Whether to include the init map data.
"""
with open(to, "wb") as x:
if include_map_data and map_data and map_data[1]:
x.write(map_data[1])
for file in via:
x.write(file.read_bytes())
x.flush()
if delete:
file.unlink()
def decrypt(include_this_segment: bool) -> Path:
"""
Decrypt all segments that uses the currently set DRM.
All segments that will be decrypted with this DRM will be merged together
in sequence, prefixed with the init data (if any), and then deleted. Once
merged they will be decrypted. The merged and decrypted file names state
the range of segments that were used.
Parameters:
include_this_segment: Whether to include the current segment in the
list of segments to merge and decrypt. This should be False if
decrypting on EXT-X-KEY changes, or True when decrypting on the
last segment.
Returns the decrypted path.
"""
drm = encryption_data[1]
first_segment_i = next(
int(file.stem)
for file in sorted(segment_save_dir.iterdir())
if file.stem.isdigit()
with ThreadPoolExecutor(max_workers=16) as pool:
for download in futures.as_completed((
pool.submit(
download_segment,
filename=str(i).zfill(len(str(len(master.segments)))),
segment=segment,
init_data=init_data,
segment_key=segment_key
)
last_segment_i = max(0, i - int(not include_this_segment))
range_len = (last_segment_i - first_segment_i) + 1
for i, segment in enumerate(master.segments)
)):
finished_threads += 1
segment_range = f"{str(first_segment_i).zfill(name_len)}-{str(last_segment_i).zfill(name_len)}"
merged_path = segment_save_dir / f"{segment_range}{get_extension(master.segments[last_segment_i].uri)}"
decrypted_path = segment_save_dir / f"{merged_path.stem}_decrypted{merged_path.suffix}"
try:
download_size = download.result()
except KeyboardInterrupt:
stop_event.set() # skip pending track downloads
progress(downloaded="[yellow]STOPPING")
pool.shutdown(wait=True, cancel_futures=True)
progress(downloaded="[yellow]STOPPED")
# tell dl that it was cancelled
# the pool is already shut down, so exiting loop is fine
raise
except Exception as e:
stop_event.set() # skip pending track downloads
progress(downloaded="[red]FAILING")
pool.shutdown(wait=True, cancel_futures=True)
progress(downloaded="[red]FAILED")
# tell dl that it failed
# the pool is already shut down, so exiting loop is fine
raise e
else:
# it successfully downloaded, and it was not cancelled
progress(advance=1)
files = [
file
for file in sorted(segment_save_dir.iterdir())
if file.stem.isdigit() and first_segment_i <= int(file.stem) <= last_segment_i
]
if not files:
raise ValueError(f"None of the segment files for {segment_range} exist...")
elif len(files) != range_len:
raise ValueError(f"Missing {range_len - len(files)} segment files for {segment_range}...")
now = time.time()
time_since = now - last_speed_refresh
merge(
to=merged_path,
via=files,
delete=True,
include_map_data=True
)
if download_size: # no size == skipped dl
download_sizes.append(download_size)
drm.decrypt(merged_path)
merged_path.rename(decrypted_path)
if download_sizes and (time_since > 5 or finished_threads == len(master.segments)):
data_size = sum(download_sizes)
download_speed = data_size / time_since
progress(downloaded=f"HLS {filesize.decimal(download_speed)}/s")
last_speed_refresh = now
download_sizes.clear()
if callable(track.OnDecrypted):
track.OnDecrypted(drm, decrypted_path)
return decrypted_path
def merge_discontinuity(include_this_segment: bool, include_map_data: bool = True):
"""
Merge all segments of the discontinuity.
All segment files for this discontinuity must already be downloaded and
already decrypted (if it needs to be decrypted).
Parameters:
include_this_segment: Whether to include the current segment in the
list of segments to merge and decrypt. This should be False if
decrypting on EXT-X-KEY changes, or True when decrypting on the
last segment.
include_map_data: Whether to prepend the init map data before the
segment files when merging.
"""
last_segment_i = max(0, i - int(not include_this_segment))
files = [
file
for file in sorted(segment_save_dir.iterdir())
if int(file.stem.replace("_decrypted", "").split("-")[-1]) <= last_segment_i
]
if files:
to_dir = segment_save_dir.parent
to_path = to_dir / f"{str(discon_i).zfill(name_len)}{files[-1].suffix}"
merge(
to=to_path,
via=files,
delete=True,
include_map_data=include_map_data
)
if segment not in unwanted_segments:
if isinstance(track, Subtitle):
segment_file_ext = get_extension(segment.uri)
segment_file_path = segment_save_dir / f"{str(i).zfill(name_len)}{segment_file_ext}"
segment_data = try_ensure_utf8(segment_file_path.read_bytes())
if track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML):
segment_data = segment_data.decode("utf8"). \
replace("&lrm;", html.unescape("&lrm;")). \
replace("&rlm;", html.unescape("&rlm;")). \
encode("utf8")
segment_file_path.write_bytes(segment_data)
if segment.discontinuity and i != 0:
if encryption_data:
decrypt(include_this_segment=False)
merge_discontinuity(
include_this_segment=False,
include_map_data=not encryption_data or not encryption_data[1]
)
discon_i += 1
range_offset = 0 # TODO: Should this be reset or not?
map_data = None
if encryption_data:
encryption_data = (encryption_data[0], encryption_data[1])
if segment.init_section and (not map_data or segment.init_section != map_data[0]):
if segment.init_section.byterange:
init_byte_range = HLS.calculate_byte_range(
segment.init_section.byterange,
range_offset
)
range_offset = init_byte_range.split("-")[0]
init_range_header = {
"Range": f"bytes={init_byte_range}"
}
else:
init_range_header = {}
res = session.get(
url=urljoin(segment.init_section.base_uri, segment.init_section.uri),
headers=init_range_header
)
res.raise_for_status()
map_data = (segment.init_section, res.content)
if segment.keys:
key = HLS.get_supported_key(segment.keys)
if encryption_data and encryption_data[0] != key and i != 0 and segment not in unwanted_segments:
decrypt(include_this_segment=False)
if key is None:
encryption_data = None
elif not encryption_data or encryption_data[0] != key:
drm = HLS.get_drm(key, session)
if isinstance(drm, Widevine):
try:
if map_data:
track_kid = track.get_key_id(map_data[1])
else:
track_kid = None
progress(downloaded="LICENSING")
license_widevine(drm, track_kid=track_kid)
progress(downloaded="[yellow]LICENSED")
except Exception: # noqa
DOWNLOAD_CANCELLED.set() # skip pending track downloads
progress(downloaded="[red]FAILED")
raise
encryption_data = (key, drm)
# TODO: This wont work as we already downloaded
if DOWNLOAD_LICENCE_ONLY.is_set():
continue
if is_last_segment:
# required as it won't end with EXT-X-DISCONTINUITY nor a new key
if encryption_data:
decrypt(include_this_segment=True)
merge_discontinuity(
include_this_segment=True,
include_map_data=not encryption_data or not encryption_data[1]
)
progress(advance=1)
# TODO: Again still wont work, we've already downloaded
if DOWNLOAD_LICENCE_ONLY.is_set():
return
segment_save_dir.rmdir()
# finally merge all the discontinuity save files together to the final path
segments_to_merge = [
x
for x in sorted(save_dir.iterdir())
if x.is_file()
]
if len(segments_to_merge) == 1:
shutil.move(segments_to_merge[0], save_path)
else:
progress(downloaded="Merging")
if isinstance(track, (Video, Audio)):
HLS.merge_segments(
segments=segments_to_merge,
save_path=save_path
)
else:
with open(save_path, "wb") as f:
for discontinuity_file in segments_to_merge:
discontinuity_data = discontinuity_file.read_bytes()
f.write(discontinuity_data)
f.flush()
save_dir.rmdir()
progress(downloaded="Downloaded")
with open(save_path, "wb") as f:
for segment_file in sorted(save_dir.iterdir()):
f.write(segment_file.read_bytes())
segment_file.unlink()
track.path = save_path
if callable(track.OnDownloaded):
track.OnDownloaded()
@staticmethod
def merge_segments(segments: list[Path], save_path: Path) -> int:
"""
Concatenate Segments by first demuxing with FFmpeg.
Returns the file size of the merged file.
"""
ffmpeg = get_binary_path("ffmpeg")
if not ffmpeg:
raise EnvironmentError("FFmpeg executable was not found but is required to merge HLS segments.")
demuxer_file = segments[0].parent / "ffmpeg_concat_demuxer.txt"
demuxer_file.write_text("\n".join([
f"file '{segment}'"
for segment in segments
]))
subprocess.check_call([
ffmpeg, "-hide_banner",
"-loglevel", "panic",
"-f", "concat",
"-safe", "0",
"-i", demuxer_file,
"-map", "0",
"-c", "copy",
save_path
])
demuxer_file.unlink()
return save_path.stat().st_size
@staticmethod
def get_supported_key(keys: list[Union[m3u8.model.SessionKey, m3u8.model.Key]]) -> Optional[m3u8.Key]:
"""
Get a support Key System from a list of Key systems.
Note that the key systems are chosen in an opinionated order.
Returns None if one of the key systems is method=NONE, which means all segments
from hence forth should be treated as plain text until another key system is
encountered, unless it's also method=NONE.
Raises NotImplementedError if none of the key systems are supported.
"""
if any(key.method == "NONE" for key in keys):
return None
unsupported_systems = []
for key in keys:
if not key:
continue
# TODO: Add a way to specify which supported key system to use
# TODO: Add support for 'SAMPLE-AES', 'AES-CTR', 'AES-CBC', 'ClearKey'
elif key.method == "AES-128":
return key
elif key.method == "ISO-23001-7":
return key
elif key.keyformat and key.keyformat.lower() == WidevineCdm.urn:
return key
else:
unsupported_systems.append(key.method + (f" ({key.keyformat})" if key.keyformat else ""))
else:
raise NotImplementedError(f"None of the key systems are supported: {', '.join(unsupported_systems)}")
save_dir.rmdir()
@staticmethod
def get_drm(
key: Union[m3u8.model.SessionKey, m3u8.model.Key],
session: Optional[requests.Session] = None
) -> DRM_T:
"""
Convert HLS EXT-X-KEY data to an initialized DRM object.
Parameters:
key: m3u8 key system (EXT-X-KEY) object.
session: Optional session used to request AES-128 URIs.
Useful to set headers, proxies, cookies, and so forth.
Raises a NotImplementedError if the key system is not supported.
"""
if not isinstance(session, (Session, type(None))):
raise TypeError(f"Expected session to be a {Session}, not {type(session)}")
if not session:
session = Session()
# TODO: Add support for 'SAMPLE-AES', 'AES-CTR', 'AES-CBC', 'ClearKey'
if key.method == "AES-128":
drm = ClearKey.from_m3u_key(key, session)
elif key.method == "ISO-23001-7":
drm = Widevine(
pssh=PSSH.new(
key_ids=[key.uri.split(",")[-1]],
system_id=PSSH.SystemId.Widevine
)
)
elif key.keyformat and key.keyformat.lower() == WidevineCdm.urn:
drm = Widevine(
pssh=PSSH(key.uri.split(",")[-1]),
**key._extra_params # noqa
)
else:
raise NotImplementedError(f"The key system is not supported: {key}")
return drm
@staticmethod
def get_all_drm(
keys: list[Union[m3u8.model.SessionKey, m3u8.model.Key]],
proxy: Optional[str] = None
) -> list[DRM_T]:
"""
Convert HLS EXT-X-KEY data to initialized DRM objects.
Parameters:
keys: m3u8 key system (EXT-X-KEY) objects.
proxy: Optional proxy string used for requesting AES-128 URIs.
You can supply key data for a single segment or for the entire manifest.
This lets you narrow the results down to each specific segment's DRM status.
Raises a NotImplementedError if none of the key systems are supported.
Returns an empty list if there were no supplied EXT-X-KEY data, or if all the
EXT-X-KEY's were of blank data. An empty list signals a DRM-free stream or segment.
Will raise a NotImplementedError if EXT-X-KEY data was supplied and none of them
were supported. A DRM-free track will never raise NotImplementedError.
"""
unsupported_keys: list[m3u8.Key] = []
drm_objects: list[DRM_T] = []
if any(key.method == "NONE" for key in keys):
return []
drm = []
unsupported_systems = []
for key in keys:
try:
drm = HLS.get_drm(key, proxy)
drm_objects.append(drm)
except NotImplementedError:
unsupported_keys.append(key)
if not key:
continue
# TODO: Add support for 'SAMPLE-AES', 'AES-CTR', 'AES-CBC', 'ClearKey'
if key.method == "NONE":
return []
elif key.method == "AES-128":
drm.append(ClearKey.from_m3u_key(key, proxy))
elif key.method == "ISO-23001-7":
drm.append(Widevine(
pssh=PSSH.new(
key_ids=[key.uri.split(",")[-1]],
system_id=PSSH.SystemId.Widevine
)
))
elif key.keyformat and key.keyformat.lower() == WidevineCdm.urn:
drm.append(Widevine(
pssh=PSSH(key.uri.split(",")[-1]),
**key._extra_params # noqa
))
else:
unsupported_systems.append(key.method + (f" ({key.keyformat})" if key.keyformat else ""))
if not drm_objects and unsupported_keys:
raise NotImplementedError(f"None of the key systems are supported: {unsupported_keys}")
if not drm and unsupported_systems:
raise NotImplementedError(f"No support for any of the key systems: {', '.join(unsupported_systems)}")
return drm_objects
return drm
@staticmethod
def calculate_byte_range(m3u_range: str, fallback_offset: int = 0) -> str:
@ -688,4 +487,4 @@ class HLS:
return f"{offset}-{offset + length - 1}"
__all__ = ("HLS",)
__ALL__ = (HLS,)

View File

@ -2,4 +2,4 @@ from .basic import Basic
from .hola import Hola
from .nordvpn import NordVPN
__all__ = ("Basic", "Hola", "NordVPN")
__ALL__ = (Basic, Hola, NordVPN)

View File

@ -1,44 +0,0 @@
from typing import Optional, Union
class SearchResult:
def __init__(
self,
id_: Union[str, int],
title: str,
description: Optional[str] = None,
label: Optional[str] = None,
url: Optional[str] = None
):
"""
A Search Result for any support Title Type.
Parameters:
id_: The search result's Title ID.
title: The primary display text, e.g., the Title's Name.
description: The secondary display text, e.g., the Title's Description or
further title information.
label: The tertiary display text. This will typically be used to display
an informative label or tag to the result. E.g., "unavailable", the
title's price tag, region, etc.
url: A hyperlink to the search result or title's page.
"""
if not isinstance(id_, (str, int)):
raise TypeError(f"Expected id_ to be a {str} or {int}, not {type(id_)}")
if not isinstance(title, str):
raise TypeError(f"Expected title to be a {str}, not {type(title)}")
if not isinstance(description, (str, type(None))):
raise TypeError(f"Expected description to be a {str}, not {type(description)}")
if not isinstance(label, (str, type(None))):
raise TypeError(f"Expected label to be a {str}, not {type(label)}")
if not isinstance(url, (str, type(None))):
raise TypeError(f"Expected url to be a {str}, not {type(url)}")
self.id = id_
self.title = title
self.description = description
self.label = label
self.url = url
__all__ = ("SearchResult",)

View File

@ -1,8 +1,7 @@
import base64
import logging
from abc import ABCMeta, abstractmethod
from collections.abc import Generator
from http.cookiejar import CookieJar
from http.cookiejar import CookieJar, MozillaCookieJar
from typing import Optional, Union
from urllib.parse import urlparse
@ -17,9 +16,8 @@ from devine.core.config import config
from devine.core.console import console
from devine.core.constants import AnyTrack
from devine.core.credential import Credential
from devine.core.search_result import SearchResult
from devine.core.titles import Title_T, Titles_T
from devine.core.tracks import Chapters, Tracks
from devine.core.tracks import Chapter, Tracks
from devine.core.utilities import get_ip_info
@ -43,40 +41,39 @@ class Service(metaclass=ABCMeta):
self.session = self.get_session()
self.cache = Cacher(self.__class__.__name__)
if not ctx.parent or not ctx.parent.params.get("no_proxy"):
if ctx.parent:
proxy = ctx.parent.params["proxy"]
else:
proxy = None
if ctx.parent:
self.proxy = ctx.parent.params["proxy"]
else:
self.proxy = None
if not proxy:
# don't override the explicit proxy set by the user, even if they may be geoblocked
with console.status("Checking if current region is Geoblocked...", spinner="dots"):
if self.GEOFENCE:
# no explicit proxy, let's get one to GEOFENCE if needed
current_region = get_ip_info(self.session)["country"].lower()
if any(x.lower() == current_region for x in self.GEOFENCE):
self.log.info("Service is not Geoblocked in your region")
else:
requested_proxy = self.GEOFENCE[0] # first is likely main region
self.log.info(f"Service is Geoblocked in your region, getting a Proxy to {requested_proxy}")
for proxy_provider in ctx.obj.proxy_providers:
proxy = proxy_provider.get_proxy(requested_proxy)
if proxy:
self.log.info(f"Got Proxy from {proxy_provider.__class__.__name__}")
break
if not self.proxy:
# don't override the explicit proxy set by the user, even if they may be geoblocked
with console.status("Checking if current region is Geoblocked...", spinner="dots"):
if self.GEOFENCE:
# no explicit proxy, let's get one to GEOFENCE if needed
current_region = get_ip_info(self.session)["country"].lower()
if any(x.lower() == current_region for x in self.GEOFENCE):
self.log.info("Service is not Geoblocked in your region")
else:
self.log.info("Service has no Geofence")
requested_proxy = self.GEOFENCE[0] # first is likely main region
self.log.info(f"Service is Geoblocked in your region, getting a Proxy to {requested_proxy}")
for proxy_provider in ctx.obj.proxy_providers:
self.proxy = proxy_provider.get_proxy(requested_proxy)
if self.proxy:
self.log.info(f"Got Proxy from {proxy_provider.__class__.__name__}")
break
else:
self.log.info("Service has no Geofence")
if proxy:
self.session.proxies.update({"all": proxy})
proxy_parse = urlparse(proxy)
if proxy_parse.username and proxy_parse.password:
self.session.headers.update({
"Proxy-Authorization": base64.b64encode(
f"{proxy_parse.username}:{proxy_parse.password}".encode("utf8")
).decode()
})
if self.proxy:
self.session.proxies.update({"all": self.proxy})
proxy_parse = urlparse(self.proxy)
if proxy_parse.username and proxy_parse.password:
self.session.headers.update({
"Proxy-Authorization": base64.b64encode(
f"{proxy_parse.username}:{proxy_parse.password}".encode("utf8")
).decode()
})
# Optional Abstract functions
# The following functions may be implemented by the Service.
@ -98,12 +95,15 @@ class Service(metaclass=ABCMeta):
backoff_factor=0.2,
status_forcelist=[429, 500, 502, 503, 504]
),
# 16 connections is used for byte-ranged downloads
# double it to allow for 16 non-related connections
pool_maxsize=16 * 2,
pool_block=True
))
session.mount("http://", session.adapters["https://"])
return session
def authenticate(self, cookies: Optional[CookieJar] = None, credential: Optional[Credential] = None) -> None:
def authenticate(self, cookies: Optional[MozillaCookieJar] = None, credential: Optional[Credential] = None) -> None:
"""
Authenticate the Service with Cookies and/or Credentials (Email/Username and Password).
@ -119,22 +119,10 @@ class Service(metaclass=ABCMeta):
"""
if cookies is not None:
if not isinstance(cookies, CookieJar):
raise TypeError(f"Expected cookies to be a {CookieJar}, not {cookies!r}.")
raise TypeError(f"Expected cookies to be a {MozillaCookieJar}, not {cookies!r}.")
self.session.cookies.update(cookies)
def search(self) -> Generator[SearchResult, None, None]:
"""
Search by query for titles from the Service.
The query must be taken as a CLI argument by the Service class.
Ideally just re-use the title ID argument (i.e. self.title).
Search results will be displayed in the order yielded.
"""
raise NotImplementedError(f"Search functionality has not been implemented by {self.__class__.__name__}")
def get_widevine_service_certificate(self, *, challenge: bytes, title: Title_T, track: AnyTrack) \
-> Union[bytes, str]:
def get_widevine_service_certificate(self, *, challenge: bytes, title: Title_T, track: AnyTrack) -> Union[bytes, str]:
"""
Get the Widevine Service Certificate used for Privacy Mode.
@ -217,23 +205,25 @@ class Service(metaclass=ABCMeta):
"""
@abstractmethod
def get_chapters(self, title: Title_T) -> Chapters:
def get_chapters(self, title: Title_T) -> list[Chapter]:
"""
Get Chapters for the Title.
Get Chapter objects of the Title.
Parameters:
title: The current Title from `get_titles` that is being processed.
Return a list of Chapter objects. This will be run after get_tracks. If there's anything
from the get_tracks that may be needed, e.g. "device_id" or a-like, store it in the class
via `self` and re-use the value in get_chapters.
You must return a Chapters object containing 0 or more Chapter objects.
How it's used is generally the same as get_titles. These are only separated as to reduce
function complexity and keep them focused on simple tasks.
You do not need to set a Chapter number or sort/order the chapters in any way as
the Chapters class automatically handles all of that for you. If there's no
descriptive name for a Chapter then do not set a name at all.
You do not need to sort or order the chapters in any way. However, you do need to filter
and alter them as needed by the service. No modification is made after get_chapters is
ran. So that means ensure that the Chapter objects returned have consistent Chapter Titles
and Chapter Numbers.
You must not set Chapter names to "Chapter {n}" or such. If you (or the user)
wants "Chapter {n}" style Chapter names (or similar) then they can use the config
option `chapter_fallback_name`. For example, `"Chapter {i:02}"` for "Chapter 01".
:param title: The current `Title` from get_titles that is being executed.
:return: List of Chapter objects, if available, empty list otherwise.
"""
__all__ = ("Service",)
__ALL__ = (Service,)

View File

@ -37,15 +37,7 @@ class Services(click.MultiCommand):
def get_command(self, ctx: click.Context, name: str) -> click.Command:
"""Load the Service and return the Click CLI method."""
tag = Services.get_tag(name)
try:
service = Services.load(tag)
except KeyError as e:
available_services = self.list_commands(ctx)
if not available_services:
raise click.ClickException(
f"There are no Services added yet, therefore the '{name}' Service could not be found."
)
raise click.ClickException(f"{e}. Available Services: {', '.join(available_services)}")
service = Services.load(tag)
if hasattr(service, "cli"):
return service.cli
@ -66,7 +58,7 @@ class Services(click.MultiCommand):
for service in _SERVICES:
if service.parent.stem == tag:
return service.parent
raise KeyError(f"There is no Service added by the Tag '{name}'")
raise click.ClickException(f"Unable to find service by the name '{name}'")
@staticmethod
def get_tag(value: str) -> str:
@ -88,8 +80,8 @@ class Services(click.MultiCommand):
"""Load a Service module by Service tag."""
module = _MODULES.get(tag)
if not module:
raise KeyError(f"There is no Service added by the Tag '{tag}'")
raise click.ClickException(f"Unable to find Service by the tag '{tag}'")
return module
__all__ = ("Services",)
__ALL__ = (Services,)

View File

@ -8,4 +8,4 @@ Title_T = Union[Movie, Episode, Song]
Titles_T = Union[Movies, Series, Album]
__all__ = ("Episode", "Series", "Movie", "Movies", "Album", "Song", "Title_T", "Titles_T")
__ALL__ = (Episode, Series, Movie, Movies, Album, Song, Title_T, Titles_T)

View File

@ -207,4 +207,4 @@ class Series(SortedKeyList, ABC):
return tree
__all__ = ("Episode", "Series")
__ALL__ = (Episode, Series)

View File

@ -153,4 +153,4 @@ class Movies(SortedKeyList, ABC):
return tree
__all__ = ("Movie", "Movies")
__ALL__ = (Movie, Movies)

View File

@ -148,4 +148,4 @@ class Album(SortedKeyList, ABC):
return tree
__all__ = ("Song", "Album")
__ALL__ = (Song, Album)

View File

@ -69,4 +69,4 @@ class Title:
"""
__all__ = ("Title",)
__ALL__ = (Title,)

View File

@ -1,9 +1,8 @@
from .audio import Audio
from .chapter import Chapter
from .chapters import Chapters
from .subtitle import Subtitle
from .track import Track
from .tracks import Tracks
from .video import Video
__all__ = ("Audio", "Chapter", "Chapters", "Subtitle", "Track", "Tracks", "Video")
__ALL__ = (Audio, Chapter, Subtitle, Track, Tracks, Video)

View File

@ -13,10 +13,9 @@ class Audio(Track):
AC3 = "DD" # https://wikipedia.org/wiki/Dolby_Digital
EC3 = "DD+" # https://wikipedia.org/wiki/Dolby_Digital_Plus
OPUS = "OPUS" # https://wikipedia.org/wiki/Opus_(audio_format)
OGG = "VORB" # https://wikipedia.org/wiki/Vorbis
DTS = "DTS" # https://en.wikipedia.org/wiki/DTS_(company)#DTS_Digital_Surround
OGG = "VORB" # https://wikipedia.org/wiki/Vorbis
DTS = "DTS" # https://en.wikipedia.org/wiki/DTS_(company)#DTS_Digital_Surround
ALAC = "ALAC" # https://en.wikipedia.org/wiki/Apple_Lossless_Audio_Codec
FLAC = "FLAC" # https://en.wikipedia.org/wiki/FLAC
@property
def extension(self) -> str:
@ -37,8 +36,6 @@ class Audio(Track):
return Audio.Codec.DTS
if mime == "alac":
return Audio.Codec.ALAC
if mime == "flac":
return Audio.Codec.FLAC
raise ValueError(f"The MIME '{mime}' is not a supported Audio Codec")
@staticmethod
@ -121,4 +118,4 @@ class Audio(Track):
]))
__all__ = ("Audio",)
__ALL__ = (Audio,)

View File

@ -1,82 +1,95 @@
from __future__ import annotations
import re
from pathlib import Path
from typing import Optional, Union
from zlib import crc32
TIMESTAMP_FORMAT = re.compile(r"^(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(?P<ms>.\d{3}|)$")
class Chapter:
def __init__(self, timestamp: Union[str, int, float], name: Optional[str] = None):
"""
Create a new Chapter with a Timestamp and optional name.
line_1 = re.compile(r"^CHAPTER(?P<number>\d+)=(?P<timecode>[\d\\.]+)$")
line_2 = re.compile(r"^CHAPTER(?P<number>\d+)NAME=(?P<title>[\d\\.]+)$")
The timestamp may be in the following formats:
- "HH:MM:SS" string, e.g., `25:05:23`.
- "HH:MM:SS.mss" string, e.g., `25:05:23.120`.
- a timecode integer in milliseconds, e.g., `90323120` is `25:05:23.120`.
- a timecode float in seconds, e.g., `90323.12` is `25:05:23.120`.
def __init__(self, number: int, timecode: str, title: Optional[str] = None):
self.id = f"chapter-{number}"
self.number = number
self.timecode = timecode
self.title = title
If you have a timecode integer in seconds, just multiply it by 1000.
If you have a timecode float in milliseconds (no decimal value), just convert
it to an integer.
"""
if timestamp is None:
raise ValueError("The timestamp must be provided.")
if "." not in self.timecode:
self.timecode += ".000"
if not isinstance(timestamp, (str, int, float)):
raise TypeError(f"Expected timestamp to be {str}, {int} or {float}, not {type(timestamp)}")
if not isinstance(name, (str, type(None))):
raise TypeError(f"Expected name to be {str}, not {type(name)}")
if not isinstance(timestamp, str):
if isinstance(timestamp, int): # ms
hours, remainder = divmod(timestamp, 1000 * 60 * 60)
minutes, remainder = divmod(remainder, 1000 * 60)
seconds, ms = divmod(remainder, 1000)
elif isinstance(timestamp, float): # seconds.ms
hours, remainder = divmod(timestamp, 60 * 60)
minutes, remainder = divmod(remainder, 60)
seconds, ms = divmod(int(remainder * 1000), 1000)
else:
raise TypeError
timestamp = f"{hours:02}:{minutes:02}:{seconds:02}.{str(ms).zfill(3)[:3]}"
timestamp_m = TIMESTAMP_FORMAT.match(timestamp)
if not timestamp_m:
raise ValueError(f"The timestamp format is invalid: {timestamp}")
hour, minute, second, ms = timestamp_m.groups()
if not ms:
timestamp += ".000"
self.timestamp = timestamp
self.name = name
def __bool__(self) -> bool:
return self.number and self.number >= 0 and self.timecode
def __repr__(self) -> str:
return "{name}({items})".format(
name=self.__class__.__name__,
items=", ".join([f"{k}={repr(v)}" for k, v in self.__dict__.items()])
"""
OGM-based Simple Chapter Format intended for use with MKVToolNix.
This format is not officially part of the Matroska spec. This was a format
designed for OGM tools that MKVToolNix has since re-used. More Information:
https://mkvtoolnix.download/doc/mkvmerge.html#mkvmerge.chapters.simple
"""
return "CHAPTER{num}={time}\nCHAPTER{num}NAME={name}".format(
num=f"{self.number:02}",
time=self.timecode,
name=self.title or ""
)
def __str__(self) -> str:
return " | ".join(filter(bool, [
"CHP",
self.timestamp,
self.name
f"[{self.number:02}]",
self.timecode,
self.title
]))
@property
def id(self) -> str:
"""Compute an ID from the Chapter data."""
checksum = crc32(str(self).encode("utf8"))
return hex(checksum)
@property
def named(self) -> bool:
"""Check if Chapter is named."""
return bool(self.name)
return bool(self.title)
@classmethod
def loads(cls, data: str) -> Chapter:
"""Load chapter data from a string."""
lines = [x.strip() for x in data.strip().splitlines(keepends=False)]
if len(lines) > 2:
return cls.loads("\n".join(lines))
one, two = lines
one_m = cls.line_1.match(one)
two_m = cls.line_2.match(two)
if not one_m or not two_m:
raise SyntaxError(f"An unexpected syntax error near:\n{one}\n{two}")
one_str, timecode = one_m.groups()
two_str, title = two_m.groups()
one_num, two_num = int(one_str.lstrip("0")), int(two_str.lstrip("0"))
if one_num != two_num:
raise SyntaxError(f"The chapter numbers ({one_num},{two_num}) does not match.")
if not timecode:
raise SyntaxError("The timecode is missing.")
if not title:
title = None
return cls(number=one_num, timecode=timecode, title=title)
@classmethod
def load(cls, path: Union[Path, str]) -> Chapter:
"""Load chapter data from a file."""
if isinstance(path, str):
path = Path(path)
return cls.loads(path.read_text(encoding="utf8"))
def dumps(self) -> str:
"""Return chapter data as a string."""
return repr(self)
def dump(self, path: Union[Path, str]) -> int:
"""Write chapter data to a file."""
if isinstance(path, str):
path = Path(path)
return path.write_text(self.dumps(), encoding="utf8")
__all__ = ("Chapter",)
__ALL__ = (Chapter,)

View File

@ -1,156 +0,0 @@
from __future__ import annotations
import re
from abc import ABC
from pathlib import Path
from typing import Any, Iterable, Optional, Union
from zlib import crc32
from sortedcontainers import SortedKeyList
from devine.core.tracks import Chapter
OGM_SIMPLE_LINE_1_FORMAT = re.compile(r"^CHAPTER(?P<number>\d+)=(?P<timestamp>\d{2,}:\d{2}:\d{2}\.\d{3})$")
OGM_SIMPLE_LINE_2_FORMAT = re.compile(r"^CHAPTER(?P<number>\d+)NAME=(?P<name>.*)$")
class Chapters(SortedKeyList, ABC):
def __init__(self, iterable: Optional[Iterable[Chapter]] = None):
super().__init__(key=lambda x: x.timestamp or 0)
for chapter in iterable or []:
self.add(chapter)
def __repr__(self) -> str:
return "{name}({items})".format(
name=self.__class__.__name__,
items=", ".join([f"{k}={repr(v)}" for k, v in self.__dict__.items()])
)
def __str__(self) -> str:
return "\n".join([
" | ".join(filter(bool, [
"CHP",
f"[{i:02}]",
chapter.timestamp,
chapter.name
]))
for i, chapter in enumerate(self, start=1)
])
@classmethod
def loads(cls, data: str) -> Chapters:
"""Load chapter data from a string."""
lines = [
line.strip()
for line in data.strip().splitlines(keepends=False)
]
if len(lines) % 2 != 0:
raise ValueError("The number of chapter lines must be even.")
chapters = []
for line_1, line_2 in zip(lines[::2], lines[1::2]):
line_1_match = OGM_SIMPLE_LINE_1_FORMAT.match(line_1)
if not line_1_match:
raise SyntaxError(f"An unexpected syntax error occurred on: {line_1}")
line_2_match = OGM_SIMPLE_LINE_2_FORMAT.match(line_2)
if not line_2_match:
raise SyntaxError(f"An unexpected syntax error occurred on: {line_2}")
line_1_number, timestamp = line_1_match.groups()
line_2_number, name = line_2_match.groups()
if line_1_number != line_2_number:
raise SyntaxError(
f"The chapter numbers {line_1_number} and {line_2_number} do not match on:\n{line_1}\n{line_2}")
if not timestamp:
raise SyntaxError(f"The timestamp is missing on: {line_1}")
chapters.append(Chapter(timestamp, name))
return cls(chapters)
@classmethod
def load(cls, path: Union[Path, str]) -> Chapters:
"""Load chapter data from a file."""
if isinstance(path, str):
path = Path(path)
return cls.loads(path.read_text(encoding="utf8"))
def dumps(self, fallback_name: str = "") -> str:
"""
Return chapter data in OGM-based Simple Chapter format.
https://mkvtoolnix.download/doc/mkvmerge.html#mkvmerge.chapters.simple
Parameters:
fallback_name: Name used for Chapters without a Name set.
The fallback name can use the following variables in f-string style:
- {i}: The Chapter number starting at 1.
E.g., `"Chapter {i}"`: "Chapter 1", "Intro", "Chapter 3".
- {j}: A number starting at 1 that increments any time a Chapter has no name.
E.g., `"Chapter {j}"`: "Chapter 1", "Intro", "Chapter 2".
These are formatted with f-strings, directives are supported.
For example, `"Chapter {i:02}"` will result in `"Chapter 01"`.
"""
chapters = []
j = 0
for i, chapter in enumerate(self, start=1):
if not chapter.name:
j += 1
chapters.append("CHAPTER{num}={time}\nCHAPTER{num}NAME={name}".format(
num=f"{i:02}",
time=chapter.timestamp,
name=chapter.name or fallback_name.format(
i=i,
j=j
)
))
return "\n".join(chapters)
def dump(self, path: Union[Path, str], *args: Any, **kwargs: Any) -> int:
"""
Write chapter data in OGM-based Simple Chapter format to a file.
Parameters:
path: The file path to write the Chapter data to, overwriting
any existing data.
See `Chapters.dumps` for more parameter documentation.
"""
if isinstance(path, str):
path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True)
ogm_text = self.dumps(*args, **kwargs)
return path.write_text(ogm_text, encoding="utf8")
def add(self, value: Chapter) -> None:
if not isinstance(value, Chapter):
raise TypeError(f"Can only add {Chapter} objects, not {type(value)}")
if any(chapter.timestamp == value.timestamp for chapter in self):
raise ValueError(f"A Chapter with the Timestamp {value.timestamp} already exists")
super().add(value)
if not any(chapter.timestamp == "00:00:00.000" for chapter in self):
self.add(Chapter(0))
@property
def id(self) -> str:
"""Compute an ID from the Chapter data."""
checksum = crc32("\n".join([
chapter.id
for chapter in self
]).encode("utf8"))
return hex(checksum)
__all__ = ("Chapters", "Chapter")

View File

@ -4,13 +4,10 @@ import re
import subprocess
from collections import defaultdict
from enum import Enum
from functools import partial
from io import BytesIO
from pathlib import Path
from typing import Any, Callable, Iterable, Optional
from typing import Any, Iterable, Optional
import pycaption
import requests
from construct import Container
from pycaption import Caption, CaptionList, CaptionNode, WebVTTReader
from pycaption.geometry import Layout
@ -18,7 +15,7 @@ from pymp4.parser import MP4
from subtitle_filter import Subtitles
from devine.core.tracks.track import Track
from devine.core.utilities import get_binary_path, try_ensure_utf8
from devine.core.utilities import get_binary_path
class Subtitle(Track):
@ -136,9 +133,6 @@ class Subtitle(Track):
if (self.cc or self.sdh) and self.forced:
raise ValueError("A text track cannot be CC/SDH as well as Forced.")
# Called after Track has been converted to another format
self.OnConverted: Optional[Callable[[Subtitle.Codec], None]] = None
def get_track_name(self) -> Optional[str]:
"""Return the base Track Name."""
track_name = super().get_track_name() or ""
@ -149,150 +143,50 @@ class Subtitle(Track):
track_name += flag
return track_name or None
def download(
self,
session: requests.Session,
prepare_drm: partial,
progress: Optional[partial] = None
):
super().download(session, prepare_drm, progress)
if not self.path:
return
if self.codec == Subtitle.Codec.fTTML:
self.convert(Subtitle.Codec.TimedTextMarkupLang)
elif self.codec == Subtitle.Codec.fVTT:
self.convert(Subtitle.Codec.WebVTT)
def convert(self, codec: Subtitle.Codec) -> Path:
"""
Convert this Subtitle to another Format.
The file path location of the Subtitle data will be kept at the same
location but the file extension will be changed appropriately.
Supported formats:
- SubRip - SubtitleEdit or pycaption.SRTWriter
- TimedTextMarkupLang - SubtitleEdit or pycaption.DFXPWriter
- WebVTT - SubtitleEdit or pycaption.WebVTTWriter
- SubStationAlphav4 - SubtitleEdit
- fTTML* - custom code using some pycaption functions
- fVTT* - custom code using some pycaption functions
*: Can read from format, but cannot convert to format
Note: It currently prioritizes using SubtitleEdit over PyCaption as
I have personally noticed more oddities with PyCaption parsing over
SubtitleEdit. Especially when working with TTML/DFXP where it would
often have timecodes and stuff mixed in/duplicated.
Returns the new file path of the Subtitle.
"""
if not self.path or not self.path.exists():
raise ValueError("You must download the subtitle track first.")
if self.codec == codec:
return self.path
output_path = self.path.with_suffix(f".{codec.value.lower()}")
sub_edit_executable = get_binary_path("SubtitleEdit")
if sub_edit_executable and self.codec not in (Subtitle.Codec.fTTML, Subtitle.Codec.fVTT):
sub_edit_format = {
Subtitle.Codec.SubStationAlphav4: "AdvancedSubStationAlpha",
Subtitle.Codec.TimedTextMarkupLang: "TimedText1.0"
}.get(codec, codec.name)
sub_edit_args = [
sub_edit_executable,
"/Convert", self.path, sub_edit_format,
f"/outputfilename:{output_path.name}",
"/encoding:utf8"
]
if codec == Subtitle.Codec.SubRip:
sub_edit_args.append("/ConvertColorsToDialog")
subprocess.run(
sub_edit_args,
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL
)
else:
writer = {
# pycaption generally only supports these subtitle formats
Subtitle.Codec.SubRip: pycaption.SRTWriter,
Subtitle.Codec.TimedTextMarkupLang: pycaption.DFXPWriter,
Subtitle.Codec.WebVTT: pycaption.WebVTTWriter,
}.get(codec)
if writer is None:
raise NotImplementedError(f"Cannot yet convert {self.codec.name} to {codec.name}.")
caption_set = self.parse(self.path.read_bytes(), self.codec)
Subtitle.merge_same_cues(caption_set)
subtitle_text = writer().write(caption_set)
output_path.write_text(subtitle_text, encoding="utf8")
self.path = output_path
self.codec = codec
if callable(self.OnConverted):
self.OnConverted(codec)
return output_path
@staticmethod
def parse(data: bytes, codec: Subtitle.Codec) -> pycaption.CaptionSet:
# TODO: Use an "enum" for subtitle codecs
if not isinstance(data, bytes):
raise ValueError(f"Subtitle data must be parsed as bytes data, not {type(data).__name__}")
try:
if codec == Subtitle.Codec.SubRip:
text = try_ensure_utf8(data).decode("utf8")
caption_set = pycaption.SRTReader().read(text)
elif codec == Subtitle.Codec.fTTML:
caption_lists: dict[str, pycaption.CaptionList] = defaultdict(pycaption.CaptionList)
if codec == Subtitle.Codec.fTTML:
captions: dict[str, pycaption.CaptionList] = defaultdict(pycaption.CaptionList)
for segment in (
Subtitle.parse(box.data, Subtitle.Codec.TimedTextMarkupLang)
for box in MP4.parse_stream(BytesIO(data))
if box.type == b"mdat"
):
for lang in segment.get_languages():
caption_lists[lang].extend(segment.get_captions(lang))
caption_set: pycaption.CaptionSet = pycaption.CaptionSet(caption_lists)
elif codec == Subtitle.Codec.TimedTextMarkupLang:
text = try_ensure_utf8(data).decode("utf8")
text = text.replace("tt:", "")
captions[lang].extend(segment.get_captions(lang))
captions: pycaption.CaptionSet = pycaption.CaptionSet(captions)
return captions
if codec == Subtitle.Codec.TimedTextMarkupLang:
text = data.decode("utf8").replace("tt:", "")
# negative size values aren't allowed in TTML/DFXP spec, replace with 0
text = re.sub(r'"(-\d+(\.\d+)?(px|em|%|c|pt))"', '"0"', text)
caption_set = pycaption.DFXPReader().read(text)
elif codec == Subtitle.Codec.fVTT:
return pycaption.DFXPReader().read(text)
if codec == Subtitle.Codec.fVTT:
caption_lists: dict[str, pycaption.CaptionList] = defaultdict(pycaption.CaptionList)
caption_list, language = Subtitle.merge_segmented_wvtt(data)
caption_lists[language] = caption_list
caption_set: pycaption.CaptionSet = pycaption.CaptionSet(caption_lists)
elif codec == Subtitle.Codec.WebVTT:
text = try_ensure_utf8(data).decode("utf8")
return caption_set
if codec == Subtitle.Codec.WebVTT:
# Segmented VTT when merged may have the WEBVTT headers part of the next caption
# if they are not separated far enough from the previous caption, hence the \n\n
text = text. \
text = data.decode("utf8"). \
replace("WEBVTT", "\n\nWEBVTT"). \
replace("\r", ""). \
replace("\n\n\n", "\n \n\n"). \
replace("\n\n<", "\n<")
caption_set = pycaption.WebVTTReader().read(text)
else:
raise ValueError(f"Unknown Subtitle format \"{codec}\"...")
captions: pycaption.CaptionSet = pycaption.WebVTTReader().read(text)
return captions
except pycaption.exceptions.CaptionReadSyntaxError as e:
raise SyntaxError(f"A syntax error has occurred when reading the \"{codec}\" subtitle: {e}")
except pycaption.exceptions.CaptionReadNoCaptions:
return pycaption.CaptionSet({"en": []})
# remove empty caption lists or some code breaks, especially if it's the first list
for language in caption_set.get_languages():
if not caption_set.get_captions(language):
# noinspection PyProtectedMember
del caption_set._captions[language]
return caption_set
raise ValueError(f"Unknown Subtitle Format \"{codec}\"...")
@staticmethod
def merge_same_cues(caption_set: pycaption.CaptionSet):
@ -421,16 +315,17 @@ class Subtitle(Track):
layout: Optional[Layout] = None
nodes: list[CaptionNode] = []
for cue_box in vttc_box.children:
for cue_box in MP4.parse_stream(BytesIO(vttc_box.data)):
if cue_box.type == b"vsid":
# this is a V(?) Source ID box, we don't care
continue
cue_data = cue_box.data.decode("utf8")
if cue_box.type == b"sttg":
layout = Layout(webvtt_positioning=cue_box.settings)
layout = Layout(webvtt_positioning=cue_data)
elif cue_box.type == b"payl":
nodes.extend([
node
for line in cue_box.cue_text.split("\n")
for line in cue_data.split("\n")
for node in [
CaptionNode.create_text(WebVTTReader()._decode(line)),
CaptionNode.create_break()
@ -464,23 +359,21 @@ class Subtitle(Track):
executable = get_binary_path("SubtitleEdit")
if executable:
if self.codec == Subtitle.Codec.SubStationAlphav4:
output_format = "AdvancedSubStationAlpha"
elif self.codec == Subtitle.Codec.TimedTextMarkupLang:
output_format = "TimedText1.0"
else:
output_format = self.codec.name
subprocess.run(
[
executable,
"/Convert", self.path, output_format,
"/encoding:utf8",
"/Convert", self.path, "srt",
"/overwrite",
"/RemoveTextForHI"
],
check=True,
stdout=subprocess.DEVNULL
)
# Remove UTF-8 Byte Order Marks
self.path.write_text(
self.path.read_text(encoding="utf-8-sig"),
encoding="utf8"
)
else:
sub = Subtitles(self.path)
sub.filter(
@ -493,37 +386,6 @@ class Subtitle(Track):
)
sub.save()
def reverse_rtl(self) -> None:
"""
Reverse RTL (Right to Left) Start/End on Captions.
This can be used to fix the positioning of sentence-ending characters.
"""
if not self.path or not self.path.exists():
raise ValueError("You must download the subtitle track first.")
executable = get_binary_path("SubtitleEdit")
if not executable:
raise EnvironmentError("SubtitleEdit executable not found...")
if self.codec == Subtitle.Codec.SubStationAlphav4:
output_format = "AdvancedSubStationAlpha"
elif self.codec == Subtitle.Codec.TimedTextMarkupLang:
output_format = "TimedText1.0"
else:
output_format = self.codec.name
subprocess.run(
[
executable,
"/Convert", self.path, output_format,
"/ReverseRtlStartEnd",
"/encoding:utf8",
"/overwrite"
],
check=True,
stdout=subprocess.DEVNULL
)
def __str__(self) -> str:
return " | ".join(filter(bool, [
"SUB",
@ -533,4 +395,4 @@ class Subtitle(Track):
]))
__all__ = ("Subtitle",)
__ALL__ = (Subtitle,)

View File

@ -1,346 +1,91 @@
import base64
import html
import logging
import re
import shutil
import subprocess
from copy import copy
from enum import Enum
from functools import partial
from pathlib import Path
from typing import Any, Callable, Iterable, Optional, Union
from uuid import UUID
from zlib import crc32
import m3u8
import requests
from langcodes import Language
from requests import Session
from devine.core.config import config
from devine.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY
from devine.core.downloaders import aria2c, curl_impersonate, requests
from devine.core.drm import DRM_T, Widevine
from devine.core.utilities import get_binary_path, get_boxes, try_ensure_utf8
from devine.core.constants import TERRITORY_MAP
from devine.core.drm import DRM_T
from devine.core.utilities import get_binary_path, get_boxes
from devine.core.utils.subprocess import ffprobe
class Track:
class DRM(Enum):
pass
class Descriptor(Enum):
URL = 1 # Direct URL, nothing fancy
HLS = 2 # https://en.wikipedia.org/wiki/HTTP_Live_Streaming
DASH = 3 # https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
M3U = 2 # https://en.wikipedia.org/wiki/M3U (and M3U8)
MPD = 3 # https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
def __init__(
self,
id_: str,
url: Union[str, list[str]],
language: Union[Language, str],
is_original_lang: bool = False,
descriptor: Descriptor = Descriptor.URL,
needs_proxy: bool = False,
needs_repack: bool = False,
name: Optional[str] = None,
drm: Optional[Iterable[DRM_T]] = None,
edition: Optional[str] = None,
downloader: Optional[Callable] = None,
data: Optional[dict] = None,
id_: Optional[str] = None,
extra: Optional[Any] = None
) -> None:
if not isinstance(url, (str, list)):
raise TypeError(f"Expected url to be a {str}, or list of {str}, not {type(url)}")
if not isinstance(language, (Language, str)):
raise TypeError(f"Expected language to be a {Language} or {str}, not {type(language)}")
if not isinstance(is_original_lang, bool):
raise TypeError(f"Expected is_original_lang to be a {bool}, not {type(is_original_lang)}")
if not isinstance(descriptor, Track.Descriptor):
raise TypeError(f"Expected descriptor to be a {Track.Descriptor}, not {type(descriptor)}")
if not isinstance(needs_repack, bool):
raise TypeError(f"Expected needs_repack to be a {bool}, not {type(needs_repack)}")
if not isinstance(name, (str, type(None))):
raise TypeError(f"Expected name to be a {str}, not {type(name)}")
if not isinstance(id_, (str, type(None))):
raise TypeError(f"Expected id_ to be a {str}, not {type(id_)}")
if not isinstance(edition, (str, type(None))):
raise TypeError(f"Expected edition to be a {str}, not {type(edition)}")
if not isinstance(downloader, (Callable, type(None))):
raise TypeError(f"Expected downloader to be a {Callable}, not {type(downloader)}")
if not isinstance(data, (dict, type(None))):
raise TypeError(f"Expected data to be a {dict}, not {type(data)}")
invalid_urls = ", ".join(set(type(x) for x in url if not isinstance(x, str)))
if invalid_urls:
raise TypeError(f"Expected all items in url to be a {str}, but found {invalid_urls}")
if drm is not None:
try:
iter(drm)
except TypeError:
raise TypeError(f"Expected drm to be an iterable, not {type(drm)}")
if downloader is None:
downloader = {
"aria2c": aria2c,
"curl_impersonate": curl_impersonate,
"requests": requests
}[config.downloader]
self.path: Optional[Path] = None
self.url = url
self.language = Language.get(language)
self.is_original_lang = is_original_lang
self.descriptor = descriptor
self.needs_repack = needs_repack
self.name = name
self.drm = drm
self.edition: str = edition
self.downloader = downloader
self.data = data or {}
if self.name is None:
lang = Language.get(self.language)
if (lang.language or "").lower() == (lang.territory or "").lower():
lang.territory = None # e.g. en-en, de-DE
reduced = lang.simplify_script()
extra_parts = []
if reduced.script is not None:
script = reduced.script_name(max_distance=25)
if script and script != "Zzzz":
extra_parts.append(script)
if reduced.territory is not None:
territory = reduced.territory_name(max_distance=25)
if territory and territory != "ZZ":
territory = territory.removesuffix(" SAR China")
extra_parts.append(territory)
self.name = ", ".join(extra_parts) or None
if not id_:
this = copy(self)
this.url = self.url.rsplit("?", maxsplit=1)[0]
checksum = crc32(repr(this).encode("utf8"))
id_ = hex(checksum)[2:]
self.id = id_
self.url = url
# required basic metadata
self.language = Language.get(language)
self.is_original_lang = bool(is_original_lang)
# optional io metadata
self.descriptor = descriptor
self.needs_proxy = bool(needs_proxy)
self.needs_repack = bool(needs_repack)
# drm
self.drm = drm
# extra data
self.edition: str = edition
self.extra: Any = extra or {} # allow anything for extra, but default to a dict
# TODO: Currently using OnFoo event naming, change to just segment_filter
# events
self.OnSegmentFilter: Optional[Callable] = None
# Called after one of the Track's segments have downloaded
self.OnSegmentDownloaded: Optional[Callable[[Path], None]] = None
# Called after the Track has downloaded
self.OnDownloaded: Optional[Callable] = None
# Called after the Track or one of its segments have been decrypted
self.OnDecrypted: Optional[Callable[[DRM_T, Optional[m3u8.Segment]], None]] = None
# Called after the Track has been repackaged
self.OnDecrypted: Optional[Callable] = None
self.OnRepacked: Optional[Callable] = None
# Called before the Track is multiplexed
self.OnMultiplex: Optional[Callable] = None
# should only be set internally
self.path: Optional[Path] = None
def __repr__(self) -> str:
return "{name}({items})".format(
name=self.__class__.__name__,
items=", ".join([f"{k}={repr(v)}" for k, v in self.__dict__.items()])
)
def __eq__(self, other: Any) -> bool:
def __eq__(self, other: object) -> bool:
return isinstance(other, Track) and self.id == other.id
def download(
self,
session: Session,
prepare_drm: partial,
progress: Optional[partial] = None
):
"""Download and optionally Decrypt this Track."""
from devine.core.manifests import DASH, HLS
if DOWNLOAD_LICENCE_ONLY.is_set():
progress(downloaded="[yellow]SKIPPING")
if DOWNLOAD_CANCELLED.is_set():
progress(downloaded="[yellow]SKIPPED")
return
log = logging.getLogger("track")
proxy = next(iter(session.proxies.values()), None)
track_type = self.__class__.__name__
save_path = config.directories.temp / f"{track_type}_{self.id}.mp4"
if track_type == "Subtitle":
save_path = save_path.with_suffix(f".{self.codec.extension}")
if self.descriptor != self.Descriptor.URL:
save_dir = save_path.with_name(save_path.name + "_segments")
else:
save_dir = save_path.parent
def cleanup():
# track file (e.g., "foo.mp4")
save_path.unlink(missing_ok=True)
# aria2c control file (e.g., "foo.mp4.aria2" or "foo.mp4.aria2__temp")
save_path.with_suffix(f"{save_path.suffix}.aria2").unlink(missing_ok=True)
save_path.with_suffix(f"{save_path.suffix}.aria2__temp").unlink(missing_ok=True)
if save_dir.exists() and save_dir.name.endswith("_segments"):
shutil.rmtree(save_dir)
if not DOWNLOAD_LICENCE_ONLY.is_set():
if config.directories.temp.is_file():
raise ValueError(f"Temp Directory '{config.directories.temp}' must be a Directory, not a file")
config.directories.temp.mkdir(parents=True, exist_ok=True)
# Delete any pre-existing temp files matching this track.
# We can't re-use or continue downloading these tracks as they do not use a
# lock file. Or at least the majority don't. Even if they did I've encountered
# corruptions caused by sudden interruptions to the lock file.
cleanup()
try:
if self.descriptor == self.Descriptor.HLS:
HLS.download_track(
track=self,
save_path=save_path,
save_dir=save_dir,
progress=progress,
session=session,
proxy=proxy,
license_widevine=prepare_drm
)
elif self.descriptor == self.Descriptor.DASH:
DASH.download_track(
track=self,
save_path=save_path,
save_dir=save_dir,
progress=progress,
session=session,
proxy=proxy,
license_widevine=prepare_drm
)
elif self.descriptor == self.Descriptor.URL:
try:
if not self.drm and track_type in ("Video", "Audio"):
# the service might not have explicitly defined the `drm` property
# try find widevine DRM information from the init data of URL
try:
self.drm = [Widevine.from_track(self, session)]
except Widevine.Exceptions.PSSHNotFound:
# it might not have Widevine DRM, or might not have found the PSSH
log.warning("No Widevine PSSH was found for this track, is it DRM free?")
if self.drm:
track_kid = self.get_key_id(session=session)
drm = self.drm[0] # just use the first supported DRM system for now
if isinstance(drm, Widevine):
# license and grab content keys
if not prepare_drm:
raise ValueError("prepare_drm func must be supplied to use Widevine DRM")
progress(downloaded="LICENSING")
prepare_drm(drm, track_kid=track_kid)
progress(downloaded="[yellow]LICENSED")
else:
drm = None
if DOWNLOAD_LICENCE_ONLY.is_set():
progress(downloaded="[yellow]SKIPPED")
else:
for status_update in self.downloader(
urls=self.url,
output_dir=save_path.parent,
filename=save_path.name,
headers=session.headers,
cookies=session.cookies,
proxy=proxy
):
file_downloaded = status_update.get("file_downloaded")
if not file_downloaded:
progress(**status_update)
# see https://github.com/devine-dl/devine/issues/71
save_path.with_suffix(f"{save_path.suffix}.aria2__temp").unlink(missing_ok=True)
self.path = save_path
if callable(self.OnDownloaded):
self.OnDownloaded()
if drm:
progress(downloaded="Decrypting", completed=0, total=100)
drm.decrypt(save_path)
self.drm = None
if callable(self.OnDecrypted):
self.OnDecrypted(drm)
progress(downloaded="Decrypted", completed=100)
if track_type == "Subtitle" and self.codec.name not in ("fVTT", "fTTML"):
track_data = self.path.read_bytes()
track_data = try_ensure_utf8(track_data)
track_data = track_data.decode("utf8"). \
replace("&lrm;", html.unescape("&lrm;")). \
replace("&rlm;", html.unescape("&rlm;")). \
encode("utf8")
self.path.write_bytes(track_data)
progress(downloaded="Downloaded")
except KeyboardInterrupt:
DOWNLOAD_CANCELLED.set()
progress(downloaded="[yellow]CANCELLED")
raise
except Exception:
DOWNLOAD_CANCELLED.set()
progress(downloaded="[red]FAILED")
raise
except (Exception, KeyboardInterrupt):
if not DOWNLOAD_LICENCE_ONLY.is_set():
cleanup()
raise
if DOWNLOAD_CANCELLED.is_set():
# we stopped during the download, let's exit
return
if not DOWNLOAD_LICENCE_ONLY.is_set():
if self.path.stat().st_size <= 3: # Empty UTF-8 BOM == 3 bytes
raise IOError("Download failed, the downloaded file is empty.")
if callable(self.OnDownloaded):
self.OnDownloaded(self)
def delete(self) -> None:
if self.path:
self.path.unlink()
self.path = None
def move(self, target: Union[Path, str]) -> Path:
"""
Move the Track's file from current location, to target location.
This will overwrite anything at the target path.
Raises:
TypeError: If the target argument is not the expected type.
ValueError: If track has no file to move, or the target does not exist.
OSError: If the file somehow failed to move.
Returns the new location of the track.
"""
if not isinstance(target, (str, Path)):
raise TypeError(f"Expected {target} to be a {Path} or {str}, not {type(target)}")
if not self.path:
raise ValueError("Track has no file to move")
if not isinstance(target, Path):
target = Path(target)
if not target.exists():
raise ValueError(f"Target file {repr(target)} does not exist")
moved_to = Path(shutil.move(self.path, target))
if moved_to.resolve() != target.resolve():
raise OSError(f"Failed to move {self.path} to {target}")
self.path = target
return target
def get_track_name(self) -> Optional[str]:
"""Get the Track Name."""
return self.name
"""Return the base Track Name. This may be enhanced in sub-classes."""
if (self.language.language or "").lower() == (self.language.territory or "").lower():
self.language.territory = None # e.g. en-en, de-DE
if self.language.territory == "US":
self.language.territory = None
reduced = self.language.simplify_script()
extra_parts = []
if reduced.script is not None:
extra_parts.append(reduced.script_name(max_distance=25))
if reduced.territory is not None:
territory = reduced.territory_name(max_distance=25)
extra_parts.append(TERRITORY_MAP.get(territory, territory))
return ", ".join(extra_parts) or None
def get_key_id(self, init_data: Optional[bytes] = None, *args, **kwargs) -> Optional[UUID]:
"""
@ -366,6 +111,7 @@ class Track:
if not isinstance(init_data, bytes):
raise TypeError(f"Expected init_data to be bytes, not {init_data!r}")
# try get via ffprobe, needed for non mp4 data e.g. WEBM from Google Play
probe = ffprobe(init_data)
if probe:
for stream in probe.get("streams") or []:
@ -373,22 +119,17 @@ class Track:
if enc_key_id:
return UUID(bytes=base64.b64decode(enc_key_id))
# look for track encryption mp4 boxes
for tenc in get_boxes(init_data, b"tenc"):
if tenc.key_ID.int != 0:
return tenc.key_ID
for uuid_box in get_boxes(init_data, b"uuid"):
if uuid_box.extended_type == UUID("8974dbce-7be7-4c51-84f9-7148f9882554"): # tenc
tenc = uuid_box.data
if tenc.key_ID.int != 0:
return tenc.key_ID
def get_init_segment(
self,
maximum_size: int = 20000,
url: Optional[str] = None,
byte_range: Optional[str] = None,
session: Optional[Session] = None
session: Optional[requests.Session] = None
) -> bytes:
"""
Get the Track's Initial Segment Data Stream.
@ -412,24 +153,20 @@ class Track:
byte_range: Range of bytes to download from the explicit or implicit URL.
session: Session context, e.g., authorization and headers.
"""
if not isinstance(maximum_size, int):
raise TypeError(f"Expected maximum_size to be an {int}, not {type(maximum_size)}")
if not isinstance(url, (str, type(None))):
raise TypeError(f"Expected url to be a {str}, not {type(url)}")
if not isinstance(byte_range, (str, type(None))):
raise TypeError(f"Expected byte_range to be a {str}, not {type(byte_range)}")
if not isinstance(session, (Session, type(None))):
raise TypeError(f"Expected session to be a {Session}, not {type(session)}")
if not url:
if self.descriptor != self.Descriptor.URL:
raise ValueError(f"An explicit URL must be provided for {self.descriptor.name} tracks")
if not self.url:
raise ValueError("An explicit URL must be provided as the track has no URL")
url = self.url
if not session:
session = Session()
session = requests.Session()
if self.descriptor != self.Descriptor.URL and not url:
# We cannot know which init map from the HLS or DASH playlist is actually used.
# For DASH this could be from any adaptation set, any period, e.t.c.
# For HLS we could make some assumptions, but it's best that it is explicitly provided.
raise ValueError(
f"An explicit URL to an init map or file must be provided for {self.descriptor.name} tracks."
)
url = url or self.url
if not url:
raise ValueError("The track must have an URL to point towards it's data.")
content_length = maximum_size
@ -444,9 +181,9 @@ class Track:
else:
size_test = session.head(url)
if "Content-Length" in size_test.headers:
content_length_header = int(size_test.headers["Content-Length"])
if content_length_header > 0:
content_length = min(content_length_header, maximum_size)
content_length = int(size_test.headers["Content-Length"])
# use whichever is smaller in case this is a full file
content_length = min(content_length, maximum_size)
range_test = session.head(url, headers={"Range": "bytes=0-1"})
if range_test.status_code == 206:
byte_range = f"0-{content_length-1}"
@ -461,6 +198,8 @@ class Track:
res.raise_for_status()
init_data = res.content
else:
# Take advantage of streaming support to take just the first n bytes
# This is a hacky alternative to HTTP's Range on unsupported servers
init_data = None
with session.get(url, stream=True) as s:
for chunk in s.iter_content(content_length):
@ -471,6 +210,11 @@ class Track:
return init_data
def delete(self) -> None:
if self.path:
self.path.unlink()
self.path = None
def repackage(self) -> None:
if not self.path or not self.path.exists():
raise ValueError("Cannot repackage a Track that has not been downloaded.")
@ -479,21 +223,20 @@ class Track:
if not executable:
raise EnvironmentError("FFmpeg executable \"ffmpeg\" was not found but is required for this call.")
original_path = self.path
output_path = original_path.with_stem(f"{original_path.stem}_repack")
repacked_path = self.path.with_suffix(f".repack{self.path.suffix}")
def _ffmpeg(extra_args: list[str] = None):
subprocess.run(
[
executable, "-hide_banner",
"-loglevel", "error",
"-i", original_path,
"-i", self.path,
*(extra_args or []),
# Following are very important!
"-map_metadata", "-1", # don't transfer metadata to output file
"-fflags", "bitexact", # only have minimal tag data, reproducible mux
"-codec", "copy",
str(output_path)
str(repacked_path)
],
check=True,
stdout=subprocess.PIPE,
@ -509,7 +252,35 @@ class Track:
else:
raise
self.path = output_path
self.swap(repacked_path)
def move(self, target: Union[str, Path]) -> bool:
"""
Move the Track's file from current location, to target location.
This will overwrite anything at the target path.
"""
if not self.path:
return False
target = Path(target)
ok = Path(shutil.move(self.path, target)).resolve() == target.resolve()
if ok:
self.path = target
return ok
def swap(self, target: Union[str, Path]) -> bool:
"""
Swaps the Track's file with the Target file. The current Track's file is deleted.
Returns False if the Track is not yet downloaded, or the target path does not exist.
"""
target = Path(target)
if not target.exists() or not self.path:
return False
self.path.unlink()
ok = Path(shutil.move(target, self.path)).resolve() == self.path.resolve()
if not ok:
return False
return self.move(target)
__all__ = ("Track",)
__ALL__ = (Track,)

View File

@ -6,16 +6,17 @@ from functools import partial
from pathlib import Path
from typing import Callable, Iterator, Optional, Sequence, Union
from Cryptodome.Random import get_random_bytes
from langcodes import Language, closest_supported_match
from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeRemainingColumn
from rich.progress import Progress, TextColumn, SpinnerColumn, BarColumn, TimeRemainingColumn
from rich.table import Table
from rich.tree import Tree
from devine.core.config import config
from devine.core.console import console
from devine.core.constants import LANGUAGE_MAX_DISTANCE, AnyTrack, TrackT
from devine.core.constants import LANGUAGE_MAX_DISTANCE, LANGUAGE_MUX_MAP, AnyTrack, TrackT
from devine.core.tracks.audio import Audio
from devine.core.tracks.chapters import Chapter, Chapters
from devine.core.tracks.chapter import Chapter
from devine.core.tracks.subtitle import Subtitle
from devine.core.tracks.track import Track
from devine.core.tracks.video import Video
@ -36,11 +37,11 @@ class Tracks:
Chapter: 3
}
def __init__(self, *args: Union[Tracks, Sequence[Union[AnyTrack, Chapter, Chapters]], Track, Chapter, Chapters]):
def __init__(self, *args: Union[Tracks, list[Track], Track]):
self.videos: list[Video] = []
self.audio: list[Audio] = []
self.subtitles: list[Subtitle] = []
self.chapters = Chapters()
self.chapters: list[Chapter] = []
if args:
self.add(args)
@ -51,13 +52,6 @@ class Tracks:
def __len__(self) -> int:
return len(self.videos) + len(self.audio) + len(self.subtitles)
def __add__(
self,
other: Union[Tracks, Sequence[Union[AnyTrack, Chapter, Chapters]], Track, Chapter, Chapters]
) -> Tracks:
self.add(other)
return self
def __repr__(self) -> str:
return "{name}({items})".format(
name=self.__class__.__name__,
@ -143,7 +137,7 @@ class Tracks:
def add(
self,
tracks: Union[Tracks, Sequence[Union[AnyTrack, Chapter, Chapters]], Track, Chapter, Chapters],
tracks: Union[Tracks, Sequence[Union[AnyTrack, Chapter]], Track, Chapter],
warn_only: bool = False
) -> None:
"""Add a provided track to its appropriate array and ensuring it's not a duplicate."""
@ -172,7 +166,7 @@ class Tracks:
elif isinstance(track, Subtitle):
self.subtitles.append(track)
elif isinstance(track, Chapter):
self.chapters.add(track)
self.chapters.append(track)
else:
raise ValueError("Track type was not set or is invalid.")
@ -249,6 +243,13 @@ class Tracks:
continue
self.subtitles.sort(key=lambda x: is_close_match(language, [x.language]), reverse=True)
def sort_chapters(self) -> None:
"""Sort chapter tracks by chapter number."""
if not self.chapters:
return
# number
self.chapters.sort(key=lambda x: x.number)
def select_video(self, x: Callable[[Video], bool]) -> None:
self.videos = list(filter(x, self.videos))
@ -258,35 +259,37 @@ class Tracks:
def select_subtitles(self, x: Callable[[Subtitle], bool]) -> None:
self.subtitles = list(filter(x, self.subtitles))
def by_resolutions(self, resolutions: list[int], per_resolution: int = 0) -> None:
# Note: Do not merge these list comprehensions. They must be done separately so the results
# from the 16:9 canvas check is only used if there's no exact height resolution match.
selected = []
for resolution in resolutions:
matches = [ # exact matches
x
for x in self.videos
if x.height == resolution
]
if not matches:
matches = [ # 16:9 canvas matches
x
for x in self.videos
if int(x.width * (9 / 16)) == resolution
]
selected.extend(matches[:per_resolution or None])
self.videos = selected
def with_resolution(self, resolution: int) -> None:
if resolution:
# Note: Do not merge these list comprehensions. They must be done separately so the results
# from the 16:9 canvas check is only used if there's no exact height resolution match.
videos_quality = [x for x in self.videos if x.height == resolution]
if not videos_quality:
videos_quality = [x for x in self.videos if int(x.width * (9 / 16)) == resolution]
self.videos = videos_quality
def export_chapters(self, to_file: Optional[Union[Path, str]] = None) -> str:
"""Export all chapters in order to a string or file."""
self.sort_chapters()
data = "\n".join(map(repr, self.chapters))
if to_file:
to_file = Path(to_file)
to_file.parent.mkdir(parents=True, exist_ok=True)
to_file.write_text(data, encoding="utf8")
return data
@staticmethod
def by_language(tracks: list[TrackT], languages: list[str], per_language: int = 0) -> list[TrackT]:
selected = []
def select_per_language(tracks: list[TrackT], languages: list[str]) -> list[TrackT]:
"""
Enumerates and return the first Track per language.
You should sort the list so the wanted track is closer to the start of the list.
"""
tracks_ = []
for language in languages:
selected.extend([
x
for x in tracks
if closest_supported_match(x.language, [language], LANGUAGE_MAX_DISTANCE)
][:per_language or None])
return selected
match = closest_supported_match(language, [str(x.language) for x in tracks], LANGUAGE_MAX_DISTANCE)
if match:
tracks_.append(next(x for x in tracks if str(x.language) == match))
return tracks_
def mux(self, title: str, delete: bool = True, progress: Optional[partial] = None) -> tuple[Path, int]:
"""
@ -311,9 +314,11 @@ class Tracks:
if not vt.path or not vt.path.exists():
raise ValueError("Video Track must be downloaded before muxing...")
if callable(vt.OnMultiplex):
vt.OnMultiplex()
vt.OnMultiplex(vt)
cl.extend([
"--language", f"0:{vt.language}",
"--language", "0:{}".format(LANGUAGE_MUX_MAP.get(
str(vt.language), str(vt.language)
)),
"--default-track", f"0:{i == 0}",
"--original-flag", f"0:{vt.is_original_lang}",
"--compression", "0:none", # disable extra compression
@ -324,10 +329,12 @@ class Tracks:
if not at.path or not at.path.exists():
raise ValueError("Audio Track must be downloaded before muxing...")
if callable(at.OnMultiplex):
at.OnMultiplex()
at.OnMultiplex(at)
cl.extend([
"--track-name", f"0:{at.get_track_name() or ''}",
"--language", f"0:{at.language}",
"--language", "0:{}".format(LANGUAGE_MUX_MAP.get(
str(at.language), str(at.language)
)),
"--default-track", f"0:{i == 0}",
"--visual-impaired-flag", f"0:{at.descriptive}",
"--original-flag", f"0:{at.is_original_lang}",
@ -339,11 +346,13 @@ class Tracks:
if not st.path or not st.path.exists():
raise ValueError("Text Track must be downloaded before muxing...")
if callable(st.OnMultiplex):
st.OnMultiplex()
st.OnMultiplex(st)
default = bool(self.audio and is_close_match(st.language, [self.audio[0].language]) and st.forced)
cl.extend([
"--track-name", f"0:{st.get_track_name() or ''}",
"--language", f"0:{st.language}",
"--language", "0:{}".format(LANGUAGE_MUX_MAP.get(
str(st.language), str(st.language)
)),
"--sub-charset", "0:UTF-8",
"--forced-track", f"0:{st.forced}",
"--default-track", f"0:{default}",
@ -356,10 +365,10 @@ class Tracks:
if self.chapters:
chapters_path = config.directories.temp / config.filenames.chapters.format(
title=sanitize_filename(title),
random=self.chapters.id
random=get_random_bytes(16).hex()
)
self.chapters.dump(chapters_path, fallback_name=config.chapter_fallback_name)
cl.extend(["--chapter-charset", "UTF-8", "--chapters", str(chapters_path)])
self.export_chapters(chapters_path)
cl.extend(["--chapters", str(chapters_path)])
else:
chapters_path = None
@ -393,4 +402,4 @@ class Tracks:
track.delete()
__all__ = ("Tracks",)
__ALL__ = (Tracks,)

View File

@ -88,40 +88,32 @@ class Video(Track):
def from_cicp(primaries: int, transfer: int, matrix: int) -> Video.Range:
"""
ISO/IEC 23001-8 Coding-independent code points to Video Range.
Sources:
https://www.itu.int/rec/T-REC-H.Sup19-202104-I
Sources for Code points:
https://www.itu.int/rec/dologin_pub.asp?lang=e&id=T-REC-H.Sup19-201903-S!!PDF-E&type=items
"""
class Primaries(Enum):
Unspecified = 0
BT_709 = 1
BT_601_625 = 5
BT_601_525 = 6
BT_2020_and_2100 = 9
SMPTE_ST_2113_and_EG_4321 = 12 # P3D65
BT_2020 = 9 # BT.2100 shares the same CP
class Transfer(Enum):
Unspecified = 0
BT_709 = 1
BT_601 = 6
BT_2020 = 14
BT_2100 = 15
BT_2100_PQ = 16
BT_2100_HLG = 18
SDR_BT_709 = 1
SDR_BT_601_625 = 5
SDR_BT_601_525 = 6
SDR_BT_2020 = 14
SDR_BT_2100 = 15
PQ = 16
HLG = 18
class Matrix(Enum):
RGB = 0
YCbCr_BT_709 = 1
YCbCr_BT_601_625 = 5
YCbCr_BT_601_525 = 6
YCbCr_BT_2020_and_2100 = 9 # YCbCr BT.2100 shares the same CP
ICtCp_BT_2100 = 14
if transfer == 5:
# While not part of any standard, it is typically used as a PAL variant of Transfer.BT_601=6.
# i.e. where Transfer 6 would be for BT.601-NTSC and Transfer 5 would be for BT.601-PAL.
# The codebase is currently agnostic to either, so a manual conversion to 6 is done.
transfer = 6
YCbCr_BT_2020 = 9 # YCbCr BT.2100 shares the same CP
primaries = Primaries(primaries)
transfer = Transfer(transfer)
@ -131,11 +123,13 @@ class Video(Track):
if (primaries, transfer, matrix) == (0, 0, 0):
return Video.Range.SDR
elif primaries in (Primaries.BT_601_625, Primaries.BT_601_525):
if primaries in (Primaries.BT_601_525, Primaries.BT_601_625):
return Video.Range.SDR
elif transfer == Transfer.BT_2100_PQ:
if transfer == Transfer.PQ:
return Video.Range.HDR10
elif transfer == Transfer.BT_2100_HLG:
elif transfer == Transfer.HLG:
return Video.Range.HLG
else:
return Video.Range.SDR
@ -188,20 +182,17 @@ class Video(Track):
Video.Codec.HEVC: "hevc_metadata"
}[self.codec]
original_path = self.path
output_path = original_path.with_stem(f"{original_path.stem}_{['limited', 'full'][range_]}_range")
changed_path = self.path.with_suffix(f".range{range_}{self.path.suffix}")
subprocess.run([
executable, "-hide_banner",
"-loglevel", "panic",
"-i", original_path,
"-i", self.path,
"-codec", "copy",
"-bsf:v", f"{filter_key}=video_full_range_flag={range_}",
str(output_path)
str(changed_path)
], check=True)
self.path = output_path
original_path.unlink()
self.swap(changed_path)
def ccextractor(
self, track_id: Any, out_path: Union[Path, str], language: Language, original: bool = False
@ -214,19 +205,13 @@ class Video(Track):
if not executable:
raise EnvironmentError("ccextractor executable was not found.")
# ccextractor often fails in weird ways unless we repack
self.repackage()
out_path = Path(out_path)
try:
subprocess.run([
executable,
"-trim",
"-nobom",
"-noru", "-ru1",
"-o", out_path,
self.path
"-trim", "-noru", "-ru1",
self.path, "-o", out_path
], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except subprocess.CalledProcessError as e:
out_path.unlink(missing_ok=True)
@ -234,6 +219,11 @@ class Video(Track):
raise
if out_path.exists():
if out_path.stat().st_size <= 3:
# An empty UTF-8 file with BOM is 3 bytes.
# If the subtitle file is empty, mkvmerge will fail to mux.
out_path.unlink()
return None
cc_track = Subtitle(
id_=track_id,
url="", # doesn't need to be downloaded
@ -321,12 +311,11 @@ class Video(Track):
i = file.index(b"x264")
encoding_settings = file[i: i + file[i:].index(b"\x00")].replace(b":", br"\\:").replace(b",", br"\,").decode()
original_path = self.path
cleaned_path = original_path.with_suffix(f".cleaned{original_path.suffix}")
cleaned_path = self.path.with_suffix(f".cleaned{self.path.suffix}")
subprocess.run([
executable, "-hide_banner",
"-loglevel", "panic",
"-i", original_path,
"-i", self.path,
"-map_metadata", "-1",
"-fflags", "bitexact",
"-bsf:v", f"filter_units=remove_types=6,h264_metadata=sei_user_data={uuid}+{encoding_settings}",
@ -336,10 +325,9 @@ class Video(Track):
log.info(" + Removed")
self.path = cleaned_path
original_path.unlink()
self.swap(cleaned_path)
return True
__all__ = ("Video",)
__ALL__ = (Video,)

View File

@ -1,23 +1,21 @@
import ast
import contextlib
import importlib.util
import os
import re
import shutil
import socket
import sys
import time
import unicodedata
from collections import defaultdict
from datetime import datetime
from pathlib import Path
from types import ModuleType
from typing import Optional, Sequence, Union
from urllib.parse import ParseResult, urlparse
from typing import AsyncIterator, Optional, Sequence, Union
from urllib.parse import urlparse
import chardet
import pproxy
import requests
from construct import ValidationError
from langcodes import Language, closest_match
from pymp4.parser import Box
from unidecode import unidecode
@ -112,7 +110,7 @@ def sanitize_filename(filename: str, spacer: str = ".") -> str:
replace("/", " & ").\
replace(";", " & ") # e.g. multi-episode filenames
filename = re.sub(r"[:; ]", spacer, filename) # structural chars to (spacer)
filename = re.sub(r"[\\*!?¿,'\"“”()<>|$#]", "", filename) # not filename safe chars
filename = re.sub(r"[\\*!?¿,'\"()<>|$#]", "", filename) # not filename safe chars
filename = re.sub(rf"[{spacer}]{{2,}}", spacer, filename) # remove extra neighbouring (spacer)s
return filename
@ -130,7 +128,7 @@ def get_boxes(data: bytes, box_type: bytes, as_bytes: bool = False) -> Box:
"""Scan a byte array for a wanted box, then parse and yield each find."""
# using slicing to get to the wanted box is done because parsing the entire box and recursively
# scanning through each box and its children often wouldn't scan far enough to reach the wanted box.
# since it doesn't care what child box the wanted box is from, this works fine.
# since it doesnt care what child box the wanted box is from, this works fine.
if not isinstance(data, (bytes, bytearray)):
raise ValueError("data must be bytes")
while True:
@ -146,15 +144,8 @@ def get_boxes(data: bytes, box_type: bytes, as_bytes: bool = False) -> Box:
try:
box = Box.parse(data)
except IOError:
# since get_init_segment might cut off unexpectedly, pymp4 may be unable to read
# the expected amounts of data and complain, so let's just end the function here
# TODO: Does this miss any data we may need?
break
except ValidationError as e:
if box_type == b"tenc":
# ignore this error on tenc boxes as the tenc definition isn't consistent,
# some services don't even put valid data and mix it up with avc1...
continue
raise e
if as_bytes:
box = Box.build(box)
yield box
@ -217,64 +208,35 @@ def time_elapsed_since(start: float) -> str:
return time_string
def try_ensure_utf8(data: bytes) -> bytes:
"""
Try to ensure that the given data is encoded in UTF-8.
@contextlib.asynccontextmanager
async def start_pproxy(proxy: str) -> AsyncIterator[str]:
proxy = urlparse(proxy)
Parameters:
data: Input data that may or may not yet be UTF-8 or another encoding.
scheme = {
"https": "http+ssl",
"socks5h": "socks"
}.get(proxy.scheme, proxy.scheme)
remote_server = f"{scheme}://{proxy.hostname}"
if proxy.port:
remote_server += f":{proxy.port}"
if proxy.username or proxy.password:
remote_server += "#"
if proxy.username:
remote_server += proxy.username
if proxy.password:
remote_server += f":{proxy.password}"
server = pproxy.Server("http://localhost:0") # random port
remote = pproxy.Connection(remote_server)
handler = await server.start_server({"rserver": [remote]})
Returns the input data encoded in UTF-8 if successful. If unable to detect the
encoding of the input data, then the original data is returned as-received.
"""
try:
data.decode("utf8")
return data
except UnicodeDecodeError:
try:
# CP-1252 is a superset of latin1
return data.decode("cp1252").encode("utf8")
except UnicodeDecodeError:
try:
# last ditch effort to detect encoding
detection_result = chardet.detect(data)
if not detection_result["encoding"]:
return data
return data.decode(detection_result["encoding"]).encode("utf8")
except UnicodeDecodeError:
return data
def get_free_port() -> int:
"""
Get an available port to use between a-b (inclusive).
The port is freed as soon as this has returned, therefore, it
is possible for the port to be taken before you try to use it.
"""
with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
s.bind(("", 0))
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
return s.getsockname()[1]
def get_extension(value: Union[str, Path, ParseResult]) -> Optional[str]:
"""
Get a URL or Path file extension/suffix.
Note: The returned value will begin with `.`.
"""
if isinstance(value, ParseResult):
value_parsed = value
elif isinstance(value, (str, Path)):
value_parsed = urlparse(str(value))
else:
raise TypeError(f"Expected {str}, {Path}, or {ParseResult}, got {type(value)}")
if value_parsed.path:
ext = os.path.splitext(value_parsed.path)[1]
if ext and ext != ".":
return ext
port = handler.sockets[0].getsockname()[1]
yield f"http://localhost:{port}"
finally:
handler.close()
await handler.wait_closed()
class FPS(ast.NodeVisitor):

View File

@ -1,8 +1,7 @@
import re
from typing import Any, Optional, Union
from typing import Optional, Union
import click
from click.shell_completion import CompletionItem
from pywidevine.cdm import Cdm as WidevineCdm
@ -95,90 +94,22 @@ class LanguageRange(click.ParamType):
return re.split(r"\s*[,;]\s*", value)
class QualityList(click.ParamType):
name = "quality_list"
class Quality(click.ParamType):
name = "quality"
def convert(
self,
value: Union[str, list[str]],
param: Optional[click.Parameter] = None,
ctx: Optional[click.Context] = None
) -> list[int]:
if not value:
return []
if not isinstance(value, list):
value = value.split(",")
resolutions = []
for resolution in value:
try:
resolutions.append(int(resolution.lower().rstrip("p")))
except TypeError:
self.fail(
f"Expected string for int() conversion, got {resolution!r} of type {type(resolution).__name__}",
param,
ctx
)
except ValueError:
self.fail(f"{resolution!r} is not a valid integer", param, ctx)
return sorted(resolutions, reverse=True)
class MultipleChoice(click.Choice):
"""
The multiple choice type allows multiple values to be checked against
a fixed set of supported values.
It internally uses and is based off of click.Choice.
"""
name = "multiple_choice"
def __repr__(self) -> str:
return f"MultipleChoice({list(self.choices)})"
def convert(
self,
value: Any,
param: Optional[click.Parameter] = None,
ctx: Optional[click.Context] = None
) -> list[Any]:
if not value:
return []
if isinstance(value, str):
values = value.split(",")
elif isinstance(value, list):
values = value
else:
def convert(self, value: str, param: Optional[click.Parameter] = None, ctx: Optional[click.Context] = None) -> int:
try:
return int(value.lower().rstrip("p"))
except TypeError:
self.fail(
f"{value!r} is not a supported value.",
f"expected string for int() conversion, got {value!r} of type {type(value).__name__}",
param,
ctx
)
chosen_values: list[Any] = []
for value in values:
chosen_values.append(super().convert(value, param, ctx))
return chosen_values
def shell_complete(
self,
ctx: click.Context,
param: click.Parameter,
incomplete: str
) -> list[CompletionItem]:
"""
Complete choices that start with the incomplete value.
Parameters:
ctx: Invocation context for this command.
param: The parameter that is requesting completion.
incomplete: Value being completed. May be empty.
"""
incomplete = incomplete.rsplit(",")[-1]
return super(self).shell_complete(ctx, param, incomplete)
except ValueError:
self.fail(f"{value!r} is not a valid integer", param, ctx)
SEASON_RANGE = SeasonRange()
LANGUAGE_RANGE = LanguageRange()
QUALITY_LIST = QualityList()
QUALITY = Quality()

View File

@ -45,4 +45,4 @@ class Vault(metaclass=ABCMeta):
"""Get a list of Service Tags from Vault."""
__all__ = ("Vault",)
__ALL__ = (Vault,)

View File

@ -74,4 +74,4 @@ class Vaults:
return success
__all__ = ("Vaults",)
__ALL__ = (Vaults,)

View File

@ -1,214 +0,0 @@
from typing import Iterator, Optional, Union
from uuid import UUID
from requests import Session
from devine.core import __version__
from devine.core.vault import Vault
class API(Vault):
"""Key Vault using a simple RESTful HTTP API call."""
def __init__(self, name: str, uri: str, token: str):
super().__init__(name)
self.uri = uri.rstrip("/")
self.session = Session()
self.session.headers.update({
"User-Agent": f"Devine v{__version__}"
})
self.session.headers.update({
"Authorization": f"Bearer {token}"
})
def get_key(self, kid: Union[UUID, str], service: str) -> Optional[str]:
if isinstance(kid, UUID):
kid = kid.hex
data = self.session.get(
url=f"{self.uri}/{service.lower()}/{kid}",
headers={
"Accept": "application/json"
}
).json()
code = int(data.get("code", 0))
message = data.get("message")
error = {
0: None,
1: Exceptions.AuthRejected,
2: Exceptions.TooManyRequests,
3: Exceptions.ServiceTagInvalid,
4: Exceptions.KeyIdInvalid
}.get(code, ValueError)
if error:
raise error(f"{message} ({code})")
content_key = data.get("content_key")
if not content_key:
return None
if not isinstance(content_key, str):
raise ValueError(f"Expected {content_key} to be {str}, was {type(content_key)}")
return content_key
def get_keys(self, service: str) -> Iterator[tuple[str, str]]:
page = 1
while True:
data = self.session.get(
url=f"{self.uri}/{service.lower()}",
params={
"page": page,
"total": 10
},
headers={
"Accept": "application/json"
}
).json()
code = int(data.get("code", 0))
message = data.get("message")
error = {
0: None,
1: Exceptions.AuthRejected,
2: Exceptions.TooManyRequests,
3: Exceptions.PageInvalid,
4: Exceptions.ServiceTagInvalid,
}.get(code, ValueError)
if error:
raise error(f"{message} ({code})")
content_keys = data.get("content_keys")
if content_keys:
if not isinstance(content_keys, dict):
raise ValueError(f"Expected {content_keys} to be {dict}, was {type(content_keys)}")
for key_id, key in content_keys.items():
yield key_id, key
pages = int(data["pages"])
if pages <= page:
break
page += 1
def add_key(self, service: str, kid: Union[UUID, str], key: str) -> bool:
if isinstance(kid, UUID):
kid = kid.hex
data = self.session.post(
url=f"{self.uri}/{service.lower()}/{kid}",
json={
"content_key": key
},
headers={
"Accept": "application/json"
}
).json()
code = int(data.get("code", 0))
message = data.get("message")
error = {
0: None,
1: Exceptions.AuthRejected,
2: Exceptions.TooManyRequests,
3: Exceptions.ServiceTagInvalid,
4: Exceptions.KeyIdInvalid,
5: Exceptions.ContentKeyInvalid
}.get(code, ValueError)
if error:
raise error(f"{message} ({code})")
# the kid:key was new to the vault (optional)
added = bool(data.get("added"))
# the key for kid was changed/updated (optional)
updated = bool(data.get("updated"))
return added or updated
def add_keys(self, service: str, kid_keys: dict[Union[UUID, str], str]) -> int:
data = self.session.post(
url=f"{self.uri}/{service.lower()}",
json={
"content_keys": {
str(kid).replace("-", ""): key
for kid, key in kid_keys.items()
}
},
headers={
"Accept": "application/json"
}
).json()
code = int(data.get("code", 0))
message = data.get("message")
error = {
0: None,
1: Exceptions.AuthRejected,
2: Exceptions.TooManyRequests,
3: Exceptions.ServiceTagInvalid,
4: Exceptions.KeyIdInvalid,
5: Exceptions.ContentKeyInvalid
}.get(code, ValueError)
if error:
raise error(f"{message} ({code})")
# each kid:key that was new to the vault (optional)
added = int(data.get("added"))
# each key for a kid that was changed/updated (optional)
updated = int(data.get("updated"))
return added + updated
def get_services(self) -> Iterator[str]:
data = self.session.post(
url=self.uri,
headers={
"Accept": "application/json"
}
).json()
code = int(data.get("code", 0))
message = data.get("message")
error = {
0: None,
1: Exceptions.AuthRejected,
2: Exceptions.TooManyRequests,
}.get(code, ValueError)
if error:
raise error(f"{message} ({code})")
service_list = data.get("service_list", [])
if not isinstance(service_list, list):
raise ValueError(f"Expected {service_list} to be {list}, was {type(service_list)}")
for service in service_list:
yield service
class Exceptions:
class AuthRejected(Exception):
"""Authentication Error Occurred, is your token valid? Do you have permission to make this call?"""
class TooManyRequests(Exception):
"""Rate Limited; Sent too many requests in a given amount of time."""
class PageInvalid(Exception):
"""Requested page does not exist."""
class ServiceTagInvalid(Exception):
"""The Service Tag is invalid."""
class KeyIdInvalid(Exception):
"""The Key ID is invalid."""
class ContentKeyInvalid(Exception):
"""The Content Key is invalid."""

2281
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,99 +1,76 @@
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
requires = ['poetry-core>=1.0.0']
build-backend = 'poetry.core.masonry.api'
[tool.poetry]
name = "devine"
version = "3.1.0"
description = "Modular Movie, TV, and Music Archival Software."
license = "GPL-3.0-only"
authors = ["rlaphoenix <rlaphoenix@pm.me>"]
readme = "README.md"
homepage = "https://github.com/devine-dl/devine"
repository = "https://github.com/devine-dl/devine"
keywords = ["python", "downloader", "drm", "widevine"]
name = 'devine'
version = '2.1.0'
description = 'Open-Source Movie, TV, and Music Downloading Solution'
license = 'GPL-3.0-only'
authors = ['rlaphoenix <rlaphoenix@pm.me>']
readme = 'README.md'
homepage = 'https://github.com/devine-dl/devine'
repository = 'https://github.com/devine-dl/devine'
keywords = ['widevine', 'drm', 'downloader']
classifiers = [
"Development Status :: 4 - Beta",
"Environment :: Console",
"Intended Audience :: End Users/Desktop",
"Natural Language :: English",
"Operating System :: OS Independent",
"Topic :: Multimedia :: Video",
"Topic :: Security :: Cryptography",
'Development Status :: 4 - Beta',
'Environment :: Console',
'Intended Audience :: End Users/Desktop',
'Natural Language :: English',
'Operating System :: OS Independent',
'Topic :: Multimedia :: Video',
'Topic :: Security :: Cryptography',
]
include = [
{ path = "CHANGELOG.md", format = "sdist" },
{ path = "README.md", format = "sdist" },
{ path = "LICENSE", format = "sdist" },
]
[tool.poetry.urls]
"Issues" = "https://github.com/devine-dl/devine/issues"
"Discussions" = "https://github.com/devine-dl/devine/discussions"
"Changelog" = "https://github.com/devine-dl/devine/blob/master/CHANGELOG.md"
[tool.poetry.dependencies]
python = ">=3.9,<4.0"
python = ">=3.9.0,<3.12"
appdirs = "^1.4.4"
Brotli = "^1.1.0"
click = "^8.1.7"
Brotli = "^1.0.9"
click = "^8.1.3"
construct = "^2.8.8"
crccheck = "^1.3.0"
jsonpickle = "^3.0.3"
jsonpickle = "^3.0.1"
langcodes = { extras = ["data"], version = "^3.3.0" }
lxml = "^5.1.0"
pproxy = "^2.7.9"
protobuf = "^4.25.3"
pycaption = "^2.2.4"
pycryptodomex = "^3.20.0"
pyjwt = "^2.8.0"
pymediainfo = "^6.1.0"
pymp4 = "^1.4.0"
pymysql = "^1.1.0"
pywidevine = { extras = ["serve"], version = "^1.8.0" }
PyYAML = "^6.0.1"
requests = { extras = ["socks"], version = "^2.31.0" }
rich = "^13.7.1"
lxml = "^4.9.2"
pproxy = "^2.7.8"
protobuf = "4.21.6"
pycaption = "^2.1.1"
pycryptodomex = "^3.17.0"
pyjwt = "^2.6.0"
pymediainfo = "^6.0.1"
pymp4 = "^1.2.0"
pymysql = "^1.0.2"
pywidevine = { extras = ["serve"], version = "^1.6.0" }
PyYAML = "^6.0"
requests = { extras = ["socks"], version = "^2.28.2" }
rich = "^13.3.1"
"rlaphoenix.m3u8" = "^3.4.0"
"ruamel.yaml" = "^0.18.6"
"ruamel.yaml" = "^0.17.21"
sortedcontainers = "^2.4.0"
subtitle-filter = "^1.4.8"
Unidecode = "^1.3.8"
urllib3 = "^2.2.1"
chardet = "^5.2.0"
curl-cffi = "^0.6.1"
# Temporary explicit versions of these langcodes dependencies as language-data v1.1
# uses marisa-trie v0.7.8 which doesn't have Python 3.12 wheels.
language-data = "^1.2.0.dev3"
marisa-trie = "^1.1.0"
subtitle-filter = "^1.4.4"
Unidecode = "^1.3.6"
urllib3 = "^1.26.14"
[tool.poetry.dev-dependencies]
pre-commit = "^3.6.2"
mypy = "^1.8.0"
mypy-protobuf = "^3.5.0"
types-protobuf = "^4.24.0.20240129"
types-PyMySQL = "^1.1.0.1"
types-requests = "^2.31.0.20240218"
isort = "^5.13.2"
ruff = "~0.3.0"
pre-commit = "^3.0.4"
mypy = "^0.991"
mypy-protobuf = "^3.3.0"
types-protobuf = "^3.19.22"
types-PyMySQL = "^1.0.19.2"
types-requests = "^2.28.11.8"
isort = "^5.12.0"
[tool.poetry.scripts]
devine = "devine.core.__main__:main"
[tool.ruff]
force-exclude = true
line-length = 120
[tool.ruff.lint]
select = ["E4", "E7", "E9", "F", "W"]
devine = 'devine.core.__main__:main'
[tool.isort]
line_length = 118
line_length = 120
[tool.mypy]
exclude = '_pb2\.pyi?$'
check_untyped_defs = true
disallow_incomplete_defs = true
disallow_untyped_defs = true
follow_imports = "silent"
follow_imports = 'silent'
ignore_missing_imports = true
no_implicit_optional = true