From c6c2e9ca510f29e190daffed38f8b572435171f1 Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Tue, 9 Jan 2024 22:38:06 +0000 Subject: [PATCH] Add Curl-Impersonate Downloader via curl_cffi project The browser to imitate can be set in the config: For example, ```yaml curl_impersonate: browser: chrome110 ``` It will default to using chrome110 if no value is set in the config. A list of available Browsers are listed here: https://github.com/yifeikong/curl_cffi#sessions --- CONFIG.md | 6 ++ devine/core/config.py | 1 + devine/core/downloaders/__init__.py | 4 +- devine/core/downloaders/curl_impersonate.py | 105 ++++++++++++++++++++ poetry.lock | 100 ++++++++++++++++++- pyproject.toml | 1 + 6 files changed, 215 insertions(+), 2 deletions(-) create mode 100644 devine/core/downloaders/curl_impersonate.py diff --git a/CONFIG.md b/CONFIG.md index 7f732dc..e1c17de 100644 --- a/CONFIG.md +++ b/CONFIG.md @@ -88,6 +88,11 @@ NF: Credentials must be specified per-profile. You cannot specify a fallback or default credential. Please be aware that this information is sensitive and to keep it safe. Do not share your config. +## curl_impersonate (dict) + +- `browser` - The Browser to impersonate as. A list of available Browsers and Versions are listed here: + + ## directories (dict) Override the default directories used across devine. @@ -150,6 +155,7 @@ Options: - `aria2c` (default) - https://github.com/aria2/aria2 - `requests` - https://github.com/psf/requests +- `curl_impersonate` - https://github.com/yifeikong/curl-impersonate (via https://github.com/yifeikong/curl_cffi) Note that aria2c can reach the highest speeds as it utilizes threading and more connections than the other downloaders. However, aria2c can also be one of the more unstable downloaders. It will work one day, then diff --git a/devine/core/config.py b/devine/core/config.py index ca146b8..e4277a6 100644 --- a/devine/core/config.py +++ b/devine/core/config.py @@ -39,6 +39,7 @@ class Config: self.dl: dict = kwargs.get("dl") or {} self.aria2c: dict = kwargs.get("aria2c") or {} self.cdm: dict = kwargs.get("cdm") or {} + self.curl_impersonate: dict = kwargs.get("curl_impersonate") or {} self.remote_cdm: list[dict] = kwargs.get("remote_cdm") or [] self.credentials: dict = kwargs.get("credentials") or {} diff --git a/devine/core/downloaders/__init__.py b/devine/core/downloaders/__init__.py index 332c06f..6528ce8 100644 --- a/devine/core/downloaders/__init__.py +++ b/devine/core/downloaders/__init__.py @@ -2,12 +2,14 @@ import asyncio from ..config import config from .aria2c import aria2c +from .curl_impersonate import curl_impersonate from .requests import requests downloader = { "aria2c": lambda *args, **kwargs: asyncio.run(aria2c(*args, **kwargs)), + "curl_impersonate": curl_impersonate, "requests": requests }[config.downloader] -__all__ = ("downloader", "aria2c", "requests") +__all__ = ("downloader", "aria2c", "curl_impersonate", "requests") diff --git a/devine/core/downloaders/curl_impersonate.py b/devine/core/downloaders/curl_impersonate.py new file mode 100644 index 0000000..ba32e85 --- /dev/null +++ b/devine/core/downloaders/curl_impersonate.py @@ -0,0 +1,105 @@ +import time +from functools import partial +from pathlib import Path +from typing import Any, MutableMapping, Optional, Union + +from curl_cffi.requests import Session +from requests.cookies import RequestsCookieJar +from rich import filesize + +from devine.core.config import config +from devine.core.constants import DOWNLOAD_CANCELLED + +MAX_ATTEMPTS = 5 +RETRY_WAIT = 2 +BROWSER = config.curl_impersonate.get("browser", "chrome110") + + +def curl_impersonate( + uri: Union[str, list[str]], + out: Path, + headers: Optional[dict] = None, + cookies: Optional[Union[MutableMapping[str, str], RequestsCookieJar]] = None, + proxy: Optional[str] = None, + progress: Optional[partial] = None, + *_: Any, + **__: Any +) -> int: + """ + Download files using Curl Impersonate. + https://github.com/lwthiker/curl-impersonate + + If multiple URLs are provided they will be downloaded in the provided order + to the output directory. They will not be merged together. + """ + if isinstance(uri, list) and len(uri) == 1: + uri = uri[0] + + if isinstance(uri, list): + if out.is_file(): + raise ValueError("Expecting out to be a Directory path not a File as multiple URLs were provided") + uri = [ + (url, out / f"{i:08}.mp4") + for i, url in enumerate(uri) + ] + else: + uri = [(uri, out.parent / out.name)] + + session = Session(impersonate=BROWSER) + if headers: + headers = { + k: v + for k, v in headers.items() + if k.lower() != "accept-encoding" + } + session.headers.update(headers) + if cookies: + session.cookies.update(cookies) + if proxy: + session.proxies.update({ + "http": proxy, + "https": proxy + }) + + if progress: + progress(total=len(uri)) + + download_sizes = [] + last_speed_refresh = time.time() + + for url, out_path in uri: + out_path.parent.mkdir(parents=True, exist_ok=True) + attempts = 1 + try: + stream = session.get(url, stream=True) + stream.raise_for_status() + with open(out_path, "wb") as f: + written = 0 + for chunk in stream.iter_content(chunk_size=1024): + download_size = len(chunk) + f.write(chunk) + written += download_size + if progress: + progress(advance=1) + + now = time.time() + time_since = now - last_speed_refresh + + download_sizes.append(download_size) + if time_since > 5 or download_size < 1024: + data_size = sum(download_sizes) + download_speed = data_size / (time_since or 1) + progress(downloaded=f"{filesize.decimal(download_speed)}/s") + last_speed_refresh = now + download_sizes.clear() + break + except Exception as e: + if DOWNLOAD_CANCELLED.is_set() or attempts == MAX_ATTEMPTS: + raise e + time.sleep(RETRY_WAIT) + attempts += 1 + + return 0 + + +__all__ = ("curl_impersonate",) diff --git a/poetry.lock b/poetry.lock index b0d9d58..1158373 100644 --- a/poetry.lock +++ b/poetry.lock @@ -271,6 +271,70 @@ files = [ {file = "certifi-2023.11.17.tar.gz", hash = "sha256:9b469f3a900bf28dc19b8cfbf8019bf47f7fdd1a65a1d4ffb98fc14166beb4d1"}, ] +[[package]] +name = "cffi" +version = "1.16.0" +description = "Foreign Function Interface for Python calling C code." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, + {file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d"}, + {file = "cffi-1.16.0-cp310-cp310-win32.whl", hash = "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a"}, + {file = "cffi-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb"}, + {file = "cffi-1.16.0-cp311-cp311-win32.whl", hash = "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab"}, + {file = "cffi-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969"}, + {file = "cffi-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520"}, + {file = "cffi-1.16.0-cp312-cp312-win32.whl", hash = "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b"}, + {file = "cffi-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235"}, + {file = "cffi-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324"}, + {file = "cffi-1.16.0-cp38-cp38-win32.whl", hash = "sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a"}, + {file = "cffi-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe"}, + {file = "cffi-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4"}, + {file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"}, + {file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"}, +] + +[package.dependencies] +pycparser = "*" + [[package]] name = "cfgv" version = "3.4.0" @@ -453,6 +517,29 @@ files = [ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] testing = ["cssselect", "importlib-resources", "jaraco.test (>=5.1)", "lxml", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff"] +[[package]] +name = "curl-cffi" +version = "0.5.10" +description = "libcurl ffi bindings for Python, with impersonation support" +optional = false +python-versions = ">=3.7" +files = [ + {file = "curl_cffi-0.5.10-cp37-abi3-macosx_10_9_x86_64.whl", hash = "sha256:892603dab5e56fb72bfff7ae969136138971f63f63defe98232e1ec55cb0f1c6"}, + {file = "curl_cffi-0.5.10-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:9937b8e13b1a6963c63e155b6621ec74649965105efedb919bc226fe731861cc"}, + {file = "curl_cffi-0.5.10-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b537595b9610a4dd0927c09823925b4e32b1ce0fd04385bfc5bb72ab830720e6"}, + {file = "curl_cffi-0.5.10-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b2bc8822d23415f6533c8b750475e9bbc76764025fe1dcb5866dc033607fd7b"}, + {file = "curl_cffi-0.5.10-cp37-abi3-win_amd64.whl", hash = "sha256:f9a1874b860c4e8db49bdfd9b9d4dc39999a1397d271ec78624c35c838e9e92a"}, + {file = "curl_cffi-0.5.10.tar.gz", hash = "sha256:55bac4b73e2d80ceeaabea33270fc8ca6ace594128a46710242f2e688b4f8bfc"}, +] + +[package.dependencies] +cffi = ">=1.12.0" + +[package.extras] +build = ["cibuildwheel", "wheel"] +dev = ["autoflake (==1.4)", "black (==22.8.0)", "coverage (==6.4.1)", "cryptography (==38.0.3)", "flake8 (==6.0.0)", "flake8-bugbear (==22.7.1)", "flake8-pie (==0.15.0)", "httpx (==0.23.1)", "isort (==5.10.1)", "mypy (==0.971)", "pytest (==7.1.2)", "pytest-asyncio (==0.19.0)", "pytest-trio (==0.7.0)", "trio (==0.21.0)", "trio-typing (==0.7.0)", "trustme (==0.9.0)", "types-certifi (==2021.10.8.2)", "uvicorn (==0.18.3)"] +test = ["cryptography (==38.0.3)", "httpx (==0.23.1)", "pytest (==7.1.2)", "pytest-asyncio (==0.19.0)", "pytest-trio (==0.7.0)", "trio (==0.21.0)", "trio-typing (==0.7.0)", "trustme (==0.9.0)", "types-certifi (==2021.10.8.2)", "uvicorn (==0.18.3)"] + [[package]] name = "distlib" version = "0.3.7" @@ -1130,6 +1217,17 @@ lxml = ">=4.9.1" dev = ["pytest", "pytest-lazy-fixture"] transcript = ["nltk"] +[[package]] +name = "pycparser" +version = "2.21" +description = "C parser in Python" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, + {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, +] + [[package]] name = "pycryptodome" version = "3.19.0" @@ -1798,4 +1896,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0" -content-hash = "50a3deb09b0f45c897ed18a0995b4f5db3f8fb387f1a7941034635f7524d1f24" +content-hash = "1615a40a4f4c6a45d59df567b7dc024c3e5a9776acdb9700e1e8de1819786e67" diff --git a/pyproject.toml b/pyproject.toml index 5e73133..6168a76 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,6 +61,7 @@ subtitle-filter = "^1.4.8" Unidecode = "^1.3.7" urllib3 = "^2.1.0" chardet = "^5.2.0" +curl-cffi = "^0.5.10" [tool.poetry.dev-dependencies] pre-commit = "^3.5.0"