forked from DRMTalks/devine
Implement a Python-requests-based downloader
This commit is contained in:
parent
cb82febb7c
commit
be403bbff4
|
@ -135,8 +135,13 @@ Choose what software to use to download data throughout Devine where needed.
|
||||||
Options:
|
Options:
|
||||||
|
|
||||||
- `aria2c` (default) - https://github.com/aria2/aria2
|
- `aria2c` (default) - https://github.com/aria2/aria2
|
||||||
|
- `requests` - https://github.com/psf/requests
|
||||||
- `saldl` - https://github.com/saldl/saldl
|
- `saldl` - https://github.com/saldl/saldl
|
||||||
|
|
||||||
|
Note that aria2c can reach the highest speeds as it utilizes threading and more connections than the other
|
||||||
|
downloaders. However, aria2c can also be one of the more unstable downloaders. It will work one day, then
|
||||||
|
not another day. It also does not support HTTP(S) proxies while the other downloaders do.
|
||||||
|
|
||||||
## headers (dict)
|
## headers (dict)
|
||||||
|
|
||||||
Case-Insensitive dictionary of headers that all Services begin their Request Session state with.
|
Case-Insensitive dictionary of headers that all Services begin their Request Session state with.
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
from .aria2c import aria2c
|
from .aria2c import aria2c
|
||||||
|
from .requests import requests
|
||||||
from .saldl import saldl
|
from .saldl import saldl
|
||||||
from .downloader import downloader
|
from .downloader import downloader
|
||||||
|
|
||||||
__ALL__ = (downloader, aria2c, saldl)
|
__ALL__ = (downloader, aria2c, requests, saldl)
|
||||||
|
|
|
@ -2,10 +2,11 @@ import asyncio
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
from devine.core.config import config
|
from devine.core.config import config
|
||||||
from devine.core.downloaders import aria2c, saldl
|
from devine.core.downloaders import aria2c, requests, saldl
|
||||||
|
|
||||||
|
|
||||||
downloader = {
|
downloader = {
|
||||||
"aria2c": partial(asyncio.run, aria2c),
|
"aria2c": partial(asyncio.run, aria2c),
|
||||||
|
"requests": requests,
|
||||||
"saldl": partial(asyncio.run, saldl)
|
"saldl": partial(asyncio.run, saldl)
|
||||||
}[config.downloader]
|
}[config.downloader]
|
||||||
|
|
|
@ -0,0 +1,91 @@
|
||||||
|
import time
|
||||||
|
from functools import partial
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional, Union, Any
|
||||||
|
|
||||||
|
from requests import Session
|
||||||
|
from rich import filesize
|
||||||
|
from rich.filesize import decimal
|
||||||
|
|
||||||
|
|
||||||
|
def requests(
|
||||||
|
uri: Union[str, list[str]],
|
||||||
|
out: Path,
|
||||||
|
headers: Optional[dict] = None,
|
||||||
|
proxy: Optional[str] = None,
|
||||||
|
progress: Optional[partial] = None,
|
||||||
|
*_: Any,
|
||||||
|
**__: Any
|
||||||
|
) -> int:
|
||||||
|
"""
|
||||||
|
Download files using Python Requests.
|
||||||
|
https://requests.readthedocs.io
|
||||||
|
|
||||||
|
If multiple URLs are provided they will be downloaded in the provided order
|
||||||
|
to the output directory. They will not be merged together.
|
||||||
|
"""
|
||||||
|
if isinstance(uri, list) and len(uri) == 1:
|
||||||
|
uri = uri[0]
|
||||||
|
|
||||||
|
if isinstance(uri, list):
|
||||||
|
if out.is_file():
|
||||||
|
raise ValueError("Expecting out to be a Directory path not a File as multiple URLs were provided")
|
||||||
|
uri = [
|
||||||
|
(url, out / f"{i:08}.mp4")
|
||||||
|
for i, url in enumerate(uri)
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
uri = [(uri, out.parent / out.name)]
|
||||||
|
|
||||||
|
session = Session()
|
||||||
|
if headers:
|
||||||
|
headers = {
|
||||||
|
k: v
|
||||||
|
for k, v in headers.items()
|
||||||
|
if k.lower() != "accept-encoding"
|
||||||
|
}
|
||||||
|
session.headers.update(headers)
|
||||||
|
if proxy:
|
||||||
|
session.proxies.update({"all": proxy})
|
||||||
|
|
||||||
|
total_size = sum(
|
||||||
|
int(session.head(url).headers["Content-Length"])
|
||||||
|
for url, _ in uri
|
||||||
|
)
|
||||||
|
|
||||||
|
if progress:
|
||||||
|
progress(total=total_size)
|
||||||
|
|
||||||
|
download_sizes = []
|
||||||
|
last_speed_refresh = time.time()
|
||||||
|
|
||||||
|
for url, out_path in uri:
|
||||||
|
stream = session.get(url, stream=True)
|
||||||
|
file_size = int(stream.headers["Content-Length"])
|
||||||
|
with open(out_path, "wb") as f:
|
||||||
|
written = 0
|
||||||
|
for chunk in stream.iter_content(chunk_size=1024):
|
||||||
|
download_size = len(chunk)
|
||||||
|
f.write(chunk)
|
||||||
|
written += download_size
|
||||||
|
if progress:
|
||||||
|
progress(advance=download_size)
|
||||||
|
|
||||||
|
now = time.time()
|
||||||
|
time_since = now - last_speed_refresh
|
||||||
|
|
||||||
|
download_sizes.append(download_size)
|
||||||
|
if time_since > 5 or download_size < 1024:
|
||||||
|
data_size = sum(download_sizes)
|
||||||
|
download_speed = data_size / (time_since or 1)
|
||||||
|
progress(downloaded=f"{filesize.decimal(download_speed)}/s")
|
||||||
|
last_speed_refresh = now
|
||||||
|
download_sizes.clear()
|
||||||
|
if written < file_size:
|
||||||
|
raise ValueError(
|
||||||
|
f"{url} finished downloading unexpectedly, got {decimal(written)}/{decimal(file_size)}")
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
__ALL__ = (requests,)
|
Loading…
Reference in New Issue