fix(curl-impersonate): Set Cert-Authority Bundle for HTTPS Proxies

For some reason curl-impersonate (curl_cffi project) does not set the certificate-authority bundle for proxies, which to be fair is for some reason seperated into two curl-options.

Doing this change as well as removing the https->http scheme enforcement on proxies, fixes HTTPS proxies on the curl-impersonate downloaders. I also simplified the seperate http and https proxy definitions to the `all` definition which was not originally supported but does seem to be supported as of v0.6.2.

I tested this on NordVPN proxies which are explicitly HTTPS-only and it does work.
This commit is contained in:
rlaphoenix 2024-04-01 14:59:53 +01:00
parent 45ccc129ce
commit f25d2419cf
1 changed files with 7 additions and 7 deletions

View File

@ -6,6 +6,7 @@ from http.cookiejar import CookieJar
from pathlib import Path
from typing import Any, Generator, MutableMapping, Optional, Union
from curl_cffi import CurlOpt
from curl_cffi.requests import Session
from rich import filesize
@ -23,7 +24,7 @@ BROWSER = config.curl_impersonate.get("browser", "chrome120")
def download(
url: str,
save_path: Path,
session: Optional[Session] = None,
session: Session,
**kwargs: Any
) -> Generator[dict[str, Any], None, None]:
"""
@ -52,8 +53,10 @@ def download(
for one-time request changes like a header, cookie, or proxy. For example,
to request Byte-ranges use e.g., `headers={"Range": "bytes=0-128"}`.
"""
if not session:
session = Session(impersonate=BROWSER)
# https://github.com/yifeikong/curl_cffi/issues/6#issuecomment-2028518677
# must be applied here since the `session.curl` is thread-localized
# noinspection PyProtectedMember
session.curl.setopt(CurlOpt.PROXY_CAINFO, session.curl._cacert)
save_dir = save_path.parent
control_file = save_path.with_name(f"{save_path.name}.!dev")
@ -224,10 +227,7 @@ def curl_impersonate(
if cookies:
session.cookies.update(cookies)
if proxy:
session.proxies.update({
"http": proxy.replace("https://", "http://"),
"https": proxy.replace("https://", "http://")
})
session.proxies.update({"all": proxy})
yield dict(total=len(urls))