From de7122a179f981aa14442865cc13d694f47b1bdf Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Tue, 23 Jan 2024 10:06:42 +0000 Subject: [PATCH] Add basic control file to Requests and Curl-Impersonate downloaders --- devine/core/downloaders/curl_impersonate.py | 71 ++++++++++-------- devine/core/downloaders/requests.py | 79 ++++++++++++--------- 2 files changed, 88 insertions(+), 62 deletions(-) diff --git a/devine/core/downloaders/curl_impersonate.py b/devine/core/downloaders/curl_impersonate.py index 2ec879f..cbc6aef 100644 --- a/devine/core/downloaders/curl_impersonate.py +++ b/devine/core/downloaders/curl_impersonate.py @@ -69,38 +69,51 @@ def curl_impersonate( for url, out_path in uri: out_path.parent.mkdir(parents=True, exist_ok=True) + + control_file = out_path.with_name(f"{out_path.name}.!dev") + if control_file.exists(): + # consider the file corrupt if the control file exists + # TODO: Design a control file format so we know how much of the file is missing + out_path.unlink(missing_ok=True) + control_file.unlink() + elif out_path.exists(): + continue + control_file.write_bytes(b"") + attempts = 1 + try: + while True: + try: + stream = session.get(url, stream=True) + stream.raise_for_status() + with open(out_path, "wb") as f: + written = 0 + for chunk in stream.iter_content(chunk_size=1024): + download_size = len(chunk) + f.write(chunk) + written += download_size + if progress: + progress(advance=1) - while True: - try: - stream = session.get(url, stream=True) - stream.raise_for_status() - with open(out_path, "wb") as f: - written = 0 - for chunk in stream.iter_content(chunk_size=1024): - download_size = len(chunk) - f.write(chunk) - written += download_size - if progress: - progress(advance=1) + now = time.time() + time_since = now - last_speed_refresh - now = time.time() - time_since = now - last_speed_refresh - - download_sizes.append(download_size) - if time_since > 5 or download_size < 1024: - data_size = sum(download_sizes) - download_speed = data_size / (time_since or 1) - progress(downloaded=f"{filesize.decimal(download_speed)}/s") - last_speed_refresh = now - download_sizes.clear() - break - except Exception as e: - out_path.unlink(missing_ok=True) - if DOWNLOAD_CANCELLED.is_set() or attempts == MAX_ATTEMPTS: - raise e - time.sleep(RETRY_WAIT) - attempts += 1 + download_sizes.append(download_size) + if time_since > 5 or download_size < 1024: + data_size = sum(download_sizes) + download_speed = data_size / (time_since or 1) + progress(downloaded=f"{filesize.decimal(download_speed)}/s") + last_speed_refresh = now + download_sizes.clear() + break + except Exception as e: + out_path.unlink(missing_ok=True) + if DOWNLOAD_CANCELLED.is_set() or attempts == MAX_ATTEMPTS: + raise e + time.sleep(RETRY_WAIT) + attempts += 1 + finally: + control_file.unlink() return 0 diff --git a/devine/core/downloaders/requests.py b/devine/core/downloaders/requests.py index f7c4278..5b40b7a 100644 --- a/devine/core/downloaders/requests.py +++ b/devine/core/downloaders/requests.py @@ -65,44 +65,57 @@ def requests( for url, out_path in uri: out_path.parent.mkdir(parents=True, exist_ok=True) + + control_file = out_path.with_name(f"{out_path.name}.!dev") + if control_file.exists(): + # consider the file corrupt if the control file exists + # TODO: Design a control file format so we know how much of the file is missing + out_path.unlink(missing_ok=True) + control_file.unlink() + elif out_path.exists(): + continue + control_file.write_bytes(b"") + attempts = 1 + try: + while True: + try: + stream = session.get(url, stream=True) + stream.raise_for_status() - while True: - try: - stream = session.get(url, stream=True) - stream.raise_for_status() + if len(uri) == 1 and progress: + content_length = int(stream.headers.get("Content-Length", "0")) + if content_length > 0: + progress(total=math.ceil(content_length / 1024)) - if len(uri) == 1 and progress: - content_length = int(stream.headers.get("Content-Length", "0")) - if content_length > 0: - progress(total=math.ceil(content_length / 1024)) + with open(out_path, "wb") as f: + written = 0 + for chunk in stream.iter_content(chunk_size=1024): + download_size = len(chunk) + f.write(chunk) + written += download_size + if progress: + progress(advance=1) - with open(out_path, "wb") as f: - written = 0 - for chunk in stream.iter_content(chunk_size=1024): - download_size = len(chunk) - f.write(chunk) - written += download_size - if progress: - progress(advance=1) + now = time.time() + time_since = now - last_speed_refresh - now = time.time() - time_since = now - last_speed_refresh - - download_sizes.append(download_size) - if time_since > 5 or download_size < 1024: - data_size = sum(download_sizes) - download_speed = data_size / (time_since or 1) - progress(downloaded=f"{filesize.decimal(download_speed)}/s") - last_speed_refresh = now - download_sizes.clear() - break - except Exception as e: - out_path.unlink(missing_ok=True) - if DOWNLOAD_CANCELLED.is_set() or attempts == MAX_ATTEMPTS: - raise e - time.sleep(RETRY_WAIT) - attempts += 1 + download_sizes.append(download_size) + if time_since > 5 or download_size < 1024: + data_size = sum(download_sizes) + download_speed = data_size / (time_since or 1) + progress(downloaded=f"{filesize.decimal(download_speed)}/s") + last_speed_refresh = now + download_sizes.clear() + break + except Exception as e: + out_path.unlink(missing_ok=True) + if DOWNLOAD_CANCELLED.is_set() or attempts == MAX_ATTEMPTS: + raise e + time.sleep(RETRY_WAIT) + attempts += 1 + finally: + control_file.unlink() return 0