Add basic control file to Requests and Curl-Impersonate downloaders

This commit is contained in:
rlaphoenix 2024-01-23 10:06:42 +00:00
parent c53330046c
commit de7122a179
2 changed files with 88 additions and 62 deletions

View File

@ -69,8 +69,19 @@ def curl_impersonate(
for url, out_path in uri:
out_path.parent.mkdir(parents=True, exist_ok=True)
attempts = 1
control_file = out_path.with_name(f"{out_path.name}.!dev")
if control_file.exists():
# consider the file corrupt if the control file exists
# TODO: Design a control file format so we know how much of the file is missing
out_path.unlink(missing_ok=True)
control_file.unlink()
elif out_path.exists():
continue
control_file.write_bytes(b"")
attempts = 1
try:
while True:
try:
stream = session.get(url, stream=True)
@ -101,6 +112,8 @@ def curl_impersonate(
raise e
time.sleep(RETRY_WAIT)
attempts += 1
finally:
control_file.unlink()
return 0

View File

@ -65,8 +65,19 @@ def requests(
for url, out_path in uri:
out_path.parent.mkdir(parents=True, exist_ok=True)
attempts = 1
control_file = out_path.with_name(f"{out_path.name}.!dev")
if control_file.exists():
# consider the file corrupt if the control file exists
# TODO: Design a control file format so we know how much of the file is missing
out_path.unlink(missing_ok=True)
control_file.unlink()
elif out_path.exists():
continue
control_file.write_bytes(b"")
attempts = 1
try:
while True:
try:
stream = session.get(url, stream=True)
@ -103,6 +114,8 @@ def requests(
raise e
time.sleep(RETRY_WAIT)
attempts += 1
finally:
control_file.unlink()
return 0