fix(rss): add thumbnail upload cooldown after Bluesky blob rate-limit exhaustion

This commit is contained in:
Guillem Hernandez Sola
2026-04-09 18:50:55 +02:00
parent e2051baffe
commit 0d41c48b7a

View File

@@ -41,6 +41,9 @@ BSKY_BLOB_TRANSIENT_ERROR_DELAY = 15
HTTP_TIMEOUT = 20
POST_RETRY_DELAY_SECONDS = 2
# Thumbnail upload cooldown state
THUMB_UPLOAD_COOLDOWN_UNTIL = 0
# --- Logging ---
logging.basicConfig(
format="%(asctime)s %(message)s",
@@ -478,6 +481,18 @@ def get_rate_limit_wait_seconds(error_obj, default_delay):
return default_delay
def get_rate_limit_reset_timestamp(error_obj):
try:
headers = getattr(error_obj, "headers", None)
if headers:
reset_value = headers.get("ratelimit-reset") or headers.get("RateLimit-Reset")
if reset_value:
return int(reset_value)
except Exception:
pass
return None
def is_transient_blob_error(error_obj):
error_text = repr(error_obj)
transient_signals = [
@@ -494,7 +509,34 @@ def is_transient_blob_error(error_obj):
return any(signal in error_text for signal in transient_signals)
def upload_blob_with_retry(client, binary_data, media_label="media"):
def is_rate_limited_error(error_obj):
error_text = str(error_obj)
return "429" in error_text or "RateLimitExceeded" in error_text
def activate_thumb_upload_cooldown_from_error(error_obj):
global THUMB_UPLOAD_COOLDOWN_UNTIL
reset_ts = get_rate_limit_reset_timestamp(error_obj)
if reset_ts:
if reset_ts > THUMB_UPLOAD_COOLDOWN_UNTIL:
THUMB_UPLOAD_COOLDOWN_UNTIL = reset_ts
logging.warning(
f"Thumbnail uploads disabled until rate-limit reset at "
f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(reset_ts))}."
)
else:
fallback_reset = int(time.time()) + 3600
if fallback_reset > THUMB_UPLOAD_COOLDOWN_UNTIL:
THUMB_UPLOAD_COOLDOWN_UNTIL = fallback_reset
logging.warning("Thumbnail uploads disabled temporarily for 1 hour due to rate limiting.")
def is_thumb_upload_cooldown_active():
return int(time.time()) < THUMB_UPLOAD_COOLDOWN_UNTIL
def upload_blob_with_retry(client, binary_data, media_label="media", optional=False, cooldown_on_rate_limit=False):
last_exception = None
transient_attempts = 0
@@ -505,16 +547,25 @@ def upload_blob_with_retry(client, binary_data, media_label="media"):
except Exception as e:
last_exception = e
error_text = str(e)
is_rate_limited = "429" in error_text or "RateLimitExceeded" in error_text
is_rate_limited = is_rate_limited_error(e)
if is_rate_limited:
if cooldown_on_rate_limit:
activate_thumb_upload_cooldown_from_error(e)
backoff_delay = min(
BSKY_BLOB_UPLOAD_BASE_DELAY * (2 ** (attempt - 1)),
BSKY_BLOB_UPLOAD_MAX_DELAY
)
wait_seconds = get_rate_limit_wait_seconds(e, backoff_delay)
if optional and cooldown_on_rate_limit:
logging.warning(
f"Optional blob upload rate-limited for {media_label}. "
f"Skipping remaining retries and omitting optional media."
)
return None
if attempt < BSKY_BLOB_UPLOAD_MAX_RETRIES:
logging.warning(
f"Blob upload rate-limited for {media_label}. "
@@ -590,6 +641,11 @@ def compress_external_thumb_to_limit(image_bytes, max_bytes=EXTERNAL_THUMB_MAX_B
def get_external_thumb_blob_from_url(image_url, client, http_client):
if is_thumb_upload_cooldown_active():
reset_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(THUMB_UPLOAD_COOLDOWN_UNTIL))
logging.info(f"Skipping external thumbnail upload due to active cooldown until {reset_str}")
return None
try:
r = http_client.get(image_url, timeout=HTTP_TIMEOUT, follow_redirects=True)
if r.status_code != 200:
@@ -610,7 +666,13 @@ def get_external_thumb_blob_from_url(image_url, client, http_client):
logging.warning("Could not compress external thumb to fit limit. Omitting thumbnail.")
return None
return upload_blob_with_retry(client, upload_bytes, media_label=f"external-thumb:{image_url}")
return upload_blob_with_retry(
client,
upload_bytes,
media_label=f"external-thumb:{image_url}",
optional=True,
cooldown_on_rate_limit=True
)
except Exception as e:
logging.warning(f"Could not fetch/upload external thumb {image_url}: {repr(e)}")
@@ -777,7 +839,6 @@ def main():
post_lang = args.lang
state_path = args.state_path
# --- Login ---
client = Client(base_url=service_url)
backoff = 60
@@ -798,7 +859,6 @@ def main():
logging.info(f"Loaded {len(recent_bsky_posts)} recent Bluesky posts for duplicate detection.")
logging.info(f"Local state currently tracks {len(state.get('posted_entries', {}))} posted items.")
# --- Parse feed ---
response = httpx.get(feed_url, timeout=HTTP_TIMEOUT, follow_redirects=True)
response.raise_for_status()