diff --git a/rss2bsky.py b/rss2bsky.py index 88c78fe..bcd8a39 100644 --- a/rss2bsky.py +++ b/rss2bsky.py @@ -41,6 +41,9 @@ BSKY_BLOB_TRANSIENT_ERROR_DELAY = 15 HTTP_TIMEOUT = 20 POST_RETRY_DELAY_SECONDS = 2 +# Thumbnail upload cooldown state +THUMB_UPLOAD_COOLDOWN_UNTIL = 0 + # --- Logging --- logging.basicConfig( format="%(asctime)s %(message)s", @@ -478,6 +481,18 @@ def get_rate_limit_wait_seconds(error_obj, default_delay): return default_delay +def get_rate_limit_reset_timestamp(error_obj): + try: + headers = getattr(error_obj, "headers", None) + if headers: + reset_value = headers.get("ratelimit-reset") or headers.get("RateLimit-Reset") + if reset_value: + return int(reset_value) + except Exception: + pass + return None + + def is_transient_blob_error(error_obj): error_text = repr(error_obj) transient_signals = [ @@ -494,7 +509,34 @@ def is_transient_blob_error(error_obj): return any(signal in error_text for signal in transient_signals) -def upload_blob_with_retry(client, binary_data, media_label="media"): +def is_rate_limited_error(error_obj): + error_text = str(error_obj) + return "429" in error_text or "RateLimitExceeded" in error_text + + +def activate_thumb_upload_cooldown_from_error(error_obj): + global THUMB_UPLOAD_COOLDOWN_UNTIL + + reset_ts = get_rate_limit_reset_timestamp(error_obj) + if reset_ts: + if reset_ts > THUMB_UPLOAD_COOLDOWN_UNTIL: + THUMB_UPLOAD_COOLDOWN_UNTIL = reset_ts + logging.warning( + f"Thumbnail uploads disabled until rate-limit reset at " + f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(reset_ts))}." + ) + else: + fallback_reset = int(time.time()) + 3600 + if fallback_reset > THUMB_UPLOAD_COOLDOWN_UNTIL: + THUMB_UPLOAD_COOLDOWN_UNTIL = fallback_reset + logging.warning("Thumbnail uploads disabled temporarily for 1 hour due to rate limiting.") + + +def is_thumb_upload_cooldown_active(): + return int(time.time()) < THUMB_UPLOAD_COOLDOWN_UNTIL + + +def upload_blob_with_retry(client, binary_data, media_label="media", optional=False, cooldown_on_rate_limit=False): last_exception = None transient_attempts = 0 @@ -505,16 +547,25 @@ def upload_blob_with_retry(client, binary_data, media_label="media"): except Exception as e: last_exception = e - error_text = str(e) - is_rate_limited = "429" in error_text or "RateLimitExceeded" in error_text + is_rate_limited = is_rate_limited_error(e) if is_rate_limited: + if cooldown_on_rate_limit: + activate_thumb_upload_cooldown_from_error(e) + backoff_delay = min( BSKY_BLOB_UPLOAD_BASE_DELAY * (2 ** (attempt - 1)), BSKY_BLOB_UPLOAD_MAX_DELAY ) wait_seconds = get_rate_limit_wait_seconds(e, backoff_delay) + if optional and cooldown_on_rate_limit: + logging.warning( + f"Optional blob upload rate-limited for {media_label}. " + f"Skipping remaining retries and omitting optional media." + ) + return None + if attempt < BSKY_BLOB_UPLOAD_MAX_RETRIES: logging.warning( f"Blob upload rate-limited for {media_label}. " @@ -590,6 +641,11 @@ def compress_external_thumb_to_limit(image_bytes, max_bytes=EXTERNAL_THUMB_MAX_B def get_external_thumb_blob_from_url(image_url, client, http_client): + if is_thumb_upload_cooldown_active(): + reset_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(THUMB_UPLOAD_COOLDOWN_UNTIL)) + logging.info(f"Skipping external thumbnail upload due to active cooldown until {reset_str}") + return None + try: r = http_client.get(image_url, timeout=HTTP_TIMEOUT, follow_redirects=True) if r.status_code != 200: @@ -610,7 +666,13 @@ def get_external_thumb_blob_from_url(image_url, client, http_client): logging.warning("Could not compress external thumb to fit limit. Omitting thumbnail.") return None - return upload_blob_with_retry(client, upload_bytes, media_label=f"external-thumb:{image_url}") + return upload_blob_with_retry( + client, + upload_bytes, + media_label=f"external-thumb:{image_url}", + optional=True, + cooldown_on_rate_limit=True + ) except Exception as e: logging.warning(f"Could not fetch/upload external thumb {image_url}: {repr(e)}") @@ -777,7 +839,6 @@ def main(): post_lang = args.lang state_path = args.state_path - # --- Login --- client = Client(base_url=service_url) backoff = 60 @@ -798,7 +859,6 @@ def main(): logging.info(f"Loaded {len(recent_bsky_posts)} recent Bluesky posts for duplicate detection.") logging.info(f"Local state currently tracks {len(state.get('posted_entries', {}))} posted items.") - # --- Parse feed --- response = httpx.get(feed_url, timeout=HTTP_TIMEOUT, follow_redirects=True) response.raise_for_status()