From f989d0fe310e23aa1d9be9b9f6cb4b47a557bfc7 Mon Sep 17 00:00:00 2001 From: Guillem Hernandez Sola Date: Sun, 5 Apr 2026 12:25:00 +0000 Subject: [PATCH] Avoid image upload warning --- twitter2bsky_daemon.py | 127 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 114 insertions(+), 13 deletions(-) diff --git a/twitter2bsky_daemon.py b/twitter2bsky_daemon.py index 7edf4e1..41d342d 100644 --- a/twitter2bsky_daemon.py +++ b/twitter2bsky_daemon.py @@ -22,6 +22,11 @@ DEDUPE_BSKY_LIMIT = 30 TWEET_MAX_AGE_DAYS = 3 VIDEO_MAX_DURATION_SECONDS = 179 +BSKY_BLOB_UPLOAD_MAX_RETRIES = 5 +BSKY_BLOB_UPLOAD_BASE_DELAY = 10 +BSKY_BLOB_UPLOAD_MAX_DELAY = 300 +MEDIA_DOWNLOAD_TIMEOUT = 30 + # --- Logging Setup --- logging.basicConfig( format="%(asctime)s [%(levelname)s] %(message)s", @@ -141,20 +146,95 @@ def extract_urls_from_facets(record): return urls -def get_blob_from_url(media_url, client): +def get_rate_limit_wait_seconds(error_obj, default_delay): + """ + Try to extract a sensible wait time from atproto/http error objects. + """ try: - r = httpx.get(media_url, timeout=30, follow_redirects=True) - if r.status_code == 200: - return client.upload_blob(r.content).blob + headers = getattr(error_obj, "headers", None) + if headers: + reset_value = headers.get("ratelimit-reset") or headers.get("RateLimit-Reset") + if reset_value: + now_ts = int(time.time()) + reset_ts = int(reset_value) + wait_seconds = max(reset_ts - now_ts + 1, default_delay) + return wait_seconds + except Exception: + pass + + return default_delay + + +def upload_blob_with_retry(client, binary_data, media_label="media"): + """ + Retry Bluesky blob upload when rate-limited. + """ + last_exception = None + + for attempt in range(1, BSKY_BLOB_UPLOAD_MAX_RETRIES + 1): + try: + result = client.upload_blob(binary_data) + return result.blob + + except Exception as e: + last_exception = e + error_text = str(e) + is_rate_limited = "429" in error_text or "RateLimitExceeded" in error_text + + if not is_rate_limited: + logging.warning(f"Could not upload {media_label}: {e}") + return None + + backoff_delay = min( + BSKY_BLOB_UPLOAD_BASE_DELAY * (2 ** (attempt - 1)), + BSKY_BLOB_UPLOAD_MAX_DELAY + ) + wait_seconds = get_rate_limit_wait_seconds(e, backoff_delay) + + if attempt < BSKY_BLOB_UPLOAD_MAX_RETRIES: + logging.warning( + f"⏳ Bluesky blob upload rate-limited for {media_label}. " + f"Retry {attempt}/{BSKY_BLOB_UPLOAD_MAX_RETRIES} after {wait_seconds}s." + ) + time.sleep(wait_seconds) + else: + logging.warning( + f"❌ Exhausted blob upload retries for {media_label} after rate limiting: {e}" + ) + + logging.warning(f"Could not upload {media_label}: {last_exception}") + return None + + +def get_blob_from_url(media_url, client, http_client): + """ + Download media and upload to Bluesky with retry support for upload rate limits. + """ + try: + r = http_client.get(media_url, timeout=MEDIA_DOWNLOAD_TIMEOUT, follow_redirects=True) + if r.status_code != 200: + logging.warning(f"Could not fetch media {media_url}: HTTP {r.status_code}") + return None + + content = r.content + if not content: + logging.warning(f"Could not fetch media {media_url}: empty response body") + return None + + return upload_blob_with_retry(client, content, media_label=media_url) + except Exception as e: logging.warning(f"Could not fetch media {media_url}: {e}") - return None + return None def get_blob_from_file(file_path, client): try: with open(file_path, "rb") as f: - return client.upload_blob(f.read()).blob + binary_data = f.read() + + return upload_blob_with_retry(client, binary_data, media_label=file_path) + except Exception as e: logging.warning(f"Could not upload local file {file_path}: {e}") return None @@ -422,7 +502,7 @@ def get_recent_bsky_posts(client, handle, limit=30): if getattr(record, "reply", None) is not None: continue - text = getattr(record, "text", "") or "" + text = getattr(record, "text", "") or "" normalized_text = normalize_post_text(text) urls = [] @@ -1029,9 +1109,9 @@ def sync_feeds(args): return new_posts = 0 - state_file = "twitter_browser_state.json" + browser_state_file = "twitter_browser_state.json" - with sync_playwright() as p: + with sync_playwright() as p, httpx.Client() as media_http_client: browser = p.chromium.launch( headless=True, args=["--disable-blink-features=AutomationControlled"] @@ -1044,8 +1124,8 @@ def sync_feeds(args): ), "viewport": {"width": 1920, "height": 1080}, } - if os.path.exists(state_file): - context_kwargs["storage_state"] = state_file + if os.path.exists(browser_state_file): + context_kwargs["storage_state"] = browser_state_file context = browser.new_context(**context_kwargs) @@ -1061,11 +1141,12 @@ def sync_feeds(args): image_embeds = [] video_embed = None + media_upload_failures = [] if tweet.media: for media in tweet.media: if media.type == "photo": - blob = get_blob_from_url(media.media_url_https, bsky_client) + blob = get_blob_from_url(media.media_url_https, bsky_client, media_http_client) if blob: image_embeds.append( models.AppBskyEmbedImages.Image( @@ -1073,10 +1154,13 @@ def sync_feeds(args): image=blob ) ) + else: + media_upload_failures.append(f"photo:{media.media_url_https}") elif media.type == "video": if not tweet.tweet_url: logging.warning("⚠️ Tweet has video marker but no tweet URL. Skipping video.") + media_upload_failures.append("video:no_tweet_url") continue temp_video_path = "temp_video.mp4" @@ -1085,19 +1169,24 @@ def sync_feeds(args): real_video_url = extract_video_url_from_tweet_page(context, tweet.tweet_url) if not real_video_url: logging.warning(f"⚠️ Could not resolve playable video URL for {tweet.tweet_url}") + media_upload_failures.append(f"video:resolve_failed:{tweet.tweet_url}") continue cropped_video_path = download_and_crop_video(real_video_url, temp_video_path) if not cropped_video_path: logging.warning(f"⚠️ Video download/crop failed for {tweet.tweet_url}") + media_upload_failures.append(f"video:crop_failed:{tweet.tweet_url}") continue video_blob = get_blob_from_file(cropped_video_path, bsky_client) if not video_blob: logging.warning(f"⚠️ Video upload blob failed for {tweet.tweet_url}") + media_upload_failures.append(f"video:upload_failed:{tweet.tweet_url}") continue video_embed = build_video_embed(video_blob, dynamic_alt) + if not video_embed: + media_upload_failures.append(f"video:embed_failed:{tweet.tweet_url}") finally: if os.path.exists(temp_video_path): @@ -1105,14 +1194,18 @@ def sync_feeds(args): try: post_result = None + post_mode = "text" if video_embed: post_result = bsky_client.send_post(text=rich_text, embed=video_embed, langs=["ca"]) + post_mode = "video" elif image_embeds: embed = models.AppBskyEmbedImages.Main(images=image_embeds) post_result = bsky_client.send_post(text=rich_text, embed=embed, langs=["ca"]) + post_mode = f"images:{len(image_embeds)}" else: post_result = bsky_client.send_post(text=rich_text, langs=["ca"]) + post_mode = "text_only" bsky_uri = getattr(post_result, "uri", None) @@ -1132,7 +1225,15 @@ def sync_feeds(args): recent_bsky_posts = recent_bsky_posts[:DEDUPE_BSKY_LIMIT] new_posts += 1 - logging.info(f"✅ Posted new tweet to Bluesky: {raw_text}") + + if media_upload_failures: + logging.warning( + f"✅ Posted tweet to Bluesky with degraded media mode ({post_mode}). " + f"Failed media items: {media_upload_failures}" + ) + else: + logging.info(f"✅ Posted new tweet to Bluesky with mode {post_mode}: {raw_text}") + time.sleep(5) except Exception as e: