From 2e38f048aec7cef2af11d9c0b1d3b8c29ba2077d Mon Sep 17 00:00:00 2001 From: Guillem Hernandez Sola Date: Mon, 11 May 2026 12:48:13 +0200 Subject: [PATCH] fix: always upload photos independently of video embed outcome Previously, photo uploads were nested inside the `else` branch of `if not video_embed`, meaning photos were only processed when a video embed succeeded. This caused photo-only tweets to be posted as text-only because `has_video` was never set, `video_embed` remained None, and the photo loop was never reached. Decouple photo collection from video outcome: warn if video failed, then unconditionally iterate over media items to upload any photos present. External link card logic is unchanged and still only triggers when both video and image embeds are absent. --- twitter2bsky.py | 76 ++++++++++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 36 deletions(-) diff --git a/twitter2bsky.py b/twitter2bsky.py index 7c20751..0d71a4d 100644 --- a/twitter2bsky.py +++ b/twitter2bsky.py @@ -212,7 +212,7 @@ def strip_trailing_url_punctuation(url): if not url: return url url = re.sub(r"#[A-Za-z]\w*$", "", url.strip()) - return re.sub(r"[\s…\.,;:!?)\]\"']+$", "", url) + return re.sub(r"[\s…\.,;:!?)\]\"\']+$", "", url) def split_url_hashtag_suffix(text): @@ -263,14 +263,14 @@ def repair_broken_urls(text): while prev_text != text: prev_text = text text = re.sub( - r"((?:https?://|www\.)[^\s<>\"]*)[\r\n]+([A-Za-z0-9/\-._~%!$&'()*+,;=:@?#]+)", + r"((?:https?://|www\.)[^\s<>\"]*?)[\r\n]+([A-Za-z0-9/\-._~%!$&'()*+,;=:@?#]+)", r"\1\2", text, flags=re.IGNORECASE, ) text = re.sub( - r"((?:https?://|www\.)[^\s<>\"]*)\s+([A-Za-z0-9/\-._~%!$&'()*+,;=:@?#]+)", + r"((?:https?://|www\.)[^\s<>\"]*?)\s+([A-Za-z0-9/\-._~%!$&'()*+,;=:@?#]+)", r"\1\2", text, flags=re.IGNORECASE, @@ -564,7 +564,7 @@ def extract_urls_from_text(text): return [] repaired = repair_broken_urls(text) - pattern = r'(?:(?:https?://)|(?:www\.))[^\s<>"\']+' + pattern = r'(?:(?:https?://)|(?:www\.))[^\s<>"\'']+' return re.findall(pattern, repaired) @@ -916,7 +916,7 @@ def sanitize_visible_urls_in_text(text, http_client, has_media=False): return text, None working = clean_post_text(text) - url_pattern = r'(?:(?:https?://)|(?:www\.))[^\s<>"\']+' + url_pattern = r'(?:(?:https?://)|(?:www\.))[^\s<>"\'']+' urls = re.findall(url_pattern, working) if not urls: @@ -1314,7 +1314,7 @@ def build_media_fingerprint(tweet, resolved_video_hash=None): else: stable_value = tweet_key - parts.append(f"{media_type}:{stable_value}") + parts.append(f"{media_type}:{stable_value}") parts.sort() raw = "|".join(parts) @@ -1647,9 +1647,11 @@ def get_recent_bsky_posts(client, handle, limit=30): urls.extend(extract_non_x_urls_from_text(text)) urls.extend(extract_urls_from_facets(record)) - canonical = canonicalize_url(normalize_urlish_token(url) or url) - if canonical: - canonical_non_x_urls.add(canonical) + canonical_non_x_urls = set() + for url in urls: + canonical = canonicalize_url(normalize_urlish_token(url) or url) + if canonical: + canonical_non_x_urls.add(canonical) media_fingerprint = build_bsky_media_fingerprint(item.post) text_media_key = build_text_media_key(normalized_text, media_fingerprint) @@ -2819,6 +2821,7 @@ def candidate_matches_existing_bsky(candidate, recent_bsky_posts): return False, None + # --- Main Sync Logic --- def sync_feeds(args): logging.info("šŸ”„ Starting sync cycle...") @@ -3115,6 +3118,7 @@ def sync_feeds(args): has_video = candidate.get("has_video", False) + # --- Video processing --- if has_video: video_media = next( ( @@ -3195,32 +3199,36 @@ def sync_feeds(args): remove_file_quietly(f"{temp_video_base}_trimmed.mp4") remove_file_quietly(f"{temp_video_base}_compressed.mp4") - if not video_embed: + # ---------------------------------------------------------------- + # FIX: warn if video failed, but ALWAYS attempt photo uploads + # independently — this is the core fix for photo-only tweets. + # ---------------------------------------------------------------- + if has_video and not video_embed: logging.warning( - "āš ļø Tweet contains video, but video could not be " - "posted. Skipping photo fallback for this tweet." + "āš ļø Tweet contains video, but video could not be posted. " + "Skipping video — will still attempt photos if present." ) - else: - if tweet.media: - for media in tweet.media: - if media.type == "photo": - blob = get_blob_from_url( - media.media_url_https, - bsky_client, - media_http_client, + # Always collect photos regardless of video outcome + if tweet.media: + for media in tweet.media: + if media.type == "photo": + blob = get_blob_from_url( + media.media_url_https, + bsky_client, + media_http_client, + ) + if blob: + image_embeds.append( + models.AppBskyEmbedImages.Image( + alt=dynamic_alt, + image=blob, + ) + ) + else: + media_upload_failures.append( + f"photo:{media.media_url_https}" ) - if blob: - image_embeds.append( - models.AppBskyEmbedImages.Image( - alt=dynamic_alt, - image=blob, - ) - ) - else: - media_upload_failures.append( - f"photo:{media.media_url_https}" - ) # --- External link card logic --- if not video_embed and not image_embeds: @@ -3332,13 +3340,10 @@ def sync_feeds(args): except Exception as e: logging.error(f"āŒ Failed to post tweet to Bluesky: {e}") - # Closes Playwright browser (Indented 12 spaces, matches 'browser = ...') browser.close() - # Logs final count (Indented 8 spaces, inside the main 'try' block) logging.info(f"āœ… Sync complete. Posted {new_posts} new updates.") - # Catches fatal sync errors (Indented 4 spaces, matches the 'try:' at the top of sync_feeds) except Exception as e: logging.error(f"āŒ Error during sync cycle: {e}") @@ -3424,5 +3429,4 @@ def main(): if __name__ == "__main__": - main() - + main() \ No newline at end of file