diff --git a/twitter2bsky.py b/twitter2bsky.py index 7c20751..0d71a4d 100644 --- a/twitter2bsky.py +++ b/twitter2bsky.py @@ -212,7 +212,7 @@ def strip_trailing_url_punctuation(url): if not url: return url url = re.sub(r"#[A-Za-z]\w*$", "", url.strip()) - return re.sub(r"[\s…\.,;:!?)\]\"']+$", "", url) + return re.sub(r"[\s…\.,;:!?)\]\"\']+$", "", url) def split_url_hashtag_suffix(text): @@ -263,14 +263,14 @@ def repair_broken_urls(text): while prev_text != text: prev_text = text text = re.sub( - r"((?:https?://|www\.)[^\s<>\"]*)[\r\n]+([A-Za-z0-9/\-._~%!$&'()*+,;=:@?#]+)", + r"((?:https?://|www\.)[^\s<>\"]*?)[\r\n]+([A-Za-z0-9/\-._~%!$&'()*+,;=:@?#]+)", r"\1\2", text, flags=re.IGNORECASE, ) text = re.sub( - r"((?:https?://|www\.)[^\s<>\"]*)\s+([A-Za-z0-9/\-._~%!$&'()*+,;=:@?#]+)", + r"((?:https?://|www\.)[^\s<>\"]*?)\s+([A-Za-z0-9/\-._~%!$&'()*+,;=:@?#]+)", r"\1\2", text, flags=re.IGNORECASE, @@ -564,7 +564,7 @@ def extract_urls_from_text(text): return [] repaired = repair_broken_urls(text) - pattern = r'(?:(?:https?://)|(?:www\.))[^\s<>"\']+' + pattern = r'(?:(?:https?://)|(?:www\.))[^\s<>"\'']+' return re.findall(pattern, repaired) @@ -916,7 +916,7 @@ def sanitize_visible_urls_in_text(text, http_client, has_media=False): return text, None working = clean_post_text(text) - url_pattern = r'(?:(?:https?://)|(?:www\.))[^\s<>"\']+' + url_pattern = r'(?:(?:https?://)|(?:www\.))[^\s<>"\'']+' urls = re.findall(url_pattern, working) if not urls: @@ -1314,7 +1314,7 @@ def build_media_fingerprint(tweet, resolved_video_hash=None): else: stable_value = tweet_key - parts.append(f"{media_type}:{stable_value}") + parts.append(f"{media_type}:{stable_value}") parts.sort() raw = "|".join(parts) @@ -1647,9 +1647,11 @@ def get_recent_bsky_posts(client, handle, limit=30): urls.extend(extract_non_x_urls_from_text(text)) urls.extend(extract_urls_from_facets(record)) - canonical = canonicalize_url(normalize_urlish_token(url) or url) - if canonical: - canonical_non_x_urls.add(canonical) + canonical_non_x_urls = set() + for url in urls: + canonical = canonicalize_url(normalize_urlish_token(url) or url) + if canonical: + canonical_non_x_urls.add(canonical) media_fingerprint = build_bsky_media_fingerprint(item.post) text_media_key = build_text_media_key(normalized_text, media_fingerprint) @@ -2819,6 +2821,7 @@ def candidate_matches_existing_bsky(candidate, recent_bsky_posts): return False, None + # --- Main Sync Logic --- def sync_feeds(args): logging.info("šŸ”„ Starting sync cycle...") @@ -3115,6 +3118,7 @@ def sync_feeds(args): has_video = candidate.get("has_video", False) + # --- Video processing --- if has_video: video_media = next( ( @@ -3195,32 +3199,36 @@ def sync_feeds(args): remove_file_quietly(f"{temp_video_base}_trimmed.mp4") remove_file_quietly(f"{temp_video_base}_compressed.mp4") - if not video_embed: + # ---------------------------------------------------------------- + # FIX: warn if video failed, but ALWAYS attempt photo uploads + # independently — this is the core fix for photo-only tweets. + # ---------------------------------------------------------------- + if has_video and not video_embed: logging.warning( - "āš ļø Tweet contains video, but video could not be " - "posted. Skipping photo fallback for this tweet." + "āš ļø Tweet contains video, but video could not be posted. " + "Skipping video — will still attempt photos if present." ) - else: - if tweet.media: - for media in tweet.media: - if media.type == "photo": - blob = get_blob_from_url( - media.media_url_https, - bsky_client, - media_http_client, + # Always collect photos regardless of video outcome + if tweet.media: + for media in tweet.media: + if media.type == "photo": + blob = get_blob_from_url( + media.media_url_https, + bsky_client, + media_http_client, + ) + if blob: + image_embeds.append( + models.AppBskyEmbedImages.Image( + alt=dynamic_alt, + image=blob, + ) + ) + else: + media_upload_failures.append( + f"photo:{media.media_url_https}" ) - if blob: - image_embeds.append( - models.AppBskyEmbedImages.Image( - alt=dynamic_alt, - image=blob, - ) - ) - else: - media_upload_failures.append( - f"photo:{media.media_url_https}" - ) # --- External link card logic --- if not video_embed and not image_embeds: @@ -3332,13 +3340,10 @@ def sync_feeds(args): except Exception as e: logging.error(f"āŒ Failed to post tweet to Bluesky: {e}") - # Closes Playwright browser (Indented 12 spaces, matches 'browser = ...') browser.close() - # Logs final count (Indented 8 spaces, inside the main 'try' block) logging.info(f"āœ… Sync complete. Posted {new_posts} new updates.") - # Catches fatal sync errors (Indented 4 spaces, matches the 'try:' at the top of sync_feeds) except Exception as e: logging.error(f"āŒ Error during sync cycle: {e}") @@ -3424,5 +3429,4 @@ def main(): if __name__ == "__main__": - main() - + main() \ No newline at end of file