From c1a90657446be08b0a5f8d97668b6fb5a62b003c Mon Sep 17 00:00:00 2001 From: Guillem Hernandez Sola Date: Sun, 5 Apr 2026 21:32:01 +0200 Subject: [PATCH] Added snippets only in urls --- twitter2bsky_daemon.py | 95 +++++++++++++++++++++++++++++++++++------- 1 file changed, 79 insertions(+), 16 deletions(-) diff --git a/twitter2bsky_daemon.py b/twitter2bsky_daemon.py index eb563fa..9f7b8fb 100644 --- a/twitter2bsky_daemon.py +++ b/twitter2bsky_daemon.py @@ -13,6 +13,7 @@ from dotenv import load_dotenv from atproto import Client, client_utils, models from playwright.sync_api import sync_playwright from moviepy import VideoFileClip +from bs4 import BeautifulSoup # --- Configuration --- LOG_PATH = "twitter2bsky.log" @@ -28,8 +29,7 @@ BSKY_TEXT_MAX_LENGTH = 275 # server-side proxy/PDS body-size caps. # - Custom PDSes such as eurosky.social may accept images fine but fail on # larger video blob uploads. -# - The public video limits discussed in Bluesky tooling references are useful, -# but in practice the safest approach is to: +# - The safest approach is to: # 1. cap duration # 2. compress aggressively # 3. log final file size @@ -41,6 +41,7 @@ BSKY_BLOB_UPLOAD_MAX_RETRIES = 5 BSKY_BLOB_UPLOAD_BASE_DELAY = 10 BSKY_BLOB_UPLOAD_MAX_DELAY = 300 MEDIA_DOWNLOAD_TIMEOUT = 30 +LINK_METADATA_TIMEOUT = 10 DEFAULT_BSKY_BASE_URL = "https://bsky.social" # --- Logging Setup --- @@ -302,6 +303,62 @@ def get_blob_from_file(file_path, client): return None +def fetch_link_metadata(url, http_client): + """ + Fetch metadata used to build a Bluesky external link card. + """ + try: + r = http_client.get(url, timeout=LINK_METADATA_TIMEOUT, follow_redirects=True) + r.raise_for_status() + soup = BeautifulSoup(r.text, "html.parser") + + title = (soup.find("meta", property="og:title") or soup.find("title")) + desc = ( + soup.find("meta", property="og:description") + or soup.find("meta", attrs={"name": "description"}) + ) + image = ( + soup.find("meta", property="og:image") + or soup.find("meta", attrs={"name": "twitter:image"}) + ) + + return { + "title": title["content"] if title and title.has_attr("content") else (title.text.strip() if title and title.text else ""), + "description": desc["content"] if desc and desc.has_attr("content") else "", + "image": image["content"] if image and image.has_attr("content") else None, + } + + except Exception as e: + logging.warning(f"Could not fetch link metadata for {url}: {repr(e)}") + return {} + + +def build_external_link_embed(url, client, http_client, fallback_title="Link"): + """ + Build a Bluesky external embed from a URL. + + This should only be used when the post has no image/video embed, because + Bluesky posts can only carry one embed type. + """ + link_metadata = fetch_link_metadata(url, http_client) + + thumb_blob = None + if link_metadata.get("image"): + thumb_blob = get_blob_from_url(link_metadata["image"], client, http_client) + + if link_metadata.get("title") or link_metadata.get("description") or thumb_blob: + return models.AppBskyEmbedExternal.Main( + external=models.AppBskyEmbedExternal.External( + uri=url, + title=link_metadata.get("title") or fallback_title, + description=link_metadata.get("description") or "", + thumb=thumb_blob, + ) + ) + + return None + + def prepare_post_text(text): """ Prepare the final public text exactly as it should be posted to Bluesky. @@ -699,7 +756,7 @@ def build_dynamic_alt(raw_text): if len(dynamic_alt) > 150: dynamic_alt = dynamic_alt[:147] + "..." elif not dynamic_alt: - dynamic_alt = "Vídeo o imatge adjunta al tuit" + dynamic_alt = "Attached video or image from tweet" return dynamic_alt @@ -986,18 +1043,6 @@ def extract_video_url_from_tweet_page(context, tweet_url): def download_and_crop_video(video_url, output_path): """ Download, trim, and compress video before upload. - - Practical comments based on Bluesky video limits and real-world custom PDS behavior: - - Duration alone is not enough; final file size matters a lot. - - A 90-second 1080x1920 video can still be too large for alternate services. - - We therefore: - 1. download the source - 2. trim to VIDEO_MAX_DURATION_SECONDS - 3. re-encode with tighter settings - 4. scale down to max width 720 - 5. log final file size - - This improves compatibility with services like eurosky.social that may have - stricter body-size or timeout limits than bsky.social. """ temp_input = output_path.replace(".mp4", "_source.mp4") temp_trimmed = output_path.replace(".mp4", "_trimmed.mp4") @@ -1279,6 +1324,7 @@ def sync_feeds(args): image_embeds = [] video_embed = None + external_embed = None media_upload_failures = [] if tweet.media: @@ -1330,6 +1376,20 @@ def sync_feeds(args): if os.path.exists(temp_video_path): os.remove(temp_video_path) + # Only create an external link card if no image/video embed will be used. + if not video_embed and not image_embeds and candidate["canonical_non_x_urls"]: + first_non_x_url = sorted(candidate["canonical_non_x_urls"])[0] + external_embed = build_external_link_embed( + first_non_x_url, + bsky_client, + media_http_client, + fallback_title="Link" + ) + if external_embed: + logging.info(f"🔗 Built external link card for URL: {first_non_x_url}") + else: + logging.info(f"ℹ️ No external link card metadata available for URL: {first_non_x_url}") + try: post_result = None post_mode = "text" @@ -1341,6 +1401,9 @@ def sync_feeds(args): embed = models.AppBskyEmbedImages.Main(images=image_embeds) post_result = bsky_client.send_post(text=rich_text, embed=embed, langs=["ca"]) post_mode = f"images:{len(image_embeds)}" + elif external_embed: + post_result = bsky_client.send_post(text=rich_text, embed=external_embed, langs=["ca"]) + post_mode = "external_link_card" else: post_result = bsky_client.send_post(text=rich_text, langs=["ca"]) post_mode = "text_only" @@ -1429,4 +1492,4 @@ def main(): if __name__ == "__main__": - main() + main() \ No newline at end of file