Added snippets only in urls

This commit is contained in:
2026-04-05 21:32:01 +02:00
parent a76715064a
commit c1a9065744

View File

@@ -13,6 +13,7 @@ from dotenv import load_dotenv
from atproto import Client, client_utils, models from atproto import Client, client_utils, models
from playwright.sync_api import sync_playwright from playwright.sync_api import sync_playwright
from moviepy import VideoFileClip from moviepy import VideoFileClip
from bs4 import BeautifulSoup
# --- Configuration --- # --- Configuration ---
LOG_PATH = "twitter2bsky.log" LOG_PATH = "twitter2bsky.log"
@@ -28,8 +29,7 @@ BSKY_TEXT_MAX_LENGTH = 275
# server-side proxy/PDS body-size caps. # server-side proxy/PDS body-size caps.
# - Custom PDSes such as eurosky.social may accept images fine but fail on # - Custom PDSes such as eurosky.social may accept images fine but fail on
# larger video blob uploads. # larger video blob uploads.
# - The public video limits discussed in Bluesky tooling references are useful, # - The safest approach is to:
# but in practice the safest approach is to:
# 1. cap duration # 1. cap duration
# 2. compress aggressively # 2. compress aggressively
# 3. log final file size # 3. log final file size
@@ -41,6 +41,7 @@ BSKY_BLOB_UPLOAD_MAX_RETRIES = 5
BSKY_BLOB_UPLOAD_BASE_DELAY = 10 BSKY_BLOB_UPLOAD_BASE_DELAY = 10
BSKY_BLOB_UPLOAD_MAX_DELAY = 300 BSKY_BLOB_UPLOAD_MAX_DELAY = 300
MEDIA_DOWNLOAD_TIMEOUT = 30 MEDIA_DOWNLOAD_TIMEOUT = 30
LINK_METADATA_TIMEOUT = 10
DEFAULT_BSKY_BASE_URL = "https://bsky.social" DEFAULT_BSKY_BASE_URL = "https://bsky.social"
# --- Logging Setup --- # --- Logging Setup ---
@@ -302,6 +303,62 @@ def get_blob_from_file(file_path, client):
return None return None
def fetch_link_metadata(url, http_client):
"""
Fetch metadata used to build a Bluesky external link card.
"""
try:
r = http_client.get(url, timeout=LINK_METADATA_TIMEOUT, follow_redirects=True)
r.raise_for_status()
soup = BeautifulSoup(r.text, "html.parser")
title = (soup.find("meta", property="og:title") or soup.find("title"))
desc = (
soup.find("meta", property="og:description")
or soup.find("meta", attrs={"name": "description"})
)
image = (
soup.find("meta", property="og:image")
or soup.find("meta", attrs={"name": "twitter:image"})
)
return {
"title": title["content"] if title and title.has_attr("content") else (title.text.strip() if title and title.text else ""),
"description": desc["content"] if desc and desc.has_attr("content") else "",
"image": image["content"] if image and image.has_attr("content") else None,
}
except Exception as e:
logging.warning(f"Could not fetch link metadata for {url}: {repr(e)}")
return {}
def build_external_link_embed(url, client, http_client, fallback_title="Link"):
"""
Build a Bluesky external embed from a URL.
This should only be used when the post has no image/video embed, because
Bluesky posts can only carry one embed type.
"""
link_metadata = fetch_link_metadata(url, http_client)
thumb_blob = None
if link_metadata.get("image"):
thumb_blob = get_blob_from_url(link_metadata["image"], client, http_client)
if link_metadata.get("title") or link_metadata.get("description") or thumb_blob:
return models.AppBskyEmbedExternal.Main(
external=models.AppBskyEmbedExternal.External(
uri=url,
title=link_metadata.get("title") or fallback_title,
description=link_metadata.get("description") or "",
thumb=thumb_blob,
)
)
return None
def prepare_post_text(text): def prepare_post_text(text):
""" """
Prepare the final public text exactly as it should be posted to Bluesky. Prepare the final public text exactly as it should be posted to Bluesky.
@@ -699,7 +756,7 @@ def build_dynamic_alt(raw_text):
if len(dynamic_alt) > 150: if len(dynamic_alt) > 150:
dynamic_alt = dynamic_alt[:147] + "..." dynamic_alt = dynamic_alt[:147] + "..."
elif not dynamic_alt: elif not dynamic_alt:
dynamic_alt = "deo o imatge adjunta al tuit" dynamic_alt = "Attached video or image from tweet"
return dynamic_alt return dynamic_alt
@@ -986,18 +1043,6 @@ def extract_video_url_from_tweet_page(context, tweet_url):
def download_and_crop_video(video_url, output_path): def download_and_crop_video(video_url, output_path):
""" """
Download, trim, and compress video before upload. Download, trim, and compress video before upload.
Practical comments based on Bluesky video limits and real-world custom PDS behavior:
- Duration alone is not enough; final file size matters a lot.
- A 90-second 1080x1920 video can still be too large for alternate services.
- We therefore:
1. download the source
2. trim to VIDEO_MAX_DURATION_SECONDS
3. re-encode with tighter settings
4. scale down to max width 720
5. log final file size
- This improves compatibility with services like eurosky.social that may have
stricter body-size or timeout limits than bsky.social.
""" """
temp_input = output_path.replace(".mp4", "_source.mp4") temp_input = output_path.replace(".mp4", "_source.mp4")
temp_trimmed = output_path.replace(".mp4", "_trimmed.mp4") temp_trimmed = output_path.replace(".mp4", "_trimmed.mp4")
@@ -1279,6 +1324,7 @@ def sync_feeds(args):
image_embeds = [] image_embeds = []
video_embed = None video_embed = None
external_embed = None
media_upload_failures = [] media_upload_failures = []
if tweet.media: if tweet.media:
@@ -1330,6 +1376,20 @@ def sync_feeds(args):
if os.path.exists(temp_video_path): if os.path.exists(temp_video_path):
os.remove(temp_video_path) os.remove(temp_video_path)
# Only create an external link card if no image/video embed will be used.
if not video_embed and not image_embeds and candidate["canonical_non_x_urls"]:
first_non_x_url = sorted(candidate["canonical_non_x_urls"])[0]
external_embed = build_external_link_embed(
first_non_x_url,
bsky_client,
media_http_client,
fallback_title="Link"
)
if external_embed:
logging.info(f"🔗 Built external link card for URL: {first_non_x_url}")
else:
logging.info(f" No external link card metadata available for URL: {first_non_x_url}")
try: try:
post_result = None post_result = None
post_mode = "text" post_mode = "text"
@@ -1341,6 +1401,9 @@ def sync_feeds(args):
embed = models.AppBskyEmbedImages.Main(images=image_embeds) embed = models.AppBskyEmbedImages.Main(images=image_embeds)
post_result = bsky_client.send_post(text=rich_text, embed=embed, langs=["ca"]) post_result = bsky_client.send_post(text=rich_text, embed=embed, langs=["ca"])
post_mode = f"images:{len(image_embeds)}" post_mode = f"images:{len(image_embeds)}"
elif external_embed:
post_result = bsky_client.send_post(text=rich_text, embed=external_embed, langs=["ca"])
post_mode = "external_link_card"
else: else:
post_result = bsky_client.send_post(text=rich_text, langs=["ca"]) post_result = bsky_client.send_post(text=rich_text, langs=["ca"])
post_mode = "text_only" post_mode = "text_only"