Added snippets only in urls
This commit is contained in:
@@ -13,6 +13,7 @@ from dotenv import load_dotenv
|
||||
from atproto import Client, client_utils, models
|
||||
from playwright.sync_api import sync_playwright
|
||||
from moviepy import VideoFileClip
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# --- Configuration ---
|
||||
LOG_PATH = "twitter2bsky.log"
|
||||
@@ -28,8 +29,7 @@ BSKY_TEXT_MAX_LENGTH = 275
|
||||
# server-side proxy/PDS body-size caps.
|
||||
# - Custom PDSes such as eurosky.social may accept images fine but fail on
|
||||
# larger video blob uploads.
|
||||
# - The public video limits discussed in Bluesky tooling references are useful,
|
||||
# but in practice the safest approach is to:
|
||||
# - The safest approach is to:
|
||||
# 1. cap duration
|
||||
# 2. compress aggressively
|
||||
# 3. log final file size
|
||||
@@ -41,6 +41,7 @@ BSKY_BLOB_UPLOAD_MAX_RETRIES = 5
|
||||
BSKY_BLOB_UPLOAD_BASE_DELAY = 10
|
||||
BSKY_BLOB_UPLOAD_MAX_DELAY = 300
|
||||
MEDIA_DOWNLOAD_TIMEOUT = 30
|
||||
LINK_METADATA_TIMEOUT = 10
|
||||
DEFAULT_BSKY_BASE_URL = "https://bsky.social"
|
||||
|
||||
# --- Logging Setup ---
|
||||
@@ -302,6 +303,62 @@ def get_blob_from_file(file_path, client):
|
||||
return None
|
||||
|
||||
|
||||
def fetch_link_metadata(url, http_client):
|
||||
"""
|
||||
Fetch metadata used to build a Bluesky external link card.
|
||||
"""
|
||||
try:
|
||||
r = http_client.get(url, timeout=LINK_METADATA_TIMEOUT, follow_redirects=True)
|
||||
r.raise_for_status()
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
title = (soup.find("meta", property="og:title") or soup.find("title"))
|
||||
desc = (
|
||||
soup.find("meta", property="og:description")
|
||||
or soup.find("meta", attrs={"name": "description"})
|
||||
)
|
||||
image = (
|
||||
soup.find("meta", property="og:image")
|
||||
or soup.find("meta", attrs={"name": "twitter:image"})
|
||||
)
|
||||
|
||||
return {
|
||||
"title": title["content"] if title and title.has_attr("content") else (title.text.strip() if title and title.text else ""),
|
||||
"description": desc["content"] if desc and desc.has_attr("content") else "",
|
||||
"image": image["content"] if image and image.has_attr("content") else None,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logging.warning(f"Could not fetch link metadata for {url}: {repr(e)}")
|
||||
return {}
|
||||
|
||||
|
||||
def build_external_link_embed(url, client, http_client, fallback_title="Link"):
|
||||
"""
|
||||
Build a Bluesky external embed from a URL.
|
||||
|
||||
This should only be used when the post has no image/video embed, because
|
||||
Bluesky posts can only carry one embed type.
|
||||
"""
|
||||
link_metadata = fetch_link_metadata(url, http_client)
|
||||
|
||||
thumb_blob = None
|
||||
if link_metadata.get("image"):
|
||||
thumb_blob = get_blob_from_url(link_metadata["image"], client, http_client)
|
||||
|
||||
if link_metadata.get("title") or link_metadata.get("description") or thumb_blob:
|
||||
return models.AppBskyEmbedExternal.Main(
|
||||
external=models.AppBskyEmbedExternal.External(
|
||||
uri=url,
|
||||
title=link_metadata.get("title") or fallback_title,
|
||||
description=link_metadata.get("description") or "",
|
||||
thumb=thumb_blob,
|
||||
)
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def prepare_post_text(text):
|
||||
"""
|
||||
Prepare the final public text exactly as it should be posted to Bluesky.
|
||||
@@ -699,7 +756,7 @@ def build_dynamic_alt(raw_text):
|
||||
if len(dynamic_alt) > 150:
|
||||
dynamic_alt = dynamic_alt[:147] + "..."
|
||||
elif not dynamic_alt:
|
||||
dynamic_alt = "Vídeo o imatge adjunta al tuit"
|
||||
dynamic_alt = "Attached video or image from tweet"
|
||||
|
||||
return dynamic_alt
|
||||
|
||||
@@ -986,18 +1043,6 @@ def extract_video_url_from_tweet_page(context, tweet_url):
|
||||
def download_and_crop_video(video_url, output_path):
|
||||
"""
|
||||
Download, trim, and compress video before upload.
|
||||
|
||||
Practical comments based on Bluesky video limits and real-world custom PDS behavior:
|
||||
- Duration alone is not enough; final file size matters a lot.
|
||||
- A 90-second 1080x1920 video can still be too large for alternate services.
|
||||
- We therefore:
|
||||
1. download the source
|
||||
2. trim to VIDEO_MAX_DURATION_SECONDS
|
||||
3. re-encode with tighter settings
|
||||
4. scale down to max width 720
|
||||
5. log final file size
|
||||
- This improves compatibility with services like eurosky.social that may have
|
||||
stricter body-size or timeout limits than bsky.social.
|
||||
"""
|
||||
temp_input = output_path.replace(".mp4", "_source.mp4")
|
||||
temp_trimmed = output_path.replace(".mp4", "_trimmed.mp4")
|
||||
@@ -1279,6 +1324,7 @@ def sync_feeds(args):
|
||||
|
||||
image_embeds = []
|
||||
video_embed = None
|
||||
external_embed = None
|
||||
media_upload_failures = []
|
||||
|
||||
if tweet.media:
|
||||
@@ -1330,6 +1376,20 @@ def sync_feeds(args):
|
||||
if os.path.exists(temp_video_path):
|
||||
os.remove(temp_video_path)
|
||||
|
||||
# Only create an external link card if no image/video embed will be used.
|
||||
if not video_embed and not image_embeds and candidate["canonical_non_x_urls"]:
|
||||
first_non_x_url = sorted(candidate["canonical_non_x_urls"])[0]
|
||||
external_embed = build_external_link_embed(
|
||||
first_non_x_url,
|
||||
bsky_client,
|
||||
media_http_client,
|
||||
fallback_title="Link"
|
||||
)
|
||||
if external_embed:
|
||||
logging.info(f"🔗 Built external link card for URL: {first_non_x_url}")
|
||||
else:
|
||||
logging.info(f"ℹ️ No external link card metadata available for URL: {first_non_x_url}")
|
||||
|
||||
try:
|
||||
post_result = None
|
||||
post_mode = "text"
|
||||
@@ -1341,6 +1401,9 @@ def sync_feeds(args):
|
||||
embed = models.AppBskyEmbedImages.Main(images=image_embeds)
|
||||
post_result = bsky_client.send_post(text=rich_text, embed=embed, langs=["ca"])
|
||||
post_mode = f"images:{len(image_embeds)}"
|
||||
elif external_embed:
|
||||
post_result = bsky_client.send_post(text=rich_text, embed=external_embed, langs=["ca"])
|
||||
post_mode = "external_link_card"
|
||||
else:
|
||||
post_result = bsky_client.send_post(text=rich_text, langs=["ca"])
|
||||
post_mode = "text_only"
|
||||
|
||||
Reference in New Issue
Block a user