Controlling snippet image on blobs
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import argparse
|
||||
import arrow
|
||||
import hashlib
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
@@ -14,6 +15,7 @@ from atproto import Client, client_utils, models
|
||||
from playwright.sync_api import sync_playwright
|
||||
from moviepy import VideoFileClip
|
||||
from bs4 import BeautifulSoup
|
||||
from PIL import Image
|
||||
|
||||
# --- Configuration ---
|
||||
LOG_PATH = "twitter2bsky.log"
|
||||
@@ -26,6 +28,14 @@ BSKY_TEXT_MAX_LENGTH = 275
|
||||
VIDEO_MAX_DURATION_SECONDS = 179
|
||||
MAX_VIDEO_UPLOAD_SIZE_MB = 45
|
||||
|
||||
# External-card thumbnail constraints:
|
||||
# The user's PDS returned:
|
||||
# BlobTooLarge: 1.15MB > 976.56KB
|
||||
# So we conservatively target a slightly smaller max size for safety.
|
||||
EXTERNAL_THUMB_MAX_BYTES = 950 * 1024
|
||||
EXTERNAL_THUMB_MAX_DIMENSION = 1200
|
||||
EXTERNAL_THUMB_MIN_JPEG_QUALITY = 40
|
||||
|
||||
BSKY_BLOB_UPLOAD_MAX_RETRIES = 5
|
||||
BSKY_BLOB_UPLOAD_BASE_DELAY = 10
|
||||
BSKY_BLOB_UPLOAD_MAX_DELAY = 300
|
||||
@@ -99,11 +109,8 @@ def repair_broken_urls(text):
|
||||
|
||||
original = text
|
||||
|
||||
# Join protocol line breaks: https://\nexample.com -> https://example.com
|
||||
text = re.sub(r"(https?://)\s*[\r\n]+\s*", r"\1", text, flags=re.IGNORECASE)
|
||||
|
||||
# Join URL-internal line breaks when the next chunk still looks like URL content.
|
||||
# This is intentionally conservative but effective for wrapped article URLs.
|
||||
prev_text = None
|
||||
while prev_text != text:
|
||||
prev_text = text
|
||||
@@ -114,7 +121,6 @@ def repair_broken_urls(text):
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
|
||||
# Also fix accidental spaces inserted inside URLs after the protocol.
|
||||
text = re.sub(
|
||||
r"((?:https?://|www\.)[^\s<>\"]*)\s+([A-Za-z0-9/\-._~%!$&'()*+,;=:@?#]+)",
|
||||
r"\1\2",
|
||||
@@ -348,6 +354,116 @@ def get_blob_from_file(file_path, client):
|
||||
return None
|
||||
|
||||
|
||||
def compress_external_thumb_to_limit(image_bytes, max_bytes=EXTERNAL_THUMB_MAX_BYTES):
|
||||
"""
|
||||
Compress/resize an image to fit external thumbnail blob size limits.
|
||||
Returns JPEG bytes or None.
|
||||
"""
|
||||
try:
|
||||
with Image.open(io.BytesIO(image_bytes)) as img:
|
||||
img = img.convert("RGB")
|
||||
|
||||
width, height = img.size
|
||||
max_dim = max(width, height)
|
||||
|
||||
if max_dim > EXTERNAL_THUMB_MAX_DIMENSION:
|
||||
scale = EXTERNAL_THUMB_MAX_DIMENSION / max_dim
|
||||
new_size = (max(1, int(width * scale)), max(1, int(height * scale)))
|
||||
img = img.resize(new_size, Image.LANCZOS)
|
||||
logging.info(f"🖼️ Resized external thumb to {new_size[0]}x{new_size[1]}")
|
||||
|
||||
# Try progressively lower qualities.
|
||||
for quality in [85, 75, 65, 55, 45, EXTERNAL_THUMB_MIN_JPEG_QUALITY]:
|
||||
out = io.BytesIO()
|
||||
img.save(out, format="JPEG", quality=quality, optimize=True, progressive=True)
|
||||
data = out.getvalue()
|
||||
|
||||
logging.info(
|
||||
f"🖼️ External thumb candidate size at JPEG quality {quality}: "
|
||||
f"{len(data) / 1024:.2f} KB"
|
||||
)
|
||||
|
||||
if len(data) <= max_bytes:
|
||||
return data
|
||||
|
||||
# If still too large, try a second resize pass.
|
||||
for target_dim in [1000, 900, 800, 700, 600]:
|
||||
resized = img.copy()
|
||||
width, height = resized.size
|
||||
max_dim = max(width, height)
|
||||
|
||||
if max_dim > target_dim:
|
||||
scale = target_dim / max_dim
|
||||
new_size = (max(1, int(width * scale)), max(1, int(height * scale)))
|
||||
resized = resized.resize(new_size, Image.LANCZOS)
|
||||
|
||||
for quality in [60, 50, 45, EXTERNAL_THUMB_MIN_JPEG_QUALITY]:
|
||||
out = io.BytesIO()
|
||||
resized.save(out, format="JPEG", quality=quality, optimize=True, progressive=True)
|
||||
data = out.getvalue()
|
||||
|
||||
logging.info(
|
||||
f"🖼️ External thumb resized to <= {target_dim}px at quality {quality}: "
|
||||
f"{len(data) / 1024:.2f} KB"
|
||||
)
|
||||
|
||||
if len(data) <= max_bytes:
|
||||
return data
|
||||
|
||||
except Exception as e:
|
||||
logging.warning(f"Could not compress external thumbnail: {repr(e)}")
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_external_thumb_blob_from_url(image_url, client, http_client):
|
||||
"""
|
||||
Download, size-check, compress if needed, and upload an external-card thumbnail blob.
|
||||
If the image cannot fit within the PDS blob limit, return None so the external card
|
||||
can still be posted without a thumbnail.
|
||||
"""
|
||||
try:
|
||||
r = http_client.get(image_url, timeout=MEDIA_DOWNLOAD_TIMEOUT, follow_redirects=True)
|
||||
if r.status_code != 200:
|
||||
logging.warning(f"Could not fetch external thumb {image_url}: HTTP {r.status_code}")
|
||||
return None
|
||||
|
||||
content = r.content
|
||||
if not content:
|
||||
logging.warning(f"Could not fetch external thumb {image_url}: empty body")
|
||||
return None
|
||||
|
||||
original_size_kb = len(content) / 1024
|
||||
logging.info(f"🖼️ Downloaded external thumb {image_url} ({original_size_kb:.2f} KB)")
|
||||
|
||||
upload_bytes = content
|
||||
if len(upload_bytes) > EXTERNAL_THUMB_MAX_BYTES:
|
||||
logging.info(
|
||||
f"🖼️ External thumb exceeds safe limit "
|
||||
f"({original_size_kb:.2f} KB > {EXTERNAL_THUMB_MAX_BYTES / 1024:.2f} KB). Compressing..."
|
||||
)
|
||||
compressed = compress_external_thumb_to_limit(upload_bytes, EXTERNAL_THUMB_MAX_BYTES)
|
||||
if compressed:
|
||||
upload_bytes = compressed
|
||||
logging.info(f"✅ External thumb compressed to {len(upload_bytes) / 1024:.2f} KB")
|
||||
else:
|
||||
logging.warning("⚠️ Could not compress external thumb to fit limit. Will omit thumbnail.")
|
||||
return None
|
||||
else:
|
||||
logging.info("✅ External thumb already within safe size limit.")
|
||||
|
||||
blob = upload_blob_with_retry(client, upload_bytes, media_label=f"external-thumb:{image_url}")
|
||||
if blob:
|
||||
return blob
|
||||
|
||||
logging.warning("⚠️ External thumb upload failed. Will omit thumbnail.")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logging.warning(f"Could not fetch/upload external thumb {image_url}: {repr(e)}")
|
||||
return None
|
||||
|
||||
|
||||
def fetch_link_metadata(url, http_client):
|
||||
try:
|
||||
r = http_client.get(url, timeout=LINK_METADATA_TIMEOUT, follow_redirects=True)
|
||||
@@ -376,11 +492,19 @@ def fetch_link_metadata(url, http_client):
|
||||
|
||||
|
||||
def build_external_link_embed(url, client, http_client, fallback_title="Link"):
|
||||
"""
|
||||
Build a Bluesky external embed from a URL.
|
||||
If the thumbnail image is too large, omit the thumbnail but still return the link card.
|
||||
"""
|
||||
link_metadata = fetch_link_metadata(url, http_client)
|
||||
|
||||
thumb_blob = None
|
||||
if link_metadata.get("image"):
|
||||
thumb_blob = get_blob_from_url(link_metadata["image"], client, http_client)
|
||||
thumb_blob = get_external_thumb_blob_from_url(link_metadata["image"], client, http_client)
|
||||
if thumb_blob:
|
||||
logging.info("✅ External link card thumbnail prepared successfully")
|
||||
else:
|
||||
logging.info("ℹ️ External link card will be posted without thumbnail")
|
||||
|
||||
if link_metadata.get("title") or link_metadata.get("description") or thumb_blob:
|
||||
return models.AppBskyEmbedExternal.Main(
|
||||
@@ -641,7 +765,7 @@ def get_recent_bsky_posts(client, handle, limit=30):
|
||||
if item.reason is not None:
|
||||
continue
|
||||
|
||||
record = item.post.record
|
||||
record = item.post.record
|
||||
if getattr(record, "reply", None) is not None:
|
||||
continue
|
||||
|
||||
|
||||
Reference in New Issue
Block a user