From 07bfd5e2d5b35df81034eaccfcc981f2617f0f12 Mon Sep 17 00:00:00 2001 From: Guillem Hernandez Sola Date: Mon, 13 Apr 2026 09:47:28 +0200 Subject: [PATCH] feat(rss): improve external card thumbnail uploads with stronger compression and shorter cooldowns --- rss2bsky.py | 88 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 63 insertions(+), 25 deletions(-) diff --git a/rss2bsky.py b/rss2bsky.py index b5c2acb..25bf942 100644 --- a/rss2bsky.py +++ b/rss2bsky.py @@ -31,21 +31,23 @@ DEFAULT_COOLDOWN_STATE_PATH = "rss2bsky_cooldowns.json" DEDUPE_BSKY_LIMIT = 30 BSKY_TEXT_MAX_LENGTH = 275 -EXTERNAL_THUMB_MAX_BYTES = 950 * 1024 -EXTERNAL_THUMB_MAX_DIMENSION = 1200 -EXTERNAL_THUMB_MIN_JPEG_QUALITY = 40 +# External thumbnail tuning +EXTERNAL_THUMB_MAX_BYTES = 750 * 1024 +EXTERNAL_THUMB_TARGET_BYTES = 500 * 1024 +EXTERNAL_THUMB_MAX_DIMENSION = 1000 +EXTERNAL_THUMB_MIN_JPEG_QUALITY = 35 -BSKY_BLOB_UPLOAD_MAX_RETRIES = 5 -BSKY_BLOB_UPLOAD_BASE_DELAY = 10 -BSKY_BLOB_UPLOAD_MAX_DELAY = 300 -BSKY_BLOB_TRANSIENT_ERROR_RETRIES = 3 -BSKY_BLOB_TRANSIENT_ERROR_DELAY = 15 +BSKY_BLOB_UPLOAD_MAX_RETRIES = 3 +BSKY_BLOB_UPLOAD_BASE_DELAY = 8 +BSKY_BLOB_UPLOAD_MAX_DELAY = 120 +BSKY_BLOB_TRANSIENT_ERROR_RETRIES = 2 +BSKY_BLOB_TRANSIENT_ERROR_DELAY = 10 HTTP_TIMEOUT = 20 POST_RETRY_DELAY_SECONDS = 2 DEFAULT_POST_COOLDOWN_SECONDS = 3600 -DEFAULT_THUMB_COOLDOWN_SECONDS = 3600 +DEFAULT_THUMB_COOLDOWN_SECONDS = 1800 # --- Logging --- @@ -679,7 +681,7 @@ def upload_blob_with_retry(client, binary_data, media_label="media", optional=Fa return None -def compress_external_thumb_to_limit(image_bytes, max_bytes=EXTERNAL_THUMB_MAX_BYTES): +def compress_external_thumb_to_limit(image_bytes, target_bytes=EXTERNAL_THUMB_TARGET_BYTES, hard_max_bytes=EXTERNAL_THUMB_MAX_BYTES): if not PIL_AVAILABLE: return None @@ -694,15 +696,30 @@ def compress_external_thumb_to_limit(image_bytes, max_bytes=EXTERNAL_THUMB_MAX_B scale = EXTERNAL_THUMB_MAX_DIMENSION / max_dim new_size = (max(1, int(width * scale)), max(1, int(height * scale))) img = img.resize(new_size, Image.LANCZOS) + logging.info(f"🖼️ Resized external thumb to {new_size[0]}x{new_size[1]}") - for quality in [85, 75, 65, 55, 45, EXTERNAL_THUMB_MIN_JPEG_QUALITY]: + for quality in [78, 70, 62, 54, 46, 40, EXTERNAL_THUMB_MIN_JPEG_QUALITY]: out = io.BytesIO() img.save(out, format="JPEG", quality=quality, optimize=True, progressive=True) data = out.getvalue() - if len(data) <= max_bytes: + + logging.info( + f"🖼️ External thumb candidate size at JPEG quality {quality}: " + f"{len(data) / 1024:.2f} KB" + ) + + if len(data) <= target_bytes: return data - for target_dim in [1000, 900, 800, 700, 600]: + if len(data) <= hard_max_bytes: + best_so_far = data + + # Additional downscale passes + best_candidate = locals().get("best_so_far") + if best_candidate and len(best_candidate) <= hard_max_bytes: + return best_candidate + + for target_dim in [900, 800, 700, 600, 500]: resized = img.copy() width, height = resized.size max_dim = max(width, height) @@ -712,13 +729,25 @@ def compress_external_thumb_to_limit(image_bytes, max_bytes=EXTERNAL_THUMB_MAX_B new_size = (max(1, int(width * scale)), max(1, int(height * scale))) resized = resized.resize(new_size, Image.LANCZOS) - for quality in [60, 50, 45, EXTERNAL_THUMB_MIN_JPEG_QUALITY]: + for quality in [54, 46, 40, EXTERNAL_THUMB_MIN_JPEG_QUALITY]: out = io.BytesIO() resized.save(out, format="JPEG", quality=quality, optimize=True, progressive=True) data = out.getvalue() - if len(data) <= max_bytes: + + logging.info( + f"🖼️ External thumb resized to <= {target_dim}px at quality {quality}: " + f"{len(data) / 1024:.2f} KB" + ) + + if len(data) <= target_bytes: return data + if len(data) <= hard_max_bytes: + best_candidate = data + + if best_candidate and len(best_candidate) <= hard_max_bytes: + return best_candidate + except Exception as e: logging.warning(f"⚠️ Could not compress external thumbnail: {repr(e)}") @@ -742,16 +771,16 @@ def get_external_thumb_blob_from_url(image_url, client, http_client, cooldown_pa logging.warning(f"⚠️ Could not fetch external thumb {image_url}: empty body") return None - upload_bytes = content - if len(upload_bytes) > EXTERNAL_THUMB_MAX_BYTES: - compressed = compress_external_thumb_to_limit(upload_bytes, EXTERNAL_THUMB_MAX_BYTES) - if compressed: - upload_bytes = compressed - else: - logging.warning("⚠️ Could not compress external thumb to fit limit. Omitting thumbnail.") - return None + logging.info(f"🖼️ Downloaded external thumb {image_url} ({len(content) / 1024:.2f} KB)") - return upload_blob_with_retry( + upload_bytes = compress_external_thumb_to_limit(content) + if not upload_bytes: + logging.warning("⚠️ Could not prepare compressed external thumbnail. Omitting thumbnail.") + return None + + logging.info(f"🖼️ Final external thumb upload size: {len(upload_bytes) / 1024:.2f} KB") + + blob = upload_blob_with_retry( client, upload_bytes, media_label=f"external-thumb:{image_url}", @@ -759,6 +788,12 @@ def get_external_thumb_blob_from_url(image_url, client, http_client, cooldown_pa cooldown_on_rate_limit=True, cooldown_path=cooldown_path ) + if blob: + logging.info("✅ External thumbnail uploaded successfully") + return blob + + logging.warning("⚠️ External thumbnail upload failed. Will omit thumbnail.") + return None except Exception as e: logging.warning(f"⚠️ Could not fetch/upload external thumb {image_url}: {repr(e)}") @@ -802,6 +837,8 @@ def build_external_link_embed(url, fallback_title, client, http_client, cooldown logging.info("✅ External link card thumbnail prepared successfully") else: logging.info("ℹ️ External link card will be posted without thumbnail") + else: + logging.info("ℹ️ No og:image found for external link card") if link_metadata.get("title") or link_metadata.get("description") or thumb_blob: return models.AppBskyEmbedExternal.Main( @@ -878,6 +915,7 @@ def is_probable_length_error(exc): "string too long", "maxLength", "length", + "grapheme too big", ] return any(signal.lower() in text.lower() for signal in signals) @@ -1099,4 +1137,4 @@ def main(): if __name__ == "__main__": - main() + main() \ No newline at end of file