fix(sync): prefer video posts over image fallback and retry transient blob upload failures
This commit is contained in:
@@ -28,8 +28,6 @@ BSKY_TEXT_MAX_LENGTH = 275
|
||||
VIDEO_MAX_DURATION_SECONDS = 179
|
||||
MAX_VIDEO_UPLOAD_SIZE_MB = 45
|
||||
|
||||
# External-card thumbnail constraints:
|
||||
# Conservative safe target below the observed PDS max (~976.56 KB).
|
||||
EXTERNAL_THUMB_MAX_BYTES = 950 * 1024
|
||||
EXTERNAL_THUMB_MAX_DIMENSION = 1200
|
||||
EXTERNAL_THUMB_MIN_JPEG_QUALITY = 40
|
||||
@@ -41,6 +39,10 @@ MEDIA_DOWNLOAD_TIMEOUT = 30
|
||||
LINK_METADATA_TIMEOUT = 10
|
||||
DEFAULT_BSKY_BASE_URL = "https://bsky.social"
|
||||
|
||||
# Extra timeout retry tuning for transient blob upload failures
|
||||
BSKY_BLOB_TRANSIENT_ERROR_RETRIES = 3
|
||||
BSKY_BLOB_TRANSIENT_ERROR_DELAY = 15
|
||||
|
||||
# --- Logging Setup ---
|
||||
logging.basicConfig(
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
@@ -88,20 +90,6 @@ def strip_trailing_url_punctuation(url):
|
||||
|
||||
|
||||
def repair_broken_urls(text):
|
||||
"""
|
||||
Repair URLs that were split by copied/scraped line breaks.
|
||||
|
||||
Examples:
|
||||
https://
|
||||
3cat.cat/path
|
||||
becomes:
|
||||
https://3cat.cat/path
|
||||
|
||||
https://3cat.cat/some-pa
|
||||
th/article
|
||||
becomes:
|
||||
https://3cat.cat/some-path/article
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
@@ -133,33 +121,6 @@ def repair_broken_urls(text):
|
||||
|
||||
|
||||
def repair_broken_mentions(text):
|
||||
"""
|
||||
Repair mention-related line wrapping in scraped text.
|
||||
|
||||
Handles cases like:
|
||||
Ho explica
|
||||
@martamartorell
|
||||
|
||||
La
|
||||
@sanfenerea
|
||||
tenia un repte
|
||||
|
||||
Hospital
|
||||
@parctauli
|
||||
.
|
||||
|
||||
conjunt
|
||||
@bomberscat
|
||||
-SEM.
|
||||
|
||||
becoming:
|
||||
Ho explica @martamartorell
|
||||
La @sanfenerea tenia un repte
|
||||
Hospital @parctauli .
|
||||
conjunt @bomberscat -SEM.
|
||||
|
||||
while preserving real paragraph breaks and standalone mention lines.
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
@@ -183,7 +144,6 @@ def repair_broken_mentions(text):
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# If current line is only a mention, try to attach it backward.
|
||||
if is_mention_only_line(current):
|
||||
if result and result[-1].strip():
|
||||
result[-1] = result[-1].rstrip() + " " + stripped
|
||||
@@ -193,8 +153,6 @@ def repair_broken_mentions(text):
|
||||
|
||||
i += 1
|
||||
|
||||
# Attach immediately following continuation lines if they are not blank
|
||||
# and not another standalone mention.
|
||||
while i < len(lines):
|
||||
next_line = lines[i]
|
||||
next_stripped = next_line.strip()
|
||||
@@ -214,7 +172,6 @@ def repair_broken_mentions(text):
|
||||
|
||||
continue
|
||||
|
||||
# If current line has text and next line is a mention, merge them.
|
||||
if i + 1 < len(lines) and is_mention_only_line(lines[i + 1]):
|
||||
merged = stripped + " " + lines[i + 1].strip()
|
||||
changed = True
|
||||
@@ -252,10 +209,6 @@ def repair_broken_mentions(text):
|
||||
|
||||
|
||||
def strip_line_edge_whitespace(text):
|
||||
"""
|
||||
Remove leading/trailing whitespace from each line while preserving
|
||||
the line structure and intentional blank lines.
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
@@ -278,12 +231,6 @@ def strip_line_edge_whitespace(text):
|
||||
|
||||
|
||||
def remove_trailing_ellipsis_line(text):
|
||||
"""
|
||||
Remove trailing lines that are only ellipsis markers.
|
||||
Handles:
|
||||
- ...
|
||||
- …
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
@@ -415,8 +362,25 @@ def get_rate_limit_wait_seconds(error_obj, default_delay):
|
||||
return default_delay
|
||||
|
||||
|
||||
def is_transient_blob_error(error_obj):
|
||||
error_text = repr(error_obj)
|
||||
transient_signals = [
|
||||
"InvokeTimeoutError",
|
||||
"ReadTimeout",
|
||||
"WriteTimeout",
|
||||
"TimeoutException",
|
||||
"RemoteProtocolError",
|
||||
"ConnectError",
|
||||
"503",
|
||||
"502",
|
||||
"504",
|
||||
]
|
||||
return any(signal in error_text for signal in transient_signals)
|
||||
|
||||
|
||||
def upload_blob_with_retry(client, binary_data, media_label="media"):
|
||||
last_exception = None
|
||||
transient_attempts = 0
|
||||
|
||||
for attempt in range(1, BSKY_BLOB_UPLOAD_MAX_RETRIES + 1):
|
||||
try:
|
||||
@@ -428,18 +392,7 @@ def upload_blob_with_retry(client, binary_data, media_label="media"):
|
||||
error_text = str(e)
|
||||
is_rate_limited = "429" in error_text or "RateLimitExceeded" in error_text
|
||||
|
||||
if not is_rate_limited:
|
||||
logging.warning(f"Could not upload {media_label}: {repr(e)}")
|
||||
|
||||
if hasattr(e, "response") and e.response is not None:
|
||||
try:
|
||||
logging.warning(f"Upload response status: {e.response.status_code}")
|
||||
logging.warning(f"Upload response body: {e.response.text}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
if is_rate_limited:
|
||||
backoff_delay = min(
|
||||
BSKY_BLOB_UPLOAD_BASE_DELAY * (2 ** (attempt - 1)),
|
||||
BSKY_BLOB_UPLOAD_MAX_DELAY
|
||||
@@ -452,10 +405,33 @@ def upload_blob_with_retry(client, binary_data, media_label="media"):
|
||||
f"Retry {attempt}/{BSKY_BLOB_UPLOAD_MAX_RETRIES} after {wait_seconds}s."
|
||||
)
|
||||
time.sleep(wait_seconds)
|
||||
continue
|
||||
else:
|
||||
logging.warning(
|
||||
f"❌ Exhausted blob upload retries for {media_label} after rate limiting: {repr(e)}"
|
||||
)
|
||||
break
|
||||
|
||||
if is_transient_blob_error(e) and transient_attempts < BSKY_BLOB_TRANSIENT_ERROR_RETRIES:
|
||||
transient_attempts += 1
|
||||
wait_seconds = BSKY_BLOB_TRANSIENT_ERROR_DELAY * transient_attempts
|
||||
logging.warning(
|
||||
f"⏳ Transient blob upload failure for {media_label}: {repr(e)}. "
|
||||
f"Transient retry {transient_attempts}/{BSKY_BLOB_TRANSIENT_ERROR_RETRIES} after {wait_seconds}s."
|
||||
)
|
||||
time.sleep(wait_seconds)
|
||||
continue
|
||||
|
||||
logging.warning(f"Could not upload {media_label}: {repr(e)}")
|
||||
|
||||
if hasattr(e, "response") and e.response is not None:
|
||||
try:
|
||||
logging.warning(f"Upload response status: {e.response.status_code}")
|
||||
logging.warning(f"Upload response body: {e.response.text}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
logging.warning(f"Could not upload {media_label}: {repr(last_exception)}")
|
||||
return None
|
||||
@@ -517,10 +493,6 @@ def get_blob_from_file(file_path, client):
|
||||
|
||||
|
||||
def compress_external_thumb_to_limit(image_bytes, max_bytes=EXTERNAL_THUMB_MAX_BYTES):
|
||||
"""
|
||||
Compress/resize an image to fit external thumbnail blob size limits.
|
||||
Returns JPEG bytes or None.
|
||||
"""
|
||||
try:
|
||||
with Image.open(io.BytesIO(image_bytes)) as img:
|
||||
img = img.convert("RGB")
|
||||
@@ -577,11 +549,6 @@ def compress_external_thumb_to_limit(image_bytes, max_bytes=EXTERNAL_THUMB_MAX_B
|
||||
|
||||
|
||||
def get_external_thumb_blob_from_url(image_url, client, http_client):
|
||||
"""
|
||||
Download, size-check, compress if needed, and upload an external-card thumbnail blob.
|
||||
If the image cannot fit within the PDS blob limit, return None so the external card
|
||||
can still be posted without a thumbnail.
|
||||
"""
|
||||
try:
|
||||
r = http_client.get(image_url, timeout=MEDIA_DOWNLOAD_TIMEOUT, follow_redirects=True)
|
||||
if r.status_code != 200:
|
||||
@@ -652,10 +619,6 @@ def fetch_link_metadata(url, http_client):
|
||||
|
||||
|
||||
def build_external_link_embed(url, client, http_client, fallback_title="Link"):
|
||||
"""
|
||||
Build a Bluesky external embed from a URL.
|
||||
If the thumbnail image is too large, omit the thumbnail but still return the link card.
|
||||
"""
|
||||
link_metadata = fetch_link_metadata(url, http_client)
|
||||
|
||||
thumb_blob = None
|
||||
@@ -1519,6 +1482,9 @@ def sync_feeds(args):
|
||||
ordered_non_x_urls = extract_ordered_non_x_urls(prepared_text)
|
||||
canonical_non_x_urls = set(ordered_non_x_urls)
|
||||
|
||||
has_video = any(getattr(m, "type", None) == "video" for m in (tweet.media or []))
|
||||
has_photo = any(getattr(m, "type", None) == "photo" for m in (tweet.media or []))
|
||||
|
||||
candidate_tweets.append({
|
||||
"tweet": tweet,
|
||||
"tweet_time": tweet_time,
|
||||
@@ -1530,6 +1496,8 @@ def sync_feeds(args):
|
||||
"canonical_non_x_urls": canonical_non_x_urls,
|
||||
"ordered_non_x_urls": ordered_non_x_urls,
|
||||
"looks_like_title_plus_url": looks_like_title_plus_url_post(prepared_text),
|
||||
"has_video": has_video,
|
||||
"has_photo": has_photo,
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
@@ -1593,6 +1561,52 @@ def sync_feeds(args):
|
||||
external_embed = None
|
||||
media_upload_failures = []
|
||||
|
||||
has_video = candidate.get("has_video", False)
|
||||
|
||||
# --- VIDEO-FIRST POLICY ---
|
||||
# If the tweet contains video, try video first and do not degrade to photos
|
||||
# from the same tweet if video processing/upload fails.
|
||||
if has_video:
|
||||
video_media = next((m for m in (tweet.media or []) if getattr(m, "type", None) == "video"), None)
|
||||
|
||||
if video_media:
|
||||
if not tweet.tweet_url:
|
||||
logging.warning("⚠️ Tweet has video marker but no tweet URL. Skipping video.")
|
||||
media_upload_failures.append("video:no_tweet_url")
|
||||
else:
|
||||
temp_video_path = "temp_video.mp4"
|
||||
try:
|
||||
real_video_url = extract_video_url_from_tweet_page(context, tweet.tweet_url)
|
||||
if not real_video_url:
|
||||
logging.warning(f"⚠️ Could not resolve playable video URL for {tweet.tweet_url}")
|
||||
media_upload_failures.append(f"video:resolve_failed:{tweet.tweet_url}")
|
||||
else:
|
||||
cropped_video_path = download_and_crop_video(real_video_url, temp_video_path)
|
||||
if not cropped_video_path:
|
||||
logging.warning(f"⚠️ Video download/crop failed for {tweet.tweet_url}")
|
||||
media_upload_failures.append(f"video:crop_failed:{tweet.tweet_url}")
|
||||
else:
|
||||
video_blob = get_blob_from_file(cropped_video_path, bsky_client)
|
||||
if not video_blob:
|
||||
logging.warning(f"⚠️ Video upload blob failed for {tweet.tweet_url}")
|
||||
media_upload_failures.append(f"video:upload_failed:{tweet.tweet_url}")
|
||||
else:
|
||||
video_embed = build_video_embed(video_blob, dynamic_alt)
|
||||
if not video_embed:
|
||||
media_upload_failures.append(f"video:embed_failed:{tweet.tweet_url}")
|
||||
finally:
|
||||
if os.path.exists(temp_video_path):
|
||||
os.remove(temp_video_path)
|
||||
|
||||
# Important: if tweet had video, do NOT upload photos as fallback.
|
||||
if not video_embed:
|
||||
logging.warning(
|
||||
"⚠️ Tweet contains video, but video could not be posted. "
|
||||
"Skipping photo fallback for this tweet."
|
||||
)
|
||||
|
||||
else:
|
||||
# Photo-only tweets can post images normally.
|
||||
if tweet.media:
|
||||
for media in tweet.media:
|
||||
if media.type == "photo":
|
||||
@@ -1607,41 +1621,7 @@ def sync_feeds(args):
|
||||
else:
|
||||
media_upload_failures.append(f"photo:{media.media_url_https}")
|
||||
|
||||
elif media.type == "video":
|
||||
if not tweet.tweet_url:
|
||||
logging.warning("⚠️ Tweet has video marker but no tweet URL. Skipping video.")
|
||||
media_upload_failures.append("video:no_tweet_url")
|
||||
continue
|
||||
|
||||
temp_video_path = "temp_video.mp4"
|
||||
|
||||
try:
|
||||
real_video_url = extract_video_url_from_tweet_page(context, tweet.tweet_url)
|
||||
if not real_video_url:
|
||||
logging.warning(f"⚠️ Could not resolve playable video URL for {tweet.tweet_url}")
|
||||
media_upload_failures.append(f"video:resolve_failed:{tweet.tweet_url}")
|
||||
continue
|
||||
|
||||
cropped_video_path = download_and_crop_video(real_video_url, temp_video_path)
|
||||
if not cropped_video_path:
|
||||
logging.warning(f"⚠️ Video download/crop failed for {tweet.tweet_url}")
|
||||
media_upload_failures.append(f"video:crop_failed:{tweet.tweet_url}")
|
||||
continue
|
||||
|
||||
video_blob = get_blob_from_file(cropped_video_path, bsky_client)
|
||||
if not video_blob:
|
||||
logging.warning(f"⚠️ Video upload blob failed for {tweet.tweet_url}")
|
||||
media_upload_failures.append(f"video:upload_failed:{tweet.tweet_url}")
|
||||
continue
|
||||
|
||||
video_embed = build_video_embed(video_blob, dynamic_alt)
|
||||
if not video_embed:
|
||||
media_upload_failures.append(f"video:embed_failed:{tweet.tweet_url}")
|
||||
|
||||
finally:
|
||||
if os.path.exists(temp_video_path):
|
||||
os.remove(temp_video_path)
|
||||
|
||||
# If nothing media-based is available, optionally degrade to external card / text-only
|
||||
if not video_embed and not image_embeds:
|
||||
candidate_url = None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user