Avoid image upload warning
This commit is contained in:
@@ -22,6 +22,11 @@ DEDUPE_BSKY_LIMIT = 30
|
|||||||
TWEET_MAX_AGE_DAYS = 3
|
TWEET_MAX_AGE_DAYS = 3
|
||||||
VIDEO_MAX_DURATION_SECONDS = 179
|
VIDEO_MAX_DURATION_SECONDS = 179
|
||||||
|
|
||||||
|
BSKY_BLOB_UPLOAD_MAX_RETRIES = 5
|
||||||
|
BSKY_BLOB_UPLOAD_BASE_DELAY = 10
|
||||||
|
BSKY_BLOB_UPLOAD_MAX_DELAY = 300
|
||||||
|
MEDIA_DOWNLOAD_TIMEOUT = 30
|
||||||
|
|
||||||
# --- Logging Setup ---
|
# --- Logging Setup ---
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||||
@@ -141,20 +146,95 @@ def extract_urls_from_facets(record):
|
|||||||
return urls
|
return urls
|
||||||
|
|
||||||
|
|
||||||
def get_blob_from_url(media_url, client):
|
def get_rate_limit_wait_seconds(error_obj, default_delay):
|
||||||
|
"""
|
||||||
|
Try to extract a sensible wait time from atproto/http error objects.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
r = httpx.get(media_url, timeout=30, follow_redirects=True)
|
headers = getattr(error_obj, "headers", None)
|
||||||
if r.status_code == 200:
|
if headers:
|
||||||
return client.upload_blob(r.content).blob
|
reset_value = headers.get("ratelimit-reset") or headers.get("RateLimit-Reset")
|
||||||
|
if reset_value:
|
||||||
|
now_ts = int(time.time())
|
||||||
|
reset_ts = int(reset_value)
|
||||||
|
wait_seconds = max(reset_ts - now_ts + 1, default_delay)
|
||||||
|
return wait_seconds
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return default_delay
|
||||||
|
|
||||||
|
|
||||||
|
def upload_blob_with_retry(client, binary_data, media_label="media"):
|
||||||
|
"""
|
||||||
|
Retry Bluesky blob upload when rate-limited.
|
||||||
|
"""
|
||||||
|
last_exception = None
|
||||||
|
|
||||||
|
for attempt in range(1, BSKY_BLOB_UPLOAD_MAX_RETRIES + 1):
|
||||||
|
try:
|
||||||
|
result = client.upload_blob(binary_data)
|
||||||
|
return result.blob
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
last_exception = e
|
||||||
|
error_text = str(e)
|
||||||
|
is_rate_limited = "429" in error_text or "RateLimitExceeded" in error_text
|
||||||
|
|
||||||
|
if not is_rate_limited:
|
||||||
|
logging.warning(f"Could not upload {media_label}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
backoff_delay = min(
|
||||||
|
BSKY_BLOB_UPLOAD_BASE_DELAY * (2 ** (attempt - 1)),
|
||||||
|
BSKY_BLOB_UPLOAD_MAX_DELAY
|
||||||
|
)
|
||||||
|
wait_seconds = get_rate_limit_wait_seconds(e, backoff_delay)
|
||||||
|
|
||||||
|
if attempt < BSKY_BLOB_UPLOAD_MAX_RETRIES:
|
||||||
|
logging.warning(
|
||||||
|
f"⏳ Bluesky blob upload rate-limited for {media_label}. "
|
||||||
|
f"Retry {attempt}/{BSKY_BLOB_UPLOAD_MAX_RETRIES} after {wait_seconds}s."
|
||||||
|
)
|
||||||
|
time.sleep(wait_seconds)
|
||||||
|
else:
|
||||||
|
logging.warning(
|
||||||
|
f"❌ Exhausted blob upload retries for {media_label} after rate limiting: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
|
logging.warning(f"Could not upload {media_label}: {last_exception}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_blob_from_url(media_url, client, http_client):
|
||||||
|
"""
|
||||||
|
Download media and upload to Bluesky with retry support for upload rate limits.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
r = http_client.get(media_url, timeout=MEDIA_DOWNLOAD_TIMEOUT, follow_redirects=True)
|
||||||
|
if r.status_code != 200:
|
||||||
|
logging.warning(f"Could not fetch media {media_url}: HTTP {r.status_code}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
content = r.content
|
||||||
|
if not content:
|
||||||
|
logging.warning(f"Could not fetch media {media_url}: empty response body")
|
||||||
|
return None
|
||||||
|
|
||||||
|
return upload_blob_with_retry(client, content, media_label=media_url)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"Could not fetch media {media_url}: {e}")
|
logging.warning(f"Could not fetch media {media_url}: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_blob_from_file(file_path, client):
|
def get_blob_from_file(file_path, client):
|
||||||
try:
|
try:
|
||||||
with open(file_path, "rb") as f:
|
with open(file_path, "rb") as f:
|
||||||
return client.upload_blob(f.read()).blob
|
binary_data = f.read()
|
||||||
|
|
||||||
|
return upload_blob_with_retry(client, binary_data, media_label=file_path)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"Could not upload local file {file_path}: {e}")
|
logging.warning(f"Could not upload local file {file_path}: {e}")
|
||||||
return None
|
return None
|
||||||
@@ -422,7 +502,7 @@ def get_recent_bsky_posts(client, handle, limit=30):
|
|||||||
if getattr(record, "reply", None) is not None:
|
if getattr(record, "reply", None) is not None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
text = getattr(record, "text", "") or ""
|
text = getattr(record, "text", "") or ""
|
||||||
normalized_text = normalize_post_text(text)
|
normalized_text = normalize_post_text(text)
|
||||||
|
|
||||||
urls = []
|
urls = []
|
||||||
@@ -1029,9 +1109,9 @@ def sync_feeds(args):
|
|||||||
return
|
return
|
||||||
|
|
||||||
new_posts = 0
|
new_posts = 0
|
||||||
state_file = "twitter_browser_state.json"
|
browser_state_file = "twitter_browser_state.json"
|
||||||
|
|
||||||
with sync_playwright() as p:
|
with sync_playwright() as p, httpx.Client() as media_http_client:
|
||||||
browser = p.chromium.launch(
|
browser = p.chromium.launch(
|
||||||
headless=True,
|
headless=True,
|
||||||
args=["--disable-blink-features=AutomationControlled"]
|
args=["--disable-blink-features=AutomationControlled"]
|
||||||
@@ -1044,8 +1124,8 @@ def sync_feeds(args):
|
|||||||
),
|
),
|
||||||
"viewport": {"width": 1920, "height": 1080},
|
"viewport": {"width": 1920, "height": 1080},
|
||||||
}
|
}
|
||||||
if os.path.exists(state_file):
|
if os.path.exists(browser_state_file):
|
||||||
context_kwargs["storage_state"] = state_file
|
context_kwargs["storage_state"] = browser_state_file
|
||||||
|
|
||||||
context = browser.new_context(**context_kwargs)
|
context = browser.new_context(**context_kwargs)
|
||||||
|
|
||||||
@@ -1061,11 +1141,12 @@ def sync_feeds(args):
|
|||||||
|
|
||||||
image_embeds = []
|
image_embeds = []
|
||||||
video_embed = None
|
video_embed = None
|
||||||
|
media_upload_failures = []
|
||||||
|
|
||||||
if tweet.media:
|
if tweet.media:
|
||||||
for media in tweet.media:
|
for media in tweet.media:
|
||||||
if media.type == "photo":
|
if media.type == "photo":
|
||||||
blob = get_blob_from_url(media.media_url_https, bsky_client)
|
blob = get_blob_from_url(media.media_url_https, bsky_client, media_http_client)
|
||||||
if blob:
|
if blob:
|
||||||
image_embeds.append(
|
image_embeds.append(
|
||||||
models.AppBskyEmbedImages.Image(
|
models.AppBskyEmbedImages.Image(
|
||||||
@@ -1073,10 +1154,13 @@ def sync_feeds(args):
|
|||||||
image=blob
|
image=blob
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
media_upload_failures.append(f"photo:{media.media_url_https}")
|
||||||
|
|
||||||
elif media.type == "video":
|
elif media.type == "video":
|
||||||
if not tweet.tweet_url:
|
if not tweet.tweet_url:
|
||||||
logging.warning("⚠️ Tweet has video marker but no tweet URL. Skipping video.")
|
logging.warning("⚠️ Tweet has video marker but no tweet URL. Skipping video.")
|
||||||
|
media_upload_failures.append("video:no_tweet_url")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
temp_video_path = "temp_video.mp4"
|
temp_video_path = "temp_video.mp4"
|
||||||
@@ -1085,19 +1169,24 @@ def sync_feeds(args):
|
|||||||
real_video_url = extract_video_url_from_tweet_page(context, tweet.tweet_url)
|
real_video_url = extract_video_url_from_tweet_page(context, tweet.tweet_url)
|
||||||
if not real_video_url:
|
if not real_video_url:
|
||||||
logging.warning(f"⚠️ Could not resolve playable video URL for {tweet.tweet_url}")
|
logging.warning(f"⚠️ Could not resolve playable video URL for {tweet.tweet_url}")
|
||||||
|
media_upload_failures.append(f"video:resolve_failed:{tweet.tweet_url}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
cropped_video_path = download_and_crop_video(real_video_url, temp_video_path)
|
cropped_video_path = download_and_crop_video(real_video_url, temp_video_path)
|
||||||
if not cropped_video_path:
|
if not cropped_video_path:
|
||||||
logging.warning(f"⚠️ Video download/crop failed for {tweet.tweet_url}")
|
logging.warning(f"⚠️ Video download/crop failed for {tweet.tweet_url}")
|
||||||
|
media_upload_failures.append(f"video:crop_failed:{tweet.tweet_url}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
video_blob = get_blob_from_file(cropped_video_path, bsky_client)
|
video_blob = get_blob_from_file(cropped_video_path, bsky_client)
|
||||||
if not video_blob:
|
if not video_blob:
|
||||||
logging.warning(f"⚠️ Video upload blob failed for {tweet.tweet_url}")
|
logging.warning(f"⚠️ Video upload blob failed for {tweet.tweet_url}")
|
||||||
|
media_upload_failures.append(f"video:upload_failed:{tweet.tweet_url}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
video_embed = build_video_embed(video_blob, dynamic_alt)
|
video_embed = build_video_embed(video_blob, dynamic_alt)
|
||||||
|
if not video_embed:
|
||||||
|
media_upload_failures.append(f"video:embed_failed:{tweet.tweet_url}")
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
if os.path.exists(temp_video_path):
|
if os.path.exists(temp_video_path):
|
||||||
@@ -1105,14 +1194,18 @@ def sync_feeds(args):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
post_result = None
|
post_result = None
|
||||||
|
post_mode = "text"
|
||||||
|
|
||||||
if video_embed:
|
if video_embed:
|
||||||
post_result = bsky_client.send_post(text=rich_text, embed=video_embed, langs=["ca"])
|
post_result = bsky_client.send_post(text=rich_text, embed=video_embed, langs=["ca"])
|
||||||
|
post_mode = "video"
|
||||||
elif image_embeds:
|
elif image_embeds:
|
||||||
embed = models.AppBskyEmbedImages.Main(images=image_embeds)
|
embed = models.AppBskyEmbedImages.Main(images=image_embeds)
|
||||||
post_result = bsky_client.send_post(text=rich_text, embed=embed, langs=["ca"])
|
post_result = bsky_client.send_post(text=rich_text, embed=embed, langs=["ca"])
|
||||||
|
post_mode = f"images:{len(image_embeds)}"
|
||||||
else:
|
else:
|
||||||
post_result = bsky_client.send_post(text=rich_text, langs=["ca"])
|
post_result = bsky_client.send_post(text=rich_text, langs=["ca"])
|
||||||
|
post_mode = "text_only"
|
||||||
|
|
||||||
bsky_uri = getattr(post_result, "uri", None)
|
bsky_uri = getattr(post_result, "uri", None)
|
||||||
|
|
||||||
@@ -1132,7 +1225,15 @@ def sync_feeds(args):
|
|||||||
recent_bsky_posts = recent_bsky_posts[:DEDUPE_BSKY_LIMIT]
|
recent_bsky_posts = recent_bsky_posts[:DEDUPE_BSKY_LIMIT]
|
||||||
|
|
||||||
new_posts += 1
|
new_posts += 1
|
||||||
logging.info(f"✅ Posted new tweet to Bluesky: {raw_text}")
|
|
||||||
|
if media_upload_failures:
|
||||||
|
logging.warning(
|
||||||
|
f"✅ Posted tweet to Bluesky with degraded media mode ({post_mode}). "
|
||||||
|
f"Failed media items: {media_upload_failures}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logging.info(f"✅ Posted new tweet to Bluesky with mode {post_mode}: {raw_text}")
|
||||||
|
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user