fix(video): use unique temporary filenames for per-tweet video processing and uploads

This commit is contained in:
Guillem Hernandez Sola
2026-04-13 18:30:08 +02:00
parent 50df25d86e
commit 455a4198a2

View File

@@ -10,6 +10,7 @@ import httpx
import time
import os
import subprocess
import uuid
from urllib.parse import urlparse
from dotenv import load_dotenv
from atproto import Client, client_utils, models
@@ -317,6 +318,33 @@ def canonicalize_tweet_url(url):
return f"https://x.com/{handle}/status/{tweet_id}"
def extract_tweet_id(tweet_url):
if not tweet_url:
return None
match = re.search(r"/status/(\d+)", tweet_url)
if match:
return match.group(1)
return None
def make_unique_video_temp_base(tweet_url=None):
tweet_id = extract_tweet_id(tweet_url) or "unknown"
ts_ms = int(time.time() * 1000)
rand = uuid.uuid4().hex[:8]
base = f"temp_video_{tweet_id}_{ts_ms}_{rand}"
logging.info(f"🎞️ Using unique temp video base: {base}")
return base
def remove_file_quietly(path):
if path and os.path.exists(path):
try:
os.remove(path)
logging.info(f"🧹 Removed temp file: {path}")
except Exception as e:
logging.warning(f"⚠️ Could not remove temp file {path}: {e}")
def is_x_or_twitter_domain(url):
try:
normalized = normalize_urlish_token(url) or url
@@ -346,7 +374,6 @@ def extract_urls_from_text(text):
return []
repaired = repair_broken_urls(text)
pattern = r'(?:(?:https?://)|(?:www\.))[^\s<>"\']+'
return re.findall(pattern, repaired)
@@ -605,10 +632,6 @@ def extract_first_resolved_external_url(text, http_client):
def sanitize_visible_urls_in_text(text, http_client):
"""
Resolve visible t.co URLs in the text, remove x.com/twitter.com URLs from
visible text, normalize www. URLs, and deduplicate repeated external URLs.
"""
if not text:
return text, None
@@ -2055,12 +2078,9 @@ def download_and_crop_video(video_url, output_path):
return None
finally:
for path in [temp_input, temp_trimmed, temp_output]:
if os.path.exists(path):
try:
os.remove(path)
except Exception:
pass
remove_file_quietly(temp_input)
remove_file_quietly(temp_trimmed)
remove_file_quietly(temp_output)
def candidate_matches_existing_bsky(candidate, recent_bsky_posts):
@@ -2259,7 +2279,9 @@ def sync_feeds(args):
logging.warning("⚠️ Tweet has video marker but no tweet URL. Skipping video.")
media_upload_failures.append("video:no_tweet_url")
else:
temp_video_path = "temp_video.mp4"
temp_video_base = make_unique_video_temp_base(tweet.tweet_url)
temp_video_path = f"{temp_video_base}.mp4"
try:
real_video_url = extract_video_url_from_tweet_page(context, tweet.tweet_url)
if not real_video_url:
@@ -2280,8 +2302,10 @@ def sync_feeds(args):
if not video_embed:
media_upload_failures.append(f"video:embed_failed:{tweet.tweet_url}")
finally:
if os.path.exists(temp_video_path):
os.remove(temp_video_path)
remove_file_quietly(temp_video_path)
remove_file_quietly(f"{temp_video_base}_source.mp4")
remove_file_quietly(f"{temp_video_base}_trimmed.mp4")
remove_file_quietly(f"{temp_video_base}_compressed.mp4")
if not video_embed:
logging.warning(