fix(video): use unique temporary filenames for per-tweet video processing and uploads
This commit is contained in:
@@ -10,6 +10,7 @@ import httpx
|
|||||||
import time
|
import time
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import uuid
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from atproto import Client, client_utils, models
|
from atproto import Client, client_utils, models
|
||||||
@@ -317,6 +318,33 @@ def canonicalize_tweet_url(url):
|
|||||||
return f"https://x.com/{handle}/status/{tweet_id}"
|
return f"https://x.com/{handle}/status/{tweet_id}"
|
||||||
|
|
||||||
|
|
||||||
|
def extract_tweet_id(tweet_url):
|
||||||
|
if not tweet_url:
|
||||||
|
return None
|
||||||
|
match = re.search(r"/status/(\d+)", tweet_url)
|
||||||
|
if match:
|
||||||
|
return match.group(1)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def make_unique_video_temp_base(tweet_url=None):
|
||||||
|
tweet_id = extract_tweet_id(tweet_url) or "unknown"
|
||||||
|
ts_ms = int(time.time() * 1000)
|
||||||
|
rand = uuid.uuid4().hex[:8]
|
||||||
|
base = f"temp_video_{tweet_id}_{ts_ms}_{rand}"
|
||||||
|
logging.info(f"🎞️ Using unique temp video base: {base}")
|
||||||
|
return base
|
||||||
|
|
||||||
|
|
||||||
|
def remove_file_quietly(path):
|
||||||
|
if path and os.path.exists(path):
|
||||||
|
try:
|
||||||
|
os.remove(path)
|
||||||
|
logging.info(f"🧹 Removed temp file: {path}")
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"⚠️ Could not remove temp file {path}: {e}")
|
||||||
|
|
||||||
|
|
||||||
def is_x_or_twitter_domain(url):
|
def is_x_or_twitter_domain(url):
|
||||||
try:
|
try:
|
||||||
normalized = normalize_urlish_token(url) or url
|
normalized = normalize_urlish_token(url) or url
|
||||||
@@ -346,7 +374,6 @@ def extract_urls_from_text(text):
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
repaired = repair_broken_urls(text)
|
repaired = repair_broken_urls(text)
|
||||||
|
|
||||||
pattern = r'(?:(?:https?://)|(?:www\.))[^\s<>"\']+'
|
pattern = r'(?:(?:https?://)|(?:www\.))[^\s<>"\']+'
|
||||||
return re.findall(pattern, repaired)
|
return re.findall(pattern, repaired)
|
||||||
|
|
||||||
@@ -605,10 +632,6 @@ def extract_first_resolved_external_url(text, http_client):
|
|||||||
|
|
||||||
|
|
||||||
def sanitize_visible_urls_in_text(text, http_client):
|
def sanitize_visible_urls_in_text(text, http_client):
|
||||||
"""
|
|
||||||
Resolve visible t.co URLs in the text, remove x.com/twitter.com URLs from
|
|
||||||
visible text, normalize www. URLs, and deduplicate repeated external URLs.
|
|
||||||
"""
|
|
||||||
if not text:
|
if not text:
|
||||||
return text, None
|
return text, None
|
||||||
|
|
||||||
@@ -2055,12 +2078,9 @@ def download_and_crop_video(video_url, output_path):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
for path in [temp_input, temp_trimmed, temp_output]:
|
remove_file_quietly(temp_input)
|
||||||
if os.path.exists(path):
|
remove_file_quietly(temp_trimmed)
|
||||||
try:
|
remove_file_quietly(temp_output)
|
||||||
os.remove(path)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def candidate_matches_existing_bsky(candidate, recent_bsky_posts):
|
def candidate_matches_existing_bsky(candidate, recent_bsky_posts):
|
||||||
@@ -2259,7 +2279,9 @@ def sync_feeds(args):
|
|||||||
logging.warning("⚠️ Tweet has video marker but no tweet URL. Skipping video.")
|
logging.warning("⚠️ Tweet has video marker but no tweet URL. Skipping video.")
|
||||||
media_upload_failures.append("video:no_tweet_url")
|
media_upload_failures.append("video:no_tweet_url")
|
||||||
else:
|
else:
|
||||||
temp_video_path = "temp_video.mp4"
|
temp_video_base = make_unique_video_temp_base(tweet.tweet_url)
|
||||||
|
temp_video_path = f"{temp_video_base}.mp4"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
real_video_url = extract_video_url_from_tweet_page(context, tweet.tweet_url)
|
real_video_url = extract_video_url_from_tweet_page(context, tweet.tweet_url)
|
||||||
if not real_video_url:
|
if not real_video_url:
|
||||||
@@ -2280,8 +2302,10 @@ def sync_feeds(args):
|
|||||||
if not video_embed:
|
if not video_embed:
|
||||||
media_upload_failures.append(f"video:embed_failed:{tweet.tweet_url}")
|
media_upload_failures.append(f"video:embed_failed:{tweet.tweet_url}")
|
||||||
finally:
|
finally:
|
||||||
if os.path.exists(temp_video_path):
|
remove_file_quietly(temp_video_path)
|
||||||
os.remove(temp_video_path)
|
remove_file_quietly(f"{temp_video_base}_source.mp4")
|
||||||
|
remove_file_quietly(f"{temp_video_base}_trimmed.mp4")
|
||||||
|
remove_file_quietly(f"{temp_video_base}_compressed.mp4")
|
||||||
|
|
||||||
if not video_embed:
|
if not video_embed:
|
||||||
logging.warning(
|
logging.warning(
|
||||||
|
|||||||
Reference in New Issue
Block a user