Added new yml

This commit is contained in:
Guillem Hernandez Sola
2026-03-30 17:38:35 +02:00
parent 782d1bd149
commit f85457a045

View File

@@ -5,6 +5,7 @@ import re
import httpx
import time
import os
import subprocess
from dotenv import load_dotenv
from atproto import Client, client_utils, models
from playwright.sync_api import sync_playwright
@@ -167,6 +168,17 @@ def build_dynamic_alt(raw_text):
return dynamic_alt
def build_video_embed(video_blob, alt_text):
try:
return models.AppBskyEmbedVideo.Main(
video=video_blob,
alt=alt_text
)
except AttributeError:
logging.error("❌ Your atproto version does not support AppBskyEmbedVideo. Upgrade atproto.")
return None
# --- Playwright Scraping ---
def scrape_tweets_via_playwright(username, password, email, target_handle):
"""Logs in (or loads session) and scrapes tweets directly from the DOM."""
@@ -321,23 +333,37 @@ def scrape_tweets_via_playwright(username, password, email, target_handle):
def extract_video_url_from_tweet_page(context, tweet_url):
"""
Opens a tweet page and captures the first real MP4 video request.
Opens a tweet page and captures the best real video URL.
Preference order:
1. .mp4
2. .m3u8
Ignores .m4s fragment files.
"""
page = context.new_page()
found_video_url = None
best_mp4_url = None
best_m3u8_url = None
def handle_response(response):
nonlocal found_video_url
nonlocal best_mp4_url, best_m3u8_url
try:
url = response.url
content_type = response.headers.get("content-type", "")
url = response.url.lower()
content_type = response.headers.get("content-type", "").lower()
if found_video_url:
if ".m4s" in url:
return
if ".mp4" in url or "video/mp4" in content_type:
found_video_url = url
logging.info(f"🎥 Found video URL: {url}")
if best_mp4_url is None:
best_mp4_url = response.url
logging.info(f"🎥 Found MP4 video URL: {response.url}")
return
if ".m3u8" in url or "application/vnd.apple.mpegurl" in content_type or "application/x-mpegurl" in content_type:
if best_m3u8_url is None:
best_m3u8_url = response.url
logging.info(f"📺 Found HLS playlist URL: {response.url}")
return
except Exception:
pass
@@ -352,11 +378,11 @@ def extract_video_url_from_tweet_page(context, tweet_url):
if video_player.count() > 0:
try:
video_player.click(force=True, timeout=3000)
time.sleep(3)
time.sleep(5)
except Exception:
pass
return found_video_url
return best_mp4_url or best_m3u8_url
except Exception as e:
logging.warning(f"⚠️ Could not extract video URL from tweet page {tweet_url}: {e}")
@@ -367,18 +393,35 @@ def extract_video_url_from_tweet_page(context, tweet_url):
# --- Video Processing ---
def download_and_crop_video(video_url, output_path):
"""Downloads the video and crops it to max 59 seconds."""
"""
Downloads a video from MP4 or HLS (.m3u8), then trims it to max 59 seconds.
Requires ffmpeg installed on the system.
"""
temp_input = output_path.replace(".mp4", "_source.mp4")
try:
response = httpx.get(video_url, timeout=60, follow_redirects=True)
if response.status_code != 200:
logging.error(f"❌ Failed to download video: {video_url} (status {response.status_code})")
logging.info(f"⬇️ Downloading video source with ffmpeg: {video_url}")
download_cmd = [
"ffmpeg",
"-y",
"-i", video_url,
"-c", "copy",
temp_input,
]
download_result = subprocess.run(
download_cmd,
capture_output=True,
text=True
)
if download_result.returncode != 0:
logging.error(f"❌ ffmpeg download failed:\n{download_result.stderr}")
return None
with open(output_path, "wb") as f:
f.write(response.content)
logging.info(f"✅ Video downloaded: {output_path}")
logging.info(f"✅ Video downloaded: {temp_input}")
video_clip = VideoFileClip(output_path)
video_clip = VideoFileClip(temp_input)
end_time = min(59, float(video_clip.duration))
if hasattr(video_clip, "subclipped"):
@@ -398,22 +441,17 @@ def download_and_crop_video(video_url, output_path):
cropped_clip.close()
os.replace(temp_output, output_path)
if os.path.exists(temp_input):
os.remove(temp_input)
logging.info(f"✅ Video cropped to 59 seconds: {output_path}")
return output_path
except Exception as e:
logging.error(f"❌ Error processing video: {e}")
return None
def build_video_embed(video_blob, alt_text):
try:
return models.AppBskyEmbedVideo.Main(
video=video_blob,
alt=alt_text
)
except AttributeError:
logging.error("❌ Your atproto version does not support AppBskyEmbedVideo. Upgrade atproto.")
if os.path.exists(temp_input):
os.remove(temp_input)
return None
@@ -504,15 +542,17 @@ def sync_feeds(args):
try:
real_video_url = extract_video_url_from_tweet_page(context, tweet.tweet_url)
if not real_video_url:
logging.warning(f"⚠️ Could not resolve real video URL for {tweet.tweet_url}")
logging.warning(f"⚠️ Could not resolve playable video URL for {tweet.tweet_url}")
continue
cropped_video_path = download_and_crop_video(real_video_url, temp_video_path)
if not cropped_video_path:
logging.warning(f"⚠️ Video download/crop failed for {tweet.tweet_url}")
continue
video_blob = get_blob_from_file(cropped_video_path, bsky_client)
if not video_blob:
logging.warning(f"⚠️ Video upload blob failed for {tweet.tweet_url}")
continue
video_embed = build_video_embed(video_blob, dynamic_alt)