Added new yml

This commit is contained in:
Guillem Hernandez Sola
2026-03-30 17:38:35 +02:00
parent 782d1bd149
commit f85457a045

View File

@@ -5,6 +5,7 @@ import re
import httpx import httpx
import time import time
import os import os
import subprocess
from dotenv import load_dotenv from dotenv import load_dotenv
from atproto import Client, client_utils, models from atproto import Client, client_utils, models
from playwright.sync_api import sync_playwright from playwright.sync_api import sync_playwright
@@ -167,6 +168,17 @@ def build_dynamic_alt(raw_text):
return dynamic_alt return dynamic_alt
def build_video_embed(video_blob, alt_text):
try:
return models.AppBskyEmbedVideo.Main(
video=video_blob,
alt=alt_text
)
except AttributeError:
logging.error("❌ Your atproto version does not support AppBskyEmbedVideo. Upgrade atproto.")
return None
# --- Playwright Scraping --- # --- Playwright Scraping ---
def scrape_tweets_via_playwright(username, password, email, target_handle): def scrape_tweets_via_playwright(username, password, email, target_handle):
"""Logs in (or loads session) and scrapes tweets directly from the DOM.""" """Logs in (or loads session) and scrapes tweets directly from the DOM."""
@@ -321,23 +333,37 @@ def scrape_tweets_via_playwright(username, password, email, target_handle):
def extract_video_url_from_tweet_page(context, tweet_url): def extract_video_url_from_tweet_page(context, tweet_url):
""" """
Opens a tweet page and captures the first real MP4 video request. Opens a tweet page and captures the best real video URL.
Preference order:
1. .mp4
2. .m3u8
Ignores .m4s fragment files.
""" """
page = context.new_page() page = context.new_page()
found_video_url = None best_mp4_url = None
best_m3u8_url = None
def handle_response(response): def handle_response(response):
nonlocal found_video_url nonlocal best_mp4_url, best_m3u8_url
try: try:
url = response.url url = response.url.lower()
content_type = response.headers.get("content-type", "") content_type = response.headers.get("content-type", "").lower()
if found_video_url: if ".m4s" in url:
return return
if ".mp4" in url or "video/mp4" in content_type: if ".mp4" in url or "video/mp4" in content_type:
found_video_url = url if best_mp4_url is None:
logging.info(f"🎥 Found video URL: {url}") best_mp4_url = response.url
logging.info(f"🎥 Found MP4 video URL: {response.url}")
return
if ".m3u8" in url or "application/vnd.apple.mpegurl" in content_type or "application/x-mpegurl" in content_type:
if best_m3u8_url is None:
best_m3u8_url = response.url
logging.info(f"📺 Found HLS playlist URL: {response.url}")
return
except Exception: except Exception:
pass pass
@@ -352,11 +378,11 @@ def extract_video_url_from_tweet_page(context, tweet_url):
if video_player.count() > 0: if video_player.count() > 0:
try: try:
video_player.click(force=True, timeout=3000) video_player.click(force=True, timeout=3000)
time.sleep(3) time.sleep(5)
except Exception: except Exception:
pass pass
return found_video_url return best_mp4_url or best_m3u8_url
except Exception as e: except Exception as e:
logging.warning(f"⚠️ Could not extract video URL from tweet page {tweet_url}: {e}") logging.warning(f"⚠️ Could not extract video URL from tweet page {tweet_url}: {e}")
@@ -367,18 +393,35 @@ def extract_video_url_from_tweet_page(context, tweet_url):
# --- Video Processing --- # --- Video Processing ---
def download_and_crop_video(video_url, output_path): def download_and_crop_video(video_url, output_path):
"""Downloads the video and crops it to max 59 seconds.""" """
Downloads a video from MP4 or HLS (.m3u8), then trims it to max 59 seconds.
Requires ffmpeg installed on the system.
"""
temp_input = output_path.replace(".mp4", "_source.mp4")
try: try:
response = httpx.get(video_url, timeout=60, follow_redirects=True) logging.info(f"⬇️ Downloading video source with ffmpeg: {video_url}")
if response.status_code != 200:
logging.error(f"❌ Failed to download video: {video_url} (status {response.status_code})") download_cmd = [
"ffmpeg",
"-y",
"-i", video_url,
"-c", "copy",
temp_input,
]
download_result = subprocess.run(
download_cmd,
capture_output=True,
text=True
)
if download_result.returncode != 0:
logging.error(f"❌ ffmpeg download failed:\n{download_result.stderr}")
return None return None
with open(output_path, "wb") as f: logging.info(f"✅ Video downloaded: {temp_input}")
f.write(response.content)
logging.info(f"✅ Video downloaded: {output_path}")
video_clip = VideoFileClip(output_path) video_clip = VideoFileClip(temp_input)
end_time = min(59, float(video_clip.duration)) end_time = min(59, float(video_clip.duration))
if hasattr(video_clip, "subclipped"): if hasattr(video_clip, "subclipped"):
@@ -398,22 +441,17 @@ def download_and_crop_video(video_url, output_path):
cropped_clip.close() cropped_clip.close()
os.replace(temp_output, output_path) os.replace(temp_output, output_path)
if os.path.exists(temp_input):
os.remove(temp_input)
logging.info(f"✅ Video cropped to 59 seconds: {output_path}") logging.info(f"✅ Video cropped to 59 seconds: {output_path}")
return output_path return output_path
except Exception as e: except Exception as e:
logging.error(f"❌ Error processing video: {e}") logging.error(f"❌ Error processing video: {e}")
return None if os.path.exists(temp_input):
os.remove(temp_input)
def build_video_embed(video_blob, alt_text):
try:
return models.AppBskyEmbedVideo.Main(
video=video_blob,
alt=alt_text
)
except AttributeError:
logging.error("❌ Your atproto version does not support AppBskyEmbedVideo. Upgrade atproto.")
return None return None
@@ -504,15 +542,17 @@ def sync_feeds(args):
try: try:
real_video_url = extract_video_url_from_tweet_page(context, tweet.tweet_url) real_video_url = extract_video_url_from_tweet_page(context, tweet.tweet_url)
if not real_video_url: if not real_video_url:
logging.warning(f"⚠️ Could not resolve real video URL for {tweet.tweet_url}") logging.warning(f"⚠️ Could not resolve playable video URL for {tweet.tweet_url}")
continue continue
cropped_video_path = download_and_crop_video(real_video_url, temp_video_path) cropped_video_path = download_and_crop_video(real_video_url, temp_video_path)
if not cropped_video_path: if not cropped_video_path:
logging.warning(f"⚠️ Video download/crop failed for {tweet.tweet_url}")
continue continue
video_blob = get_blob_from_file(cropped_video_path, bsky_client) video_blob = get_blob_from_file(cropped_video_path, bsky_client)
if not video_blob: if not video_blob:
logging.warning(f"⚠️ Video upload blob failed for {tweet.tweet_url}")
continue continue
video_embed = build_video_embed(video_blob, dynamic_alt) video_embed = build_video_embed(video_blob, dynamic_alt)