From f85457a0451af8b570c41ed9f0dc9a8588864814 Mon Sep 17 00:00:00 2001 From: Guillem Hernandez Sola Date: Mon, 30 Mar 2026 17:38:35 +0200 Subject: [PATCH] Added new yml --- twitter2bsky_daemon.py | 100 ++++++++++++++++++++++++++++------------- 1 file changed, 70 insertions(+), 30 deletions(-) diff --git a/twitter2bsky_daemon.py b/twitter2bsky_daemon.py index eec393b..0ecb6bd 100644 --- a/twitter2bsky_daemon.py +++ b/twitter2bsky_daemon.py @@ -5,6 +5,7 @@ import re import httpx import time import os +import subprocess from dotenv import load_dotenv from atproto import Client, client_utils, models from playwright.sync_api import sync_playwright @@ -167,6 +168,17 @@ def build_dynamic_alt(raw_text): return dynamic_alt +def build_video_embed(video_blob, alt_text): + try: + return models.AppBskyEmbedVideo.Main( + video=video_blob, + alt=alt_text + ) + except AttributeError: + logging.error("❌ Your atproto version does not support AppBskyEmbedVideo. Upgrade atproto.") + return None + + # --- Playwright Scraping --- def scrape_tweets_via_playwright(username, password, email, target_handle): """Logs in (or loads session) and scrapes tweets directly from the DOM.""" @@ -321,23 +333,37 @@ def scrape_tweets_via_playwright(username, password, email, target_handle): def extract_video_url_from_tweet_page(context, tweet_url): """ - Opens a tweet page and captures the first real MP4 video request. + Opens a tweet page and captures the best real video URL. + Preference order: + 1. .mp4 + 2. .m3u8 + Ignores .m4s fragment files. """ page = context.new_page() - found_video_url = None + best_mp4_url = None + best_m3u8_url = None def handle_response(response): - nonlocal found_video_url + nonlocal best_mp4_url, best_m3u8_url try: - url = response.url - content_type = response.headers.get("content-type", "") + url = response.url.lower() + content_type = response.headers.get("content-type", "").lower() - if found_video_url: + if ".m4s" in url: return if ".mp4" in url or "video/mp4" in content_type: - found_video_url = url - logging.info(f"🎥 Found video URL: {url}") + if best_mp4_url is None: + best_mp4_url = response.url + logging.info(f"🎥 Found MP4 video URL: {response.url}") + return + + if ".m3u8" in url or "application/vnd.apple.mpegurl" in content_type or "application/x-mpegurl" in content_type: + if best_m3u8_url is None: + best_m3u8_url = response.url + logging.info(f"📺 Found HLS playlist URL: {response.url}") + return + except Exception: pass @@ -352,11 +378,11 @@ def extract_video_url_from_tweet_page(context, tweet_url): if video_player.count() > 0: try: video_player.click(force=True, timeout=3000) - time.sleep(3) + time.sleep(5) except Exception: pass - return found_video_url + return best_mp4_url or best_m3u8_url except Exception as e: logging.warning(f"⚠️ Could not extract video URL from tweet page {tweet_url}: {e}") @@ -367,18 +393,35 @@ def extract_video_url_from_tweet_page(context, tweet_url): # --- Video Processing --- def download_and_crop_video(video_url, output_path): - """Downloads the video and crops it to max 59 seconds.""" + """ + Downloads a video from MP4 or HLS (.m3u8), then trims it to max 59 seconds. + Requires ffmpeg installed on the system. + """ + temp_input = output_path.replace(".mp4", "_source.mp4") + try: - response = httpx.get(video_url, timeout=60, follow_redirects=True) - if response.status_code != 200: - logging.error(f"❌ Failed to download video: {video_url} (status {response.status_code})") + logging.info(f"⬇️ Downloading video source with ffmpeg: {video_url}") + + download_cmd = [ + "ffmpeg", + "-y", + "-i", video_url, + "-c", "copy", + temp_input, + ] + download_result = subprocess.run( + download_cmd, + capture_output=True, + text=True + ) + + if download_result.returncode != 0: + logging.error(f"❌ ffmpeg download failed:\n{download_result.stderr}") return None - with open(output_path, "wb") as f: - f.write(response.content) - logging.info(f"✅ Video downloaded: {output_path}") + logging.info(f"✅ Video downloaded: {temp_input}") - video_clip = VideoFileClip(output_path) + video_clip = VideoFileClip(temp_input) end_time = min(59, float(video_clip.duration)) if hasattr(video_clip, "subclipped"): @@ -398,22 +441,17 @@ def download_and_crop_video(video_url, output_path): cropped_clip.close() os.replace(temp_output, output_path) + + if os.path.exists(temp_input): + os.remove(temp_input) + logging.info(f"✅ Video cropped to 59 seconds: {output_path}") return output_path except Exception as e: logging.error(f"❌ Error processing video: {e}") - return None - - -def build_video_embed(video_blob, alt_text): - try: - return models.AppBskyEmbedVideo.Main( - video=video_blob, - alt=alt_text - ) - except AttributeError: - logging.error("❌ Your atproto version does not support AppBskyEmbedVideo. Upgrade atproto.") + if os.path.exists(temp_input): + os.remove(temp_input) return None @@ -504,15 +542,17 @@ def sync_feeds(args): try: real_video_url = extract_video_url_from_tweet_page(context, tweet.tweet_url) if not real_video_url: - logging.warning(f"⚠️ Could not resolve real video URL for {tweet.tweet_url}") + logging.warning(f"⚠️ Could not resolve playable video URL for {tweet.tweet_url}") continue cropped_video_path = download_and_crop_video(real_video_url, temp_video_path) if not cropped_video_path: + logging.warning(f"⚠️ Video download/crop failed for {tweet.tweet_url}") continue video_blob = get_blob_from_file(cropped_video_path, bsky_client) if not video_blob: + logging.warning(f"⚠️ Video upload blob failed for {tweet.tweet_url}") continue video_embed = build_video_embed(video_blob, dynamic_alt)