From f85457a0451af8b570c41ed9f0dc9a8588864814 Mon Sep 17 00:00:00 2001
From: Guillem Hernandez Sola <guillem.hernandez.sola@gmail.com>
Date: Mon, 30 Mar 2026 17:38:35 +0200
Subject: [PATCH] Added new yml

---
 twitter2bsky_daemon.py | 100 ++++++++++++++++++++++++++++-------------
 1 file changed, 70 insertions(+), 30 deletions(-)

diff --git a/twitter2bsky_daemon.py b/twitter2bsky_daemon.py
index eec393b..0ecb6bd 100644
--- a/twitter2bsky_daemon.py
+++ b/twitter2bsky_daemon.py
@@ -5,6 +5,7 @@ import re
 import httpx
 import time
 import os
+import subprocess
 from dotenv import load_dotenv
 from atproto import Client, client_utils, models
 from playwright.sync_api import sync_playwright
@@ -167,6 +168,17 @@ def build_dynamic_alt(raw_text):
     return dynamic_alt
 
 
+def build_video_embed(video_blob, alt_text):
+    try:
+        return models.AppBskyEmbedVideo.Main(
+            video=video_blob,
+            alt=alt_text
+        )
+    except AttributeError:
+        logging.error("❌ Your atproto version does not support AppBskyEmbedVideo. Upgrade atproto.")
+        return None
+
+
 # --- Playwright Scraping ---
 def scrape_tweets_via_playwright(username, password, email, target_handle):
     """Logs in (or loads session) and scrapes tweets directly from the DOM."""
@@ -321,23 +333,37 @@ def scrape_tweets_via_playwright(username, password, email, target_handle):
 
 def extract_video_url_from_tweet_page(context, tweet_url):
     """
-    Opens a tweet page and captures the first real MP4 video request.
+    Opens a tweet page and captures the best real video URL.
+    Preference order:
+    1. .mp4
+    2. .m3u8
+    Ignores .m4s fragment files.
     """
     page = context.new_page()
-    found_video_url = None
+    best_mp4_url = None
+    best_m3u8_url = None
 
     def handle_response(response):
-        nonlocal found_video_url
+        nonlocal best_mp4_url, best_m3u8_url
         try:
-            url = response.url
-            content_type = response.headers.get("content-type", "")
+            url = response.url.lower()
+            content_type = response.headers.get("content-type", "").lower()
 
-            if found_video_url:
+            if ".m4s" in url:
                 return
 
             if ".mp4" in url or "video/mp4" in content_type:
-                found_video_url = url
-                logging.info(f"🎥 Found video URL: {url}")
+                if best_mp4_url is None:
+                    best_mp4_url = response.url
+                    logging.info(f"🎥 Found MP4 video URL: {response.url}")
+                return
+
+            if ".m3u8" in url or "application/vnd.apple.mpegurl" in content_type or "application/x-mpegurl" in content_type:
+                if best_m3u8_url is None:
+                    best_m3u8_url = response.url
+                    logging.info(f"📺 Found HLS playlist URL: {response.url}")
+                return
+
         except Exception:
             pass
 
@@ -352,11 +378,11 @@ def extract_video_url_from_tweet_page(context, tweet_url):
         if video_player.count() > 0:
             try:
                 video_player.click(force=True, timeout=3000)
-                time.sleep(3)
+                time.sleep(5)
             except Exception:
                 pass
 
-        return found_video_url
+        return best_mp4_url or best_m3u8_url
 
     except Exception as e:
         logging.warning(f"⚠️ Could not extract video URL from tweet page {tweet_url}: {e}")
@@ -367,18 +393,35 @@ def extract_video_url_from_tweet_page(context, tweet_url):
 
 # --- Video Processing ---
 def download_and_crop_video(video_url, output_path):
-    """Downloads the video and crops it to max 59 seconds."""
+    """
+    Downloads a video from MP4 or HLS (.m3u8), then trims it to max 59 seconds.
+    Requires ffmpeg installed on the system.
+    """
+    temp_input = output_path.replace(".mp4", "_source.mp4")
+
     try:
-        response = httpx.get(video_url, timeout=60, follow_redirects=True)
-        if response.status_code != 200:
-            logging.error(f"❌ Failed to download video: {video_url} (status {response.status_code})")
+        logging.info(f"⬇️ Downloading video source with ffmpeg: {video_url}")
+
+        download_cmd = [
+            "ffmpeg",
+            "-y",
+            "-i", video_url,
+            "-c", "copy",
+            temp_input,
+        ]
+        download_result = subprocess.run(
+            download_cmd,
+            capture_output=True,
+            text=True
+        )
+
+        if download_result.returncode != 0:
+            logging.error(f"❌ ffmpeg download failed:\n{download_result.stderr}")
             return None
 
-        with open(output_path, "wb") as f:
-            f.write(response.content)
-        logging.info(f"✅ Video downloaded: {output_path}")
+        logging.info(f"✅ Video downloaded: {temp_input}")
 
-        video_clip = VideoFileClip(output_path)
+        video_clip = VideoFileClip(temp_input)
         end_time = min(59, float(video_clip.duration))
 
         if hasattr(video_clip, "subclipped"):
@@ -398,22 +441,17 @@ def download_and_crop_video(video_url, output_path):
         cropped_clip.close()
 
         os.replace(temp_output, output_path)
+
+        if os.path.exists(temp_input):
+            os.remove(temp_input)
+
         logging.info(f"✅ Video cropped to 59 seconds: {output_path}")
         return output_path
 
     except Exception as e:
         logging.error(f"❌ Error processing video: {e}")
-        return None
-
-
-def build_video_embed(video_blob, alt_text):
-    try:
-        return models.AppBskyEmbedVideo.Main(
-            video=video_blob,
-            alt=alt_text
-        )
-    except AttributeError:
-        logging.error("❌ Your atproto version does not support AppBskyEmbedVideo. Upgrade atproto.")
+        if os.path.exists(temp_input):
+            os.remove(temp_input)
         return None
 
 
@@ -504,15 +542,17 @@ def sync_feeds(args):
                             try:
                                 real_video_url = extract_video_url_from_tweet_page(context, tweet.tweet_url)
                                 if not real_video_url:
-                                    logging.warning(f"⚠️ Could not resolve real video URL for {tweet.tweet_url}")
+                                    logging.warning(f"⚠️ Could not resolve playable video URL for {tweet.tweet_url}")
                                     continue
 
                                 cropped_video_path = download_and_crop_video(real_video_url, temp_video_path)
                                 if not cropped_video_path:
+                                    logging.warning(f"⚠️ Video download/crop failed for {tweet.tweet_url}")
                                     continue
 
                                 video_blob = get_blob_from_file(cropped_video_path, bsky_client)
                                 if not video_blob:
+                                    logging.warning(f"⚠️ Video upload blob failed for {tweet.tweet_url}")
                                     continue
 
                                 video_embed = build_video_embed(video_blob, dynamic_alt)