From 17f66117bd639844e9c00fadcbcf81ee1f9a1f63 Mon Sep 17 00:00:00 2001
From: Guillem Hernandez Sola <guillem.hernandez.sola@gmail.com>
Date: Mon, 30 Mar 2026 17:46:19 +0200
Subject: [PATCH] Added new yml

---
 twitter2bsky_daemon.py | 87 ++++++++++++++++++++++++++++++++----------
 1 file changed, 66 insertions(+), 21 deletions(-)

diff --git a/twitter2bsky_daemon.py b/twitter2bsky_daemon.py
index 2272fcc..2301f01 100644
--- a/twitter2bsky_daemon.py
+++ b/twitter2bsky_daemon.py
@@ -334,34 +334,58 @@ def scrape_tweets_via_playwright(username, password, email, target_handle):
 def extract_video_url_from_tweet_page(context, tweet_url):
     """
     Opens a tweet page and captures the best real video URL.
+
     Preference order:
-    1. .mp4
-    2. .m3u8
-    Ignores .m4s fragment files.
+    1. real video .mp4
+    2. .m3u8 playlist
+
+    Ignores:
+    - .m4s fragments
+    - audio-only mp4 URLs
     """
     page = context.new_page()
-    best_mp4_url = None
+    best_video_mp4_url = None
     best_m3u8_url = None
 
+    def is_audio_only_mp4(url, content_type):
+        url_l = url.lower()
+        content_type_l = content_type.lower()
+        return (
+            "/aud/" in url_l or
+            "/audio/" in url_l or
+            "mp4a" in url_l or
+            ("audio/" in content_type_l and "video/" not in content_type_l)
+        )
+
     def handle_response(response):
-        nonlocal best_mp4_url, best_m3u8_url
+        nonlocal best_video_mp4_url, best_m3u8_url
         try:
-            url = response.url.lower()
-            content_type = response.headers.get("content-type", "").lower()
+            url = response.url
+            url_l = url.lower()
+            content_type = response.headers.get("content-type", "")
+            content_type_l = content_type.lower()
 
-            if ".m4s" in url:
+            if ".m4s" in url_l:
                 return
 
-            if ".mp4" in url or "video/mp4" in content_type:
-                if best_mp4_url is None:
-                    best_mp4_url = response.url
-                    logging.info(f"🎥 Found MP4 video URL: {response.url}")
+            if (".mp4" in url_l or "video/mp4" in content_type_l or "audio/mp4" in content_type_l) and is_audio_only_mp4(url, content_type):
+                logging.info(f"🔇 Ignoring audio-only MP4: {url}")
                 return
 
-            if ".m3u8" in url or "application/vnd.apple.mpegurl" in content_type or "application/x-mpegurl" in content_type:
+            if ".mp4" in url_l or "video/mp4" in content_type_l:
+                if best_video_mp4_url is None:
+                    best_video_mp4_url = url
+                    logging.info(f"🎥 Found VIDEO MP4 URL: {url}")
+                return
+
+            if (
+                ".m3u8" in url_l or
+                "application/vnd.apple.mpegurl" in content_type_l or
+                "application/x-mpegurl" in content_type_l
+            ):
                 if best_m3u8_url is None:
-                    best_m3u8_url = response.url
-                    logging.info(f"📺 Found HLS playlist URL: {response.url}")
+                    best_m3u8_url = url
+                    logging.info(f"📺 Found HLS playlist URL: {url}")
                 return
 
         except Exception:
@@ -382,7 +406,7 @@ def extract_video_url_from_tweet_page(context, tweet_url):
             except Exception:
                 pass
 
-        return best_mp4_url or best_m3u8_url
+        return best_video_mp4_url or best_m3u8_url
 
     except Exception as e:
         logging.warning(f"⚠️ Could not extract video URL from tweet page {tweet_url}: {e}")
@@ -398,6 +422,7 @@ def download_and_crop_video(video_url, output_path):
     Requires ffmpeg installed on the system.
     """
     temp_input = output_path.replace(".mp4", "_source.mp4")
+    temp_output = output_path.replace(".mp4", "_cropped.mp4")
 
     try:
         logging.info(f"⬇️ Downloading video source with ffmpeg: {video_url}")
@@ -419,17 +444,27 @@ def download_and_crop_video(video_url, output_path):
             logging.error(f"❌ ffmpeg download failed:\n{download_result.stderr}")
             return None
 
+        if not os.path.exists(temp_input) or os.path.getsize(temp_input) == 0:
+            logging.error("❌ Downloaded video source file is missing or empty.")
+            return None
+
         logging.info(f"✅ Video downloaded: {temp_input}")
 
         video_clip = VideoFileClip(temp_input)
-        end_time = min(59, float(video_clip.duration))
+        duration = float(video_clip.duration) if video_clip.duration else 0
+
+        if duration <= 0:
+            video_clip.close()
+            logging.error("❌ Downloaded video has invalid or unknown duration.")
+            return None
+
+        end_time = min(59, duration)
 
         if hasattr(video_clip, "subclipped"):
             cropped_clip = video_clip.subclipped(0, end_time)
         else:
             cropped_clip = video_clip.subclip(0, end_time)
 
-        temp_output = output_path.replace(".mp4", "_cropped.mp4")
         cropped_clip.write_videofile(
             temp_output,
             codec="libx264",
@@ -440,6 +475,10 @@ def download_and_crop_video(video_url, output_path):
         video_clip.close()
         cropped_clip.close()
 
+        if not os.path.exists(temp_output) or os.path.getsize(temp_output) == 0:
+            logging.error("❌ Cropped video output is missing or empty.")
+            return None
+
         os.replace(temp_output, output_path)
 
         if os.path.exists(temp_input):
@@ -450,10 +489,16 @@ def download_and_crop_video(video_url, output_path):
 
     except Exception as e:
         logging.error(f"❌ Error processing video: {e}")
-        if os.path.exists(temp_input):
-            os.remove(temp_input)
         return None
 
+    finally:
+        for path in [temp_input, temp_output]:
+            if os.path.exists(path):
+                try:
+                    os.remove(path)
+                except Exception:
+                    pass
+
 
 # --- Main Sync Function ---
 def sync_feeds(args):
@@ -498,7 +543,7 @@ def sync_feeds(args):
             for tweet in reversed(tweets):
                 tweet_time = arrow.get(tweet.created_on)
 
-                #if tweet_time <= last_bsky_time
+                #if tweet_time <= last_bsky_time:
                 if False:
                     continue