From 17f66117bd639844e9c00fadcbcf81ee1f9a1f63 Mon Sep 17 00:00:00 2001 From: Guillem Hernandez Sola Date: Mon, 30 Mar 2026 17:46:19 +0200 Subject: [PATCH] Added new yml --- twitter2bsky_daemon.py | 87 ++++++++++++++++++++++++++++++++---------- 1 file changed, 66 insertions(+), 21 deletions(-) diff --git a/twitter2bsky_daemon.py b/twitter2bsky_daemon.py index 2272fcc..2301f01 100644 --- a/twitter2bsky_daemon.py +++ b/twitter2bsky_daemon.py @@ -334,34 +334,58 @@ def scrape_tweets_via_playwright(username, password, email, target_handle): def extract_video_url_from_tweet_page(context, tweet_url): """ Opens a tweet page and captures the best real video URL. + Preference order: - 1. .mp4 - 2. .m3u8 - Ignores .m4s fragment files. + 1. real video .mp4 + 2. .m3u8 playlist + + Ignores: + - .m4s fragments + - audio-only mp4 URLs """ page = context.new_page() - best_mp4_url = None + best_video_mp4_url = None best_m3u8_url = None + def is_audio_only_mp4(url, content_type): + url_l = url.lower() + content_type_l = content_type.lower() + return ( + "/aud/" in url_l or + "/audio/" in url_l or + "mp4a" in url_l or + ("audio/" in content_type_l and "video/" not in content_type_l) + ) + def handle_response(response): - nonlocal best_mp4_url, best_m3u8_url + nonlocal best_video_mp4_url, best_m3u8_url try: - url = response.url.lower() - content_type = response.headers.get("content-type", "").lower() + url = response.url + url_l = url.lower() + content_type = response.headers.get("content-type", "") + content_type_l = content_type.lower() - if ".m4s" in url: + if ".m4s" in url_l: return - if ".mp4" in url or "video/mp4" in content_type: - if best_mp4_url is None: - best_mp4_url = response.url - logging.info(f"🎥 Found MP4 video URL: {response.url}") + if (".mp4" in url_l or "video/mp4" in content_type_l or "audio/mp4" in content_type_l) and is_audio_only_mp4(url, content_type): + logging.info(f"🔇 Ignoring audio-only MP4: {url}") return - if ".m3u8" in url or "application/vnd.apple.mpegurl" in content_type or "application/x-mpegurl" in content_type: + if ".mp4" in url_l or "video/mp4" in content_type_l: + if best_video_mp4_url is None: + best_video_mp4_url = url + logging.info(f"🎥 Found VIDEO MP4 URL: {url}") + return + + if ( + ".m3u8" in url_l or + "application/vnd.apple.mpegurl" in content_type_l or + "application/x-mpegurl" in content_type_l + ): if best_m3u8_url is None: - best_m3u8_url = response.url - logging.info(f"📺 Found HLS playlist URL: {response.url}") + best_m3u8_url = url + logging.info(f"📺 Found HLS playlist URL: {url}") return except Exception: @@ -382,7 +406,7 @@ def extract_video_url_from_tweet_page(context, tweet_url): except Exception: pass - return best_mp4_url or best_m3u8_url + return best_video_mp4_url or best_m3u8_url except Exception as e: logging.warning(f"⚠️ Could not extract video URL from tweet page {tweet_url}: {e}") @@ -398,6 +422,7 @@ def download_and_crop_video(video_url, output_path): Requires ffmpeg installed on the system. """ temp_input = output_path.replace(".mp4", "_source.mp4") + temp_output = output_path.replace(".mp4", "_cropped.mp4") try: logging.info(f"⬇️ Downloading video source with ffmpeg: {video_url}") @@ -419,17 +444,27 @@ def download_and_crop_video(video_url, output_path): logging.error(f"❌ ffmpeg download failed:\n{download_result.stderr}") return None + if not os.path.exists(temp_input) or os.path.getsize(temp_input) == 0: + logging.error("❌ Downloaded video source file is missing or empty.") + return None + logging.info(f"✅ Video downloaded: {temp_input}") video_clip = VideoFileClip(temp_input) - end_time = min(59, float(video_clip.duration)) + duration = float(video_clip.duration) if video_clip.duration else 0 + + if duration <= 0: + video_clip.close() + logging.error("❌ Downloaded video has invalid or unknown duration.") + return None + + end_time = min(59, duration) if hasattr(video_clip, "subclipped"): cropped_clip = video_clip.subclipped(0, end_time) else: cropped_clip = video_clip.subclip(0, end_time) - temp_output = output_path.replace(".mp4", "_cropped.mp4") cropped_clip.write_videofile( temp_output, codec="libx264", @@ -440,6 +475,10 @@ def download_and_crop_video(video_url, output_path): video_clip.close() cropped_clip.close() + if not os.path.exists(temp_output) or os.path.getsize(temp_output) == 0: + logging.error("❌ Cropped video output is missing or empty.") + return None + os.replace(temp_output, output_path) if os.path.exists(temp_input): @@ -450,10 +489,16 @@ def download_and_crop_video(video_url, output_path): except Exception as e: logging.error(f"❌ Error processing video: {e}") - if os.path.exists(temp_input): - os.remove(temp_input) return None + finally: + for path in [temp_input, temp_output]: + if os.path.exists(path): + try: + os.remove(path) + except Exception: + pass + # --- Main Sync Function --- def sync_feeds(args): @@ -498,7 +543,7 @@ def sync_feeds(args): for tweet in reversed(tweets): tweet_time = arrow.get(tweet.created_on) - #if tweet_time <= last_bsky_time + #if tweet_time <= last_bsky_time: if False: continue