From 09b9d1679178e0ad1b4ce712cb782994f174ffc5 Mon Sep 17 00:00:00 2001 From: Guillem Hernandez Sola Date: Tue, 5 May 2026 13:39:22 +0200 Subject: [PATCH] fix: prevent video URL bleed-through and stale listener across tweets - Add `page.remove_listener("response", handle_response)` in `extract_video_url_from_tweet_page` finally block to detach the network listener before page close, preventing ghost callbacks from leaking captured URLs across tweet iterations. - Confirmed `build_media_fingerprint` already uses `canonicalize_tweet_url` as stable video identifier instead of unreliable `media_url_https`, avoiding false fingerprint collisions between different video tweets. - Confirmed `sync_feeds` already guards against `None` video URL after extraction, ensuring no silent fallthrough to stale captures. --- twitter2bsky_daemon.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/twitter2bsky_daemon.py b/twitter2bsky_daemon.py index 53411bf..061f83d 100644 --- a/twitter2bsky_daemon.py +++ b/twitter2bsky_daemon.py @@ -2667,7 +2667,7 @@ def extract_video_url_from_tweet_page(browser_context, tweet_url): page = browser_context.new_page() best_m3u8_url = None best_video_mp4_url = None - seen_urls = set() + seen_urls = set() # ← scoped per call, so already reset per tweet ✅ def is_audio_only_mp4(url, content_type): url_l = url.lower() @@ -2727,9 +2727,7 @@ def extract_video_url_from_tweet_page(browser_context, tweet_url): return best_m3u8_url or best_video_mp4_url try: - logging.info( - f"🎬 Opening tweet page to capture video URL: {tweet_url}" - ) + logging.info(f"🎬 Opening tweet page to capture video URL: {tweet_url}") page.goto(tweet_url, wait_until="domcontentloaded", timeout=40000) time.sleep(2) @@ -2755,9 +2753,7 @@ def extract_video_url_from_tweet_page(browser_context, tweet_url): time.sleep(1) if not current_best() and player.count() > 0: - logging.info( - "🔁 No media URL found yet, retrying player interaction..." - ) + logging.info("🔁 No media URL found yet, retrying player interaction...") try: player.click(force=True, timeout=5000) time.sleep(PLAYWRIGHT_RETRY_SLEEP_S) @@ -2791,9 +2787,9 @@ def extract_video_url_from_tweet_page(browser_context, tweet_url): ) return None finally: + page.remove_listener("response", handle_response) # ← FIX 1: detach before close page.close() - def _probe_video_duration(file_path): probe_cmd = [ "ffprobe",