fix: prevent video URL bleed-through and stale listener across tweets
- Add `page.remove_listener("response", handle_response)` in
`extract_video_url_from_tweet_page` finally block to detach the
network listener before page close, preventing ghost callbacks
from leaking captured URLs across tweet iterations.
- Confirmed `build_media_fingerprint` already uses `canonicalize_tweet_url`
as stable video identifier instead of unreliable `media_url_https`,
avoiding false fingerprint collisions between different video tweets.
- Confirmed `sync_feeds` already guards against `None` video URL after
extraction, ensuring no silent fallthrough to stale captures.
This commit is contained in:
@@ -2667,7 +2667,7 @@ def extract_video_url_from_tweet_page(browser_context, tweet_url):
|
||||
page = browser_context.new_page()
|
||||
best_m3u8_url = None
|
||||
best_video_mp4_url = None
|
||||
seen_urls = set()
|
||||
seen_urls = set() # ← scoped per call, so already reset per tweet ✅
|
||||
|
||||
def is_audio_only_mp4(url, content_type):
|
||||
url_l = url.lower()
|
||||
@@ -2727,9 +2727,7 @@ def extract_video_url_from_tweet_page(browser_context, tweet_url):
|
||||
return best_m3u8_url or best_video_mp4_url
|
||||
|
||||
try:
|
||||
logging.info(
|
||||
f"🎬 Opening tweet page to capture video URL: {tweet_url}"
|
||||
)
|
||||
logging.info(f"🎬 Opening tweet page to capture video URL: {tweet_url}")
|
||||
page.goto(tweet_url, wait_until="domcontentloaded", timeout=40000)
|
||||
time.sleep(2)
|
||||
|
||||
@@ -2755,9 +2753,7 @@ def extract_video_url_from_tweet_page(browser_context, tweet_url):
|
||||
time.sleep(1)
|
||||
|
||||
if not current_best() and player.count() > 0:
|
||||
logging.info(
|
||||
"🔁 No media URL found yet, retrying player interaction..."
|
||||
)
|
||||
logging.info("🔁 No media URL found yet, retrying player interaction...")
|
||||
try:
|
||||
player.click(force=True, timeout=5000)
|
||||
time.sleep(PLAYWRIGHT_RETRY_SLEEP_S)
|
||||
@@ -2791,9 +2787,9 @@ def extract_video_url_from_tweet_page(browser_context, tweet_url):
|
||||
)
|
||||
return None
|
||||
finally:
|
||||
page.remove_listener("response", handle_response) # ← FIX 1: detach before close
|
||||
page.close()
|
||||
|
||||
|
||||
def _probe_video_duration(file_path):
|
||||
probe_cmd = [
|
||||
"ffprobe",
|
||||
|
||||
Reference in New Issue
Block a user