fix: prevent video URL bleed-through and stale listener across tweets
- Add `page.remove_listener("response", handle_response)` in
`extract_video_url_from_tweet_page` finally block to detach the
network listener before page close, preventing ghost callbacks
from leaking captured URLs across tweet iterations.
- Confirmed `build_media_fingerprint` already uses `canonicalize_tweet_url`
as stable video identifier instead of unreliable `media_url_https`,
avoiding false fingerprint collisions between different video tweets.
- Confirmed `sync_feeds` already guards against `None` video URL after
extraction, ensuring no silent fallthrough to stale captures.
This commit is contained in:
@@ -2667,7 +2667,7 @@ def extract_video_url_from_tweet_page(browser_context, tweet_url):
|
|||||||
page = browser_context.new_page()
|
page = browser_context.new_page()
|
||||||
best_m3u8_url = None
|
best_m3u8_url = None
|
||||||
best_video_mp4_url = None
|
best_video_mp4_url = None
|
||||||
seen_urls = set()
|
seen_urls = set() # ← scoped per call, so already reset per tweet ✅
|
||||||
|
|
||||||
def is_audio_only_mp4(url, content_type):
|
def is_audio_only_mp4(url, content_type):
|
||||||
url_l = url.lower()
|
url_l = url.lower()
|
||||||
@@ -2727,9 +2727,7 @@ def extract_video_url_from_tweet_page(browser_context, tweet_url):
|
|||||||
return best_m3u8_url or best_video_mp4_url
|
return best_m3u8_url or best_video_mp4_url
|
||||||
|
|
||||||
try:
|
try:
|
||||||
logging.info(
|
logging.info(f"🎬 Opening tweet page to capture video URL: {tweet_url}")
|
||||||
f"🎬 Opening tweet page to capture video URL: {tweet_url}"
|
|
||||||
)
|
|
||||||
page.goto(tweet_url, wait_until="domcontentloaded", timeout=40000)
|
page.goto(tweet_url, wait_until="domcontentloaded", timeout=40000)
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
@@ -2755,9 +2753,7 @@ def extract_video_url_from_tweet_page(browser_context, tweet_url):
|
|||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
if not current_best() and player.count() > 0:
|
if not current_best() and player.count() > 0:
|
||||||
logging.info(
|
logging.info("🔁 No media URL found yet, retrying player interaction...")
|
||||||
"🔁 No media URL found yet, retrying player interaction..."
|
|
||||||
)
|
|
||||||
try:
|
try:
|
||||||
player.click(force=True, timeout=5000)
|
player.click(force=True, timeout=5000)
|
||||||
time.sleep(PLAYWRIGHT_RETRY_SLEEP_S)
|
time.sleep(PLAYWRIGHT_RETRY_SLEEP_S)
|
||||||
@@ -2791,9 +2787,9 @@ def extract_video_url_from_tweet_page(browser_context, tweet_url):
|
|||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
finally:
|
finally:
|
||||||
|
page.remove_listener("response", handle_response) # ← FIX 1: detach before close
|
||||||
page.close()
|
page.close()
|
||||||
|
|
||||||
|
|
||||||
def _probe_video_duration(file_path):
|
def _probe_video_duration(file_path):
|
||||||
probe_cmd = [
|
probe_cmd = [
|
||||||
"ffprobe",
|
"ffprobe",
|
||||||
|
|||||||
Reference in New Issue
Block a user