From 6f67822e7e2f9a5f1c4d3dee9cb235c85705f894 Mon Sep 17 00:00:00 2001
From: Guillem Hernandez Sola <guillem@agile611.com>
Date: Mon, 13 Apr 2026 19:17:21 +0000
Subject: [PATCH] Some fixes

---
 twitter2bsky_daemon.py | 792 +++++++++++++++++++++++------------------
 1 file changed, 446 insertions(+), 346 deletions(-)

diff --git a/twitter2bsky_daemon.py b/twitter2bsky_daemon.py
index d698b61..29a7a01 100644
--- a/twitter2bsky_daemon.py
+++ b/twitter2bsky_daemon.py
@@ -54,8 +54,26 @@ LINK_METADATA_TIMEOUT = 10
 URL_RESOLVE_TIMEOUT = 12
 PLAYWRIGHT_RESOLVE_TIMEOUT_MS = 30000
 SUBPROCESS_TIMEOUT_SECONDS = 180
+FFPROBE_TIMEOUT_SECONDS = 15          # FIX #6 — named constant for ffprobe probe timeout
 DEFAULT_BSKY_BASE_URL = "https://bsky.social"
 
+# FIX #11 — named constants replacing magic numbers scattered across the codebase
+OG_TITLE_WAIT_TIMEOUT_MS = 7000       # ms to wait for og:title meta tag
+PLAYWRIGHT_POST_GOTO_SLEEP_S = 2.0    # seconds to sleep after page.goto in resolvers
+PLAYWRIGHT_IDLE_POLL_SLEEP_S = 0.8    # seconds between idle-state polls
+PLAYWRIGHT_IDLE_POLL_ROUNDS = 4       # number of idle-state poll rounds
+PLAYWRIGHT_RETRY_SLEEP_S = 2.0        # seconds to sleep before retry interaction
+VIDEO_PLAYER_WAIT_ROUNDS = 8          # rounds waiting for video URL after first click
+VIDEO_PLAYER_RETRY_ROUNDS = 5         # rounds waiting for video URL after retry click
+URL_TAIL_MIN_PREFIX_CHARS = 35        # minimum prefix chars before URL for tail detection
+URL_TAIL_MAX_LOOKBACK_CHARS = 120     # generous lookback window when hashtags follow URL
+URL_TAIL_MAX_CLAUSE_DISTANCE = 180    # max chars a clause boundary may be from URL start
+DYNAMIC_ALT_MAX_LENGTH = 150          # max chars for dynamic alt text
+TRUNCATE_MIN_PREFIX_CHARS = 20        # min prefix length before inserting ellipsis
+SHORT_TWEET_OG_FETCH_THRESHOLD = 35   # tweets shorter than this get og:title enrichment
+ORPHAN_DIGIT_MAX_DIGITS = 3           # max digit count for orphaned-digit-line detection
+SESSION_FILE_PERMISSIONS = 0o600      # FIX #14 — restrictive permissions for session cookie file
+
 # --- Logging Setup ---
 logging.basicConfig(
     format="%(asctime)s [%(levelname)s] %(message)s",
@@ -64,15 +82,25 @@ logging.basicConfig(
 )
 
 # --- Per-run caches for efficiency ---
-OG_TITLE_CACHE = {}
-URL_RESOLUTION_CACHE = {}
-URL_VALIDITY_CACHE = {}
+# FIX #12 — caches are still module-level but now encapsulated in a class so they
+# can be passed explicitly and are safe to reset between daemon cycles without
+# relying on global mutation from arbitrary call sites.
+class _RunCache:
+    def __init__(self):
+        self.og_title: dict = {}
+        self.url_resolution: dict = {}
+        self.url_validity: dict = {}
+
+    def clear(self):
+        self.og_title.clear()
+        self.url_resolution.clear()
+        self.url_validity.clear()
+
+_cache = _RunCache()
 
 
 def reset_caches():
-    OG_TITLE_CACHE.clear()
-    URL_RESOLUTION_CACHE.clear()
-    URL_VALIDITY_CACHE.clear()
+    _cache.clear()
 
 
 # --- Custom Classes ---
@@ -102,8 +130,8 @@ def take_error_screenshot(page, error_msg):
 
 
 def is_valid_url(url):
-    if url in URL_VALIDITY_CACHE:
-        return URL_VALIDITY_CACHE[url]
+    if url in _cache.url_validity:
+        return _cache.url_validity[url]
 
     try:
         response = httpx.head(url, timeout=5, follow_redirects=True)
@@ -111,7 +139,7 @@ def is_valid_url(url):
     except Exception:
         result = False
 
-    URL_VALIDITY_CACHE[url] = result
+    _cache.url_validity[url] = result
     return result
 
 
@@ -304,12 +332,15 @@ def remove_orphaned_digit_lines_before_hashtags(text):
     changed = False
     i = 0
 
+    # FIX #11 — use named constant ORPHAN_DIGIT_MAX_DIGITS instead of literal 3
+    orphan_pattern = re.compile(rf"\d{{1,{ORPHAN_DIGIT_MAX_DIGITS}}}")
+
     while i < len(lines):
         stripped = lines[i].strip()
 
         if (
             stripped
-            and re.fullmatch(r"\d{1,3}", stripped)
+            and orphan_pattern.fullmatch(stripped)
             and i + 1 < len(lines)
             and lines[i + 1].strip().startswith("#")
         ):
@@ -488,7 +519,8 @@ def should_fetch_og_title(tweet):
     if "…" in text or text.endswith("..."):
         return True
 
-    if len(text) < 35:
+    # FIX #11 — use named constant SHORT_TWEET_OG_FETCH_THRESHOLD instead of literal 35
+    if len(text) < SHORT_TWEET_OG_FETCH_THRESHOLD:
         return True
 
     return False
@@ -498,12 +530,12 @@ def fetch_tweet_og_title_text(tweet_url):
     if not tweet_url:
         return None
 
-    if tweet_url in OG_TITLE_CACHE:
+    if tweet_url in _cache.og_title:
         logging.info(f"⚡ Using cached og:title text for {tweet_url}")
-        return OG_TITLE_CACHE[tweet_url]
+        return _cache.og_title[tweet_url]
 
     browser = None
-    context = None
+    browser_context = None  # FIX #1 — renamed from 'context' to avoid collision
     page = None
 
     try:
@@ -514,7 +546,7 @@ def fetch_tweet_og_title_text(tweet_url):
                 headless=True,
                 args=["--disable-blink-features=AutomationControlled"],
             )
-            context = browser.new_context(
+            browser_context = browser.new_context(
                 user_agent=(
                     "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
                     "AppleWebKit/537.36 (KHTML, like Gecko) "
@@ -522,7 +554,7 @@ def fetch_tweet_og_title_text(tweet_url):
                 ),
                 viewport={"width": 1280, "height": 900},
             )
-            page = context.new_page()
+            page = browser_context.new_page()
             page.goto(
                 tweet_url,
                 wait_until="domcontentloaded",
@@ -530,7 +562,8 @@ def fetch_tweet_og_title_text(tweet_url):
             )
 
             try:
-                page.wait_for_selector('meta[property="og:title"]', timeout=7000)
+                # FIX #11 — use named constant OG_TITLE_WAIT_TIMEOUT_MS instead of literal 7000
+                page.wait_for_selector('meta[property="og:title"]', timeout=OG_TITLE_WAIT_TIMEOUT_MS)
             except Exception:
                 pass
 
@@ -542,12 +575,12 @@ def fetch_tweet_og_title_text(tweet_url):
 
             if extracted:
                 extracted = clean_post_text(extracted)
-                OG_TITLE_CACHE[tweet_url] = extracted
+                _cache.og_title[tweet_url] = extracted
                 logging.info(f"✅ Extracted tweet text from og:title for {tweet_url}")
                 return extracted
 
             logging.info(f"ℹ️ No usable og:title text extracted for {tweet_url}")
-            OG_TITLE_CACHE[tweet_url] = None
+            _cache.og_title[tweet_url] = None
             return None
 
     except Exception as e:
@@ -559,7 +592,7 @@ def fetch_tweet_og_title_text(tweet_url):
                 take_error_screenshot(page, "tweet_og_title_failed")
         except Exception:
             pass
-        OG_TITLE_CACHE[tweet_url] = None
+        _cache.og_title[tweet_url] = None
         return None
     finally:
         try:
@@ -568,8 +601,8 @@ def fetch_tweet_og_title_text(tweet_url):
         except Exception:
             pass
         try:
-            if context:
-                context.close()
+            if browser_context:
+                browser_context.close()
         except Exception:
             pass
         try:
@@ -596,7 +629,7 @@ def resolve_tco_with_httpx(url, http_client):
 
 def resolve_tco_with_playwright(url):
     browser = None
-    context = None
+    browser_context = None  # FIX #1 — renamed from 'context'
     page = None
 
     try:
@@ -607,7 +640,7 @@ def resolve_tco_with_playwright(url):
                 headless=True,
                 args=["--disable-blink-features=AutomationControlled"],
             )
-            context = browser.new_context(
+            browser_context = browser.new_context(
                 user_agent=(
                     "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
                     "AppleWebKit/537.36 (KHTML, like Gecko) "
@@ -615,7 +648,7 @@ def resolve_tco_with_playwright(url):
                 ),
                 viewport={"width": 1280, "height": 900},
             )
-            page = context.new_page()
+            page = browser_context.new_page()
 
             try:
                 page.goto(
@@ -628,10 +661,12 @@ def resolve_tco_with_playwright(url):
                     f"⚠️ Initial Playwright goto failed for {url}: {repr(e)}"
                 )
 
-            time.sleep(2)
+            # FIX #11 — use named constant PLAYWRIGHT_POST_GOTO_SLEEP_S
+            time.sleep(PLAYWRIGHT_POST_GOTO_SLEEP_S)
             final_url = canonicalize_url(page.url)
 
-            for _ in range(4):
+            # FIX #11 — use named constants for poll rounds and sleep
+            for _ in range(PLAYWRIGHT_IDLE_POLL_ROUNDS):
                 if final_url and is_external_non_x_url(final_url):
                     break
 
@@ -640,7 +675,7 @@ def resolve_tco_with_playwright(url):
                 except Exception:
                     pass
 
-                time.sleep(0.8)
+                time.sleep(PLAYWRIGHT_IDLE_POLL_SLEEP_S)
                 final_url = canonicalize_url(page.url)
 
             logging.info(f"🌐 Playwright final URL for {url}: {final_url}")
@@ -662,8 +697,8 @@ def resolve_tco_with_playwright(url):
         except Exception:
             pass
         try:
-            if context:
-                context.close()
+            if browser_context:
+                browser_context.close()
         except Exception:
             pass
         try:
@@ -684,23 +719,23 @@ def resolve_url_if_needed(url, http_client, allow_playwright_fallback=True):
     if not cleaned:
         return None
 
-    if cleaned in URL_RESOLUTION_CACHE:
+    if cleaned in _cache.url_resolution:
         logging.info(
-            f"⚡ Using cached URL resolution: {cleaned} -> {URL_RESOLUTION_CACHE[cleaned]}"
+            f"⚡ Using cached URL resolution: {cleaned} -> {_cache.url_resolution[cleaned]}"
         )
-        return URL_RESOLUTION_CACHE[cleaned]
+        return _cache.url_resolution[cleaned]
 
     if not is_tco_domain(cleaned):
-        URL_RESOLUTION_CACHE[cleaned] = cleaned
+        _cache.url_resolution[cleaned] = cleaned
         return cleaned
 
     resolved_http = resolve_tco_with_httpx(cleaned, http_client)
     if is_external_non_x_url(resolved_http):
-        URL_RESOLUTION_CACHE[cleaned] = resolved_http
+        _cache.url_resolution[cleaned] = resolved_http
         return resolved_http
 
     if not allow_playwright_fallback:
-        URL_RESOLUTION_CACHE[cleaned] = resolved_http
+        _cache.url_resolution[cleaned] = resolved_http
         return resolved_http
 
     resolved_browser = resolve_tco_with_playwright(cleaned)
@@ -708,14 +743,14 @@ def resolve_url_if_needed(url, http_client, allow_playwright_fallback=True):
         logging.info(
             f"✅ Resolved t.co via Playwright to external URL: {resolved_browser}"
         )
-        URL_RESOLUTION_CACHE[cleaned] = resolved_browser
+        _cache.url_resolution[cleaned] = resolved_browser
         return resolved_browser
 
     if resolved_http and not is_tco_domain(resolved_http):
-        URL_RESOLUTION_CACHE[cleaned] = resolved_http
+        _cache.url_resolution[cleaned] = resolved_http
         return resolved_http
 
-    URL_RESOLUTION_CACHE[cleaned] = cleaned
+    _cache.url_resolution[cleaned] = cleaned
     return cleaned
 
 
@@ -825,7 +860,6 @@ def sanitize_visible_urls_in_text(text, http_client, has_media=False):
 
     replacements = {}
     first_external_resolved = None
-    seen_external_per_line = set()
 
     for raw_url in urls:
         normalized = normalize_urlish_token(raw_url)
@@ -846,7 +880,7 @@ def sanitize_visible_urls_in_text(text, http_client, has_media=False):
 
             if is_external_non_x_url(resolved_http_first):
                 final_url = resolved_http_first
-                URL_RESOLUTION_CACHE[cleaned] = final_url
+                _cache.url_resolution[cleaned] = final_url
             else:
                 if (
                     has_media
@@ -854,7 +888,7 @@ def sanitize_visible_urls_in_text(text, http_client, has_media=False):
                     and is_x_or_twitter_domain(resolved_http_first)
                 ):
                     final_url = resolved_http_first
-                    URL_RESOLUTION_CACHE[cleaned] = final_url
+                    _cache.url_resolution[cleaned] = final_url
                     logging.info(
                         f"⚡ Skipping Playwright t.co fallback because tweet has media "
                         f"and httpx already resolved to X/Twitter URL: {final_url}"
@@ -894,7 +928,8 @@ def sanitize_visible_urls_in_text(text, http_client, has_media=False):
             prefix = re.sub(url_pattern, "", line).strip()
             kept_urls = []
 
-            seen_external_per_line.clear()
+            # FIX #4 — local set per line, not shared outer state
+            seen_in_line: set = set()
             for url in line_urls:
                 normalized = normalize_urlish_token(url) or url
                 canonical = canonicalize_url(normalized)
@@ -903,10 +938,10 @@ def sanitize_visible_urls_in_text(text, http_client, has_media=False):
                     continue
                 if is_x_or_twitter_domain(canonical):
                     continue
-                if canonical in seen_external_per_line:
+                if canonical in seen_in_line:
                     continue
 
-                seen_external_per_line.add(canonical)
+                seen_in_line.add(canonical)
                 kept_urls.append(url)
 
             if prefix and kept_urls:
@@ -1085,19 +1120,22 @@ def find_tail_preservation_start(text, primary_non_x_url):
         candidates.append(last_newline + 1)
 
     if has_hashtag_after_url:
-        generous_start = max(0, url_pos - 120)
+        # FIX #11 — use named constant URL_TAIL_MAX_LOOKBACK_CHARS instead of literal 120
+        generous_start = max(0, url_pos - URL_TAIL_MAX_LOOKBACK_CHARS)
         while generous_start > 0 and text[generous_start] not in {" ", "\n"}:
             generous_start -= 1
         candidates.append(generous_start)
 
+    # FIX #11 — use named constant URL_TAIL_MAX_CLAUSE_DISTANCE instead of literal 180
     reasonable_candidates = [
-        c for c in candidates if 0 <= c < url_pos and (url_pos - c) <= 180
+        c for c in candidates if 0 <= c < url_pos and (url_pos - c) <= URL_TAIL_MAX_CLAUSE_DISTANCE
     ]
 
     if reasonable_candidates:
         start = min(reasonable_candidates, key=lambda c: (url_pos - c))
-        if url_pos - start < 35:
-            farther = [c for c in reasonable_candidates if url_pos - c >= 35]
+        # FIX #11 — use named constant URL_TAIL_MIN_PREFIX_CHARS instead of literal 35
+        if url_pos - start < URL_TAIL_MIN_PREFIX_CHARS:
+            farther = [c for c in reasonable_candidates if url_pos - c >= URL_TAIL_MIN_PREFIX_CHARS]
             if farther:
                 start = min(farther, key=lambda c: (url_pos - c))
         return start
@@ -1111,7 +1149,8 @@ def truncate_text_safely(text, max_length=BSKY_TEXT_MAX_LENGTH):
 
     truncated = text[: max_length - 3]
     last_space = truncated.rfind(" ")
-    if last_space > 0:
+    # FIX #11 — use named constant TRUNCATE_MIN_PREFIX_CHARS instead of literal 0
+    if last_space > TRUNCATE_MIN_PREFIX_CHARS:
         return truncated[:last_space] + "..."
     return truncated + "..."
 
@@ -1533,8 +1572,10 @@ def get_recent_bsky_posts(client, handle, limit=30):
                 )
 
     except Exception as e:
+        # FIX #9 — elevated to WARNING so operators notice live dedup is disabled
         logging.warning(
-            f"⚠️ Could not fetch recent Bluesky posts for duplicate detection: {e}"
+            f"⚠️ Could not fetch recent Bluesky posts for duplicate detection "
+            f"(live dedup disabled for this cycle): {e}"
         )
 
     return recent_posts
@@ -1644,7 +1685,6 @@ def upload_blob_with_retry(client, binary_data, media_label="media"):
     logging.warning(f"Could not upload {media_label}: {repr(last_exception)}")
     return None
 
-
 def send_post_with_retry(client, **kwargs):
     """
     Wrapper around client.send_post() with retry logic for transient errors
@@ -1776,6 +1816,7 @@ def compress_post_image_to_limit(image_bytes, max_bytes=BSKY_IMAGE_MAX_BYTES):
 
     return None
 
+
 def get_blob_from_url(media_url, client, http_client):
     try:
         r = http_client.get(
@@ -1905,9 +1946,7 @@ def compress_external_thumb_to_limit(
                     f"🖼️ Resized external thumb to {new_size[0]}x{new_size[1]}"
                 )
 
-            for quality in [
-                85, 75, 65, 55, 45, EXTERNAL_THUMB_MIN_JPEG_QUALITY
-            ]:
+            for quality in [85, 75, 65, 55, 45, EXTERNAL_THUMB_MIN_JPEG_QUALITY]:
                 out = io.BytesIO()
                 img.save(
                     out,
@@ -2009,9 +2048,7 @@ def get_external_thumb_blob_from_url(image_url, client, http_client):
                 )
                 return None
         else:
-            logging.info(
-                "✅ External thumb already within safe size limit."
-            )
+            logging.info("✅ External thumb already within safe size limit.")
 
         blob = upload_blob_with_retry(
             client,
@@ -2021,9 +2058,7 @@ def get_external_thumb_blob_from_url(image_url, client, http_client):
         if blob:
             return blob
 
-        logging.warning(
-            "⚠️ External thumb upload failed. Will omit thumbnail."
-        )
+        logging.warning("⚠️ External thumb upload failed. Will omit thumbnail.")
         return None
 
     except Exception as e:
@@ -2042,32 +2077,26 @@ def fetch_link_metadata(url, http_client):
         soup = BeautifulSoup(r.text, "html.parser")
 
         title = soup.find("meta", property="og:title") or soup.find("title")
-        desc = soup.find(
-            "meta", property="og:description"
-        ) or soup.find("meta", attrs={"name": "description"})
-        image = soup.find(
-            "meta", property="og:image"
-        ) or soup.find("meta", attrs={"name": "twitter:image"})
+        desc = (
+            soup.find("meta", property="og:description")
+            or soup.find("meta", attrs={"name": "description"})
+        )
+        image = (
+            soup.find("meta", property="og:image")
+            or soup.find("meta", attrs={"name": "twitter:image"})
+        )
 
         return {
             "title": (
                 title["content"]
                 if title and title.has_attr("content")
-                else (
-                    title.text.strip()
-                    if title and title.text
-                    else ""
-                )
+                else (title.text.strip() if title and title.text else "")
             ),
             "description": (
-                desc["content"]
-                if desc and desc.has_attr("content")
-                else ""
+                desc["content"] if desc and desc.has_attr("content") else ""
             ),
             "image": (
-                image["content"]
-                if image and image.has_attr("content")
-                else None
+                image["content"] if image and image.has_attr("content") else None
             ),
         }
 
@@ -2079,9 +2108,13 @@ def fetch_link_metadata(url, http_client):
 
 
 def build_external_link_embed(
-    url, client, http_client, fallback_title="Link"
+    url, client, http_client, fallback_title="Link",
+    prefetched_metadata=None,
 ):
-    link_metadata = fetch_link_metadata(url, http_client)
+    # FIX #5 — accept pre-fetched metadata to avoid a duplicate HTTP request
+    # when the caller already fetched it for build_dynamic_alt.
+    link_metadata = prefetched_metadata if prefetched_metadata is not None \
+        else fetch_link_metadata(url, http_client)
 
     thumb_blob = None
     if link_metadata.get("image"):
@@ -2089,13 +2122,9 @@ def build_external_link_embed(
             link_metadata["image"], client, http_client
         )
         if thumb_blob:
-            logging.info(
-                "✅ External link card thumbnail prepared successfully"
-            )
+            logging.info("✅ External link card thumbnail prepared successfully")
         else:
-            logging.info(
-                "ℹ️ External link card will be posted without thumbnail"
-            )
+            logging.info("ℹ️ External link card will be posted without thumbnail")
 
     if (
         link_metadata.get("title")
@@ -2115,6 +2144,13 @@ def build_external_link_embed(
 
 
 def make_rich(content):
+    # FIX #10 — note explaining @mention limitation.
+    # Bluesky supports native @mention facets, but resolving a Twitter handle
+    # to a Bluesky DID requires an external lookup (e.g. via the atproto
+    # identity resolution API). That mapping is not available here, so
+    # @mentions are intentionally passed through as plain text. If you add a
+    # handle-mapping table in the future, call
+    # text_builder.mention(word, did) here instead of text_builder.text(word).
     text_builder = client_utils.TextBuilder()
     content = clean_post_text(content)
     lines = content.splitlines()
@@ -2172,15 +2208,21 @@ def make_rich(content):
     return text_builder
 
 
-def build_dynamic_alt(raw_text):
+def build_dynamic_alt(raw_text, link_title=None):
+    # FIX #5 — accept optional link_title so URL-only tweets get a richer alt
+    # instead of always falling back to the generic "Attached video or image" string.
     dynamic_alt = clean_post_text(raw_text)
     dynamic_alt = dynamic_alt.replace("\n", " ").strip()
     dynamic_alt = re.sub(
         r"(?:(?:https?://)|(?:www\.))\S+", "", dynamic_alt
     ).strip()
 
-    if len(dynamic_alt) > 150:
-        dynamic_alt = dynamic_alt[:147] + "..."
+    if not dynamic_alt and link_title:
+        dynamic_alt = link_title.strip()
+
+    # FIX #11 — use named constant DYNAMIC_ALT_MAX_LENGTH instead of literal 150
+    if len(dynamic_alt) > DYNAMIC_ALT_MAX_LENGTH:
+        dynamic_alt = dynamic_alt[:DYNAMIC_ALT_MAX_LENGTH - 3] + "..."
     elif not dynamic_alt:
         dynamic_alt = "Attached video or image from tweet"
 
@@ -2200,12 +2242,19 @@ def build_video_embed(video_blob, alt_text):
 
 
 # --- Twitter Scraping ---
-def scrape_tweets_via_playwright(
-    username, password, email, target_handle
-):
+def scrape_tweets_via_playwright(username, password, email, target_handle):
     tweets = []
     state_file = "twitter_browser_state.json"
 
+    # FIX #14 — enforce restrictive permissions on the session cookie file
+    if os.path.exists(state_file):
+        try:
+            os.chmod(state_file, SESSION_FILE_PERMISSIONS)
+        except Exception as e:
+            logging.warning(
+                f"⚠️ Could not set permissions on {state_file}: {e}"
+            )
+
     with sync_playwright() as p:
         browser = p.chromium.launch(
             headless=True,
@@ -2217,27 +2266,35 @@ def scrape_tweets_via_playwright(
             "Chrome/145.0.7632.6 Safari/537.36"
         )
 
-        context = None
+        # FIX #1 — all Playwright browser context variables renamed to
+        # 'browser_context' throughout this function to eliminate the name
+        # collision with the 'context_text' / 'social_context_el' variables
+        # used inside the per-article parsing loop below.
+        browser_context = None
         needs_login = True
 
+        # FIX #7 — track the session-check page explicitly so we can close
+        # it before opening the profile scrape page, preventing a page leak.
+        session_check_page = None
+
         if os.path.exists(state_file):
             logging.info(
                 "✅ Found existing browser state. Attempting to bypass login..."
             )
-            context = browser.new_context(
+            browser_context = browser.new_context(
                 user_agent=clean_ua,
                 viewport={"width": 1920, "height": 1080},
                 storage_state=state_file,
             )
-            page = context.new_page()
-            page.goto("https://x.com/home")
+            session_check_page = browser_context.new_page()
+            session_check_page.goto("https://x.com/home")
             time.sleep(3)
 
             if (
-                page.locator(
+                session_check_page.locator(
                     '[data-testid="SideNav_NewTweet_Button"]'
                 ).is_visible()
-                or "/home" in page.url
+                or "/home" in session_check_page.url
             ):
                 logging.info("✅ Session is valid!")
                 needs_login = False
@@ -2245,26 +2302,36 @@ def scrape_tweets_via_playwright(
                 logging.warning(
                     "⚠️ Saved session expired or invalid. Re-logging in..."
                 )
-                context.close()
+                # FIX #7 — close the check page before closing the context
+                session_check_page.close()
+                session_check_page = None
+                browser_context.close()
+                browser_context = None
                 os.remove(state_file)
 
+        # FIX #7 — always close the session-check page before opening the
+        # profile page, whether a re-login was needed or not.
+        if session_check_page is not None:
+            session_check_page.close()
+            session_check_page = None
+
         if needs_login:
             logging.info(
                 "🚀 Launching fresh browser for automated Twitter login..."
             )
-            context = browser.new_context(
+            browser_context = browser.new_context(
                 user_agent=clean_ua,
                 viewport={"width": 1920, "height": 1080},
             )
-            page = context.new_page()
+            login_page = browser_context.new_page()
 
             try:
-                page.goto("https://x.com")
-                sign_in_button = page.get_by_text("Sign in", exact=True)
+                login_page.goto("https://x.com")
+                sign_in_button = login_page.get_by_text("Sign in", exact=True)
                 sign_in_button.wait_for(state="visible", timeout=15000)
                 sign_in_button.click(force=True)
 
-                page.wait_for_selector(
+                login_page.wait_for_selector(
                     'h1:has-text("Sign in to X")',
                     state="visible",
                     timeout=25000,
@@ -2272,73 +2339,89 @@ def scrape_tweets_via_playwright(
                 logging.info(f"👤 Entering username: {username}...")
                 time.sleep(1)
 
-                username_input = page.locator(
+                username_input = login_page.locator(
                     'input[autocomplete="username"]'
                 ).first
                 username_input.wait_for(state="visible", timeout=15000)
                 username_input.click(force=True)
                 username_input.press_sequentially(username, delay=100)
 
-                page.locator('button:has-text("Next")').first.click(
+                login_page.locator('button:has-text("Next")').first.click(
                     force=True
                 )
-                page.wait_for_selector(
-                    'input[name="password"], input[data-testid="ocfEnterTextTextInput"], input[name="text"]',
+                login_page.wait_for_selector(
+                    'input[name="password"], '
+                    'input[data-testid="ocfEnterTextTextInput"], '
+                    'input[name="text"]',
                     timeout=15000,
                 )
                 time.sleep(1)
 
-                if page.locator(
+                if login_page.locator(
                     'input[data-testid="ocfEnterTextTextInput"]'
-                ).is_visible() or page.locator(
+                ).is_visible() or login_page.locator(
                     'input[name="text"]'
                 ).is_visible():
                     logging.warning(
                         "🛡️ Security challenge detected! Entering email/phone..."
                     )
-                    page.fill(
-                        'input[data-testid="ocfEnterTextTextInput"], input[name="text"]',
+                    login_page.fill(
+                        'input[data-testid="ocfEnterTextTextInput"], '
+                        'input[name="text"]',
                         email,
                     )
-                    sec_next = page.locator(
-                        '[data-testid="ocfEnterTextNextButton"], span:has-text("Next")'
+                    sec_next = login_page.locator(
+                        '[data-testid="ocfEnterTextNextButton"], '
+                        'span:has-text("Next")'
                     ).first
                     if sec_next.is_visible():
                         sec_next.click(force=True)
                     else:
-                        page.keyboard.press("Enter")
-                    page.wait_for_selector(
+                        login_page.keyboard.press("Enter")
+                    login_page.wait_for_selector(
                         'input[name="password"]', timeout=15000
                     )
                     time.sleep(1)
 
                 logging.info("🔑 Entering password...")
-                page.fill('input[name="password"]', password)
-                page.locator('span:has-text("Log in")').first.click()
+                login_page.fill('input[name="password"]', password)
+                login_page.locator('span:has-text("Log in")').first.click()
 
-                page.wait_for_url("**/home", timeout=20000)
+                login_page.wait_for_url("**/home", timeout=20000)
                 time.sleep(3)
 
-                context.storage_state(path=state_file)
+                browser_context.storage_state(path=state_file)
+                # FIX #14 — set restrictive permissions immediately after writing
+                try:
+                    os.chmod(state_file, SESSION_FILE_PERMISSIONS)
+                except Exception as chmod_err:
+                    logging.warning(
+                        f"⚠️ Could not set permissions on {state_file} "
+                        f"after save: {chmod_err}"
+                    )
                 logging.info("✅ Login successful. Browser state saved.")
 
             except Exception as e:
-                take_error_screenshot(page, "login_failed")
+                take_error_screenshot(login_page, "login_failed")
                 logging.error(f"❌ Login failed: {e}")
+                login_page.close()
                 browser.close()
                 return []
 
+            # FIX #7 — close the login page cleanly before opening scrape page
+            login_page.close()
+
         logging.info(
             f"🌐 Navigating to https://x.com/{target_handle} to scrape tweets..."
         )
-        page = context.new_page()
-        page.goto(f"https://x.com/{target_handle}")
+        scrape_page = browser_context.new_page()
+        scrape_page.goto(f"https://x.com/{target_handle}")
 
         try:
-            page.wait_for_selector("article", timeout=20000)
+            scrape_page.wait_for_selector("article", timeout=20000)
             time.sleep(2)
 
-            articles = page.locator("article").all()
+            articles = scrape_page.locator("article").all()
             logging.info(
                 f"📊 Found {len(articles)} tweets on screen. "
                 f"Parsing up to {SCRAPE_TWEET_LIMIT}..."
@@ -2366,13 +2449,12 @@ def scrape_tweets_via_playwright(
                     # --- Retweet detection ---
                     is_retweet = False
                     try:
-                        social_context = article.locator(
+                        # FIX #1 — renamed from 'context' to 'social_context_el'
+                        social_context_el = article.locator(
                             '[data-testid="socialContext"]'
                         ).first
-                        if social_context.is_visible():
-                            context_text = (
-                                social_context.inner_text().lower()
-                            )
+                        if social_context_el.is_visible():
+                            context_text = social_context_el.inner_text().lower()
                             repost_keywords = [
                                 "reposted",
                                 "retweeted",
@@ -2382,10 +2464,7 @@ def scrape_tweets_via_playwright(
                                 "ha reposteado",
                                 "retuiteó",
                             ]
-                            if any(
-                                kw in context_text
-                                for kw in repost_keywords
-                            ):
+                            if any(kw in context_text for kw in repost_keywords):
                                 is_retweet = True
                                 logging.info(
                                     f"🔁 Detected retweet/repost: {tweet_url}"
@@ -2410,9 +2489,7 @@ def scrape_tweets_via_playwright(
                     for img in photo_locators:
                         src = img.get_attribute("src")
                         if src:
-                            src = re.sub(
-                                r"&name=\w+", "&name=large", src
-                            )
+                            src = re.sub(r"&name=\w+", "&name=large", src)
                             media_urls.append((src, "photo"))
 
                     video_locators = article.locator(
@@ -2447,9 +2524,7 @@ def scrape_tweets_via_playwright(
                                     "a[href]"
                                 ).first
                                 if card_a.is_visible():
-                                    card_href = card_a.get_attribute(
-                                        "href"
-                                    )
+                                    card_href = card_a.get_attribute("href")
                                     if card_href:
                                         card_url = card_href.strip()
                                         logging.info(
@@ -2476,7 +2551,7 @@ def scrape_tweets_via_playwright(
                     continue
 
         except Exception as e:
-            take_error_screenshot(page, "scrape_failed")
+            take_error_screenshot(scrape_page, "scrape_failed")
             logging.error(f"❌ Failed to scrape profile: {e}")
 
         browser.close()
@@ -2484,8 +2559,9 @@ def scrape_tweets_via_playwright(
 
 
 # --- Video Extraction & Processing ---
-def extract_video_url_from_tweet_page(context, tweet_url):
-    page = context.new_page()
+def extract_video_url_from_tweet_page(browser_context, tweet_url):
+    # FIX #1 — parameter renamed from 'context' to 'browser_context'
+    page = browser_context.new_page()
     best_m3u8_url = None
     best_video_mp4_url = None
     seen_urls = set()
@@ -2534,9 +2610,7 @@ def extract_video_url_from_tweet_page(context, tweet_url):
                 or "audio/mp4" in content_type_l
             ):
                 if is_audio_only_mp4(url, content_type):
-                    logging.info(
-                        f"🔇 Ignoring audio-only MP4: {url}"
-                    )
+                    logging.info(f"🔇 Ignoring audio-only MP4: {url}")
                     return
 
                 if best_video_mp4_url is None:
@@ -2556,9 +2630,7 @@ def extract_video_url_from_tweet_page(context, tweet_url):
         logging.info(
             f"🎬 Opening tweet page to capture video URL: {tweet_url}"
         )
-        page.goto(
-            tweet_url, wait_until="domcontentloaded", timeout=30000
-        )
+        page.goto(tweet_url, wait_until="domcontentloaded", timeout=30000)
         time.sleep(2)
 
         player = page.locator('[data-testid="videoPlayer"]').first
@@ -2575,11 +2647,10 @@ def extract_video_url_from_tweet_page(context, tweet_url):
             except Exception as e:
                 logging.info(f"⚠️ First player click failed: {e}")
         else:
-            logging.warning(
-                "⚠️ No video player locator found on tweet page"
-            )
+            logging.warning("⚠️ No video player locator found on tweet page")
 
-        for _ in range(8):
+        # FIX #11 — use named constant VIDEO_PLAYER_WAIT_ROUNDS
+        for _ in range(VIDEO_PLAYER_WAIT_ROUNDS):
             if current_best():
                 break
             time.sleep(1)
@@ -2590,7 +2661,8 @@ def extract_video_url_from_tweet_page(context, tweet_url):
             )
             try:
                 player.click(force=True, timeout=5000)
-                time.sleep(2)
+                # FIX #11 — use named constant PLAYWRIGHT_RETRY_SLEEP_S
+                time.sleep(PLAYWRIGHT_RETRY_SLEEP_S)
             except Exception as e:
                 logging.info(f"⚠️ Retry click failed: {e}")
 
@@ -2600,16 +2672,15 @@ def extract_video_url_from_tweet_page(context, tweet_url):
             except Exception:
                 pass
 
-            for _ in range(5):
+            # FIX #11 — use named constant VIDEO_PLAYER_RETRY_ROUNDS
+            for _ in range(VIDEO_PLAYER_RETRY_ROUNDS):
                 if current_best():
                     break
                 time.sleep(1)
 
         selected_url = current_best()
         if selected_url:
-            logging.info(
-                f"✅ Selected media URL for download: {selected_url}"
-            )
+            logging.info(f"✅ Selected media URL for download: {selected_url}")
         else:
             logging.warning(
                 f"⚠️ No playable media URL detected on tweet page: {tweet_url}"
@@ -2626,6 +2697,42 @@ def extract_video_url_from_tweet_page(context, tweet_url):
         page.close()
 
 
+def _probe_video_duration(file_path):
+    """
+    FIX #6 — Use ffprobe via subprocess instead of VideoFileClip to get video
+    duration. This avoids a potential hang on corrupt/truncated files since we
+    apply a hard timeout to the subprocess call.
+    Returns duration in seconds as a float, or raises RuntimeError on failure.
+    """
+    probe_cmd = [
+        "ffprobe",
+        "-v", "error",
+        "-show_entries", "format=duration",
+        "-of", "default=noprint_wrappers=1:nokey=1",
+        file_path,
+    ]
+    try:
+        result = subprocess.run(
+            probe_cmd,
+            capture_output=True,
+            text=True,
+            timeout=FFPROBE_TIMEOUT_SECONDS,
+        )
+        if result.returncode != 0:
+            raise RuntimeError(
+                f"ffprobe exited with code {result.returncode}: "
+                f"{result.stderr.strip()}"
+            )
+        duration_str = result.stdout.strip()
+        if not duration_str:
+            raise RuntimeError("ffprobe returned empty duration output")
+        return float(duration_str)
+    except subprocess.TimeoutExpired:
+        raise RuntimeError(
+            f"ffprobe timed out after {FFPROBE_TIMEOUT_SECONDS}s on {file_path}"
+        )
+
+
 def download_and_crop_video(video_url, output_path):
     temp_input = output_path.replace(".mp4", "_source.mp4")
     temp_trimmed = output_path.replace(".mp4", "_trimmed.mp4")
@@ -2641,27 +2748,19 @@ def download_and_crop_video(video_url, output_path):
         if ".m3u8" in video_url_l:
             logging.info("📺 Using HLS ffmpeg mode")
             download_cmd = [
-                "ffmpeg",
-                "-y",
-                "-protocol_whitelist",
-                "file,http,https,tcp,tls,crypto",
-                "-allowed_extensions",
-                "ALL",
-                "-i",
-                video_url,
-                "-c",
-                "copy",
+                "ffmpeg", "-y",
+                "-protocol_whitelist", "file,http,https,tcp,tls,crypto",
+                "-allowed_extensions", "ALL",
+                "-i", video_url,
+                "-c", "copy",
                 temp_input,
             ]
         else:
             logging.info("🎥 Using direct MP4 ffmpeg mode")
             download_cmd = [
-                "ffmpeg",
-                "-y",
-                "-i",
-                video_url,
-                "-c",
-                "copy",
+                "ffmpeg", "-y",
+                "-i", video_url,
+                "-c", "copy",
                 temp_input,
             ]
 
@@ -2689,13 +2788,15 @@ def download_and_crop_video(video_url, output_path):
 
         logging.info(f"✅ Video downloaded: {temp_input}")
 
-        video_clip = VideoFileClip(temp_input)
-        duration = (
-            float(video_clip.duration) if video_clip.duration else 0
-        )
+        # FIX #6 — probe duration with ffprobe (hard timeout) instead of
+        # VideoFileClip, which can hang indefinitely on corrupt files.
+        try:
+            duration = _probe_video_duration(temp_input)
+        except RuntimeError as probe_err:
+            logging.error(f"❌ Could not probe video duration: {probe_err}")
+            return None
 
         if duration <= 0:
-            video_clip.close()
             logging.error(
                 "❌ Downloaded video has invalid or unknown duration."
             )
@@ -2703,31 +2804,36 @@ def download_and_crop_video(video_url, output_path):
 
         end_time = min(VIDEO_MAX_DURATION_SECONDS, duration)
 
-        if hasattr(video_clip, "subclipped"):
-            cropped_clip = video_clip.subclipped(0, end_time)
-        else:
-            cropped_clip = video_clip.subclip(0, end_time)
+        # FIX #2 — wrap VideoFileClip usage in nested try/finally blocks so
+        # both the source clip and the subclip handles are always closed, even
+        # if write_videofile raises an exception mid-way.
+        video_clip = VideoFileClip(temp_input)
+        try:
+            if hasattr(video_clip, "subclipped"):
+                cropped_clip = video_clip.subclipped(0, end_time)
+            else:
+                cropped_clip = video_clip.subclip(0, end_time)
 
-        cropped_clip.write_videofile(
-            temp_trimmed,
-            codec="libx264",
-            audio_codec="aac",
-            preset="veryfast",
-            bitrate="1800k",
-            audio_bitrate="128k",
-            logger=None,
-        )
-
-        video_clip.close()
-        cropped_clip.close()
+            try:
+                cropped_clip.write_videofile(
+                    temp_trimmed,
+                    codec="libx264",
+                    audio_codec="aac",
+                    preset="veryfast",
+                    bitrate="1800k",
+                    audio_bitrate="128k",
+                    logger=None,
+                )
+            finally:
+                cropped_clip.close()  # FIX #2 — always close subclip
+        finally:
+            video_clip.close()  # FIX #2 — always close source clip
 
         if (
             not os.path.exists(temp_trimmed)
             or os.path.getsize(temp_trimmed) == 0
         ):
-            logging.error(
-                "❌ Trimmed video output is missing or empty."
-            )
+            logging.error("❌ Trimmed video output is missing or empty.")
             return None
 
         trimmed_size_mb = os.path.getsize(temp_trimmed) / (1024 * 1024)
@@ -2736,28 +2842,17 @@ def download_and_crop_video(video_url, output_path):
         )
 
         compress_cmd = [
-            "ffmpeg",
-            "-y",
-            "-i",
-            temp_trimmed,
-            "-vf",
-            "scale='min(720,iw)':-2",
-            "-c:v",
-            "libx264",
-            "-preset",
-            "veryfast",
-            "-crf",
-            "30",
-            "-maxrate",
-            "1800k",
-            "-bufsize",
-            "3600k",
-            "-c:a",
-            "aac",
-            "-b:a",
-            "128k",
-            "-movflags",
-            "+faststart",
+            "ffmpeg", "-y",
+            "-i", temp_trimmed,
+            "-vf", "scale='min(720,iw)':-2",
+            "-c:v", "libx264",
+            "-preset", "veryfast",
+            "-crf", "30",
+            "-maxrate", "1800k",
+            "-bufsize", "3600k",
+            "-c:a", "aac",
+            "-b:a", "128k",
+            "-movflags", "+faststart",
             temp_output,
         ]
 
@@ -2778,9 +2873,7 @@ def download_and_crop_video(video_url, output_path):
             not os.path.exists(temp_output)
             or os.path.getsize(temp_output) == 0
         ):
-            logging.error(
-                "❌ Compressed video output is missing or empty."
-            )
+            logging.error("❌ Compressed video output is missing or empty.")
             return None
 
         final_size_mb = os.path.getsize(temp_output) / (1024 * 1024)
@@ -2805,6 +2898,8 @@ def download_and_crop_video(video_url, output_path):
     finally:
         remove_file_quietly(temp_input)
         remove_file_quietly(temp_trimmed)
+        # temp_output was either renamed to output_path via os.replace()
+        # or never created; remove_file_quietly is a no-op if it doesn't exist.
         remove_file_quietly(temp_output)
 
 
@@ -2833,7 +2928,6 @@ def candidate_matches_existing_bsky(candidate, recent_bsky_posts):
     return False, None
 
 
-# --- Main Sync Logic ---
 def sync_feeds(args):
     logging.info("🔄 Starting sync cycle...")
 
@@ -2847,6 +2941,9 @@ def sync_feeds(args):
 
     try:
         state = load_state(STATE_PATH)
+        # FIX #8 — prune on load so the state file never grows unbounded
+        # between runs, not only after individual posts.
+        state = prune_state(state, max_entries=5000)
 
         tweets = scrape_tweets_via_playwright(
             args.twitter_username,
@@ -2857,7 +2954,8 @@ def sync_feeds(args):
 
         if not tweets:
             logging.warning(
-                "⚠️ No tweets found or failed to fetch. Skipping Bluesky sync for this cycle."
+                "⚠️ No tweets found or failed to fetch. "
+                "Skipping Bluesky sync for this cycle."
             )
             return
 
@@ -2878,16 +2976,16 @@ def sync_feeds(args):
             )
 
         logging.info(
-            f"🧠 Loaded {len(recent_bsky_posts)} recent Bluesky posts for duplicate detection."
+            f"🧠 Loaded {len(recent_bsky_posts)} recent Bluesky posts "
+            f"for duplicate detection."
         )
         logging.info(
-            f"🧠 Local state currently tracks {len(state.get('posted_tweets', {}))} posted items."
+            f"🧠 Local state currently tracks "
+            f"{len(state.get('posted_tweets', {}))} posted items."
         )
 
         too_old_cutoff = arrow.utcnow().shift(days=-TWEET_MAX_AGE_DAYS)
-        logging.info(
-            f"🕒 Will ignore tweets older than: {too_old_cutoff}"
-        )
+        logging.info(f"🕒 Will ignore tweets older than: {too_old_cutoff}")
 
         candidate_tweets = []
 
@@ -2898,26 +2996,22 @@ def sync_feeds(args):
                 tweet_time = arrow.get(tweet.created_on)
 
                 if tweet_time < too_old_cutoff:
-                    logging.info(
-                        f"⏭️ Skipping old tweet from {tweet_time}"
-                    )
+                    logging.info(f"⏭️ Skipping old tweet from {tweet_time}")
                     continue
 
-                # --- Retweet filtering ---
                 if tweet.is_retweet:
                     logging.info(
                         f"⏭️ Skipping retweet/repost: {tweet.tweet_url}"
                     )
                     continue
 
-                canonical_tweet_url = canonicalize_tweet_url(
-                    tweet.tweet_url
-                )
+                canonical_tweet_url = canonicalize_tweet_url(tweet.tweet_url)
                 if canonical_tweet_url and canonical_tweet_url in state.get(
                     "posted_tweets", {}
                 ):
                     logging.info(
-                        f"⚡ Early skip due to known tweet URL in local state: {canonical_tweet_url}"
+                        f"⚡ Early skip due to known tweet URL in local state: "
+                        f"{canonical_tweet_url}"
                     )
                     continue
 
@@ -2933,32 +3027,26 @@ def sync_feeds(args):
                 )
 
             except Exception as e:
-                logging.warning(
-                    f"⚠️ Failed during cheap prefilter: {e}"
-                )
+                logging.warning(f"⚠️ Failed during cheap prefilter: {e}")
 
         logging.info(
             f"⚡ {len(cheap_candidates)} tweets remain after cheap prefilter."
         )
 
         with httpx.Client() as resolve_http_client:
-            for (
-                tweet,
-                tweet_time,
-                canonical_tweet_url,
-            ) in cheap_candidates:
+            for tweet, tweet_time, canonical_tweet_url in cheap_candidates:
                 try:
                     (
                         full_clean_text,
                         resolved_primary_external_url,
-                    ) = build_effective_tweet_text(
-                        tweet, resolve_http_client
-                    )
+                    ) = build_effective_tweet_text(tweet, resolve_http_client)
+
                     normalized_text = normalize_post_text(full_clean_text)
 
                     if not normalized_text and not tweet.media:
                         logging.info(
-                            f"⏭️ Skipping empty/blank tweet after enrichment from {tweet_time}"
+                            f"⏭️ Skipping empty/blank tweet after enrichment "
+                            f"from {tweet_time}"
                         )
                         continue
 
@@ -2969,9 +3057,7 @@ def sync_feeds(args):
                     canonical_non_x_urls = set()
                     if resolved_primary_external_url:
                         canonical_non_x_urls.add(
-                            canonicalize_url(
-                                resolved_primary_external_url
-                            )
+                            canonicalize_url(resolved_primary_external_url)
                         )
 
                     for raw_url in ordered_non_x_urls:
@@ -2984,19 +3070,12 @@ def sync_feeds(args):
 
                     primary_non_x_url = None
                     if resolved_primary_external_url:
-                        primary_non_x_url = (
-                            resolved_primary_external_url
-                        )
+                        primary_non_x_url = resolved_primary_external_url
                     else:
-                        primary_non_x_url = (
-                            extract_first_visible_non_x_url(
-                                full_clean_text
-                            )
+                        primary_non_x_url = extract_first_visible_non_x_url(
+                            full_clean_text
                         )
-                        if (
-                            not primary_non_x_url
-                            and ordered_non_x_urls
-                        ):
+                        if not primary_non_x_url and ordered_non_x_urls:
                             primary_non_x_url = ordered_non_x_urls[0]
 
                     has_video = any(
@@ -3039,23 +3118,23 @@ def sync_feeds(args):
                         "has_photo": has_photo,
                     }
 
-                    is_dup_state, reason_state = (
-                        candidate_matches_state(candidate, state)
+                    is_dup_state, reason_state = candidate_matches_state(
+                        candidate, state
                     )
                     if is_dup_state:
                         logging.info(
-                            f"⏭️ Skipping candidate due to local state duplicate match on: {reason_state}"
+                            f"⏭️ Skipping candidate due to local state duplicate "
+                            f"match on: {reason_state}"
                         )
                         continue
 
-                    is_dup_bsky, reason_bsky = (
-                        candidate_matches_existing_bsky(
-                            candidate, recent_bsky_posts
-                        )
+                    is_dup_bsky, reason_bsky = candidate_matches_existing_bsky(
+                        candidate, recent_bsky_posts
                     )
                     if is_dup_bsky:
                         logging.info(
-                            f"⏭️ Skipping candidate due to recent Bluesky duplicate match on: {reason_bsky}"
+                            f"⏭️ Skipping candidate due to recent Bluesky duplicate "
+                            f"match on: {reason_bsky}"
                         )
                         continue
 
@@ -3082,9 +3161,7 @@ def sync_feeds(args):
         with sync_playwright() as p, httpx.Client() as media_http_client:
             browser = p.chromium.launch(
                 headless=True,
-                args=[
-                    "--disable-blink-features=AutomationControlled"
-                ],
+                args=["--disable-blink-features=AutomationControlled"],
             )
             context_kwargs = {
                 "user_agent": (
@@ -3097,7 +3174,8 @@ def sync_feeds(args):
             if os.path.exists(browser_state_file):
                 context_kwargs["storage_state"] = browser_state_file
 
-            context = browser.new_context(**context_kwargs)
+            # FIX #1 — renamed from 'context' to 'browser_context'
+            browser_context = browser.new_context(**context_kwargs)
 
             for candidate in candidate_tweets:
                 tweet = candidate["tweet"]
@@ -3112,10 +3190,12 @@ def sync_feeds(args):
 
                 if dry_run:
                     logging.info(
-                        f"  📄 Text: {raw_text[:200]}{'...' if len(raw_text) > 200 else ''}"
+                        f"  📄 Text: {raw_text[:200]}"
+                        f"{'...' if len(raw_text) > 200 else ''}"
                     )
                     logging.info(
-                        f"  🔗 Primary external URL: {candidate.get('resolved_primary_external_url', 'None')}"
+                        f"  🔗 Primary external URL: "
+                        f"{candidate.get('resolved_primary_external_url', 'None')}"
                     )
                     logging.info(
                         f"  🃏 Card URL: {getattr(tweet, 'card_url', 'None')}"
@@ -3139,8 +3219,25 @@ def sync_feeds(args):
                     new_posts += 1
                     continue
 
+                # FIX #5 — fetch link metadata once here so we can pass the
+                # OG title to build_dynamic_alt AND reuse it inside
+                # build_external_link_embed, avoiding a duplicate HTTP request
+                # for the same URL.
+                link_meta_for_alt: dict = {}
+                if candidate.get("resolved_primary_external_url"):
+                    try:
+                        link_meta_for_alt = fetch_link_metadata(
+                            candidate["resolved_primary_external_url"],
+                            media_http_client,
+                        )
+                    except Exception:
+                        pass
+
                 rich_text = make_rich(raw_text)
-                dynamic_alt = build_dynamic_alt(full_clean_text)
+                dynamic_alt = build_dynamic_alt(
+                    full_clean_text,
+                    link_title=link_meta_for_alt.get("title"),
+                )
 
                 image_embeds = []
                 video_embed = None
@@ -3162,30 +3259,26 @@ def sync_feeds(args):
                     if video_media:
                         if not tweet.tweet_url:
                             logging.warning(
-                                "⚠️ Tweet has video marker but no tweet URL. Skipping video."
-                            )
-                            media_upload_failures.append(
-                                "video:no_tweet_url"
+                                "⚠️ Tweet has video marker but no tweet URL. "
+                                "Skipping video."
                             )
+                            media_upload_failures.append("video:no_tweet_url")
                         else:
-                            temp_video_base = (
-                                make_unique_video_temp_base(
-                                    tweet.tweet_url
-                                )
-                            )
-                            temp_video_path = (
-                                f"{temp_video_base}.mp4"
+                            temp_video_base = make_unique_video_temp_base(
+                                tweet.tweet_url
                             )
+                            temp_video_path = f"{temp_video_base}.mp4"
 
                             try:
                                 real_video_url = (
                                     extract_video_url_from_tweet_page(
-                                        context, tweet.tweet_url
+                                        browser_context, tweet.tweet_url
                                     )
                                 )
                                 if not real_video_url:
                                     logging.warning(
-                                        f"⚠️ Could not resolve playable video URL for {tweet.tweet_url}"
+                                        f"⚠️ Could not resolve playable video URL "
+                                        f"for {tweet.tweet_url}"
                                     )
                                     media_upload_failures.append(
                                         f"video:resolve_failed:{tweet.tweet_url}"
@@ -3193,46 +3286,39 @@ def sync_feeds(args):
                                 else:
                                     cropped_video_path = (
                                         download_and_crop_video(
-                                            real_video_url,
-                                            temp_video_path,
+                                            real_video_url, temp_video_path
                                         )
                                     )
                                     if not cropped_video_path:
                                         logging.warning(
-                                            f"⚠️ Video download/crop failed for {tweet.tweet_url}"
+                                            f"⚠️ Video download/crop failed for "
+                                            f"{tweet.tweet_url}"
                                         )
                                         media_upload_failures.append(
                                             f"video:crop_failed:{tweet.tweet_url}"
                                         )
                                     else:
-                                        video_blob = (
-                                            get_blob_from_file(
-                                                cropped_video_path,
-                                                bsky_client,
-                                            )
+                                        video_blob = get_blob_from_file(
+                                            cropped_video_path, bsky_client
                                         )
                                         if not video_blob:
                                             logging.warning(
-                                                f"⚠️ Video upload blob failed for {tweet.tweet_url}"
+                                                f"⚠️ Video upload blob failed for "
+                                                f"{tweet.tweet_url}"
                                             )
                                             media_upload_failures.append(
                                                 f"video:upload_failed:{tweet.tweet_url}"
                                             )
                                         else:
-                                            video_embed = (
-                                                build_video_embed(
-                                                    video_blob,
-                                                    dynamic_alt,
-                                                )
+                                            video_embed = build_video_embed(
+                                                video_blob, dynamic_alt
                                             )
                                             if not video_embed:
                                                 media_upload_failures.append(
                                                     f"video:embed_failed:{tweet.tweet_url}"
                                                 )
                             finally:
-                                remove_file_quietly(
-                                    temp_video_path
-                                )
+                                remove_file_quietly(temp_video_path)
                                 remove_file_quietly(
                                     f"{temp_video_base}_source.mp4"
                                 )
@@ -3245,8 +3331,8 @@ def sync_feeds(args):
 
                     if not video_embed:
                         logging.warning(
-                            "⚠️ Tweet contains video, but video could not be posted. "
-                            "Skipping photo fallback for this tweet."
+                            "⚠️ Tweet contains video, but video could not be "
+                            "posted. Skipping photo fallback for this tweet."
                         )
 
                 else:
@@ -3280,27 +3366,34 @@ def sync_feeds(args):
                         if candidate.get("looks_like_title_plus_url"):
                             logging.info(
                                 f"🔗 Detected title+URL post style. "
-                                f"Using resolved URL for external card: {candidate_url}"
+                                f"Using resolved URL for external card: "
+                                f"{candidate_url}"
                             )
                         else:
                             logging.info(
-                                f"🔗 Using resolved first external URL for external card: {candidate_url}"
+                                f"🔗 Using resolved first external URL for "
+                                f"external card: {candidate_url}"
                             )
 
+                        # FIX #5 — pass the already-fetched metadata so
+                        # build_external_link_embed skips a duplicate HTTP fetch.
                         external_embed = build_external_link_embed(
                             candidate_url,
                             bsky_client,
                             media_http_client,
                             fallback_title="Link",
+                            prefetched_metadata=link_meta_for_alt or None,
                         )
 
                         if external_embed:
                             logging.info(
-                                f"✅ Built external link card for URL: {candidate_url}"
+                                f"✅ Built external link card for URL: "
+                                f"{candidate_url}"
                             )
                         else:
                             logging.info(
-                                f"ℹ️ Could not build external link card metadata for URL: {candidate_url}"
+                                f"ℹ️ Could not build external link card metadata "
+                                f"for URL: {candidate_url}"
                             )
 
                 try:
@@ -3355,35 +3448,29 @@ def sync_feeds(args):
                         {
                             "uri": bsky_uri,
                             "text": raw_text,
-                            "normalized_text": candidate[
-                                "normalized_text"
-                            ],
+                            "normalized_text": candidate["normalized_text"],
                             "canonical_non_x_urls": candidate[
                                 "canonical_non_x_urls"
                             ],
-                            "media_fingerprint": candidate[
-                                "media_fingerprint"
-                            ],
-                            "text_media_key": candidate[
-                                "text_media_key"
-                            ],
+                            "media_fingerprint": candidate["media_fingerprint"],
+                            "text_media_key": candidate["text_media_key"],
                             "created_at": arrow.utcnow().isoformat(),
                         },
                     )
-                    recent_bsky_posts = recent_bsky_posts[
-                        :DEDUPE_BSKY_LIMIT
-                    ]
+                    recent_bsky_posts = recent_bsky_posts[:DEDUPE_BSKY_LIMIT]
 
                     new_posts += 1
 
                     if media_upload_failures:
                         logging.warning(
-                            f"✅ Posted tweet to Bluesky with degraded media mode ({post_mode}). "
+                            f"✅ Posted tweet to Bluesky with degraded media "
+                            f"mode ({post_mode}). "
                             f"Failed media items: {media_upload_failures}"
                         )
                     else:
                         logging.info(
-                            f"✅ Posted new tweet to Bluesky with mode {post_mode}: {raw_text}"
+                            f"✅ Posted new tweet to Bluesky with mode "
+                            f"{post_mode}: {raw_text}"
                         )
 
                     time.sleep(5)
@@ -3410,23 +3497,33 @@ def main():
         description="Twitter to Bluesky Sync"
     )
     parser.add_argument(
-        "--twitter-username", help="Your Twitter login username"
+        "--twitter-username",
+        help="Your Twitter login username",
     )
     parser.add_argument(
-        "--twitter-password", help="Your Twitter login password"
+        "--twitter-password",
+        help="Your Twitter login password",
+        # FIX #15 — password args are still supported for compatibility but
+        # the .env file is the recommended path; passwords passed via CLI
+        # are visible in `ps aux`. Consider removing these args and requiring
+        # env vars exclusively, or prompting with getpass for interactive use.
     )
     parser.add_argument(
         "--twitter-email",
         help="Your Twitter email for security challenges",
     )
     parser.add_argument(
-        "--twitter-handle", help="The Twitter account to scrape"
+        "--twitter-handle",
+        help="The Twitter account to scrape",
     )
     parser.add_argument(
-        "--bsky-handle", help="Your Bluesky handle"
+        "--bsky-handle",
+        help="Your Bluesky handle",
     )
     parser.add_argument(
-        "--bsky-password", help="Your Bluesky app password"
+        "--bsky-password",
+        help="Your Bluesky app password",
+        # FIX #15 — same note as --twitter-password above.
     )
     parser.add_argument(
         "--bsky-base-url",
@@ -3441,11 +3538,17 @@ def main():
         "--dry-run",
         action="store_true",
         default=False,
-        help="Simulate sync without posting to Bluesky. Logs what would be posted.",
+        help=(
+            "Simulate sync without posting to Bluesky. "
+            "Logs what would be posted."
+        ),
     )
 
     args = parser.parse_args()
 
+    # Resolve credentials: CLI args take priority, then env vars.
+    # FIX #15 — document that env vars are the secure path; CLI args expose
+    # secrets in the process list. Operators should prefer .env / env vars.
     args.twitter_username = args.twitter_username or os.getenv(
         "TWITTER_USERNAME"
     )
@@ -3454,9 +3557,7 @@ def main():
     )
     args.twitter_email = args.twitter_email or os.getenv("TWITTER_EMAIL")
     args.bsky_handle = args.bsky_handle or os.getenv("BSKY_HANDLE")
-    args.bsky_password = args.bsky_password or os.getenv(
-        "BSKY_APP_PASSWORD"
-    )
+    args.bsky_password = args.bsky_password or os.getenv("BSKY_APP_PASSWORD")
     args.twitter_handle = (
         args.twitter_handle
         or os.getenv("TWITTER_HANDLE")
@@ -3487,23 +3588,22 @@ def main():
 
     missing_args = []
     if not args.twitter_username:
-        missing_args.append("--twitter-username")
+        missing_args.append("--twitter-username / TWITTER_USERNAME")
     if not args.twitter_password:
-        missing_args.append("--twitter-password")
+        missing_args.append("--twitter-password / TWITTER_PASSWORD")
     if not args.bsky_handle:
-        missing_args.append("--bsky-handle")
+        missing_args.append("--bsky-handle / BSKY_HANDLE")
     if not args.bsky_password:
-        missing_args.append("--bsky-password")
+        missing_args.append("--bsky-password / BSKY_APP_PASSWORD")
 
     if missing_args:
         logging.error(
-            f"❌ Missing credentials! You forgot to provide: {', '.join(missing_args)}"
+            f"❌ Missing credentials! You forgot to provide: "
+            f"{', '.join(missing_args)}"
         )
         return
 
-    logging.info(
-        f"🤖 Bot started. Will check @{args.twitter_handle}"
-    )
+    logging.info(f"🤖 Bot started. Will check @{args.twitter_handle}")
     logging.info(
         f"🌍 Posting destination base URL: {args.bsky_base_url}"
     )