From 6d4cfbd4b5b7c889adbf60cfb97d825edb2266e2 Mon Sep 17 00:00:00 2001 From: Guillem Hernandez Sola Date: Wed, 20 May 2026 07:16:07 +0200 Subject: [PATCH] Changes --- tiktok2bsky.py | 470 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 422 insertions(+), 48 deletions(-) diff --git a/tiktok2bsky.py b/tiktok2bsky.py index 4ee5cff..2a9a43d 100644 --- a/tiktok2bsky.py +++ b/tiktok2bsky.py @@ -78,24 +78,30 @@ VIDEO_MAX_AGE_DAYS = 3 VIDEO_MAX_DURATION_S = 179 # Bluesky hard limit is 180s -# Bluesky login retry config -BSKY_LOGIN_MAX_RETRIES = 4 -BSKY_LOGIN_BASE_DELAY = 15.0 -BSKY_LOGIN_MAX_DELAY = 120.0 -BSKY_LOGIN_JITTER_MAX = 10.0 +# ── Bluesky login retry config (ported from twitter2bsky.py) ───────────────── +BSKY_LOGIN_MAX_RETRIES = 6 +BSKY_LOGIN_BASE_DELAY = 15.0 +BSKY_LOGIN_MAX_DELAY = 600.0 +BSKY_LOGIN_JITTER_MAX = 5.0 +BSKY_LOGIN_RATE_LIMIT_DELAY = 90.0 # minimum wait on 429 +BSKY_LOGIN_RATE_LIMIT_MAX_DELAY = 600.0 # maximum wait on 429 -# Bluesky upload retry config +# ── Bluesky upload retry config ─────────────────────────────────────────────── BSKY_UPLOAD_MAX_RETRIES = 5 BSKY_UPLOAD_BASE_DELAY = 10.0 BSKY_UPLOAD_MAX_DELAY = 120.0 BSKY_UPLOAD_JITTER_MAX = 5.0 -# Playwright scraping config +# ── Playwright scraping config ──────────────────────────────────────────────── PLAYWRIGHT_TIMEOUT_MS = 30_000 PLAYWRIGHT_SLOW_MO = 50 PLAYWRIGHT_MAX_RELOADS = 3 +<<<<<<< HEAD # TikTok selectors +======= +# ── TikTok selectors ────────────────────────────────────────────────────────── +>>>>>>> 7cddbd0 (Fixes for today) TIKTOK_VIDEO_GRID_SEL = '[data-e2e="user-post-item-list"]' TIKTOK_VIDEO_ITEM_SEL = '[data-e2e="user-post-item"]' TIKTOK_BANNER_SELS = [ @@ -254,6 +260,7 @@ def convert_json_cookies_to_netscape(json_path: str) -> str | None: for c in cookies: domain = c.get("domain", ".tiktok.com") +<<<<<<< HEAD # Netscape format requires domain to start with a dot for # include_subdomains=TRUE to work correctly include_sub = "TRUE" if domain.startswith(".") else "FALSE" @@ -262,6 +269,12 @@ def convert_json_cookies_to_netscape(json_path: str) -> str | None: expiry = int( c.get("expirationDate") or c.get("expires") or 0 ) +======= + include_sub = "TRUE" if domain.startswith(".") else "FALSE" + path = c.get("path", "/") + secure = "TRUE" if c.get("secure", False) else "FALSE" + expiry = int(c.get("expirationDate") or c.get("expires") or 0) +>>>>>>> 7cddbd0 (Fixes for today) name = c.get("name", "") value = c.get("value", "") @@ -285,110 +298,168 @@ def convert_json_cookies_to_netscape(json_path: str) -> str | None: # ───────────────────────────────────────────────────────────────────────────── +<<<<<<< HEAD # Bluesky error classification helpers +======= +# Bluesky error classification (ported from twitter2bsky.py) +>>>>>>> 7cddbd0 (Fixes for today) # ───────────────────────────────────────────────────────────────────────────── +def _bsky_error_text(error_obj) -> str: + """Normalised lowercase repr for pattern matching.""" + return repr(error_obj).lower() + + def is_rate_limited_error(error_obj) -> bool: - text = repr(error_obj).lower() + text = _bsky_error_text(error_obj) return ( - "429" in text + "429" in text or "ratelimitexceeded" in text or "too many requests" in text - or "rate limit" in text + or "rate limit" in text + or "ratelimit" in text ) def is_auth_error(error_obj) -> bool: - text = repr(error_obj).lower() + text = _bsky_error_text(error_obj) return ( - "401" in text - or "403" in text + "401" in text + or "403" in text + or "invalid identifier" in text + or "invalid password" in text + or "authenticationrequired" in text + or "invalidtoken" in text + or "expiredtoken" in text + or "accounttakedown" in text or "invalid identifier or password" in text - or "authenticationrequired" in text - or "invalidtoken" in text ) def is_network_error(error_obj) -> bool: text = repr(error_obj) signals = [ - "ConnectError", - "RemoteProtocolError", - "ReadTimeout", - "WriteTimeout", - "TimeoutException", - "503", - "502", - "504", - "ConnectionResetError", + "ConnectError", "RemoteProtocolError", "ReadTimeout", + "WriteTimeout", "TimeoutException", "ConnectionResetError", + "503", "502", "504", ] - return any(sig in text for sig in signals) + return any(s in text for s in signals) def is_transient_error(error_obj) -> bool: text = repr(error_obj) signals = [ - "InvokeTimeoutError", - "ReadTimeout", - "WriteTimeout", - "TimeoutException", - "RemoteProtocolError", - "ConnectError", - "503", - "502", - "504", + "InvokeTimeoutError", "ReadTimeout", "WriteTimeout", + "TimeoutException", "RemoteProtocolError", "ConnectError", + "503", "502", "504", ] - return any(sig in text for sig in signals) + return any(s in text for s in signals) def get_rate_limit_wait_seconds(error_obj, default_delay: float) -> float: """ +<<<<<<< HEAD Parse rate-limit response headers and return a bounded wait time in seconds. +======= + Extract the server-requested wait time from rate-limit error headers. + + Checks (in order): + 1. error_obj.headers dict — Retry-After, X-RateLimit-After, RateLimit-Reset + 2. repr(error_obj) text — same keys embedded as strings + 3. Falls back to default_delay + + Ported from twitter2bsky.py. +>>>>>>> 7cddbd0 (Fixes for today) """ + now_ts = int(time.time()) + + # ── 1. Live headers object ──────────────────────────────────────────── try: - now_ts = int(time.time()) headers = getattr(error_obj, "headers", None) or {} for key in ("retry-after", "Retry-After"): - if headers.get(key): - return min(max(int(headers[key]), 1), BSKY_LOGIN_MAX_DELAY) + val = headers.get(key) + if val: + return min(max(int(val), 1), BSKY_LOGIN_RATE_LIMIT_MAX_DELAY) for key in ("x-ratelimit-after", "X-RateLimit-After"): - if headers.get(key): - return min(max(int(headers[key]), 1), BSKY_LOGIN_MAX_DELAY) + val = headers.get(key) + if val: + return min(max(int(val), 1), BSKY_LOGIN_RATE_LIMIT_MAX_DELAY) for key in ("ratelimit-reset", "RateLimit-Reset"): - if headers.get(key): - wait = max(int(headers[key]) - now_ts + 1, default_delay) - return min(wait, BSKY_LOGIN_MAX_DELAY) + val = headers.get(key) + if val: + wait = max(int(val) - now_ts + 2, default_delay) + return min(wait, BSKY_LOGIN_RATE_LIMIT_MAX_DELAY) + except Exception: pass +<<<<<<< HEAD +======= + # ── 2. repr() string fallback ───────────────────────────────────────── +>>>>>>> 7cddbd0 (Fixes for today) text = repr(error_obj) - for pattern, is_timestamp in [ - (r"'retry-after':\s*'(\d+)'", False), - (r"'x-ratelimit-after':\s*'(\d+)'", False), - (r"'ratelimit-reset':\s*'(\d+)'", True), + for pattern, is_ts in [ + (r"['\"]retry-after['\"]\s*:\s*['\"](\d+)['\"]", False), + (r"['\"]x-ratelimit-after['\"]\s*:\s*['\"](\d+)['\"]", False), + (r"['\"]ratelimit-reset['\"]\s*:\s*['\"](\d+)['\"]", True), + (r"retry.?after[=:\s]+(\d+)", False), ]: m = re.search(pattern, text, re.IGNORECASE) if m: val = int(m.group(1)) +<<<<<<< HEAD if is_timestamp: wait = max(val - int(time.time()) + 1, default_delay) return min(wait, BSKY_LOGIN_MAX_DELAY) return min(max(val, 1), BSKY_LOGIN_MAX_DELAY) +======= + if is_ts: + wait = max(val - now_ts + 2, default_delay) + return min(wait, BSKY_LOGIN_RATE_LIMIT_MAX_DELAY) + return min(max(val, 1), BSKY_LOGIN_RATE_LIMIT_MAX_DELAY) +>>>>>>> 7cddbd0 (Fixes for today) return default_delay # ───────────────────────────────────────────────────────────────────────────── +<<<<<<< HEAD # Bluesky client # ───────────────────────────────────────────────────────────────────────────── def connect_bluesky(handle: str, app_password: str, base_url: str) -> Client: logging.info(f"🔐 Connecting Bluesky client via base URL: {base_url}") client = Client(base_url=base_url) +======= +# Bluesky client — improved login (ported from twitter2bsky.py) +# ───────────────────────────────────────────────────────────────────────────── +def connect_bluesky(handle: str, app_password: str, base_url: str) -> Client: + """ + Authenticate with Bluesky with full retry logic ported from twitter2bsky.py: + + • 429 / rate-limit → honour Retry-After header; wait up to 600s + • auth errors → fail immediately (retrying won't help) + • network/transient → exponential backoff with jitter + • other errors → exponential backoff with jitter + • exhausted retries → raise so Jenkins marks the build FAILURE + """ + logging.info(f"🔐 Connecting Bluesky client → {base_url}") + client = Client(base_url=base_url) + + attempt = 0 + last_error = None + + while attempt < BSKY_LOGIN_MAX_RETRIES: + attempt += 1 + logging.info( + f"🔐 Bluesky login attempt {attempt}/{BSKY_LOGIN_MAX_RETRIES} " + f"for {handle}" + ) +>>>>>>> 7cddbd0 (Fixes for today) - for attempt in range(1, BSKY_LOGIN_MAX_RETRIES + 1): try: +<<<<<<< HEAD logging.info( f"🔐 Bluesky login attempt {attempt}/{BSKY_LOGIN_MAX_RETRIES} for {handle}" ) @@ -423,6 +494,79 @@ def connect_bluesky(handle: str, app_password: str, base_url: str) -> Client: raise raise RuntimeError("Bluesky login failed: exhausted all retries.") +======= + client.login(handle, app_password) + # Fetch profile to confirm the session is fully live + client.me = client.get_profile(handle) + logging.info(f"✅ Bluesky login successful as {handle}") + return client + + except Exception as e: + last_error = e + err_detail = f"{type(e).__name__}: {e}" + + # ── Auth errors: no point retrying ─────────────────────────── + if is_auth_error(e): + logging.error( + f"❌ Bluesky login auth error (will not retry): {err_detail}" + ) + raise + + # ── Rate-limited (429) ──────────────────────────────────────── + if is_rate_limited_error(e): + raw_wait = get_rate_limit_wait_seconds(e, BSKY_LOGIN_RATE_LIMIT_DELAY) + jitter = random.uniform(0.0, BSKY_LOGIN_JITTER_MAX) + wait = min(raw_wait + jitter, BSKY_LOGIN_RATE_LIMIT_MAX_DELAY) + logging.warning( + f"⏳ Bluesky login rate-limited (attempt {attempt}/" + f"{BSKY_LOGIN_MAX_RETRIES}). " + f"Waiting {wait:.1f}s (server requested {raw_wait:.0f}s)." + ) + if attempt < BSKY_LOGIN_MAX_RETRIES: + time.sleep(wait) + continue + + # ── Network / transient errors ──────────────────────────────── + if is_network_error(e) or is_transient_error(e): + delay = min( + BSKY_LOGIN_BASE_DELAY * (2 ** (attempt - 1)), + BSKY_LOGIN_MAX_DELAY, + ) + jitter = random.uniform(0.0, BSKY_LOGIN_JITTER_MAX) + wait = delay + jitter + logging.warning( + f"⚠️ Bluesky login network/transient error " + f"(attempt {attempt}/{BSKY_LOGIN_MAX_RETRIES}): " + f"{err_detail}. Retrying in {wait:.1f}s." + ) + if attempt < BSKY_LOGIN_MAX_RETRIES: + time.sleep(wait) + continue + + # ── Unknown errors ──────────────────────────────────────────── + delay = min( + BSKY_LOGIN_BASE_DELAY * (2 ** (attempt - 1)), + BSKY_LOGIN_MAX_DELAY, + ) + jitter = random.uniform(0.0, BSKY_LOGIN_JITTER_MAX) + wait = delay + jitter + logging.warning( + f"⚠️ Bluesky login failed " + f"(attempt {attempt}/{BSKY_LOGIN_MAX_RETRIES}): " + f"{err_detail}. Retrying in {wait:.1f}s." + ) + if attempt < BSKY_LOGIN_MAX_RETRIES: + time.sleep(wait) + + logging.error( + f"❌ Bluesky login failed after {BSKY_LOGIN_MAX_RETRIES} attempts. " + f"Last error: {type(last_error).__name__}: {last_error}" + ) + raise RuntimeError( + f"Bluesky login failed after {BSKY_LOGIN_MAX_RETRIES} attempts: " + f"{last_error}" + ) +>>>>>>> 7cddbd0 (Fixes for today) # ───────────────────────────────────────────────────────────────────────────── @@ -463,7 +607,11 @@ def compress_video( • post-encode size guard — rejects file if still over limit """ if max_size_bytes is None: +<<<<<<< HEAD max_size_bytes = 20 * 1024 * 1024 # fallback +======= + max_size_bytes = 20 * 1024 * 1024 +>>>>>>> 7cddbd0 (Fixes for today) try: duration = get_video_duration(input_path) @@ -477,7 +625,10 @@ def compress_video( trim_to = min(duration, max_duration) +<<<<<<< HEAD # Target 85% of the size budget to leave headroom for container overhead +======= +>>>>>>> 7cddbd0 (Fixes for today) target_bits = max_size_bytes * 8 * 0.85 total_kbps = int(target_bits / trim_to / 1000) audio_kbps = 96 @@ -493,10 +644,13 @@ def compress_video( "ffmpeg", "-y", "-i", input_path, "-t", str(trim_to), +<<<<<<< HEAD # Scale to 720p max, then pad to even dimensions. # The pad filter is required because libx264 needs width/height # divisible by 2. Portrait TikTok videos (9:16) would otherwise # produce odd widths like 405px and crash the encoder. +======= +>>>>>>> 7cddbd0 (Fixes for today) "-vf", ( "scale='min(1280,iw)':'min(720,ih)'" ":force_original_aspect_ratio=decrease," @@ -504,7 +658,11 @@ def compress_video( ), "-c:v", "libx264", "-b:v", f"{video_kbps}k", +<<<<<<< HEAD "-maxrate", f"{video_kbps}k", # hard ceiling — no burst above target +======= + "-maxrate", f"{video_kbps}k", +>>>>>>> 7cddbd0 (Fixes for today) "-bufsize", f"{video_kbps * 2}k", "-c:a", "aac", "-b:a", f"{audio_kbps}k", @@ -520,7 +678,10 @@ def compress_video( final_size = os.path.getsize(output_path) +<<<<<<< HEAD # Reject if still over the hard limit +======= +>>>>>>> 7cddbd0 (Fixes for today) if final_size > max_size_bytes: logging.error( f"❌ Compressed file still too large: " @@ -572,7 +733,10 @@ def download_video_ytdlp( """ Download a TikTok video using yt-dlp with browser impersonation. Accepts a Netscape-format cookie file path (not JSON). +<<<<<<< HEAD Returns True on success, False on failure. +======= +>>>>>>> 7cddbd0 (Fixes for today) """ impersonate = get_best_impersonation_target() @@ -633,7 +797,11 @@ def upload_video_to_bluesky( ) -> object | None: """ Upload a video file to Bluesky as a blob. +<<<<<<< HEAD Exception is always logged as type(e).__name__: e for full visibility. +======= + All exceptions logged as type(e).__name__: e for full visibility. +>>>>>>> 7cddbd0 (Fixes for today) """ size_mb = os.path.getsize(video_path) / 1024 / 1024 logging.info(f"⬆️ Uploading to Bluesky ({size_mb:.1f} MB)...") @@ -717,6 +885,90 @@ def dismiss_overlays(page) -> None: pass +<<<<<<< HEAD +======= +def _run_playwright_scrape_loop(page, profile_url: str, limit: int) -> list[dict]: + """ + Inner scraping loop shared by both the stealth and no-stealth paths. + Returns a list of video dicts. + """ + videos = [] + + for attempt in range(1, PLAYWRIGHT_MAX_RELOADS + 1): + try: + logging.info( + f"🌐 Loading profile (attempt {attempt}/{PLAYWRIGHT_MAX_RELOADS})..." + ) + page.goto( + profile_url, + wait_until="domcontentloaded", + timeout=PLAYWRIGHT_TIMEOUT_MS, + ) + time.sleep(3) + dismiss_overlays(page) + + try: + page.wait_for_selector( + TIKTOK_VIDEO_GRID_SEL, + timeout=PLAYWRIGHT_TIMEOUT_MS, + ) + except Exception: + pass + + grid = page.locator(TIKTOK_VIDEO_GRID_SEL).first + if not grid.is_visible(timeout=5000): + logging.warning(f"⚠️ Video grid not found on attempt {attempt}.") + ts = int(time.time()) + try: + page.screenshot(path=f"screenshot_no_grid_{attempt}_{ts}.png") + logging.info( + f"📸 Screenshot saved: screenshot_no_grid_{attempt}_{ts}.png" + ) + except Exception: + pass + time.sleep(3) + continue + + items = page.locator(TIKTOK_VIDEO_ITEM_SEL).all() + for item in items[:limit]: + try: + link = item.locator("a").first.get_attribute("href") + if link and "/video/" in link: + vid_match = re.search(r"/video/(\d+)", link) + if vid_match: + video_id = vid_match.group(1) + full_url = ( + link if link.startswith("http") + else f"https://www.tiktok.com{link}" + ) + videos.append({ + "video_id": video_id, + "url": full_url, + "timestamp": None, + }) + except Exception: + pass + + if videos: + logging.info(f"✅ Playwright scraped {len(videos)} videos.") + break + + except Exception as e: + logging.warning( + f"⚠️ Playwright attempt {attempt} error: " + f"{type(e).__name__}: {e}" + ) + ts = int(time.time()) + try: + page.screenshot(path=f"screenshot_error_{attempt}_{ts}.png") + except Exception: + pass + time.sleep(3) + + return videos + + +>>>>>>> 7cddbd0 (Fixes for today) def scrape_tiktok_profile_playwright( handle: str, cookies: list, @@ -724,10 +976,18 @@ def scrape_tiktok_profile_playwright( ) -> list[dict]: """ Scrape the most recent video URLs from a TikTok profile page using Playwright. +<<<<<<< HEAD Returns a list of dicts with keys: video_id, url, timestamp. Stealth fix: playwright-stealth v2.x must wrap the page via a context manager on new_page(), not via .apply() or .use_sync() after the fact. +======= + + Stealth handling: + v1.x → stealth_sync(page) after new_page() + v2.x → Stealth() used as context manager; page created inside it + none → plain page, no stealth +>>>>>>> 7cddbd0 (Fixes for today) """ profile_url = f"https://www.tiktok.com/@{handle}" logging.info(f"🕷️ Scraping TikTok profile: {profile_url}") @@ -756,6 +1016,7 @@ def scrape_tiktok_profile_playwright( inject_cookies_into_context(context, cookies) +<<<<<<< HEAD # ── Stealth application ─────────────────────────────────────────── # v1.x: stealth_sync(page) — called after new_page() # v2.x: context manager on new_page — page must be created inside @@ -771,11 +1032,16 @@ def scrape_tiktok_profile_playwright( elif _STEALTH_V2: # v2.x — use as context manager so the page is created inside it +======= + # ── Stealth v2.x — page must be created inside the context manager ── + if _STEALTH_V2 is True: +>>>>>>> 7cddbd0 (Fixes for today) try: stealth_instance = Stealth() with stealth_instance(context) as stealthy_context: page = stealthy_context.new_page() logging.info("🥷 playwright-stealth v2.x applied (context manager).") +<<<<<<< HEAD # Run the scraping loop inside the context manager scope for attempt in range(1, PLAYWRIGHT_MAX_RELOADS + 1): try: @@ -864,12 +1130,27 @@ def scrape_tiktok_profile_playwright( else: # v1.x — create page then apply stealth +======= + videos = _run_playwright_scrape_loop(page, profile_url, limit) + except Exception as e: + logging.warning( + f"⚠️ playwright-stealth v2.x failed: {type(e).__name__}: {e}. " + f"Retrying without stealth." + ) + # Fall through to no-stealth path below + page = context.new_page() + videos = _run_playwright_scrape_loop(page, profile_url, limit) + + # ── Stealth v1.x ────────────────────────────────────────────────── + elif _STEALTH_V2 is False: +>>>>>>> 7cddbd0 (Fixes for today) page = context.new_page() try: stealth_sync(page) logging.info("🥷 playwright-stealth v1.x applied (stealth_sync).") except Exception as e: logging.warning( +<<<<<<< HEAD f"⚠️ playwright-stealth v1.x failed: " f"{type(e).__name__}: {e}. Continuing without stealth." ) @@ -971,6 +1252,35 @@ def scrape_tiktok_profile_playwright( pass # ── Cleanup ─────────────────────────────────────────────────────── +======= + f"⚠️ playwright-stealth v1.x failed: {type(e).__name__}: {e}. " + f"Continuing without stealth." + ) + videos = _run_playwright_scrape_loop(page, profile_url, limit) + + # ── No stealth available ────────────────────────────────────────── + else: + logging.warning("⚠️ playwright-stealth not installed. Skipping stealth.") + page = context.new_page() + videos = _run_playwright_scrape_loop(page, profile_url, limit) + + if not videos: + logging.warning( + f"⚠️ Video grid not found after {PLAYWRIGHT_MAX_RELOADS} attempts." + ) + ts = int(time.time()) + try: + page.screenshot( + path=f"screenshot_no_grid_{PLAYWRIGHT_MAX_RELOADS}_{ts}.png" + ) + logging.info( + f"📸 Screenshot saved: " + f"screenshot_no_grid_{PLAYWRIGHT_MAX_RELOADS}_{ts}.png" + ) + except Exception: + pass + +>>>>>>> 7cddbd0 (Fixes for today) for obj in (page, context, browser): try: if obj: @@ -992,7 +1302,10 @@ def scrape_tiktok_profile_ytdlp( """ Fallback: use yt-dlp to extract the video list from a TikTok profile. Accepts a Netscape-format cookie file path (not JSON). +<<<<<<< HEAD Returns a list of dicts with keys: video_id, url, timestamp. +======= +>>>>>>> 7cddbd0 (Fixes for today) """ import yt_dlp @@ -1060,7 +1373,11 @@ def build_caption(video_info: dict, tiktok_handle: str, max_len: int = 290) -> s url = video_info.get("url", "") if desc: +<<<<<<< HEAD url_len = len(url) + 1 # +1 for newline +======= + url_len = len(url) + 1 +>>>>>>> 7cddbd0 (Fixes for today) max_desc = max_len - url_len if len(desc) > max_desc: desc = desc[: max_desc - 1] + "…" @@ -1097,7 +1414,10 @@ def process_videos( logging.info(f"⏭️ Already posted: {video_id}") continue +<<<<<<< HEAD # Age filter (only when timestamp is available) +======= +>>>>>>> 7cddbd0 (Fixes for today) ts = video.get("timestamp") if ts: try: @@ -1150,7 +1470,10 @@ def process_videos( if ok: mark_as_posted(video_id, state, meta={"url": video_url}) posted_count += 1 +<<<<<<< HEAD # Brief pause between posts to avoid rate limiting +======= +>>>>>>> 7cddbd0 (Fixes for today) time.sleep(random.uniform(2.0, 5.0)) return posted_count @@ -1162,6 +1485,7 @@ def process_videos( def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Cross-post TikTok videos to Bluesky." +<<<<<<< HEAD ) parser.add_argument( "--tiktok-handle", @@ -1200,6 +1524,34 @@ def parse_args() -> argparse.Namespace: default=VIDEO_MAX_AGE_DAYS, help=f"Skip videos older than N days (default: {VIDEO_MAX_AGE_DAYS})", ) +======= + ) + parser.add_argument("--tiktok-handle", required=True) + parser.add_argument("--bsky-handle", required=True) + parser.add_argument("--bsky-app-password", required=True) + parser.add_argument( + "--bsky-base-url", + default=DEFAULT_BSKY_BASE_URL, + help=f"Bluesky PDS base URL (default: {DEFAULT_BSKY_BASE_URL})", + ) + parser.add_argument( + "--bsky-langs", + nargs="+", + default=DEFAULT_BSKY_LANGS, + help="BCP-47 language tags for posts (default: es)", + ) + parser.add_argument( + "--cookies-path", + default=TIKTOK_COOKIES_PATH, + help=f"Path to TikTok cookies JSON (default: {TIKTOK_COOKIES_PATH})", + ) + parser.add_argument( + "--max-age-days", + type=int, + default=VIDEO_MAX_AGE_DAYS, + help=f"Skip videos older than N days (default: {VIDEO_MAX_AGE_DAYS})", + ) +>>>>>>> 7cddbd0 (Fixes for today) return parser.parse_args() @@ -1207,7 +1559,10 @@ def main(): load_dotenv() args = parse_args() +<<<<<<< HEAD # Fix 2 — resolve video size limit based on PDS +======= +>>>>>>> 7cddbd0 (Fixes for today) video_max_size_bytes = get_video_size_limit(args.bsky_base_url) logging.info("=" * 60) @@ -1230,17 +1585,30 @@ def main(): args.bsky_base_url, ) +<<<<<<< HEAD # Convert JSON cookies → Netscape format for yt-dlp # Playwright uses the JSON cookies directly via inject_cookies_into_context() # yt-dlp requires Netscape .txt format — convert once and reuse +======= + # Convert JSON cookies → Netscape format once for all yt-dlp calls +>>>>>>> 7cddbd0 (Fixes for today) netscape_cookies_path = convert_json_cookies_to_netscape(args.cookies_path) if netscape_cookies_path: logging.info(f"🍪 Netscape cookie file ready: {netscape_cookies_path}") else: +<<<<<<< HEAD logging.warning("⚠️ Could not create Netscape cookie file. yt-dlp will run without cookies.") try: # Scrape TikTok profile +======= + logging.warning( + "⚠️ Could not create Netscape cookie file. " + "yt-dlp will run without cookies." + ) + + try: +>>>>>>> 7cddbd0 (Fixes for today) logging.info(f"🔄 Scraping @{args.tiktok_handle}...") cookies = load_cookies_from_file(args.cookies_path) @@ -1290,7 +1658,13 @@ def main(): if netscape_cookies_path and os.path.exists(netscape_cookies_path): try: os.remove(netscape_cookies_path) +<<<<<<< HEAD logging.info(f"🧹 Removed temporary Netscape cookie file: {netscape_cookies_path}") +======= + logging.info( + f"🧹 Removed temporary Netscape cookie file: {netscape_cookies_path}" + ) +>>>>>>> 7cddbd0 (Fixes for today) except Exception as e: logging.warning(f"⚠️ Could not remove Netscape cookie file: {e}")