From 6f3fe078337af2c4879b3c52477f8c4cbe93b0d3 Mon Sep 17 00:00:00 2001 From: Guillem Hernandez Sola Date: Tue, 19 May 2026 15:53:51 +0200 Subject: [PATCH] Fixes --- jenkins/comedygoldbcnTiktok | 20 +++++++---- tiktok2bsky.py | 68 ++++++++++++++++--------------------- 2 files changed, 42 insertions(+), 46 deletions(-) diff --git a/jenkins/comedygoldbcnTiktok b/jenkins/comedygoldbcnTiktok index 63d886d..8107b5b 100644 --- a/jenkins/comedygoldbcnTiktok +++ b/jenkins/comedygoldbcnTiktok @@ -57,6 +57,12 @@ pipeline { pip install --upgrade yt-dlp pip show yt-dlp | grep -E "^(Name|Version)" + # ── curl_cffi: TikTok impersonation (REQUIRED) ───── + # Without this yt-dlp cannot bypass TikTok bot detection + pip install --upgrade curl-cffi + pip show curl-cffi | grep -E "^(Name|Version)" + python3 -c "import curl_cffi; print('curl_cffi OK')" + # ── playwright-stealth version check ─────────────── pip show playwright-stealth | grep -E "^(Name|Version)" python3 -c " @@ -69,12 +75,12 @@ except ImportError: " # ── Sanity checks ────────────────────────────────── - python3 -c "import atproto; print('atproto OK')" - python3 -c "import playwright; print('playwright OK')" - python3 -c "import yt_dlp; print('yt_dlp OK')" - python3 -c "import httpx; print('httpx OK')" - python3 -c "import arrow; print('arrow OK')" - python3 -c "import moviepy; print('moviepy OK')" + python3 -c "import atproto; print('atproto OK')" + python3 -c "import playwright; print('playwright OK')" + python3 -c "import yt_dlp; print('yt_dlp OK')" + python3 -c "import httpx; print('httpx OK')" + python3 -c "import arrow; print('arrow OK')" + python3 -c "import moviepy; print('moviepy OK')" # ── System tools ─────────────────────────────────── ffmpeg -version | head -1 @@ -156,4 +162,4 @@ except ImportError: echo '⚠️ TikTok→Bluesky sync finished with warnings.' } } -} +} \ No newline at end of file diff --git a/tiktok2bsky.py b/tiktok2bsky.py index f545eb4..a1de196 100644 --- a/tiktok2bsky.py +++ b/tiktok2bsky.py @@ -596,51 +596,20 @@ def compress_video(input_path: str, output_path: str, logging.error(f"❌ compress_video error: {e}") return False - def download_video(url: str, output_path: str, cookies: list = None) -> bool: """ - Download a video from a URL (MP4 or M3U8) using httpx or yt-dlp. - Falls back to yt-dlp for HLS streams or when direct download fails. + Download a TikTok video using yt-dlp with impersonation. + Direct HTTP download is skipped — TikTok always returns HTML + for video page URLs, never a raw MP4. """ - # ── Try direct HTTP download first ──────────────────────────────── - if not url.endswith(".m3u8"): - try: - headers = { - "User-Agent": ( - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " - "AppleWebKit/537.36 (KHTML, like Gecko) " - "Chrome/124.0.0.0 Safari/537.36" - ), - "Referer": "https://www.tiktok.com/", - } - with httpx.stream("GET", url, headers=headers, - follow_redirects=True, timeout=60) as r: - r.raise_for_status() - with open(output_path, "wb") as f: - for chunk in r.iter_bytes(chunk_size=1024 * 64): - f.write(chunk) - size = os.path.getsize(output_path) - if size > 10_000: - logging.info( - f"✅ Direct download OK: {size / 1024 / 1024:.1f} MB" - ) - return True - logging.warning( - f"⚠️ Direct download too small ({size} bytes), trying yt-dlp..." - ) - except Exception as e: - logging.warning(f"⚠️ Direct download failed: {e}. Trying yt-dlp...") - - # ── Fall back to yt-dlp ──────────────────────────────────────────── return download_video_ytdlp(url, output_path, cookies=cookies) - def download_video_ytdlp(url: str, output_path: str, cookies: list = None) -> bool: """ - Download a video using yt-dlp with TikTok impersonation support. - curl_cffi must be installed for impersonation to work. + Download a video using yt-dlp with TikTok impersonation. + Requires curl-cffi: pip install curl-cffi """ cookie_file = None try: @@ -652,11 +621,31 @@ def download_video_ytdlp(url: str, output_path: str, "quiet": True, "no_warnings": False, "merge_output_format": "mp4", - # ── TikTok impersonation ─────────────────────────────────── - # Requires curl_cffi: pip install curl-cffi - "impersonate": "chrome", } + # ── Impersonation: try targets in order of preference ────────── + # curl_cffi must be installed: pip install curl-cffi + impersonate_targets = ["chrome126", "chrome124", "chrome", "safari"] + impersonate_set = False + + try: + import yt_dlp.networking.impersonate as _imp + available = {str(t) for t in _imp.ImpersonateTarget.supported_targets()} + for target in impersonate_targets: + if any(target in a for a in available): + ydl_opts["impersonate"] = target + logging.info(f"🎭 yt-dlp impersonation target: {target}") + impersonate_set = True + break + if not impersonate_set: + logging.warning( + f"⚠️ No impersonation target available. " + f"Available: {available}. " + f"Install curl-cffi: pip install curl-cffi" + ) + except Exception as e: + logging.warning(f"⚠️ Could not check impersonation targets: {e}") + if cookies: cookie_file = _write_netscape_cookies(cookies) if cookie_file: @@ -689,6 +678,7 @@ def download_video_ytdlp(url: str, output_path: str, if cookie_file and os.path.exists(cookie_file): os.unlink(cookie_file) + def _write_netscape_cookies(cookies: list) -> str | None: """Write cookies list to a Netscape-format temp file for yt-dlp.""" try: