diff --git a/jenkins/jijantesFCTikTok b/jenkins/jijantesFCTikTok index 20c88ba..a58f91d 100644 --- a/jenkins/jijantesFCTikTok +++ b/jenkins/jijantesFCTikTok @@ -37,19 +37,25 @@ pipeline { sh ''' set -euxo pipefail - # Create venv + # ── Playwright system dependencies (required in CI) ─ + # Installs libglib, libnss, libatk, libdrm, etc. + # Safe to run even if already installed — exits 0. + "${VENV_DIR}/bin/python" -m playwright install-deps chromium || \ + sudo playwright install-deps chromium || \ + echo "⚠️ playwright install-deps skipped (no sudo) — continuing" + + # ── Create venv ──────────────────────────────────── python3 -m venv "${VENV_DIR}" - # Upgrade pip toolchain + # ── Upgrade pip toolchain ────────────────────────── "${VENV_DIR}/bin/python" -m pip install --upgrade pip wheel setuptools - # Install all required packages + # ── Install all required packages ────────────────── "${VENV_DIR}/bin/pip" install \ --cache-dir "${PIP_CACHE_DIR}" \ -U \ atproto \ playwright \ - yt-dlp \ httpx \ arrow \ python-dotenv \ @@ -59,8 +65,20 @@ pipeline { Pillow \ grapheme - # ── Install playwright-stealth and detect version ── - "${VENV_DIR}/bin/pip" install --cache-dir "${PIP_CACHE_DIR}" -U playwright-stealth + # ── yt-dlp: always upgrade to latest ────────────── + # TikTok extractor breaks frequently — latest is required + "${VENV_DIR}/bin/pip" install \ + --cache-dir "${PIP_CACHE_DIR}" \ + --upgrade \ + "yt-dlp" + + # Print installed yt-dlp version for traceability + "${VENV_DIR}/bin/pip" show yt-dlp | grep -E "^(Name|Version)" + + # ── playwright-stealth ───────────────────────────── + "${VENV_DIR}/bin/pip" install \ + --cache-dir "${PIP_CACHE_DIR}" \ + -U playwright-stealth # Print which version was installed for traceability "${VENV_DIR}/bin/pip" show playwright-stealth | grep -E "^(Name|Version)" @@ -87,7 +105,7 @@ except ImportError: ffmpeg -version | head -1 ffprobe -version | head -1 - # ── Playwright browser binaries (no sudo needed) ─── + # ── Playwright browser binaries ──────────────────── "${VENV_DIR}/bin/python" -m playwright install chromium ''' } @@ -141,7 +159,7 @@ except ImportError: post { always { - // Archive logs, state, and any CAPTCHA/debug screenshots + // Archive logs, state, and any debug screenshots archiveArtifacts( artifacts: '*.log, *.json, screenshot_*.png', allowEmptyArchive: true diff --git a/tiktok2bsky.py b/tiktok2bsky.py index ca6a9c5..72bc2f3 100644 --- a/tiktok2bsky.py +++ b/tiktok2bsky.py @@ -774,11 +774,11 @@ def _try_refresh_grid(page, max_attempts: int = 4) -> bool: def _scrape_via_api(handle: str, cookies: list) -> list: """ - Fallback scraper using yt-dlp to extract the video list from a - TikTok profile. yt-dlp handles TikTok's signing tokens internally. + Fallback scraper using yt-dlp to list videos from a TikTok profile. + yt-dlp handles TikTok's request signing internally — no raw API needed. Returns same list-of-dicts format as the Playwright scraper. """ - logging.info(f"📦 Trying yt-dlp profile scrape fallback for @{handle}...") + logging.info(f"📦 yt-dlp profile scrape fallback for @{handle}...") cookie_file = None videos = [] @@ -789,16 +789,17 @@ def _scrape_via_api(handle: str, cookies: list) -> list: cookie_file = _write_netscape_cookies(cookies) ydl_opts = { - "quiet": True, - "no_warnings": False, - "extract_flat": True, # metadata only — no download - "playlistend": SCRAPE_VIDEO_LIMIT, - "ignoreerrors": True, + "quiet": True, + "no_warnings": False, + "extract_flat": True, # metadata only — no video download yet + "playlistend": SCRAPE_VIDEO_LIMIT, + "ignoreerrors": True, } if cookie_file: ydl_opts["cookiefile"] = cookie_file profile_url = f"https://www.tiktok.com/@{handle}" + logging.info(f"🌐 yt-dlp extracting: {profile_url}") with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(profile_url, download=False) @@ -808,25 +809,40 @@ def _scrape_via_api(handle: str, cookies: list) -> list: return [] entries = info.get("entries") or [] - logging.info(f"✅ yt-dlp profile scrape returned {len(entries)} entries.") + logging.info( + f"✅ yt-dlp returned {len(entries)} entries " + f"(playlist: {info.get('title', '?')})" + ) for entry in entries[:SCRAPE_VIDEO_LIMIT]: try: if not entry: continue - vid_id = str(entry.get("id") or "") - url = entry.get("url") or entry.get("webpage_url") or "" - desc = entry.get("title") or entry.get("description") or "" + vid_id = str(entry.get("id") or "") + url = ( + entry.get("webpage_url") + or entry.get("url") + or "" + ) + desc = ( + entry.get("title") + or entry.get("description") + or "" + ) # Normalise URL if vid_id and not url: url = f"https://www.tiktok.com/@{handle}/video/{vid_id}" - if not vid_id: + + # Extract ID from URL if missing + if not vid_id and url: m = re.search(r"/video/(\d+)", url) if m: vid_id = m.group(1) + if not vid_id: + logging.debug(f"⏭️ Skipping entry with no ID: {entry}") continue videos.append({ @@ -836,11 +852,12 @@ def _scrape_via_api(handle: str, cookies: list) -> list: "timestamp": arrow.utcnow().isoformat(), "video_url": url, }) + logging.debug(f" 📹 {vid_id}: {desc[:60]}") except Exception as e: logging.warning(f"⚠️ yt-dlp entry parse error: {e}") - logging.info(f"✅ yt-dlp fallback produced {len(videos)} videos.") + logging.info(f"✅ yt-dlp fallback produced {len(videos)} usable videos.") except Exception as e: logging.error(f"❌ yt-dlp profile scrape failed: {e}") @@ -850,7 +867,6 @@ def _scrape_via_api(handle: str, cookies: list) -> list: os.unlink(cookie_file) return videos - def _resolve_tiktok_ids(handle: str, headers: dict) -> tuple[str | None, str | None]: """ Extract both the numeric user ID and secUid from the profile page HTML.