Some fixes
This commit is contained in:
@@ -774,11 +774,11 @@ def _try_refresh_grid(page, max_attempts: int = 4) -> bool:
|
||||
|
||||
def _scrape_via_api(handle: str, cookies: list) -> list:
|
||||
"""
|
||||
Fallback scraper using yt-dlp to extract the video list from a
|
||||
TikTok profile. yt-dlp handles TikTok's signing tokens internally.
|
||||
Fallback scraper using yt-dlp to list videos from a TikTok profile.
|
||||
yt-dlp handles TikTok's request signing internally — no raw API needed.
|
||||
Returns same list-of-dicts format as the Playwright scraper.
|
||||
"""
|
||||
logging.info(f"📦 Trying yt-dlp profile scrape fallback for @{handle}...")
|
||||
logging.info(f"📦 yt-dlp profile scrape fallback for @{handle}...")
|
||||
|
||||
cookie_file = None
|
||||
videos = []
|
||||
@@ -789,16 +789,17 @@ def _scrape_via_api(handle: str, cookies: list) -> list:
|
||||
cookie_file = _write_netscape_cookies(cookies)
|
||||
|
||||
ydl_opts = {
|
||||
"quiet": True,
|
||||
"no_warnings": False,
|
||||
"extract_flat": True, # metadata only — no download
|
||||
"playlistend": SCRAPE_VIDEO_LIMIT,
|
||||
"ignoreerrors": True,
|
||||
"quiet": True,
|
||||
"no_warnings": False,
|
||||
"extract_flat": True, # metadata only — no video download yet
|
||||
"playlistend": SCRAPE_VIDEO_LIMIT,
|
||||
"ignoreerrors": True,
|
||||
}
|
||||
if cookie_file:
|
||||
ydl_opts["cookiefile"] = cookie_file
|
||||
|
||||
profile_url = f"https://www.tiktok.com/@{handle}"
|
||||
logging.info(f"🌐 yt-dlp extracting: {profile_url}")
|
||||
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
info = ydl.extract_info(profile_url, download=False)
|
||||
@@ -808,25 +809,40 @@ def _scrape_via_api(handle: str, cookies: list) -> list:
|
||||
return []
|
||||
|
||||
entries = info.get("entries") or []
|
||||
logging.info(f"✅ yt-dlp profile scrape returned {len(entries)} entries.")
|
||||
logging.info(
|
||||
f"✅ yt-dlp returned {len(entries)} entries "
|
||||
f"(playlist: {info.get('title', '?')})"
|
||||
)
|
||||
|
||||
for entry in entries[:SCRAPE_VIDEO_LIMIT]:
|
||||
try:
|
||||
if not entry:
|
||||
continue
|
||||
|
||||
vid_id = str(entry.get("id") or "")
|
||||
url = entry.get("url") or entry.get("webpage_url") or ""
|
||||
desc = entry.get("title") or entry.get("description") or ""
|
||||
vid_id = str(entry.get("id") or "")
|
||||
url = (
|
||||
entry.get("webpage_url")
|
||||
or entry.get("url")
|
||||
or ""
|
||||
)
|
||||
desc = (
|
||||
entry.get("title")
|
||||
or entry.get("description")
|
||||
or ""
|
||||
)
|
||||
|
||||
# Normalise URL
|
||||
if vid_id and not url:
|
||||
url = f"https://www.tiktok.com/@{handle}/video/{vid_id}"
|
||||
if not vid_id:
|
||||
|
||||
# Extract ID from URL if missing
|
||||
if not vid_id and url:
|
||||
m = re.search(r"/video/(\d+)", url)
|
||||
if m:
|
||||
vid_id = m.group(1)
|
||||
|
||||
if not vid_id:
|
||||
logging.debug(f"⏭️ Skipping entry with no ID: {entry}")
|
||||
continue
|
||||
|
||||
videos.append({
|
||||
@@ -836,11 +852,12 @@ def _scrape_via_api(handle: str, cookies: list) -> list:
|
||||
"timestamp": arrow.utcnow().isoformat(),
|
||||
"video_url": url,
|
||||
})
|
||||
logging.debug(f" 📹 {vid_id}: {desc[:60]}")
|
||||
|
||||
except Exception as e:
|
||||
logging.warning(f"⚠️ yt-dlp entry parse error: {e}")
|
||||
|
||||
logging.info(f"✅ yt-dlp fallback produced {len(videos)} videos.")
|
||||
logging.info(f"✅ yt-dlp fallback produced {len(videos)} usable videos.")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"❌ yt-dlp profile scrape failed: {e}")
|
||||
@@ -850,7 +867,6 @@ def _scrape_via_api(handle: str, cookies: list) -> list:
|
||||
os.unlink(cookie_file)
|
||||
|
||||
return videos
|
||||
|
||||
def _resolve_tiktok_ids(handle: str, headers: dict) -> tuple[str | None, str | None]:
|
||||
"""
|
||||
Extract both the numeric user ID and secUid from the profile page HTML.
|
||||
|
||||
Reference in New Issue
Block a user