TikTok
This commit is contained in:
317
tiktok2bsky.py
317
tiktok2bsky.py
@@ -25,7 +25,6 @@ import subprocess
|
|||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
from datetime import datetime, timezone
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import arrow
|
import arrow
|
||||||
@@ -37,18 +36,19 @@ from playwright.sync_api import sync_playwright
|
|||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
# playwright-stealth: detect installed version
|
# playwright-stealth: detect installed version
|
||||||
|
# v2.x (2.0.x) has a completely unstable API — we skip stealth for it and
|
||||||
|
# rely on browser launch args instead. v1.x stealth_sync works fine.
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
_STEALTH_V2 = None # None = not available at all
|
_STEALTH_SYNC = None # will hold the stealth_sync callable if v1.x is present
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from playwright_stealth import stealth_sync
|
from playwright_stealth import stealth_sync as _stealth_sync_import
|
||||||
_STEALTH_V2 = False
|
_STEALTH_SYNC = _stealth_sync_import
|
||||||
|
logging.getLogger(__name__).debug("playwright-stealth v1.x detected (stealth_sync)")
|
||||||
except ImportError:
|
except ImportError:
|
||||||
try:
|
# v2.x is installed but its API is too unstable to use reliably —
|
||||||
from playwright_stealth import Stealth
|
# browser launch args provide equivalent protection for our use case
|
||||||
_STEALTH_V2 = True
|
pass
|
||||||
except ImportError:
|
|
||||||
pass # stealth disabled — warning emitted at runtime
|
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
@@ -120,12 +120,10 @@ TIKTOK_COOKIE_MODAL_SELS = [
|
|||||||
'[class*="cookie"] button',
|
'[class*="cookie"] button',
|
||||||
'[id*="cookie"] button',
|
'[id*="cookie"] button',
|
||||||
]
|
]
|
||||||
TIKTOK_GRID_ERROR_SEL = '[data-e2e="user-post-item-list-error"]'
|
|
||||||
TIKTOK_REFRESH_BTN_SEL = 'button:has-text("Actualizar"), button:has-text("Refresh")'
|
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
# Fix 2 — Dynamic video size limit based on PDS
|
# Dynamic video size limit based on PDS
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
def get_video_size_limit(bsky_base_url: str) -> int:
|
def get_video_size_limit(bsky_base_url: str) -> int:
|
||||||
"""
|
"""
|
||||||
@@ -165,7 +163,6 @@ def save_state(state: dict):
|
|||||||
for old_key in sorted_keys[: len(posted) - STATE_MAX_ENTRIES]:
|
for old_key in sorted_keys[: len(posted) - STATE_MAX_ENTRIES]:
|
||||||
del posted[old_key]
|
del posted[old_key]
|
||||||
state["posted"] = posted
|
state["posted"] = posted
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(STATE_FILE, "w", encoding="utf-8") as f:
|
with open(STATE_FILE, "w", encoding="utf-8") as f:
|
||||||
json.dump(state, f, indent=2, ensure_ascii=False)
|
json.dump(state, f, indent=2, ensure_ascii=False)
|
||||||
@@ -189,7 +186,6 @@ def mark_as_posted(video_id: str, state: dict, meta: dict = None):
|
|||||||
# Cookie helpers
|
# Cookie helpers
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
def load_cookies_from_file(path: str) -> list:
|
def load_cookies_from_file(path: str) -> list:
|
||||||
"""Load cookies from a JSON file."""
|
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path):
|
||||||
logging.warning(f"⚠️ Cookie file not found: {path}")
|
logging.warning(f"⚠️ Cookie file not found: {path}")
|
||||||
return []
|
return []
|
||||||
@@ -204,7 +200,6 @@ def load_cookies_from_file(path: str) -> list:
|
|||||||
|
|
||||||
|
|
||||||
def inject_cookies_into_context(context, cookies: list):
|
def inject_cookies_into_context(context, cookies: list):
|
||||||
"""Inject a list of cookie dicts into a Playwright browser context."""
|
|
||||||
if not cookies:
|
if not cookies:
|
||||||
return
|
return
|
||||||
playwright_cookies = []
|
playwright_cookies = []
|
||||||
@@ -224,7 +219,9 @@ def inject_cookies_into_context(context, cookies: list):
|
|||||||
playwright_cookies.append(entry)
|
playwright_cookies.append(entry)
|
||||||
try:
|
try:
|
||||||
context.add_cookies(playwright_cookies)
|
context.add_cookies(playwright_cookies)
|
||||||
logging.info(f"🍪 Injected {len(playwright_cookies)} cookies into browser context.")
|
logging.info(
|
||||||
|
f"🍪 Injected {len(playwright_cookies)} cookies into browser context."
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"⚠️ Could not inject cookies: {e}")
|
logging.warning(f"⚠️ Could not inject cookies: {e}")
|
||||||
|
|
||||||
@@ -232,25 +229,16 @@ def inject_cookies_into_context(context, cookies: list):
|
|||||||
def convert_json_cookies_to_netscape(json_path: str) -> str | None:
|
def convert_json_cookies_to_netscape(json_path: str) -> str | None:
|
||||||
"""
|
"""
|
||||||
Convert a JSON cookie file (browser extension format) to a Netscape
|
Convert a JSON cookie file (browser extension format) to a Netscape
|
||||||
cookie file that yt-dlp can consume.
|
cookie file that yt-dlp can consume. Returns temp file path or None.
|
||||||
|
Caller must delete the file when done.
|
||||||
Returns the path to a temporary Netscape file, or None on failure.
|
|
||||||
The caller is responsible for deleting the file when done.
|
|
||||||
|
|
||||||
Netscape format columns (tab-separated):
|
|
||||||
domain include_subdomains path secure expiry name value
|
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
with open(json_path, "r", encoding="utf-8") as f:
|
with open(json_path, "r", encoding="utf-8") as f:
|
||||||
cookies = json.load(f)
|
cookies = json.load(f)
|
||||||
|
|
||||||
tmp = tempfile.NamedTemporaryFile(
|
tmp = tempfile.NamedTemporaryFile(
|
||||||
mode="w",
|
mode="w", suffix=".txt", delete=False, encoding="utf-8"
|
||||||
suffix=".txt",
|
|
||||||
delete=False,
|
|
||||||
encoding="utf-8",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
tmp.write("# Netscape HTTP Cookie File\n")
|
tmp.write("# Netscape HTTP Cookie File\n")
|
||||||
tmp.write("# Generated by tiktok2bsky.py\n\n")
|
tmp.write("# Generated by tiktok2bsky.py\n\n")
|
||||||
|
|
||||||
@@ -262,7 +250,6 @@ def convert_json_cookies_to_netscape(json_path: str) -> str | None:
|
|||||||
expiry = int(c.get("expirationDate") or c.get("expires") or 0)
|
expiry = int(c.get("expirationDate") or c.get("expires") or 0)
|
||||||
name = c.get("name", "")
|
name = c.get("name", "")
|
||||||
value = c.get("value", "")
|
value = c.get("value", "")
|
||||||
|
|
||||||
tmp.write(
|
tmp.write(
|
||||||
f"{domain}\t{include_sub}\t{path}\t"
|
f"{domain}\t{include_sub}\t{path}\t"
|
||||||
f"{secure}\t{expiry}\t{name}\t{value}\n"
|
f"{secure}\t{expiry}\t{name}\t{value}\n"
|
||||||
@@ -286,7 +273,6 @@ def convert_json_cookies_to_netscape(json_path: str) -> str | None:
|
|||||||
# Bluesky error classification (ported from twitter2bsky.py)
|
# Bluesky error classification (ported from twitter2bsky.py)
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
def _bsky_error_text(error_obj) -> str:
|
def _bsky_error_text(error_obj) -> str:
|
||||||
"""Normalised lowercase repr for pattern matching."""
|
|
||||||
return repr(error_obj).lower()
|
return repr(error_obj).lower()
|
||||||
|
|
||||||
|
|
||||||
@@ -318,61 +304,47 @@ def is_auth_error(error_obj) -> bool:
|
|||||||
|
|
||||||
def is_network_error(error_obj) -> bool:
|
def is_network_error(error_obj) -> bool:
|
||||||
text = repr(error_obj)
|
text = repr(error_obj)
|
||||||
signals = [
|
return any(s in text for s in [
|
||||||
"ConnectError", "RemoteProtocolError", "ReadTimeout",
|
"ConnectError", "RemoteProtocolError", "ReadTimeout",
|
||||||
"WriteTimeout", "TimeoutException", "ConnectionResetError",
|
"WriteTimeout", "TimeoutException", "ConnectionResetError",
|
||||||
"503", "502", "504",
|
"503", "502", "504",
|
||||||
]
|
])
|
||||||
return any(s in text for s in signals)
|
|
||||||
|
|
||||||
|
|
||||||
def is_transient_error(error_obj) -> bool:
|
def is_transient_error(error_obj) -> bool:
|
||||||
text = repr(error_obj)
|
text = repr(error_obj)
|
||||||
signals = [
|
return any(s in text for s in [
|
||||||
"InvokeTimeoutError", "ReadTimeout", "WriteTimeout",
|
"InvokeTimeoutError", "ReadTimeout", "WriteTimeout",
|
||||||
"TimeoutException", "RemoteProtocolError", "ConnectError",
|
"TimeoutException", "RemoteProtocolError", "ConnectError",
|
||||||
"503", "502", "504",
|
"503", "502", "504",
|
||||||
]
|
])
|
||||||
return any(s in text for s in signals)
|
|
||||||
|
|
||||||
|
|
||||||
def get_rate_limit_wait_seconds(error_obj, default_delay: float) -> float:
|
def get_rate_limit_wait_seconds(error_obj, default_delay: float) -> float:
|
||||||
"""
|
"""
|
||||||
Extract the server-requested wait time from rate-limit error headers.
|
Extract the server-requested wait time from rate-limit error headers.
|
||||||
|
|
||||||
Checks (in order):
|
|
||||||
1. error_obj.headers dict — Retry-After, X-RateLimit-After, RateLimit-Reset
|
|
||||||
2. repr(error_obj) text — same keys embedded as strings
|
|
||||||
3. Falls back to default_delay
|
|
||||||
|
|
||||||
Ported from twitter2bsky.py.
|
Ported from twitter2bsky.py.
|
||||||
"""
|
"""
|
||||||
now_ts = int(time.time())
|
now_ts = int(time.time())
|
||||||
|
|
||||||
# ── 1. Live headers object ────────────────────────────────────────────
|
|
||||||
try:
|
try:
|
||||||
headers = getattr(error_obj, "headers", None) or {}
|
headers = getattr(error_obj, "headers", None) or {}
|
||||||
|
|
||||||
for key in ("retry-after", "Retry-After"):
|
for key in ("retry-after", "Retry-After"):
|
||||||
val = headers.get(key)
|
val = headers.get(key)
|
||||||
if val:
|
if val:
|
||||||
return min(max(int(val), 1), BSKY_LOGIN_RATE_LIMIT_MAX_DELAY)
|
return min(max(int(val), 1), BSKY_LOGIN_RATE_LIMIT_MAX_DELAY)
|
||||||
|
|
||||||
for key in ("x-ratelimit-after", "X-RateLimit-After"):
|
for key in ("x-ratelimit-after", "X-RateLimit-After"):
|
||||||
val = headers.get(key)
|
val = headers.get(key)
|
||||||
if val:
|
if val:
|
||||||
return min(max(int(val), 1), BSKY_LOGIN_RATE_LIMIT_MAX_DELAY)
|
return min(max(int(val), 1), BSKY_LOGIN_RATE_LIMIT_MAX_DELAY)
|
||||||
|
|
||||||
for key in ("ratelimit-reset", "RateLimit-Reset"):
|
for key in ("ratelimit-reset", "RateLimit-Reset"):
|
||||||
val = headers.get(key)
|
val = headers.get(key)
|
||||||
if val:
|
if val:
|
||||||
wait = max(int(val) - now_ts + 2, default_delay)
|
wait = max(int(val) - now_ts + 2, default_delay)
|
||||||
return min(wait, BSKY_LOGIN_RATE_LIMIT_MAX_DELAY)
|
return min(wait, BSKY_LOGIN_RATE_LIMIT_MAX_DELAY)
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# ── 2. repr() string fallback ─────────────────────────────────────────
|
|
||||||
text = repr(error_obj)
|
text = repr(error_obj)
|
||||||
for pattern, is_ts in [
|
for pattern, is_ts in [
|
||||||
(r"['\"]retry-after['\"]\s*:\s*['\"](\d+)['\"]", False),
|
(r"['\"]retry-after['\"]\s*:\s*['\"](\d+)['\"]", False),
|
||||||
@@ -392,34 +364,29 @@ def get_rate_limit_wait_seconds(error_obj, default_delay: float) -> float:
|
|||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
# Bluesky client — improved login (ported from twitter2bsky.py)
|
# Bluesky client — robust login (ported from twitter2bsky.py)
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
def connect_bluesky(handle: str, app_password: str, base_url: str) -> Client:
|
def connect_bluesky(handle: str, app_password: str, base_url: str) -> Client:
|
||||||
"""
|
"""
|
||||||
Authenticate with Bluesky with full retry logic ported from twitter2bsky.py:
|
Authenticate with Bluesky with full retry logic:
|
||||||
|
|
||||||
• 429 / rate-limit → honour Retry-After header; wait up to 600s
|
• 429 / rate-limit → honour Retry-After header; wait up to 600s
|
||||||
• auth errors → fail immediately (retrying won't help)
|
• auth errors → fail immediately (retrying won't help)
|
||||||
• network/transient → exponential backoff with jitter
|
• network/transient → exponential backoff with jitter
|
||||||
• other errors → exponential backoff with jitter
|
• other errors → exponential backoff with jitter
|
||||||
• exhausted retries → raise so Jenkins marks the build FAILURE
|
|
||||||
"""
|
"""
|
||||||
logging.info(f"🔐 Connecting Bluesky client → {base_url}")
|
logging.info(f"🔐 Connecting Bluesky client → {base_url}")
|
||||||
client = Client(base_url=base_url)
|
client = Client(base_url=base_url)
|
||||||
|
|
||||||
attempt = 0
|
attempt = 0
|
||||||
last_error = None
|
last_error = None
|
||||||
|
|
||||||
while attempt < BSKY_LOGIN_MAX_RETRIES:
|
while attempt < BSKY_LOGIN_MAX_RETRIES:
|
||||||
attempt += 1
|
attempt += 1
|
||||||
logging.info(
|
logging.info(
|
||||||
f"🔐 Bluesky login attempt {attempt}/{BSKY_LOGIN_MAX_RETRIES} "
|
f"🔐 Bluesky login attempt {attempt}/{BSKY_LOGIN_MAX_RETRIES} for {handle}"
|
||||||
f"for {handle}"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
client.login(handle, app_password)
|
client.login(handle, app_password)
|
||||||
# Fetch profile to confirm the session is fully live
|
|
||||||
client.me = client.get_profile(handle)
|
client.me = client.get_profile(handle)
|
||||||
logging.info(f"✅ Bluesky login successful as {handle}")
|
logging.info(f"✅ Bluesky login successful as {handle}")
|
||||||
return client
|
return client
|
||||||
@@ -428,14 +395,14 @@ def connect_bluesky(handle: str, app_password: str, base_url: str) -> Client:
|
|||||||
last_error = e
|
last_error = e
|
||||||
err_detail = f"{type(e).__name__}: {e}"
|
err_detail = f"{type(e).__name__}: {e}"
|
||||||
|
|
||||||
# ── Auth errors: no point retrying ───────────────────────────
|
# Auth errors — no point retrying
|
||||||
if is_auth_error(e):
|
if is_auth_error(e):
|
||||||
logging.error(
|
logging.error(
|
||||||
f"❌ Bluesky login auth error (will not retry): {err_detail}"
|
f"❌ Bluesky login auth error (will not retry): {err_detail}"
|
||||||
)
|
)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
# ── Rate-limited (429) ────────────────────────────────────────
|
# Rate-limited (429)
|
||||||
if is_rate_limited_error(e):
|
if is_rate_limited_error(e):
|
||||||
raw_wait = get_rate_limit_wait_seconds(e, BSKY_LOGIN_RATE_LIMIT_DELAY)
|
raw_wait = get_rate_limit_wait_seconds(e, BSKY_LOGIN_RATE_LIMIT_DELAY)
|
||||||
jitter = random.uniform(0.0, BSKY_LOGIN_JITTER_MAX)
|
jitter = random.uniform(0.0, BSKY_LOGIN_JITTER_MAX)
|
||||||
@@ -449,7 +416,7 @@ def connect_bluesky(handle: str, app_password: str, base_url: str) -> Client:
|
|||||||
time.sleep(wait)
|
time.sleep(wait)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# ── Network / transient errors ────────────────────────────────
|
# Network / transient errors
|
||||||
if is_network_error(e) or is_transient_error(e):
|
if is_network_error(e) or is_transient_error(e):
|
||||||
delay = min(
|
delay = min(
|
||||||
BSKY_LOGIN_BASE_DELAY * (2 ** (attempt - 1)),
|
BSKY_LOGIN_BASE_DELAY * (2 ** (attempt - 1)),
|
||||||
@@ -466,7 +433,7 @@ def connect_bluesky(handle: str, app_password: str, base_url: str) -> Client:
|
|||||||
time.sleep(wait)
|
time.sleep(wait)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# ── Unknown errors ────────────────────────────────────────────
|
# Unknown errors
|
||||||
delay = min(
|
delay = min(
|
||||||
BSKY_LOGIN_BASE_DELAY * (2 ** (attempt - 1)),
|
BSKY_LOGIN_BASE_DELAY * (2 ** (attempt - 1)),
|
||||||
BSKY_LOGIN_MAX_DELAY,
|
BSKY_LOGIN_MAX_DELAY,
|
||||||
@@ -486,8 +453,7 @@ def connect_bluesky(handle: str, app_password: str, base_url: str) -> Client:
|
|||||||
f"Last error: {type(last_error).__name__}: {last_error}"
|
f"Last error: {type(last_error).__name__}: {last_error}"
|
||||||
)
|
)
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"Bluesky login failed after {BSKY_LOGIN_MAX_RETRIES} attempts: "
|
f"Bluesky login failed after {BSKY_LOGIN_MAX_RETRIES} attempts: {last_error}"
|
||||||
f"{last_error}"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -495,7 +461,6 @@ def connect_bluesky(handle: str, app_password: str, base_url: str) -> Client:
|
|||||||
# Video helpers
|
# Video helpers
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
def get_video_duration(path: str) -> float:
|
def get_video_duration(path: str) -> float:
|
||||||
"""Return video duration in seconds via ffprobe, or 0.0 on failure."""
|
|
||||||
try:
|
try:
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
[
|
[
|
||||||
@@ -504,9 +469,7 @@ def get_video_duration(path: str) -> float:
|
|||||||
"-of", "default=noprint_wrappers=1:nokey=1",
|
"-of", "default=noprint_wrappers=1:nokey=1",
|
||||||
path,
|
path,
|
||||||
],
|
],
|
||||||
capture_output=True,
|
capture_output=True, text=True, timeout=15,
|
||||||
text=True,
|
|
||||||
timeout=15,
|
|
||||||
)
|
)
|
||||||
return float(result.stdout.strip())
|
return float(result.stdout.strip())
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -520,29 +483,18 @@ def compress_video(
|
|||||||
max_duration: int = VIDEO_MAX_DURATION_S,
|
max_duration: int = VIDEO_MAX_DURATION_S,
|
||||||
max_size_bytes: int = None,
|
max_size_bytes: int = None,
|
||||||
) -> bool:
|
) -> bool:
|
||||||
"""
|
|
||||||
Re-encode input_path → output_path using libx264, targeting max_size_bytes.
|
|
||||||
|
|
||||||
Fixes applied:
|
|
||||||
• pad=ceil(iw/2)*2:ceil(ih/2)*2 — ensures even dimensions (libx264 requirement)
|
|
||||||
• -maxrate == -b:v — hard ceiling, no burst above target
|
|
||||||
• post-encode size guard — rejects file if still over limit
|
|
||||||
"""
|
|
||||||
if max_size_bytes is None:
|
if max_size_bytes is None:
|
||||||
max_size_bytes = 20 * 1024 * 1024
|
max_size_bytes = 20 * 1024 * 1024
|
||||||
|
|
||||||
try:
|
try:
|
||||||
duration = get_video_duration(input_path)
|
duration = get_video_duration(input_path)
|
||||||
|
|
||||||
if duration <= 0:
|
if duration <= 0:
|
||||||
logging.error(
|
logging.error(
|
||||||
f"❌ compress_video: invalid duration={duration} "
|
f"❌ compress_video: invalid duration={duration} for {input_path}"
|
||||||
f"for {input_path} ({os.path.getsize(input_path)} bytes)"
|
|
||||||
)
|
)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
trim_to = min(duration, max_duration)
|
trim_to = min(duration, max_duration)
|
||||||
|
|
||||||
target_bits = max_size_bytes * 8 * 0.85
|
target_bits = max_size_bytes * 8 * 0.85
|
||||||
total_kbps = int(target_bits / trim_to / 1000)
|
total_kbps = int(target_bits / trim_to / 1000)
|
||||||
audio_kbps = 96
|
audio_kbps = 96
|
||||||
@@ -580,12 +532,11 @@ def compress_video(
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
final_size = os.path.getsize(output_path)
|
final_size = os.path.getsize(output_path)
|
||||||
|
|
||||||
if final_size > max_size_bytes:
|
if final_size > max_size_bytes:
|
||||||
logging.error(
|
logging.error(
|
||||||
f"❌ Compressed file still too large: "
|
f"❌ Compressed file still too large: "
|
||||||
f"{final_size / 1024 / 1024:.1f} MB > "
|
f"{final_size / 1024 / 1024:.1f} MB > "
|
||||||
f"{max_size_bytes / 1024 / 1024:.0f} MB limit. Skipping."
|
f"{max_size_bytes / 1024 / 1024:.0f} MB. Skipping."
|
||||||
)
|
)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -604,23 +555,65 @@ def compress_video(
|
|||||||
# ─────────────────────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
def get_best_impersonation_target() -> str | None:
|
def get_best_impersonation_target() -> str | None:
|
||||||
"""
|
"""
|
||||||
Dynamically select the best available curl_cffi impersonation target.
|
Ask yt-dlp directly which impersonation targets are actually available
|
||||||
Returns None if curl_cffi is not installed or no target is available.
|
in the current environment. This is the only reliable method —
|
||||||
|
curl_cffi's BrowserType enum values change between versions and do not
|
||||||
|
map 1:1 to yt-dlp's target names.
|
||||||
|
|
||||||
|
Returns the best available target string, or None if none are available.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
from curl_cffi.requests import BrowserType
|
import yt_dlp
|
||||||
preferred = ["chrome126", "chrome124", "chrome", "safari"]
|
# yt-dlp exposes available impersonation targets via
|
||||||
available = {t.value if hasattr(t, "value") else str(t) for t in BrowserType}
|
# ImpersonateTarget.supported_targets() in newer builds,
|
||||||
for target in preferred:
|
# or via YoutubeDL._impersonate_target_key in older ones.
|
||||||
if target in available:
|
# The safest cross-version approach is to instantiate a YoutubeDL
|
||||||
logging.info(f"🎭 yt-dlp impersonation target: {target}")
|
# object with quiet=True and inspect _impersonate_targets.
|
||||||
return target
|
with yt_dlp.YoutubeDL({"quiet": True, "no_warnings": True}) as ydl:
|
||||||
if available:
|
# _impersonate_targets is a dict of {ImpersonateTarget: handler}
|
||||||
target = sorted(available)[0]
|
targets = getattr(ydl, "_impersonate_targets", None)
|
||||||
logging.info(f"🎭 yt-dlp impersonation target (fallback): {target}")
|
if not targets:
|
||||||
return target
|
logging.warning(
|
||||||
|
"⚠️ yt-dlp: no impersonation targets available in this environment."
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Convert to string representations and pick the best one
|
||||||
|
preferred = ["chrome", "safari", "firefox", "edge"]
|
||||||
|
available_strs = []
|
||||||
|
for t in targets.keys():
|
||||||
|
# ImpersonateTarget has .client and optionally .version
|
||||||
|
client = getattr(t, "client", None) or str(t)
|
||||||
|
version = getattr(t, "version", None)
|
||||||
|
label = f"{client}-{version}" if version else str(client)
|
||||||
|
available_strs.append((label.lower(), t))
|
||||||
|
|
||||||
|
logging.info(
|
||||||
|
f"🎭 yt-dlp available impersonation targets: "
|
||||||
|
f"{[s for s, _ in available_strs]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Pick highest-versioned chrome first, then others
|
||||||
|
chrome_targets = sorted(
|
||||||
|
[(s, t) for s, t in available_strs if "chrome" in s],
|
||||||
|
key=lambda x: x[0],
|
||||||
|
reverse=True,
|
||||||
|
)
|
||||||
|
if chrome_targets:
|
||||||
|
best_label, best_target = chrome_targets[0]
|
||||||
|
logging.info(f"🎭 Selected impersonation target: {best_label}")
|
||||||
|
return best_target # return the actual ImpersonateTarget object
|
||||||
|
|
||||||
|
# Fallback to any available target
|
||||||
|
best_label, best_target = available_strs[0]
|
||||||
|
logging.info(f"🎭 Selected impersonation target (fallback): {best_label}")
|
||||||
|
return best_target
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"⚠️ Could not check impersonation targets: {e}")
|
logging.warning(
|
||||||
|
f"⚠️ Could not determine yt-dlp impersonation targets: "
|
||||||
|
f"{type(e).__name__}: {e}"
|
||||||
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@@ -629,10 +622,6 @@ def download_video_ytdlp(
|
|||||||
output_path: str,
|
output_path: str,
|
||||||
netscape_cookies_path: str = None,
|
netscape_cookies_path: str = None,
|
||||||
) -> bool:
|
) -> bool:
|
||||||
"""
|
|
||||||
Download a TikTok video using yt-dlp with browser impersonation.
|
|
||||||
Accepts a Netscape-format cookie file path (not JSON).
|
|
||||||
"""
|
|
||||||
impersonate = get_best_impersonation_target()
|
impersonate = get_best_impersonation_target()
|
||||||
|
|
||||||
ydl_opts = {
|
ydl_opts = {
|
||||||
@@ -646,7 +635,7 @@ def download_video_ytdlp(
|
|||||||
if netscape_cookies_path and os.path.exists(netscape_cookies_path):
|
if netscape_cookies_path and os.path.exists(netscape_cookies_path):
|
||||||
ydl_opts["cookiefile"] = netscape_cookies_path
|
ydl_opts["cookiefile"] = netscape_cookies_path
|
||||||
|
|
||||||
if impersonate:
|
if impersonate is not None:
|
||||||
ydl_opts["impersonate"] = impersonate
|
ydl_opts["impersonate"] = impersonate
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -658,7 +647,7 @@ def download_video_ytdlp(
|
|||||||
size_mb = os.path.getsize(output_path) / 1024 / 1024
|
size_mb = os.path.getsize(output_path) / 1024 / 1024
|
||||||
logging.info(f"✅ yt-dlp download OK: {size_mb:.1f} MB")
|
logging.info(f"✅ yt-dlp download OK: {size_mb:.1f} MB")
|
||||||
return True
|
return True
|
||||||
else:
|
|
||||||
logging.warning(
|
logging.warning(
|
||||||
f"⚠️ yt-dlp output too small or missing: {output_path} "
|
f"⚠️ yt-dlp output too small or missing: {output_path} "
|
||||||
f"({os.path.getsize(output_path) if os.path.exists(output_path) else 0} bytes)"
|
f"({os.path.getsize(output_path) if os.path.exists(output_path) else 0} bytes)"
|
||||||
@@ -666,9 +655,7 @@ def download_video_ytdlp(
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(
|
logging.error(f"❌ yt-dlp download failed for {url}: {type(e).__name__}: {e}")
|
||||||
f"❌ yt-dlp download failed for {url}: {type(e).__name__}: {e}"
|
|
||||||
)
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
@@ -677,7 +664,6 @@ def download_video(
|
|||||||
output_path: str,
|
output_path: str,
|
||||||
netscape_cookies_path: str = None,
|
netscape_cookies_path: str = None,
|
||||||
) -> bool:
|
) -> bool:
|
||||||
"""Download a TikTok video via yt-dlp with browser impersonation."""
|
|
||||||
logging.info(f"⬇️ Downloading: {url}")
|
logging.info(f"⬇️ Downloading: {url}")
|
||||||
return download_video_ytdlp(url, output_path, netscape_cookies_path=netscape_cookies_path)
|
return download_video_ytdlp(url, output_path, netscape_cookies_path=netscape_cookies_path)
|
||||||
|
|
||||||
@@ -690,10 +676,6 @@ def upload_video_to_bluesky(
|
|||||||
video_path: str,
|
video_path: str,
|
||||||
video_id: str,
|
video_id: str,
|
||||||
) -> object | None:
|
) -> object | None:
|
||||||
"""
|
|
||||||
Upload a video file to Bluesky as a blob.
|
|
||||||
All exceptions logged as type(e).__name__: e for full visibility.
|
|
||||||
"""
|
|
||||||
size_mb = os.path.getsize(video_path) / 1024 / 1024
|
size_mb = os.path.getsize(video_path) / 1024 / 1024
|
||||||
logging.info(f"⬆️ Uploading to Bluesky ({size_mb:.1f} MB)...")
|
logging.info(f"⬆️ Uploading to Bluesky ({size_mb:.1f} MB)...")
|
||||||
|
|
||||||
@@ -738,19 +720,12 @@ def post_video_to_bluesky(
|
|||||||
langs: list[str],
|
langs: list[str],
|
||||||
video_id: str,
|
video_id: str,
|
||||||
) -> bool:
|
) -> bool:
|
||||||
"""Create a Bluesky post embedding the uploaded video blob."""
|
|
||||||
from atproto import models
|
from atproto import models
|
||||||
|
|
||||||
try:
|
try:
|
||||||
video_embed = models.AppBskyEmbedVideo.Main(video=blob)
|
video_embed = models.AppBskyEmbedVideo.Main(video=blob)
|
||||||
client.send_post(
|
client.send_post(text=caption, embed=video_embed, langs=langs)
|
||||||
text=caption,
|
|
||||||
embed=video_embed,
|
|
||||||
langs=langs,
|
|
||||||
)
|
|
||||||
logging.info(f"✅ Posted video {video_id} to Bluesky.")
|
logging.info(f"✅ Posted video {video_id} to Bluesky.")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(
|
logging.error(
|
||||||
f"❌ Failed to post video {video_id} to Bluesky: "
|
f"❌ Failed to post video {video_id} to Bluesky: "
|
||||||
@@ -763,7 +738,6 @@ def post_video_to_bluesky(
|
|||||||
# TikTok scraping — Playwright
|
# TikTok scraping — Playwright
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
def dismiss_overlays(page) -> None:
|
def dismiss_overlays(page) -> None:
|
||||||
"""Try to dismiss cookie banners and modal overlays."""
|
|
||||||
all_sels = TIKTOK_COOKIE_MODAL_SELS + TIKTOK_BANNER_SELS
|
all_sels = TIKTOK_COOKIE_MODAL_SELS + TIKTOK_BANNER_SELS
|
||||||
for sel in all_sels:
|
for sel in all_sels:
|
||||||
try:
|
try:
|
||||||
@@ -777,10 +751,7 @@ def dismiss_overlays(page) -> None:
|
|||||||
|
|
||||||
|
|
||||||
def _run_playwright_scrape_loop(page, profile_url: str, limit: int) -> list[dict]:
|
def _run_playwright_scrape_loop(page, profile_url: str, limit: int) -> list[dict]:
|
||||||
"""
|
"""Inner scraping loop — shared by stealth and no-stealth paths."""
|
||||||
Inner scraping loop shared by both the stealth and no-stealth paths.
|
|
||||||
Returns a list of video dicts.
|
|
||||||
"""
|
|
||||||
videos = []
|
videos = []
|
||||||
|
|
||||||
for attempt in range(1, PLAYWRIGHT_MAX_RELOADS + 1):
|
for attempt in range(1, PLAYWRIGHT_MAX_RELOADS + 1):
|
||||||
@@ -798,8 +769,7 @@ def _run_playwright_scrape_loop(page, profile_url: str, limit: int) -> list[dict
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
page.wait_for_selector(
|
page.wait_for_selector(
|
||||||
TIKTOK_VIDEO_GRID_SEL,
|
TIKTOK_VIDEO_GRID_SEL, timeout=PLAYWRIGHT_TIMEOUT_MS
|
||||||
timeout=PLAYWRIGHT_TIMEOUT_MS,
|
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
@@ -844,8 +814,7 @@ def _run_playwright_scrape_loop(page, profile_url: str, limit: int) -> list[dict
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(
|
logging.warning(
|
||||||
f"⚠️ Playwright attempt {attempt} error: "
|
f"⚠️ Playwright attempt {attempt} error: {type(e).__name__}: {e}"
|
||||||
f"{type(e).__name__}: {e}"
|
|
||||||
)
|
)
|
||||||
ts = int(time.time())
|
ts = int(time.time())
|
||||||
try:
|
try:
|
||||||
@@ -865,10 +834,10 @@ def scrape_tiktok_profile_playwright(
|
|||||||
"""
|
"""
|
||||||
Scrape the most recent video URLs from a TikTok profile page using Playwright.
|
Scrape the most recent video URLs from a TikTok profile page using Playwright.
|
||||||
|
|
||||||
Stealth handling:
|
Stealth strategy:
|
||||||
v1.x → stealth_sync(page) after new_page()
|
v1.x → stealth_sync(page) after new_page() — works reliably
|
||||||
v2.x → Stealth() used as context manager; page created inside it
|
v2.x → skipped entirely; v2.0.x API is unstable across patch versions.
|
||||||
none → plain page, no stealth
|
Browser launch args provide equivalent bot-detection evasion.
|
||||||
"""
|
"""
|
||||||
profile_url = f"https://www.tiktok.com/@{handle}"
|
profile_url = f"https://www.tiktok.com/@{handle}"
|
||||||
logging.info(f"🕷️ Scraping TikTok profile: {profile_url}")
|
logging.info(f"🕷️ Scraping TikTok profile: {profile_url}")
|
||||||
@@ -883,6 +852,8 @@ def scrape_tiktok_profile_playwright(
|
|||||||
"--disable-blink-features=AutomationControlled",
|
"--disable-blink-features=AutomationControlled",
|
||||||
"--no-sandbox",
|
"--no-sandbox",
|
||||||
"--disable-setuid-sandbox",
|
"--disable-setuid-sandbox",
|
||||||
|
"--disable-web-security",
|
||||||
|
"--disable-features=IsolateOrigins,site-per-process",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
context = browser.new_context(
|
context = browser.new_context(
|
||||||
@@ -893,44 +864,31 @@ def scrape_tiktok_profile_playwright(
|
|||||||
),
|
),
|
||||||
viewport={"width": 1280, "height": 900},
|
viewport={"width": 1280, "height": 900},
|
||||||
locale="es-ES",
|
locale="es-ES",
|
||||||
|
# Mask automation signals at the context level
|
||||||
|
extra_http_headers={
|
||||||
|
"Accept-Language": "es-ES,es;q=0.9,en;q=0.8",
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
inject_cookies_into_context(context, cookies)
|
inject_cookies_into_context(context, cookies)
|
||||||
|
|
||||||
# ── Stealth v2.x — page must be created inside the context manager ──
|
|
||||||
if _STEALTH_V2 is True:
|
|
||||||
try:
|
|
||||||
stealth_instance = Stealth()
|
|
||||||
with stealth_instance(context) as stealthy_context:
|
|
||||||
page = stealthy_context.new_page()
|
|
||||||
logging.info("🥷 playwright-stealth v2.x applied (context manager).")
|
|
||||||
videos = _run_playwright_scrape_loop(page, profile_url, limit)
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(
|
|
||||||
f"⚠️ playwright-stealth v2.x failed: {type(e).__name__}: {e}. "
|
|
||||||
f"Retrying without stealth."
|
|
||||||
)
|
|
||||||
# Fall through to no-stealth path below
|
|
||||||
page = context.new_page()
|
page = context.new_page()
|
||||||
videos = _run_playwright_scrape_loop(page, profile_url, limit)
|
|
||||||
|
|
||||||
# ── Stealth v1.x ──────────────────────────────────────────────────
|
# Apply stealth v1.x if available; skip v2.x entirely
|
||||||
elif _STEALTH_V2 is False:
|
if _STEALTH_SYNC is not None:
|
||||||
page = context.new_page()
|
|
||||||
try:
|
try:
|
||||||
stealth_sync(page)
|
_STEALTH_SYNC(page)
|
||||||
logging.info("🥷 playwright-stealth v1.x applied (stealth_sync).")
|
logging.info("🥷 playwright-stealth v1.x applied.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(
|
logging.warning(
|
||||||
f"⚠️ playwright-stealth v1.x failed: {type(e).__name__}: {e}. "
|
f"⚠️ playwright-stealth v1.x failed: {type(e).__name__}: {e}. "
|
||||||
f"Continuing without stealth."
|
f"Continuing without stealth."
|
||||||
)
|
)
|
||||||
videos = _run_playwright_scrape_loop(page, profile_url, limit)
|
|
||||||
|
|
||||||
# ── No stealth available ──────────────────────────────────────────
|
|
||||||
else:
|
else:
|
||||||
logging.warning("⚠️ playwright-stealth not installed. Skipping stealth.")
|
logging.info(
|
||||||
page = context.new_page()
|
"ℹ️ playwright-stealth v2.x detected — skipping (unstable API). "
|
||||||
|
"Using browser launch args for bot-detection evasion."
|
||||||
|
)
|
||||||
|
|
||||||
videos = _run_playwright_scrape_loop(page, profile_url, limit)
|
videos = _run_playwright_scrape_loop(page, profile_url, limit)
|
||||||
|
|
||||||
if not videos:
|
if not videos:
|
||||||
@@ -986,7 +944,7 @@ def scrape_tiktok_profile_ytdlp(
|
|||||||
}
|
}
|
||||||
if netscape_cookies_path and os.path.exists(netscape_cookies_path):
|
if netscape_cookies_path and os.path.exists(netscape_cookies_path):
|
||||||
ydl_opts["cookiefile"] = netscape_cookies_path
|
ydl_opts["cookiefile"] = netscape_cookies_path
|
||||||
if impersonate:
|
if impersonate is not None:
|
||||||
ydl_opts["impersonate"] = impersonate
|
ydl_opts["impersonate"] = impersonate
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -1022,9 +980,7 @@ def scrape_tiktok_profile_ytdlp(
|
|||||||
return videos[:limit]
|
return videos[:limit]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(
|
logging.error(f"❌ yt-dlp profile scrape failed: {type(e).__name__}: {e}")
|
||||||
f"❌ yt-dlp profile scrape failed: {type(e).__name__}: {e}"
|
|
||||||
)
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
@@ -1032,17 +988,14 @@ def scrape_tiktok_profile_ytdlp(
|
|||||||
# Caption builder
|
# Caption builder
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
def build_caption(video_info: dict, tiktok_handle: str, max_len: int = 290) -> str:
|
def build_caption(video_info: dict, tiktok_handle: str, max_len: int = 290) -> str:
|
||||||
"""Build a Bluesky post caption from video metadata."""
|
|
||||||
desc = (video_info.get("description") or "").strip()
|
desc = (video_info.get("description") or "").strip()
|
||||||
url = video_info.get("url", "")
|
url = video_info.get("url", "")
|
||||||
|
|
||||||
if desc:
|
if desc:
|
||||||
url_len = len(url) + 1
|
url_len = len(url) + 1
|
||||||
max_desc = max_len - url_len
|
max_desc = max_len - url_len
|
||||||
if len(desc) > max_desc:
|
if len(desc) > max_desc:
|
||||||
desc = desc[: max_desc - 1] + "…"
|
desc = desc[: max_desc - 1] + "…"
|
||||||
return f"{desc}\n{url}"
|
return f"{desc}\n{url}"
|
||||||
|
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
||||||
@@ -1059,10 +1012,6 @@ def process_videos(
|
|||||||
max_age_days: int,
|
max_age_days: int,
|
||||||
video_max_size_bytes: int,
|
video_max_size_bytes: int,
|
||||||
) -> int:
|
) -> int:
|
||||||
"""
|
|
||||||
Download, compress, upload and post each new video.
|
|
||||||
Returns the count of successfully posted videos.
|
|
||||||
"""
|
|
||||||
posted_count = 0
|
posted_count = 0
|
||||||
now = arrow.utcnow()
|
now = arrow.utcnow()
|
||||||
|
|
||||||
@@ -1096,8 +1045,7 @@ def process_videos(
|
|||||||
|
|
||||||
# 1. Download
|
# 1. Download
|
||||||
ok = download_video(
|
ok = download_video(
|
||||||
video_url,
|
video_url, raw_path,
|
||||||
raw_path,
|
|
||||||
netscape_cookies_path=netscape_cookies_path,
|
netscape_cookies_path=netscape_cookies_path,
|
||||||
)
|
)
|
||||||
if not ok:
|
if not ok:
|
||||||
@@ -1105,11 +1053,7 @@ def process_videos(
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# 2. Compress
|
# 2. Compress
|
||||||
ok = compress_video(
|
ok = compress_video(raw_path, comp_path, max_size_bytes=video_max_size_bytes)
|
||||||
raw_path,
|
|
||||||
comp_path,
|
|
||||||
max_size_bytes=video_max_size_bytes,
|
|
||||||
)
|
|
||||||
if not ok:
|
if not ok:
|
||||||
logging.error(f"❌ Compression failed for {video_id}. Skipping.")
|
logging.error(f"❌ Compression failed for {video_id}. Skipping.")
|
||||||
continue
|
continue
|
||||||
@@ -1142,25 +1086,19 @@ def parse_args() -> argparse.Namespace:
|
|||||||
parser.add_argument("--bsky-handle", required=True)
|
parser.add_argument("--bsky-handle", required=True)
|
||||||
parser.add_argument("--bsky-app-password", required=True)
|
parser.add_argument("--bsky-app-password", required=True)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--bsky-base-url",
|
"--bsky-base-url", default=DEFAULT_BSKY_BASE_URL,
|
||||||
default=DEFAULT_BSKY_BASE_URL,
|
|
||||||
help=f"Bluesky PDS base URL (default: {DEFAULT_BSKY_BASE_URL})",
|
help=f"Bluesky PDS base URL (default: {DEFAULT_BSKY_BASE_URL})",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--bsky-langs",
|
"--bsky-langs", nargs="+", default=DEFAULT_BSKY_LANGS,
|
||||||
nargs="+",
|
|
||||||
default=DEFAULT_BSKY_LANGS,
|
|
||||||
help="BCP-47 language tags for posts (default: es)",
|
help="BCP-47 language tags for posts (default: es)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--cookies-path",
|
"--cookies-path", default=TIKTOK_COOKIES_PATH,
|
||||||
default=TIKTOK_COOKIES_PATH,
|
|
||||||
help=f"Path to TikTok cookies JSON (default: {TIKTOK_COOKIES_PATH})",
|
help=f"Path to TikTok cookies JSON (default: {TIKTOK_COOKIES_PATH})",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--max-age-days",
|
"--max-age-days", type=int, default=VIDEO_MAX_AGE_DAYS,
|
||||||
type=int,
|
|
||||||
default=VIDEO_MAX_AGE_DAYS,
|
|
||||||
help=f"Skip videos older than N days (default: {VIDEO_MAX_AGE_DAYS})",
|
help=f"Skip videos older than N days (default: {VIDEO_MAX_AGE_DAYS})",
|
||||||
)
|
)
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
@@ -1184,13 +1122,7 @@ def main():
|
|||||||
logging.info("=" * 60)
|
logging.info("=" * 60)
|
||||||
|
|
||||||
state = load_state()
|
state = load_state()
|
||||||
|
client = connect_bluesky(args.bsky_handle, args.bsky_app_password, args.bsky_base_url)
|
||||||
# Connect to Bluesky
|
|
||||||
client = connect_bluesky(
|
|
||||||
args.bsky_handle,
|
|
||||||
args.bsky_app_password,
|
|
||||||
args.bsky_base_url,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Convert JSON cookies → Netscape format once for all yt-dlp calls
|
# Convert JSON cookies → Netscape format once for all yt-dlp calls
|
||||||
netscape_cookies_path = convert_json_cookies_to_netscape(args.cookies_path)
|
netscape_cookies_path = convert_json_cookies_to_netscape(args.cookies_path)
|
||||||
@@ -1207,9 +1139,7 @@ def main():
|
|||||||
cookies = load_cookies_from_file(args.cookies_path)
|
cookies = load_cookies_from_file(args.cookies_path)
|
||||||
|
|
||||||
videos = scrape_tiktok_profile_playwright(
|
videos = scrape_tiktok_profile_playwright(
|
||||||
args.tiktok_handle,
|
args.tiktok_handle, cookies, limit=SCRAPE_VIDEO_LIMIT,
|
||||||
cookies,
|
|
||||||
limit=SCRAPE_VIDEO_LIMIT,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if not videos:
|
if not videos:
|
||||||
@@ -1248,7 +1178,6 @@ def main():
|
|||||||
logging.info("=" * 60)
|
logging.info("=" * 60)
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# Always clean up the temporary Netscape cookie file
|
|
||||||
if netscape_cookies_path and os.path.exists(netscape_cookies_path):
|
if netscape_cookies_path and os.path.exists(netscape_cookies_path):
|
||||||
try:
|
try:
|
||||||
os.remove(netscape_cookies_path)
|
os.remove(netscape_cookies_path)
|
||||||
|
|||||||
Reference in New Issue
Block a user