This commit is contained in:
Guillem Hernandez Sola
2026-05-20 07:16:07 +02:00
parent c613ab3603
commit 6d4cfbd4b5

View File

@@ -78,24 +78,30 @@ VIDEO_MAX_AGE_DAYS = 3
VIDEO_MAX_DURATION_S = 179 # Bluesky hard limit is 180s
# Bluesky login retry config
BSKY_LOGIN_MAX_RETRIES = 4
BSKY_LOGIN_BASE_DELAY = 15.0
BSKY_LOGIN_MAX_DELAY = 120.0
BSKY_LOGIN_JITTER_MAX = 10.0
# ── Bluesky login retry config (ported from twitter2bsky.py) ─────────────────
BSKY_LOGIN_MAX_RETRIES = 6
BSKY_LOGIN_BASE_DELAY = 15.0
BSKY_LOGIN_MAX_DELAY = 600.0
BSKY_LOGIN_JITTER_MAX = 5.0
BSKY_LOGIN_RATE_LIMIT_DELAY = 90.0 # minimum wait on 429
BSKY_LOGIN_RATE_LIMIT_MAX_DELAY = 600.0 # maximum wait on 429
# Bluesky upload retry config
# ── Bluesky upload retry config ───────────────────────────────────────────────
BSKY_UPLOAD_MAX_RETRIES = 5
BSKY_UPLOAD_BASE_DELAY = 10.0
BSKY_UPLOAD_MAX_DELAY = 120.0
BSKY_UPLOAD_JITTER_MAX = 5.0
# Playwright scraping config
# ── Playwright scraping config ────────────────────────────────────────────────
PLAYWRIGHT_TIMEOUT_MS = 30_000
PLAYWRIGHT_SLOW_MO = 50
PLAYWRIGHT_MAX_RELOADS = 3
<<<<<<< HEAD
# TikTok selectors
=======
# ── TikTok selectors ──────────────────────────────────────────────────────────
>>>>>>> 7cddbd0 (Fixes for today)
TIKTOK_VIDEO_GRID_SEL = '[data-e2e="user-post-item-list"]'
TIKTOK_VIDEO_ITEM_SEL = '[data-e2e="user-post-item"]'
TIKTOK_BANNER_SELS = [
@@ -254,6 +260,7 @@ def convert_json_cookies_to_netscape(json_path: str) -> str | None:
for c in cookies:
domain = c.get("domain", ".tiktok.com")
<<<<<<< HEAD
# Netscape format requires domain to start with a dot for
# include_subdomains=TRUE to work correctly
include_sub = "TRUE" if domain.startswith(".") else "FALSE"
@@ -262,6 +269,12 @@ def convert_json_cookies_to_netscape(json_path: str) -> str | None:
expiry = int(
c.get("expirationDate") or c.get("expires") or 0
)
=======
include_sub = "TRUE" if domain.startswith(".") else "FALSE"
path = c.get("path", "/")
secure = "TRUE" if c.get("secure", False) else "FALSE"
expiry = int(c.get("expirationDate") or c.get("expires") or 0)
>>>>>>> 7cddbd0 (Fixes for today)
name = c.get("name", "")
value = c.get("value", "")
@@ -285,110 +298,168 @@ def convert_json_cookies_to_netscape(json_path: str) -> str | None:
# ─────────────────────────────────────────────────────────────────────────────
<<<<<<< HEAD
# Bluesky error classification helpers
=======
# Bluesky error classification (ported from twitter2bsky.py)
>>>>>>> 7cddbd0 (Fixes for today)
# ─────────────────────────────────────────────────────────────────────────────
def _bsky_error_text(error_obj) -> str:
"""Normalised lowercase repr for pattern matching."""
return repr(error_obj).lower()
def is_rate_limited_error(error_obj) -> bool:
text = repr(error_obj).lower()
text = _bsky_error_text(error_obj)
return (
"429" in text
"429" in text
or "ratelimitexceeded" in text
or "too many requests" in text
or "rate limit" in text
or "rate limit" in text
or "ratelimit" in text
)
def is_auth_error(error_obj) -> bool:
text = repr(error_obj).lower()
text = _bsky_error_text(error_obj)
return (
"401" in text
or "403" in text
"401" in text
or "403" in text
or "invalid identifier" in text
or "invalid password" in text
or "authenticationrequired" in text
or "invalidtoken" in text
or "expiredtoken" in text
or "accounttakedown" in text
or "invalid identifier or password" in text
or "authenticationrequired" in text
or "invalidtoken" in text
)
def is_network_error(error_obj) -> bool:
text = repr(error_obj)
signals = [
"ConnectError",
"RemoteProtocolError",
"ReadTimeout",
"WriteTimeout",
"TimeoutException",
"503",
"502",
"504",
"ConnectionResetError",
"ConnectError", "RemoteProtocolError", "ReadTimeout",
"WriteTimeout", "TimeoutException", "ConnectionResetError",
"503", "502", "504",
]
return any(sig in text for sig in signals)
return any(s in text for s in signals)
def is_transient_error(error_obj) -> bool:
text = repr(error_obj)
signals = [
"InvokeTimeoutError",
"ReadTimeout",
"WriteTimeout",
"TimeoutException",
"RemoteProtocolError",
"ConnectError",
"503",
"502",
"504",
"InvokeTimeoutError", "ReadTimeout", "WriteTimeout",
"TimeoutException", "RemoteProtocolError", "ConnectError",
"503", "502", "504",
]
return any(sig in text for sig in signals)
return any(s in text for s in signals)
def get_rate_limit_wait_seconds(error_obj, default_delay: float) -> float:
"""
<<<<<<< HEAD
Parse rate-limit response headers and return a bounded wait time in seconds.
=======
Extract the server-requested wait time from rate-limit error headers.
Checks (in order):
1. error_obj.headers dict — Retry-After, X-RateLimit-After, RateLimit-Reset
2. repr(error_obj) text — same keys embedded as strings
3. Falls back to default_delay
Ported from twitter2bsky.py.
>>>>>>> 7cddbd0 (Fixes for today)
"""
now_ts = int(time.time())
# ── 1. Live headers object ────────────────────────────────────────────
try:
now_ts = int(time.time())
headers = getattr(error_obj, "headers", None) or {}
for key in ("retry-after", "Retry-After"):
if headers.get(key):
return min(max(int(headers[key]), 1), BSKY_LOGIN_MAX_DELAY)
val = headers.get(key)
if val:
return min(max(int(val), 1), BSKY_LOGIN_RATE_LIMIT_MAX_DELAY)
for key in ("x-ratelimit-after", "X-RateLimit-After"):
if headers.get(key):
return min(max(int(headers[key]), 1), BSKY_LOGIN_MAX_DELAY)
val = headers.get(key)
if val:
return min(max(int(val), 1), BSKY_LOGIN_RATE_LIMIT_MAX_DELAY)
for key in ("ratelimit-reset", "RateLimit-Reset"):
if headers.get(key):
wait = max(int(headers[key]) - now_ts + 1, default_delay)
return min(wait, BSKY_LOGIN_MAX_DELAY)
val = headers.get(key)
if val:
wait = max(int(val) - now_ts + 2, default_delay)
return min(wait, BSKY_LOGIN_RATE_LIMIT_MAX_DELAY)
except Exception:
pass
<<<<<<< HEAD
=======
# ── 2. repr() string fallback ─────────────────────────────────────────
>>>>>>> 7cddbd0 (Fixes for today)
text = repr(error_obj)
for pattern, is_timestamp in [
(r"'retry-after':\s*'(\d+)'", False),
(r"'x-ratelimit-after':\s*'(\d+)'", False),
(r"'ratelimit-reset':\s*'(\d+)'", True),
for pattern, is_ts in [
(r"['\"]retry-after['\"]\s*:\s*['\"](\d+)['\"]", False),
(r"['\"]x-ratelimit-after['\"]\s*:\s*['\"](\d+)['\"]", False),
(r"['\"]ratelimit-reset['\"]\s*:\s*['\"](\d+)['\"]", True),
(r"retry.?after[=:\s]+(\d+)", False),
]:
m = re.search(pattern, text, re.IGNORECASE)
if m:
val = int(m.group(1))
<<<<<<< HEAD
if is_timestamp:
wait = max(val - int(time.time()) + 1, default_delay)
return min(wait, BSKY_LOGIN_MAX_DELAY)
return min(max(val, 1), BSKY_LOGIN_MAX_DELAY)
=======
if is_ts:
wait = max(val - now_ts + 2, default_delay)
return min(wait, BSKY_LOGIN_RATE_LIMIT_MAX_DELAY)
return min(max(val, 1), BSKY_LOGIN_RATE_LIMIT_MAX_DELAY)
>>>>>>> 7cddbd0 (Fixes for today)
return default_delay
# ─────────────────────────────────────────────────────────────────────────────
<<<<<<< HEAD
# Bluesky client
# ─────────────────────────────────────────────────────────────────────────────
def connect_bluesky(handle: str, app_password: str, base_url: str) -> Client:
logging.info(f"🔐 Connecting Bluesky client via base URL: {base_url}")
client = Client(base_url=base_url)
=======
# Bluesky client — improved login (ported from twitter2bsky.py)
# ─────────────────────────────────────────────────────────────────────────────
def connect_bluesky(handle: str, app_password: str, base_url: str) -> Client:
"""
Authenticate with Bluesky with full retry logic ported from twitter2bsky.py:
• 429 / rate-limit → honour Retry-After header; wait up to 600s
• auth errors → fail immediately (retrying won't help)
• network/transient → exponential backoff with jitter
• other errors → exponential backoff with jitter
• exhausted retries → raise so Jenkins marks the build FAILURE
"""
logging.info(f"🔐 Connecting Bluesky client → {base_url}")
client = Client(base_url=base_url)
attempt = 0
last_error = None
while attempt < BSKY_LOGIN_MAX_RETRIES:
attempt += 1
logging.info(
f"🔐 Bluesky login attempt {attempt}/{BSKY_LOGIN_MAX_RETRIES} "
f"for {handle}"
)
>>>>>>> 7cddbd0 (Fixes for today)
for attempt in range(1, BSKY_LOGIN_MAX_RETRIES + 1):
try:
<<<<<<< HEAD
logging.info(
f"🔐 Bluesky login attempt {attempt}/{BSKY_LOGIN_MAX_RETRIES} for {handle}"
)
@@ -423,6 +494,79 @@ def connect_bluesky(handle: str, app_password: str, base_url: str) -> Client:
raise
raise RuntimeError("Bluesky login failed: exhausted all retries.")
=======
client.login(handle, app_password)
# Fetch profile to confirm the session is fully live
client.me = client.get_profile(handle)
logging.info(f"✅ Bluesky login successful as {handle}")
return client
except Exception as e:
last_error = e
err_detail = f"{type(e).__name__}: {e}"
# ── Auth errors: no point retrying ───────────────────────────
if is_auth_error(e):
logging.error(
f"❌ Bluesky login auth error (will not retry): {err_detail}"
)
raise
# ── Rate-limited (429) ────────────────────────────────────────
if is_rate_limited_error(e):
raw_wait = get_rate_limit_wait_seconds(e, BSKY_LOGIN_RATE_LIMIT_DELAY)
jitter = random.uniform(0.0, BSKY_LOGIN_JITTER_MAX)
wait = min(raw_wait + jitter, BSKY_LOGIN_RATE_LIMIT_MAX_DELAY)
logging.warning(
f"⏳ Bluesky login rate-limited (attempt {attempt}/"
f"{BSKY_LOGIN_MAX_RETRIES}). "
f"Waiting {wait:.1f}s (server requested {raw_wait:.0f}s)."
)
if attempt < BSKY_LOGIN_MAX_RETRIES:
time.sleep(wait)
continue
# ── Network / transient errors ────────────────────────────────
if is_network_error(e) or is_transient_error(e):
delay = min(
BSKY_LOGIN_BASE_DELAY * (2 ** (attempt - 1)),
BSKY_LOGIN_MAX_DELAY,
)
jitter = random.uniform(0.0, BSKY_LOGIN_JITTER_MAX)
wait = delay + jitter
logging.warning(
f"⚠️ Bluesky login network/transient error "
f"(attempt {attempt}/{BSKY_LOGIN_MAX_RETRIES}): "
f"{err_detail}. Retrying in {wait:.1f}s."
)
if attempt < BSKY_LOGIN_MAX_RETRIES:
time.sleep(wait)
continue
# ── Unknown errors ────────────────────────────────────────────
delay = min(
BSKY_LOGIN_BASE_DELAY * (2 ** (attempt - 1)),
BSKY_LOGIN_MAX_DELAY,
)
jitter = random.uniform(0.0, BSKY_LOGIN_JITTER_MAX)
wait = delay + jitter
logging.warning(
f"⚠️ Bluesky login failed "
f"(attempt {attempt}/{BSKY_LOGIN_MAX_RETRIES}): "
f"{err_detail}. Retrying in {wait:.1f}s."
)
if attempt < BSKY_LOGIN_MAX_RETRIES:
time.sleep(wait)
logging.error(
f"❌ Bluesky login failed after {BSKY_LOGIN_MAX_RETRIES} attempts. "
f"Last error: {type(last_error).__name__}: {last_error}"
)
raise RuntimeError(
f"Bluesky login failed after {BSKY_LOGIN_MAX_RETRIES} attempts: "
f"{last_error}"
)
>>>>>>> 7cddbd0 (Fixes for today)
# ─────────────────────────────────────────────────────────────────────────────
@@ -463,7 +607,11 @@ def compress_video(
• post-encode size guard — rejects file if still over limit
"""
if max_size_bytes is None:
<<<<<<< HEAD
max_size_bytes = 20 * 1024 * 1024 # fallback
=======
max_size_bytes = 20 * 1024 * 1024
>>>>>>> 7cddbd0 (Fixes for today)
try:
duration = get_video_duration(input_path)
@@ -477,7 +625,10 @@ def compress_video(
trim_to = min(duration, max_duration)
<<<<<<< HEAD
# Target 85% of the size budget to leave headroom for container overhead
=======
>>>>>>> 7cddbd0 (Fixes for today)
target_bits = max_size_bytes * 8 * 0.85
total_kbps = int(target_bits / trim_to / 1000)
audio_kbps = 96
@@ -493,10 +644,13 @@ def compress_video(
"ffmpeg", "-y",
"-i", input_path,
"-t", str(trim_to),
<<<<<<< HEAD
# Scale to 720p max, then pad to even dimensions.
# The pad filter is required because libx264 needs width/height
# divisible by 2. Portrait TikTok videos (9:16) would otherwise
# produce odd widths like 405px and crash the encoder.
=======
>>>>>>> 7cddbd0 (Fixes for today)
"-vf", (
"scale='min(1280,iw)':'min(720,ih)'"
":force_original_aspect_ratio=decrease,"
@@ -504,7 +658,11 @@ def compress_video(
),
"-c:v", "libx264",
"-b:v", f"{video_kbps}k",
<<<<<<< HEAD
"-maxrate", f"{video_kbps}k", # hard ceiling — no burst above target
=======
"-maxrate", f"{video_kbps}k",
>>>>>>> 7cddbd0 (Fixes for today)
"-bufsize", f"{video_kbps * 2}k",
"-c:a", "aac",
"-b:a", f"{audio_kbps}k",
@@ -520,7 +678,10 @@ def compress_video(
final_size = os.path.getsize(output_path)
<<<<<<< HEAD
# Reject if still over the hard limit
=======
>>>>>>> 7cddbd0 (Fixes for today)
if final_size > max_size_bytes:
logging.error(
f"❌ Compressed file still too large: "
@@ -572,7 +733,10 @@ def download_video_ytdlp(
"""
Download a TikTok video using yt-dlp with browser impersonation.
Accepts a Netscape-format cookie file path (not JSON).
<<<<<<< HEAD
Returns True on success, False on failure.
=======
>>>>>>> 7cddbd0 (Fixes for today)
"""
impersonate = get_best_impersonation_target()
@@ -633,7 +797,11 @@ def upload_video_to_bluesky(
) -> object | None:
"""
Upload a video file to Bluesky as a blob.
<<<<<<< HEAD
Exception is always logged as type(e).__name__: e for full visibility.
=======
All exceptions logged as type(e).__name__: e for full visibility.
>>>>>>> 7cddbd0 (Fixes for today)
"""
size_mb = os.path.getsize(video_path) / 1024 / 1024
logging.info(f"⬆️ Uploading to Bluesky ({size_mb:.1f} MB)...")
@@ -717,6 +885,90 @@ def dismiss_overlays(page) -> None:
pass
<<<<<<< HEAD
=======
def _run_playwright_scrape_loop(page, profile_url: str, limit: int) -> list[dict]:
"""
Inner scraping loop shared by both the stealth and no-stealth paths.
Returns a list of video dicts.
"""
videos = []
for attempt in range(1, PLAYWRIGHT_MAX_RELOADS + 1):
try:
logging.info(
f"🌐 Loading profile (attempt {attempt}/{PLAYWRIGHT_MAX_RELOADS})..."
)
page.goto(
profile_url,
wait_until="domcontentloaded",
timeout=PLAYWRIGHT_TIMEOUT_MS,
)
time.sleep(3)
dismiss_overlays(page)
try:
page.wait_for_selector(
TIKTOK_VIDEO_GRID_SEL,
timeout=PLAYWRIGHT_TIMEOUT_MS,
)
except Exception:
pass
grid = page.locator(TIKTOK_VIDEO_GRID_SEL).first
if not grid.is_visible(timeout=5000):
logging.warning(f"⚠️ Video grid not found on attempt {attempt}.")
ts = int(time.time())
try:
page.screenshot(path=f"screenshot_no_grid_{attempt}_{ts}.png")
logging.info(
f"📸 Screenshot saved: screenshot_no_grid_{attempt}_{ts}.png"
)
except Exception:
pass
time.sleep(3)
continue
items = page.locator(TIKTOK_VIDEO_ITEM_SEL).all()
for item in items[:limit]:
try:
link = item.locator("a").first.get_attribute("href")
if link and "/video/" in link:
vid_match = re.search(r"/video/(\d+)", link)
if vid_match:
video_id = vid_match.group(1)
full_url = (
link if link.startswith("http")
else f"https://www.tiktok.com{link}"
)
videos.append({
"video_id": video_id,
"url": full_url,
"timestamp": None,
})
except Exception:
pass
if videos:
logging.info(f"✅ Playwright scraped {len(videos)} videos.")
break
except Exception as e:
logging.warning(
f"⚠️ Playwright attempt {attempt} error: "
f"{type(e).__name__}: {e}"
)
ts = int(time.time())
try:
page.screenshot(path=f"screenshot_error_{attempt}_{ts}.png")
except Exception:
pass
time.sleep(3)
return videos
>>>>>>> 7cddbd0 (Fixes for today)
def scrape_tiktok_profile_playwright(
handle: str,
cookies: list,
@@ -724,10 +976,18 @@ def scrape_tiktok_profile_playwright(
) -> list[dict]:
"""
Scrape the most recent video URLs from a TikTok profile page using Playwright.
<<<<<<< HEAD
Returns a list of dicts with keys: video_id, url, timestamp.
Stealth fix: playwright-stealth v2.x must wrap the page via a context manager
on new_page(), not via .apply() or .use_sync() after the fact.
=======
Stealth handling:
v1.x → stealth_sync(page) after new_page()
v2.x → Stealth() used as context manager; page created inside it
none → plain page, no stealth
>>>>>>> 7cddbd0 (Fixes for today)
"""
profile_url = f"https://www.tiktok.com/@{handle}"
logging.info(f"🕷️ Scraping TikTok profile: {profile_url}")
@@ -756,6 +1016,7 @@ def scrape_tiktok_profile_playwright(
inject_cookies_into_context(context, cookies)
<<<<<<< HEAD
# ── Stealth application ───────────────────────────────────────────
# v1.x: stealth_sync(page) — called after new_page()
# v2.x: context manager on new_page — page must be created inside
@@ -771,11 +1032,16 @@ def scrape_tiktok_profile_playwright(
elif _STEALTH_V2:
# v2.x — use as context manager so the page is created inside it
=======
# ── Stealth v2.x — page must be created inside the context manager ──
if _STEALTH_V2 is True:
>>>>>>> 7cddbd0 (Fixes for today)
try:
stealth_instance = Stealth()
with stealth_instance(context) as stealthy_context:
page = stealthy_context.new_page()
logging.info("🥷 playwright-stealth v2.x applied (context manager).")
<<<<<<< HEAD
# Run the scraping loop inside the context manager scope
for attempt in range(1, PLAYWRIGHT_MAX_RELOADS + 1):
try:
@@ -864,12 +1130,27 @@ def scrape_tiktok_profile_playwright(
else:
# v1.x — create page then apply stealth
=======
videos = _run_playwright_scrape_loop(page, profile_url, limit)
except Exception as e:
logging.warning(
f"⚠️ playwright-stealth v2.x failed: {type(e).__name__}: {e}. "
f"Retrying without stealth."
)
# Fall through to no-stealth path below
page = context.new_page()
videos = _run_playwright_scrape_loop(page, profile_url, limit)
# ── Stealth v1.x ──────────────────────────────────────────────────
elif _STEALTH_V2 is False:
>>>>>>> 7cddbd0 (Fixes for today)
page = context.new_page()
try:
stealth_sync(page)
logging.info("🥷 playwright-stealth v1.x applied (stealth_sync).")
except Exception as e:
logging.warning(
<<<<<<< HEAD
f"⚠️ playwright-stealth v1.x failed: "
f"{type(e).__name__}: {e}. Continuing without stealth."
)
@@ -971,6 +1252,35 @@ def scrape_tiktok_profile_playwright(
pass
# ── Cleanup ───────────────────────────────────────────────────────
=======
f"⚠️ playwright-stealth v1.x failed: {type(e).__name__}: {e}. "
f"Continuing without stealth."
)
videos = _run_playwright_scrape_loop(page, profile_url, limit)
# ── No stealth available ──────────────────────────────────────────
else:
logging.warning("⚠️ playwright-stealth not installed. Skipping stealth.")
page = context.new_page()
videos = _run_playwright_scrape_loop(page, profile_url, limit)
if not videos:
logging.warning(
f"⚠️ Video grid not found after {PLAYWRIGHT_MAX_RELOADS} attempts."
)
ts = int(time.time())
try:
page.screenshot(
path=f"screenshot_no_grid_{PLAYWRIGHT_MAX_RELOADS}_{ts}.png"
)
logging.info(
f"📸 Screenshot saved: "
f"screenshot_no_grid_{PLAYWRIGHT_MAX_RELOADS}_{ts}.png"
)
except Exception:
pass
>>>>>>> 7cddbd0 (Fixes for today)
for obj in (page, context, browser):
try:
if obj:
@@ -992,7 +1302,10 @@ def scrape_tiktok_profile_ytdlp(
"""
Fallback: use yt-dlp to extract the video list from a TikTok profile.
Accepts a Netscape-format cookie file path (not JSON).
<<<<<<< HEAD
Returns a list of dicts with keys: video_id, url, timestamp.
=======
>>>>>>> 7cddbd0 (Fixes for today)
"""
import yt_dlp
@@ -1060,7 +1373,11 @@ def build_caption(video_info: dict, tiktok_handle: str, max_len: int = 290) -> s
url = video_info.get("url", "")
if desc:
<<<<<<< HEAD
url_len = len(url) + 1 # +1 for newline
=======
url_len = len(url) + 1
>>>>>>> 7cddbd0 (Fixes for today)
max_desc = max_len - url_len
if len(desc) > max_desc:
desc = desc[: max_desc - 1] + ""
@@ -1097,7 +1414,10 @@ def process_videos(
logging.info(f"⏭️ Already posted: {video_id}")
continue
<<<<<<< HEAD
# Age filter (only when timestamp is available)
=======
>>>>>>> 7cddbd0 (Fixes for today)
ts = video.get("timestamp")
if ts:
try:
@@ -1150,7 +1470,10 @@ def process_videos(
if ok:
mark_as_posted(video_id, state, meta={"url": video_url})
posted_count += 1
<<<<<<< HEAD
# Brief pause between posts to avoid rate limiting
=======
>>>>>>> 7cddbd0 (Fixes for today)
time.sleep(random.uniform(2.0, 5.0))
return posted_count
@@ -1162,6 +1485,7 @@ def process_videos(
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Cross-post TikTok videos to Bluesky."
<<<<<<< HEAD
)
parser.add_argument(
"--tiktok-handle",
@@ -1200,6 +1524,34 @@ def parse_args() -> argparse.Namespace:
default=VIDEO_MAX_AGE_DAYS,
help=f"Skip videos older than N days (default: {VIDEO_MAX_AGE_DAYS})",
)
=======
)
parser.add_argument("--tiktok-handle", required=True)
parser.add_argument("--bsky-handle", required=True)
parser.add_argument("--bsky-app-password", required=True)
parser.add_argument(
"--bsky-base-url",
default=DEFAULT_BSKY_BASE_URL,
help=f"Bluesky PDS base URL (default: {DEFAULT_BSKY_BASE_URL})",
)
parser.add_argument(
"--bsky-langs",
nargs="+",
default=DEFAULT_BSKY_LANGS,
help="BCP-47 language tags for posts (default: es)",
)
parser.add_argument(
"--cookies-path",
default=TIKTOK_COOKIES_PATH,
help=f"Path to TikTok cookies JSON (default: {TIKTOK_COOKIES_PATH})",
)
parser.add_argument(
"--max-age-days",
type=int,
default=VIDEO_MAX_AGE_DAYS,
help=f"Skip videos older than N days (default: {VIDEO_MAX_AGE_DAYS})",
)
>>>>>>> 7cddbd0 (Fixes for today)
return parser.parse_args()
@@ -1207,7 +1559,10 @@ def main():
load_dotenv()
args = parse_args()
<<<<<<< HEAD
# Fix 2 — resolve video size limit based on PDS
=======
>>>>>>> 7cddbd0 (Fixes for today)
video_max_size_bytes = get_video_size_limit(args.bsky_base_url)
logging.info("=" * 60)
@@ -1230,17 +1585,30 @@ def main():
args.bsky_base_url,
)
<<<<<<< HEAD
# Convert JSON cookies → Netscape format for yt-dlp
# Playwright uses the JSON cookies directly via inject_cookies_into_context()
# yt-dlp requires Netscape .txt format — convert once and reuse
=======
# Convert JSON cookies → Netscape format once for all yt-dlp calls
>>>>>>> 7cddbd0 (Fixes for today)
netscape_cookies_path = convert_json_cookies_to_netscape(args.cookies_path)
if netscape_cookies_path:
logging.info(f"🍪 Netscape cookie file ready: {netscape_cookies_path}")
else:
<<<<<<< HEAD
logging.warning("⚠️ Could not create Netscape cookie file. yt-dlp will run without cookies.")
try:
# Scrape TikTok profile
=======
logging.warning(
"⚠️ Could not create Netscape cookie file. "
"yt-dlp will run without cookies."
)
try:
>>>>>>> 7cddbd0 (Fixes for today)
logging.info(f"🔄 Scraping @{args.tiktok_handle}...")
cookies = load_cookies_from_file(args.cookies_path)
@@ -1290,7 +1658,13 @@ def main():
if netscape_cookies_path and os.path.exists(netscape_cookies_path):
try:
os.remove(netscape_cookies_path)
<<<<<<< HEAD
logging.info(f"🧹 Removed temporary Netscape cookie file: {netscape_cookies_path}")
=======
logging.info(
f"🧹 Removed temporary Netscape cookie file: {netscape_cookies_path}"
)
>>>>>>> 7cddbd0 (Fixes for today)
except Exception as e:
logging.warning(f"⚠️ Could not remove Netscape cookie file: {e}")