Added all
This commit is contained in:
320
tiktok2bsky.py
320
tiktok2bsky.py
@@ -8,9 +8,9 @@ them to a Bluesky account.
|
||||
Usage:
|
||||
python tiktok2bsky.py \
|
||||
--tiktok-handle jijantesfc \
|
||||
--bsky-handle jijantesfc.eurosky.social \
|
||||
--bsky-handle jijantesfc.bsky.social \
|
||||
--bsky-app-password xxxx-xxxx-xxxx-xxxx \
|
||||
--bsky-base-url https://eurosky.social \
|
||||
--bsky-base-url https://bsky.social \
|
||||
--bsky-langs es \
|
||||
--cookies-path tiktok_cookies.json
|
||||
"""
|
||||
@@ -33,6 +33,7 @@ import httpx
|
||||
from atproto import Client
|
||||
from dotenv import load_dotenv
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
# playwright-stealth 1.x uses stealth_sync, 2.x uses Stealth class
|
||||
try:
|
||||
from playwright_stealth import stealth_sync
|
||||
@@ -113,6 +114,7 @@ TIKTOK_COOKIE_MODAL_SELS = [
|
||||
TIKTOK_GRID_ERROR_SEL = '[data-e2e="user-post-item-list-error"]'
|
||||
TIKTOK_REFRESH_BTN_SEL = 'button:has-text("Actualizar"), button:has-text("Refresh")'
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# State management
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
@@ -160,6 +162,7 @@ def mark_as_posted(video_id: str, state: dict, meta: dict = None):
|
||||
}
|
||||
save_state(state)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Cookie helpers
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
@@ -203,64 +206,215 @@ def inject_cookies_into_context(context, cookies: list):
|
||||
except Exception as e:
|
||||
logging.warning(f"⚠️ Could not inject cookies: {e}")
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Bluesky error classification helpers (ported from twitter2bsky.py)
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
def is_rate_limited_error(error_obj) -> bool:
|
||||
text = repr(error_obj).lower()
|
||||
return (
|
||||
"429" in text
|
||||
or "ratelimitexceeded" in text
|
||||
or "too many requests" in text
|
||||
or "rate limit" in text
|
||||
)
|
||||
|
||||
|
||||
def is_auth_error(error_obj) -> bool:
|
||||
text = repr(error_obj).lower()
|
||||
return (
|
||||
"401" in text
|
||||
or "403" in text
|
||||
or "invalid identifier or password" in text
|
||||
or "authenticationrequired" in text
|
||||
or "invalidtoken" in text
|
||||
)
|
||||
|
||||
|
||||
def is_network_error(error_obj) -> bool:
|
||||
text = repr(error_obj)
|
||||
signals = [
|
||||
"ConnectError",
|
||||
"RemoteProtocolError",
|
||||
"ReadTimeout",
|
||||
"WriteTimeout",
|
||||
"TimeoutException",
|
||||
"503",
|
||||
"502",
|
||||
"504",
|
||||
"ConnectionResetError",
|
||||
]
|
||||
return any(sig in text for sig in signals)
|
||||
|
||||
|
||||
def is_transient_error(error_obj) -> bool:
|
||||
text = repr(error_obj)
|
||||
signals = [
|
||||
"InvokeTimeoutError",
|
||||
"ReadTimeout",
|
||||
"WriteTimeout",
|
||||
"TimeoutException",
|
||||
"RemoteProtocolError",
|
||||
"ConnectError",
|
||||
"503",
|
||||
"502",
|
||||
"504",
|
||||
]
|
||||
return any(sig in text for sig in signals)
|
||||
|
||||
|
||||
def get_rate_limit_wait_seconds(error_obj, default_delay: float) -> float:
|
||||
"""
|
||||
Parse rate-limit response headers and return a bounded wait time in seconds.
|
||||
Supports retry-after, x-ratelimit-after, and ratelimit-reset (unix timestamp).
|
||||
Ported from twitter2bsky.py.
|
||||
"""
|
||||
try:
|
||||
now_ts = int(time.time())
|
||||
headers = getattr(error_obj, "headers", None) or {}
|
||||
|
||||
for key in ("retry-after", "Retry-After"):
|
||||
if headers.get(key):
|
||||
return min(max(int(headers[key]), 1), BSKY_LOGIN_MAX_DELAY)
|
||||
|
||||
for key in ("x-ratelimit-after", "X-RateLimit-After"):
|
||||
if headers.get(key):
|
||||
return min(max(int(headers[key]), 1), BSKY_LOGIN_MAX_DELAY)
|
||||
|
||||
for key in ("ratelimit-reset", "RateLimit-Reset"):
|
||||
if headers.get(key):
|
||||
wait = max(int(headers[key]) - now_ts + 1, default_delay)
|
||||
return min(wait, BSKY_LOGIN_MAX_DELAY)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# repr() fallback — parse headers embedded in the exception string
|
||||
text = repr(error_obj)
|
||||
for pattern, is_timestamp in [
|
||||
(r"'retry-after':\s*'(\d+)'", False),
|
||||
(r"'x-ratelimit-after':\s*'(\d+)'", False),
|
||||
(r"'ratelimit-reset':\s*'(\d+)'", True),
|
||||
]:
|
||||
m = re.search(pattern, text, re.IGNORECASE)
|
||||
if m:
|
||||
val = int(m.group(1))
|
||||
if is_timestamp:
|
||||
return min(
|
||||
max(val - int(time.time()) + 1, default_delay),
|
||||
BSKY_LOGIN_MAX_DELAY,
|
||||
)
|
||||
return min(max(val, 1), BSKY_LOGIN_MAX_DELAY)
|
||||
|
||||
return default_delay
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Bluesky helpers
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
def bsky_login(client: Client, handle: str, password: str,
|
||||
base_url: str) -> bool:
|
||||
base_url: str = DEFAULT_BSKY_BASE_URL) -> bool:
|
||||
"""
|
||||
Authenticate against the AT Protocol PDS.
|
||||
|
||||
base_url is always https://bsky.social for standard Bluesky accounts —
|
||||
even when the user's handle lives on a custom domain like eurosky.social.
|
||||
The Client is re-initialised with the base URL baked in at construction
|
||||
time, which is the only reliable way to override the internal session
|
||||
resolver (mirrors create_bsky_client() in twitter2bsky.py).
|
||||
"""
|
||||
normalized_base_url = (base_url or DEFAULT_BSKY_BASE_URL).strip().rstrip("/")
|
||||
logging.info(f"🔐 Connecting Bluesky client via base URL: {normalized_base_url}")
|
||||
|
||||
# Re-initialise the client so the base URL is baked in from the start.
|
||||
# Setting client.base_url after construction does not reliably override
|
||||
# the internal session resolver in the atproto SDK.
|
||||
client.__init__(base_url=normalized_base_url)
|
||||
|
||||
for attempt in range(1, BSKY_LOGIN_MAX_RETRIES + 1):
|
||||
try:
|
||||
# Force the client to use the custom PDS for ALL requests
|
||||
# including identity resolution — must be set before login
|
||||
client._base_url = base_url.rstrip("/")
|
||||
client.base_url = base_url.rstrip("/")
|
||||
|
||||
# Use com.atproto.server.createSession directly on the PDS
|
||||
response = client.com.atproto.server.create_session(
|
||||
data={
|
||||
"identifier": handle,
|
||||
"password": password,
|
||||
}
|
||||
)
|
||||
logging.info(
|
||||
f"✅ Logged in to Bluesky as {handle} via {base_url}"
|
||||
f"🔐 Bluesky login attempt {attempt}/{BSKY_LOGIN_MAX_RETRIES} "
|
||||
f"for {handle}"
|
||||
)
|
||||
client.login(handle, password)
|
||||
logging.info(f"✅ Bluesky login successful as {handle}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
err = str(e)
|
||||
|
||||
# 401 = wrong credentials — no point retrying
|
||||
if any(x in err for x in ("401", "AuthenticationRequired",
|
||||
"Invalid identifier", "Invalid password")):
|
||||
# ── 401 / auth errors — no point retrying ─────────────────
|
||||
if is_auth_error(e):
|
||||
logging.error(
|
||||
f"❌ Bluesky login failed: invalid handle or app password.\n"
|
||||
f" Handle : {handle}\n"
|
||||
f" PDS : {base_url}\n"
|
||||
f" Fix : regenerate app password at {base_url}/settings\n"
|
||||
f" Detail : {err}"
|
||||
f" PDS : {normalized_base_url}\n"
|
||||
f" Fix : regenerate app password at "
|
||||
f"https://bsky.app/settings/app-passwords\n"
|
||||
f" Detail : {repr(e)}"
|
||||
)
|
||||
return False
|
||||
|
||||
if attempt == BSKY_LOGIN_MAX_RETRIES:
|
||||
logging.error(
|
||||
f"❌ All {BSKY_LOGIN_MAX_RETRIES} login attempts failed."
|
||||
)
|
||||
return False
|
||||
|
||||
delay = min(
|
||||
BSKY_LOGIN_BASE_DELAY * (2 ** (attempt - 1))
|
||||
+ random.uniform(0, BSKY_LOGIN_JITTER_MAX),
|
||||
BSKY_LOGIN_MAX_DELAY,
|
||||
# ── Rate limit ─────────────────────────────────────────────
|
||||
if is_rate_limited_error(e):
|
||||
if attempt < BSKY_LOGIN_MAX_RETRIES:
|
||||
wait = get_rate_limit_wait_seconds(
|
||||
e, default_delay=BSKY_LOGIN_BASE_DELAY
|
||||
)
|
||||
wait += random.uniform(0, BSKY_LOGIN_JITTER_MAX)
|
||||
logging.warning(
|
||||
f"⚠️ Bluesky login attempt {attempt} failed: {e}. "
|
||||
f"Retrying in {delay:.1f}s..."
|
||||
f"⏳ Bluesky login rate-limited "
|
||||
f"(attempt {attempt}/{BSKY_LOGIN_MAX_RETRIES}). "
|
||||
f"Retrying in {wait:.1f}s."
|
||||
)
|
||||
time.sleep(delay)
|
||||
time.sleep(wait)
|
||||
continue
|
||||
logging.error(
|
||||
"❌ Exhausted Bluesky login retries due to rate limiting."
|
||||
)
|
||||
return False
|
||||
|
||||
# ── Transient / network errors ─────────────────────────────
|
||||
if is_network_error(e) or is_transient_error(e):
|
||||
if attempt < BSKY_LOGIN_MAX_RETRIES:
|
||||
wait = min(
|
||||
BSKY_LOGIN_BASE_DELAY * attempt,
|
||||
BSKY_LOGIN_MAX_DELAY,
|
||||
) + random.uniform(0, BSKY_LOGIN_JITTER_MAX)
|
||||
logging.warning(
|
||||
f"⏳ Transient login failure "
|
||||
f"(attempt {attempt}/{BSKY_LOGIN_MAX_RETRIES}). "
|
||||
f"Retrying in {wait:.1f}s."
|
||||
)
|
||||
time.sleep(wait)
|
||||
continue
|
||||
logging.error(
|
||||
"❌ Exhausted Bluesky login retries after "
|
||||
"transient/network errors."
|
||||
)
|
||||
return False
|
||||
|
||||
# ── Unexpected error — retry with backoff ──────────────────
|
||||
if attempt < BSKY_LOGIN_MAX_RETRIES:
|
||||
wait = min(
|
||||
BSKY_LOGIN_BASE_DELAY * attempt,
|
||||
BSKY_LOGIN_MAX_DELAY,
|
||||
) + random.uniform(0, BSKY_LOGIN_JITTER_MAX)
|
||||
logging.warning(
|
||||
f"⏳ Unexpected login error "
|
||||
f"(attempt {attempt}/{BSKY_LOGIN_MAX_RETRIES}): "
|
||||
f"{repr(e)}. Retrying in {wait:.1f}s."
|
||||
)
|
||||
time.sleep(wait)
|
||||
continue
|
||||
|
||||
logging.error(
|
||||
f"❌ All Bluesky login attempts failed. Last error: {repr(e)}"
|
||||
)
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def bsky_get_recent_post_urls(client: Client, handle: str,
|
||||
limit: int = 50) -> set:
|
||||
"""Return a set of URLs recently posted to Bluesky (to avoid duplicates)."""
|
||||
@@ -294,12 +448,12 @@ def bsky_upload_blob_with_retry(client: Client, data: bytes,
|
||||
)
|
||||
return resp.blob
|
||||
except Exception as e:
|
||||
err = str(e)
|
||||
is_rate_limit = "429" in err or "RateLimitExceeded" in err
|
||||
is_rate_limit = is_rate_limited_error(e)
|
||||
|
||||
if attempt == BSKY_UPLOAD_MAX_RETRIES:
|
||||
logging.error(
|
||||
f"❌ Blob upload failed after {BSKY_UPLOAD_MAX_RETRIES} attempts: {e}"
|
||||
f"❌ Blob upload failed after "
|
||||
f"{BSKY_UPLOAD_MAX_RETRIES} attempts: {e}"
|
||||
)
|
||||
raise
|
||||
|
||||
@@ -309,7 +463,10 @@ def bsky_upload_blob_with_retry(client: Client, data: bytes,
|
||||
BSKY_UPLOAD_MAX_DELAY,
|
||||
)
|
||||
if is_rate_limit:
|
||||
delay = max(delay, 60.0)
|
||||
delay = max(
|
||||
get_rate_limit_wait_seconds(e, default_delay=delay),
|
||||
60.0,
|
||||
)
|
||||
|
||||
logging.warning(
|
||||
f"⚠️ Blob upload attempt {attempt} failed: {e}. "
|
||||
@@ -332,12 +489,12 @@ def bsky_create_post_with_retry(client: Client, text: str,
|
||||
logging.info(f"✅ Post created on attempt {attempt}.")
|
||||
return True
|
||||
except Exception as e:
|
||||
err = str(e)
|
||||
is_rate_limit = "429" in err or "RateLimitExceeded" in err
|
||||
is_rate_limit = is_rate_limited_error(e)
|
||||
|
||||
if attempt == BSKY_UPLOAD_MAX_RETRIES:
|
||||
logging.error(
|
||||
f"❌ Post creation failed after {BSKY_UPLOAD_MAX_RETRIES} attempts: {e}"
|
||||
f"❌ Post creation failed after "
|
||||
f"{BSKY_UPLOAD_MAX_RETRIES} attempts: {e}"
|
||||
)
|
||||
return False
|
||||
|
||||
@@ -347,7 +504,10 @@ def bsky_create_post_with_retry(client: Client, text: str,
|
||||
BSKY_UPLOAD_MAX_DELAY,
|
||||
)
|
||||
if is_rate_limit:
|
||||
delay = max(delay, 60.0)
|
||||
delay = max(
|
||||
get_rate_limit_wait_seconds(e, default_delay=delay),
|
||||
60.0,
|
||||
)
|
||||
|
||||
logging.warning(
|
||||
f"⚠️ Post creation attempt {attempt} failed: {e}. "
|
||||
@@ -357,6 +517,7 @@ def bsky_create_post_with_retry(client: Client, text: str,
|
||||
|
||||
return False
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Video processing helpers
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
@@ -523,15 +684,21 @@ def _write_netscape_cookies(cookies: list) -> str | None:
|
||||
flag = "TRUE" if domain.startswith(".") else "FALSE"
|
||||
path_val = c.get("path", "/")
|
||||
secure = "TRUE" if c.get("secure") else "FALSE"
|
||||
exp = int(c.get("expirationDate", 0) or c.get("expires", 0) or 0)
|
||||
exp = int(
|
||||
c.get("expirationDate", 0) or c.get("expires", 0) or 0
|
||||
)
|
||||
name = c.get("name", "")
|
||||
value = c.get("value", "")
|
||||
f.write(f"{domain}\t{flag}\t{path_val}\t{secure}\t{exp}\t{name}\t{value}\n")
|
||||
f.write(
|
||||
f"{domain}\t{flag}\t{path_val}\t{secure}\t"
|
||||
f"{exp}\t{name}\t{value}\n"
|
||||
)
|
||||
return path
|
||||
except Exception as e:
|
||||
logging.warning(f"⚠️ Could not write Netscape cookie file: {e}")
|
||||
return None
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# TikTok scraping via Playwright
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
@@ -599,7 +766,6 @@ def scrape_tiktoks_via_playwright(handle: str) -> list:
|
||||
|
||||
page = context.new_page()
|
||||
|
||||
# Stealth mode
|
||||
# Stealth mode — compatible with both v1.x and v2.x
|
||||
if _STEALTH_V2:
|
||||
Stealth().apply_stealth_sync(page)
|
||||
@@ -680,8 +846,6 @@ def scrape_tiktoks_via_playwright(handle: str) -> list:
|
||||
items = page.locator(TIKTOK_VIDEO_ITEM_SEL).all()
|
||||
logging.info(f"📋 Found {len(items)} video items in grid.")
|
||||
|
||||
cutoff = arrow.utcnow().shift(days=-VIDEO_MAX_AGE_DAYS)
|
||||
|
||||
for item in items[:SCRAPE_VIDEO_LIMIT]:
|
||||
try:
|
||||
# Get the link
|
||||
@@ -705,7 +869,9 @@ def scrape_tiktoks_via_playwright(handle: str) -> list:
|
||||
try:
|
||||
desc = item.get_attribute("aria-label") or ""
|
||||
if not desc:
|
||||
desc_el = item.locator('[class*="desc"], [class*="title"]').first
|
||||
desc_el = item.locator(
|
||||
'[class*="desc"], [class*="title"]'
|
||||
).first
|
||||
desc = desc_el.inner_text(timeout=1000).strip()
|
||||
except Exception:
|
||||
pass
|
||||
@@ -727,6 +893,7 @@ def scrape_tiktoks_via_playwright(handle: str) -> list:
|
||||
logging.info(f"✅ Scraped {len(videos)} videos from @{handle}.")
|
||||
return videos
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Core: process a single TikTok video → post to Bluesky
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
@@ -825,6 +992,7 @@ def process_tiktok(video: dict, client: Client,
|
||||
logging.error(f"❌ Failed to post video {video_id} to Bluesky.")
|
||||
return False
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Entry point
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
@@ -836,25 +1004,42 @@ def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="TikTok → Bluesky cross-poster"
|
||||
)
|
||||
parser.add_argument("--tiktok-handle", required=True,
|
||||
help="TikTok handle to scrape (without @)")
|
||||
parser.add_argument("--bsky-handle", required=True,
|
||||
help="Bluesky handle (e.g. user.eurosky.social)")
|
||||
parser.add_argument("--bsky-app-password", required=True,
|
||||
help="Bluesky app password (not account password)")
|
||||
parser.add_argument("--bsky-base-url", default=DEFAULT_BSKY_BASE_URL,
|
||||
help=f"Bluesky PDS base URL (default: {DEFAULT_BSKY_BASE_URL})")
|
||||
parser.add_argument("--bsky-langs", nargs="+", default=DEFAULT_BSKY_LANGS,
|
||||
help="Post language codes (default: es)")
|
||||
parser.add_argument("--cookies-path", default=TIKTOK_COOKIES_PATH,
|
||||
help="Path to TikTok cookies JSON file")
|
||||
parser.add_argument(
|
||||
"--tiktok-handle", required=True,
|
||||
help="TikTok handle to scrape (without @)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--bsky-handle", required=True,
|
||||
help="Bluesky handle (e.g. user.bsky.social)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--bsky-app-password", required=True,
|
||||
help="Bluesky app password (not account password)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--bsky-base-url", default=DEFAULT_BSKY_BASE_URL,
|
||||
help=(
|
||||
"Bluesky AT Protocol PDS base URL. "
|
||||
"Always https://bsky.social even for custom-domain users "
|
||||
"(e.g. eurosky.social handles still authenticate via bsky.social). "
|
||||
f"Default: {DEFAULT_BSKY_BASE_URL}"
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--bsky-langs", nargs="+", default=DEFAULT_BSKY_LANGS,
|
||||
help="Post language codes (default: es)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cookies-path", default=TIKTOK_COOKIES_PATH,
|
||||
help="Path to TikTok cookies JSON file",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
# Override global cookie path from CLI
|
||||
TIKTOK_COOKIES_PATH = args.cookies_path
|
||||
|
||||
logging.info("=" * 60)
|
||||
logging.info(f"🤖 TikTok→Bluesky bot started")
|
||||
logging.info("🤖 TikTok→Bluesky bot started")
|
||||
logging.info(f" TikTok handle : @{args.tiktok_handle}")
|
||||
logging.info(f" Bluesky handle: {args.bsky_handle}")
|
||||
logging.info(f" Bluesky PDS : {args.bsky_base_url}")
|
||||
@@ -866,12 +1051,17 @@ def main():
|
||||
logging.info("=" * 60)
|
||||
|
||||
state = load_state()
|
||||
|
||||
# Instantiate client — base URL is baked in via bsky_login()
|
||||
client = Client()
|
||||
|
||||
# ── Bluesky login ──────────────────────────────────────────────────
|
||||
if not bsky_login(client, args.bsky_handle,
|
||||
if not bsky_login(
|
||||
client,
|
||||
args.bsky_handle,
|
||||
args.bsky_app_password,
|
||||
args.bsky_base_url):
|
||||
args.bsky_base_url,
|
||||
):
|
||||
logging.error("❌ Cannot proceed without Bluesky login. Exiting.")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user