Add --bsky-langs es to Jenkins pipeline for specific language tagging
This commit is contained in:
@@ -82,7 +82,8 @@ pipeline {
|
||||
--twitter-handle "$TWITTER_3CAT_HANDLE" \
|
||||
--bsky-handle "$BSKY_3CAT_HANDLE" \
|
||||
--bsky-password "$BSKY_3CAT_APP_PASSWORD" \
|
||||
--bsky-base-url https://eurosky.social
|
||||
--bsky-base-url https://eurosky.social \
|
||||
--bsky-langs ca
|
||||
'''
|
||||
}
|
||||
}
|
||||
|
||||
@@ -97,6 +97,7 @@ class _RunCache:
|
||||
self.og_title: dict = {}
|
||||
self.url_resolution: dict = {}
|
||||
self.url_validity: dict = {}
|
||||
self.locale: str = "en-US" # ← ADDED locale cache here
|
||||
|
||||
def clear(self):
|
||||
self.og_title.clear()
|
||||
@@ -113,6 +114,30 @@ def grapheme_len(text):
|
||||
"""Return the grapheme cluster count, matching Bluesky's character counting."""
|
||||
return grapheme.length(text)
|
||||
|
||||
# BCP-47 language tag → sensible locale for Playwright
|
||||
_LANG_TO_LOCALE = {
|
||||
"ca": "ca-ES",
|
||||
"es": "es-ES",
|
||||
"en": "en-US",
|
||||
"fr": "fr-FR",
|
||||
"de": "de-DE",
|
||||
"pt": "pt-PT",
|
||||
"it": "it-IT",
|
||||
"nl": "nl-NL",
|
||||
"eu": "eu-ES",
|
||||
"gl": "gl-ES",
|
||||
}
|
||||
|
||||
def bsky_langs_to_playwright_locale(bsky_langs):
|
||||
"""
|
||||
Convert the first configured Bluesky language tag to a Playwright locale
|
||||
string (e.g. ['ca'] → 'ca-ES'). Falls back to 'en-US' if unknown.
|
||||
"""
|
||||
if not bsky_langs:
|
||||
return "en-US"
|
||||
primary = bsky_langs[0].strip().lower()
|
||||
return _LANG_TO_LOCALE.get(primary, f"{primary}-{primary.upper()}")
|
||||
|
||||
# --- Custom Classes ---
|
||||
class ScrapedMedia:
|
||||
def __init__(self, url, media_type="photo"):
|
||||
@@ -562,7 +587,7 @@ def should_fetch_og_title(tweet):
|
||||
return False
|
||||
|
||||
|
||||
def fetch_tweet_og_title_text(tweet_url):
|
||||
def fetch_tweet_og_title_text(tweet_url, locale="en-US"):
|
||||
if not tweet_url:
|
||||
return None
|
||||
|
||||
@@ -589,6 +614,7 @@ def fetch_tweet_og_title_text(tweet_url):
|
||||
"Chrome/145.0.7632.6 Safari/537.36"
|
||||
),
|
||||
viewport={"width": 1280, "height": 900},
|
||||
locale=_cache.locale, # ← USE CACHE
|
||||
)
|
||||
page = browser_context.new_page()
|
||||
page.goto(
|
||||
@@ -665,7 +691,7 @@ def resolve_tco_with_httpx(url, http_client):
|
||||
return canonicalize_url(url)
|
||||
|
||||
|
||||
def resolve_tco_with_playwright(url):
|
||||
def resolve_tco_with_playwright(url, locale="en-US"):
|
||||
browser = None
|
||||
browser_context = None
|
||||
page = None
|
||||
@@ -685,6 +711,7 @@ def resolve_tco_with_playwright(url):
|
||||
"Chrome/145.0.7632.6 Safari/537.36"
|
||||
),
|
||||
viewport={"width": 1280, "height": 900},
|
||||
locale=locale,
|
||||
)
|
||||
page = browser_context.new_page()
|
||||
|
||||
@@ -2337,7 +2364,7 @@ def build_video_embed(video_blob, alt_text):
|
||||
|
||||
|
||||
# --- Twitter Scraping ---
|
||||
def scrape_tweets_via_playwright(username, password, email, target_handle):
|
||||
def scrape_tweets_via_playwright(username, password, email, target_handle, locale="en-US"):
|
||||
tweets = []
|
||||
state_file = "twitter_browser_state.json"
|
||||
|
||||
@@ -2372,6 +2399,7 @@ def scrape_tweets_via_playwright(username, password, email, target_handle):
|
||||
user_agent=clean_ua,
|
||||
viewport={"width": 1920, "height": 1080},
|
||||
storage_state=state_file,
|
||||
locale=locale,
|
||||
)
|
||||
session_check_page = browser_context.new_page()
|
||||
session_check_page.goto("https://x.com/home")
|
||||
@@ -2406,6 +2434,7 @@ def scrape_tweets_via_playwright(username, password, email, target_handle):
|
||||
browser_context = browser.new_context(
|
||||
user_agent=clean_ua,
|
||||
viewport={"width": 1920, "height": 1080},
|
||||
locale=locale, # ✅ add this
|
||||
)
|
||||
login_page = browser_context.new_page()
|
||||
|
||||
@@ -2980,9 +3009,10 @@ def candidate_matches_existing_bsky(candidate, recent_bsky_posts):
|
||||
# --- Main Sync Logic ---
|
||||
def sync_feeds(args):
|
||||
logging.info("🔄 Starting sync cycle...")
|
||||
|
||||
dry_run = getattr(args, "dry_run", False)
|
||||
bsky_langs = getattr(args, "bsky_langs", None) or DEFAULT_BSKY_LANGS
|
||||
bot_locale = bsky_langs_to_playwright_locale(bsky_langs) # ✅ now defined
|
||||
_cache.locale = bot_locale
|
||||
|
||||
if dry_run:
|
||||
logging.info("🧪 DRY RUN MODE — no posts will be created on Bluesky.")
|
||||
@@ -2996,6 +3026,7 @@ def sync_feeds(args):
|
||||
args.twitter_password,
|
||||
args.twitter_email,
|
||||
args.twitter_handle,
|
||||
locale=bot_locale,
|
||||
)
|
||||
|
||||
if not tweets:
|
||||
@@ -3213,6 +3244,7 @@ def sync_feeds(args):
|
||||
"Chrome/145.0.7632.6 Safari/537.36"
|
||||
),
|
||||
"viewport": {"width": 1920, "height": 1080},
|
||||
"locale": bot_locale,
|
||||
}
|
||||
if os.path.exists(browser_state_file):
|
||||
context_kwargs["storage_state"] = browser_state_file
|
||||
|
||||
Reference in New Issue
Block a user