Add --bsky-langs es to Jenkins pipeline for specific language tagging
This commit is contained in:
@@ -82,7 +82,8 @@ pipeline {
|
|||||||
--twitter-handle "$TWITTER_3CAT_HANDLE" \
|
--twitter-handle "$TWITTER_3CAT_HANDLE" \
|
||||||
--bsky-handle "$BSKY_3CAT_HANDLE" \
|
--bsky-handle "$BSKY_3CAT_HANDLE" \
|
||||||
--bsky-password "$BSKY_3CAT_APP_PASSWORD" \
|
--bsky-password "$BSKY_3CAT_APP_PASSWORD" \
|
||||||
--bsky-base-url https://eurosky.social
|
--bsky-base-url https://eurosky.social \
|
||||||
|
--bsky-langs ca
|
||||||
'''
|
'''
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -97,6 +97,7 @@ class _RunCache:
|
|||||||
self.og_title: dict = {}
|
self.og_title: dict = {}
|
||||||
self.url_resolution: dict = {}
|
self.url_resolution: dict = {}
|
||||||
self.url_validity: dict = {}
|
self.url_validity: dict = {}
|
||||||
|
self.locale: str = "en-US" # ← ADDED locale cache here
|
||||||
|
|
||||||
def clear(self):
|
def clear(self):
|
||||||
self.og_title.clear()
|
self.og_title.clear()
|
||||||
@@ -113,6 +114,30 @@ def grapheme_len(text):
|
|||||||
"""Return the grapheme cluster count, matching Bluesky's character counting."""
|
"""Return the grapheme cluster count, matching Bluesky's character counting."""
|
||||||
return grapheme.length(text)
|
return grapheme.length(text)
|
||||||
|
|
||||||
|
# BCP-47 language tag → sensible locale for Playwright
|
||||||
|
_LANG_TO_LOCALE = {
|
||||||
|
"ca": "ca-ES",
|
||||||
|
"es": "es-ES",
|
||||||
|
"en": "en-US",
|
||||||
|
"fr": "fr-FR",
|
||||||
|
"de": "de-DE",
|
||||||
|
"pt": "pt-PT",
|
||||||
|
"it": "it-IT",
|
||||||
|
"nl": "nl-NL",
|
||||||
|
"eu": "eu-ES",
|
||||||
|
"gl": "gl-ES",
|
||||||
|
}
|
||||||
|
|
||||||
|
def bsky_langs_to_playwright_locale(bsky_langs):
|
||||||
|
"""
|
||||||
|
Convert the first configured Bluesky language tag to a Playwright locale
|
||||||
|
string (e.g. ['ca'] → 'ca-ES'). Falls back to 'en-US' if unknown.
|
||||||
|
"""
|
||||||
|
if not bsky_langs:
|
||||||
|
return "en-US"
|
||||||
|
primary = bsky_langs[0].strip().lower()
|
||||||
|
return _LANG_TO_LOCALE.get(primary, f"{primary}-{primary.upper()}")
|
||||||
|
|
||||||
# --- Custom Classes ---
|
# --- Custom Classes ---
|
||||||
class ScrapedMedia:
|
class ScrapedMedia:
|
||||||
def __init__(self, url, media_type="photo"):
|
def __init__(self, url, media_type="photo"):
|
||||||
@@ -562,7 +587,7 @@ def should_fetch_og_title(tweet):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def fetch_tweet_og_title_text(tweet_url):
|
def fetch_tweet_og_title_text(tweet_url, locale="en-US"):
|
||||||
if not tweet_url:
|
if not tweet_url:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -589,6 +614,7 @@ def fetch_tweet_og_title_text(tweet_url):
|
|||||||
"Chrome/145.0.7632.6 Safari/537.36"
|
"Chrome/145.0.7632.6 Safari/537.36"
|
||||||
),
|
),
|
||||||
viewport={"width": 1280, "height": 900},
|
viewport={"width": 1280, "height": 900},
|
||||||
|
locale=_cache.locale, # ← USE CACHE
|
||||||
)
|
)
|
||||||
page = browser_context.new_page()
|
page = browser_context.new_page()
|
||||||
page.goto(
|
page.goto(
|
||||||
@@ -665,7 +691,7 @@ def resolve_tco_with_httpx(url, http_client):
|
|||||||
return canonicalize_url(url)
|
return canonicalize_url(url)
|
||||||
|
|
||||||
|
|
||||||
def resolve_tco_with_playwright(url):
|
def resolve_tco_with_playwright(url, locale="en-US"):
|
||||||
browser = None
|
browser = None
|
||||||
browser_context = None
|
browser_context = None
|
||||||
page = None
|
page = None
|
||||||
@@ -685,6 +711,7 @@ def resolve_tco_with_playwright(url):
|
|||||||
"Chrome/145.0.7632.6 Safari/537.36"
|
"Chrome/145.0.7632.6 Safari/537.36"
|
||||||
),
|
),
|
||||||
viewport={"width": 1280, "height": 900},
|
viewport={"width": 1280, "height": 900},
|
||||||
|
locale=locale,
|
||||||
)
|
)
|
||||||
page = browser_context.new_page()
|
page = browser_context.new_page()
|
||||||
|
|
||||||
@@ -2337,7 +2364,7 @@ def build_video_embed(video_blob, alt_text):
|
|||||||
|
|
||||||
|
|
||||||
# --- Twitter Scraping ---
|
# --- Twitter Scraping ---
|
||||||
def scrape_tweets_via_playwright(username, password, email, target_handle):
|
def scrape_tweets_via_playwright(username, password, email, target_handle, locale="en-US"):
|
||||||
tweets = []
|
tweets = []
|
||||||
state_file = "twitter_browser_state.json"
|
state_file = "twitter_browser_state.json"
|
||||||
|
|
||||||
@@ -2372,6 +2399,7 @@ def scrape_tweets_via_playwright(username, password, email, target_handle):
|
|||||||
user_agent=clean_ua,
|
user_agent=clean_ua,
|
||||||
viewport={"width": 1920, "height": 1080},
|
viewport={"width": 1920, "height": 1080},
|
||||||
storage_state=state_file,
|
storage_state=state_file,
|
||||||
|
locale=locale,
|
||||||
)
|
)
|
||||||
session_check_page = browser_context.new_page()
|
session_check_page = browser_context.new_page()
|
||||||
session_check_page.goto("https://x.com/home")
|
session_check_page.goto("https://x.com/home")
|
||||||
@@ -2406,6 +2434,7 @@ def scrape_tweets_via_playwright(username, password, email, target_handle):
|
|||||||
browser_context = browser.new_context(
|
browser_context = browser.new_context(
|
||||||
user_agent=clean_ua,
|
user_agent=clean_ua,
|
||||||
viewport={"width": 1920, "height": 1080},
|
viewport={"width": 1920, "height": 1080},
|
||||||
|
locale=locale, # ✅ add this
|
||||||
)
|
)
|
||||||
login_page = browser_context.new_page()
|
login_page = browser_context.new_page()
|
||||||
|
|
||||||
@@ -2980,9 +3009,10 @@ def candidate_matches_existing_bsky(candidate, recent_bsky_posts):
|
|||||||
# --- Main Sync Logic ---
|
# --- Main Sync Logic ---
|
||||||
def sync_feeds(args):
|
def sync_feeds(args):
|
||||||
logging.info("🔄 Starting sync cycle...")
|
logging.info("🔄 Starting sync cycle...")
|
||||||
|
|
||||||
dry_run = getattr(args, "dry_run", False)
|
dry_run = getattr(args, "dry_run", False)
|
||||||
bsky_langs = getattr(args, "bsky_langs", None) or DEFAULT_BSKY_LANGS
|
bsky_langs = getattr(args, "bsky_langs", None) or DEFAULT_BSKY_LANGS
|
||||||
|
bot_locale = bsky_langs_to_playwright_locale(bsky_langs) # ✅ now defined
|
||||||
|
_cache.locale = bot_locale
|
||||||
|
|
||||||
if dry_run:
|
if dry_run:
|
||||||
logging.info("🧪 DRY RUN MODE — no posts will be created on Bluesky.")
|
logging.info("🧪 DRY RUN MODE — no posts will be created on Bluesky.")
|
||||||
@@ -2996,6 +3026,7 @@ def sync_feeds(args):
|
|||||||
args.twitter_password,
|
args.twitter_password,
|
||||||
args.twitter_email,
|
args.twitter_email,
|
||||||
args.twitter_handle,
|
args.twitter_handle,
|
||||||
|
locale=bot_locale,
|
||||||
)
|
)
|
||||||
|
|
||||||
if not tweets:
|
if not tweets:
|
||||||
@@ -3213,6 +3244,7 @@ def sync_feeds(args):
|
|||||||
"Chrome/145.0.7632.6 Safari/537.36"
|
"Chrome/145.0.7632.6 Safari/537.36"
|
||||||
),
|
),
|
||||||
"viewport": {"width": 1920, "height": 1080},
|
"viewport": {"width": 1920, "height": 1080},
|
||||||
|
"locale": bot_locale,
|
||||||
}
|
}
|
||||||
if os.path.exists(browser_state_file):
|
if os.path.exists(browser_state_file):
|
||||||
context_kwargs["storage_state"] = browser_state_file
|
context_kwargs["storage_state"] = browser_state_file
|
||||||
|
|||||||
Reference in New Issue
Block a user