From 2401da8e5f416b2bd7716a4f24518229e3a8150e Mon Sep 17 00:00:00 2001 From: Guillem Hernandez Sola Date: Thu, 23 Apr 2026 20:01:57 +0200 Subject: [PATCH] Fixed some truncation --- jenkins/3catTw | 3 ++- twitter2bsky_daemon.py | 13 +++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/jenkins/3catTw b/jenkins/3catTw index e8bae75..a38bbdc 100644 --- a/jenkins/3catTw +++ b/jenkins/3catTw @@ -46,7 +46,8 @@ pipeline { fastfeedparser \ beautifulsoup4 \ charset-normalizer \ - Pillow + Pillow \ + grapheme # Verify required imports "${VENV_DIR}/bin/python" -c "import fastfeedparser; print('fastfeedparser OK')" diff --git a/twitter2bsky_daemon.py b/twitter2bsky_daemon.py index f3d2ca9..699f69c 100644 --- a/twitter2bsky_daemon.py +++ b/twitter2bsky_daemon.py @@ -19,6 +19,7 @@ from playwright.sync_api import sync_playwright from moviepy import VideoFileClip from bs4 import BeautifulSoup from PIL import Image +import grapheme # add to imports at top # --- Configuration --- LOG_PATH = "twitter2bsky.log" @@ -26,7 +27,7 @@ STATE_PATH = "twitter2bsky_state.json" SCRAPE_TWEET_LIMIT = 30 DEDUPE_BSKY_LIMIT = 30 TWEET_MAX_AGE_DAYS = 3 -BSKY_TEXT_MAX_LENGTH = 275 +BSKY_TEXT_MAX_LENGTH = 300 DEFAULT_BSKY_LANGS = ["ca"] VIDEO_MAX_DURATION_SECONDS = 179 @@ -108,6 +109,9 @@ _cache = _RunCache() def reset_caches(): _cache.clear() +def grapheme_len(text): + """Return the grapheme cluster count, matching Bluesky's character counting.""" + return grapheme.length(text) # --- Custom Classes --- class ScrapedMedia: @@ -1160,16 +1164,17 @@ def find_tail_preservation_start(text, primary_non_x_url): def truncate_text_safely(text, max_length=BSKY_TEXT_MAX_LENGTH): - if len(text) <= max_length: + if grapheme_len(text) <= max_length: return text - truncated = text[: max_length - 3] + # Truncate by grapheme clusters + clusters = list(grapheme.graphemes(text)) + truncated = "".join(clusters[: max_length - 3]) last_space = truncated.rfind(" ") if last_space > TRUNCATE_MIN_PREFIX_CHARS: return truncated[:last_space] + "..." return truncated + "..." - def truncate_text_preserving_tail(text, tail_start, max_length=BSKY_TEXT_MAX_LENGTH): if ( not text