Fixed some truncation
This commit is contained in:
@@ -46,7 +46,8 @@ pipeline {
|
||||
fastfeedparser \
|
||||
beautifulsoup4 \
|
||||
charset-normalizer \
|
||||
Pillow
|
||||
Pillow \
|
||||
grapheme
|
||||
|
||||
# Verify required imports
|
||||
"${VENV_DIR}/bin/python" -c "import fastfeedparser; print('fastfeedparser OK')"
|
||||
|
||||
@@ -19,6 +19,7 @@ from playwright.sync_api import sync_playwright
|
||||
from moviepy import VideoFileClip
|
||||
from bs4 import BeautifulSoup
|
||||
from PIL import Image
|
||||
import grapheme # add to imports at top
|
||||
|
||||
# --- Configuration ---
|
||||
LOG_PATH = "twitter2bsky.log"
|
||||
@@ -26,7 +27,7 @@ STATE_PATH = "twitter2bsky_state.json"
|
||||
SCRAPE_TWEET_LIMIT = 30
|
||||
DEDUPE_BSKY_LIMIT = 30
|
||||
TWEET_MAX_AGE_DAYS = 3
|
||||
BSKY_TEXT_MAX_LENGTH = 275
|
||||
BSKY_TEXT_MAX_LENGTH = 300
|
||||
DEFAULT_BSKY_LANGS = ["ca"]
|
||||
|
||||
VIDEO_MAX_DURATION_SECONDS = 179
|
||||
@@ -108,6 +109,9 @@ _cache = _RunCache()
|
||||
def reset_caches():
|
||||
_cache.clear()
|
||||
|
||||
def grapheme_len(text):
|
||||
"""Return the grapheme cluster count, matching Bluesky's character counting."""
|
||||
return grapheme.length(text)
|
||||
|
||||
# --- Custom Classes ---
|
||||
class ScrapedMedia:
|
||||
@@ -1160,16 +1164,17 @@ def find_tail_preservation_start(text, primary_non_x_url):
|
||||
|
||||
|
||||
def truncate_text_safely(text, max_length=BSKY_TEXT_MAX_LENGTH):
|
||||
if len(text) <= max_length:
|
||||
if grapheme_len(text) <= max_length:
|
||||
return text
|
||||
|
||||
truncated = text[: max_length - 3]
|
||||
# Truncate by grapheme clusters
|
||||
clusters = list(grapheme.graphemes(text))
|
||||
truncated = "".join(clusters[: max_length - 3])
|
||||
last_space = truncated.rfind(" ")
|
||||
if last_space > TRUNCATE_MIN_PREFIX_CHARS:
|
||||
return truncated[:last_space] + "..."
|
||||
return truncated + "..."
|
||||
|
||||
|
||||
def truncate_text_preserving_tail(text, tail_start, max_length=BSKY_TEXT_MAX_LENGTH):
|
||||
if (
|
||||
not text
|
||||
|
||||
Reference in New Issue
Block a user