Fixed some truncation
This commit is contained in:
@@ -46,7 +46,8 @@ pipeline {
|
|||||||
fastfeedparser \
|
fastfeedparser \
|
||||||
beautifulsoup4 \
|
beautifulsoup4 \
|
||||||
charset-normalizer \
|
charset-normalizer \
|
||||||
Pillow
|
Pillow \
|
||||||
|
grapheme
|
||||||
|
|
||||||
# Verify required imports
|
# Verify required imports
|
||||||
"${VENV_DIR}/bin/python" -c "import fastfeedparser; print('fastfeedparser OK')"
|
"${VENV_DIR}/bin/python" -c "import fastfeedparser; print('fastfeedparser OK')"
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ from playwright.sync_api import sync_playwright
|
|||||||
from moviepy import VideoFileClip
|
from moviepy import VideoFileClip
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
import grapheme # add to imports at top
|
||||||
|
|
||||||
# --- Configuration ---
|
# --- Configuration ---
|
||||||
LOG_PATH = "twitter2bsky.log"
|
LOG_PATH = "twitter2bsky.log"
|
||||||
@@ -26,7 +27,7 @@ STATE_PATH = "twitter2bsky_state.json"
|
|||||||
SCRAPE_TWEET_LIMIT = 30
|
SCRAPE_TWEET_LIMIT = 30
|
||||||
DEDUPE_BSKY_LIMIT = 30
|
DEDUPE_BSKY_LIMIT = 30
|
||||||
TWEET_MAX_AGE_DAYS = 3
|
TWEET_MAX_AGE_DAYS = 3
|
||||||
BSKY_TEXT_MAX_LENGTH = 275
|
BSKY_TEXT_MAX_LENGTH = 300
|
||||||
DEFAULT_BSKY_LANGS = ["ca"]
|
DEFAULT_BSKY_LANGS = ["ca"]
|
||||||
|
|
||||||
VIDEO_MAX_DURATION_SECONDS = 179
|
VIDEO_MAX_DURATION_SECONDS = 179
|
||||||
@@ -108,6 +109,9 @@ _cache = _RunCache()
|
|||||||
def reset_caches():
|
def reset_caches():
|
||||||
_cache.clear()
|
_cache.clear()
|
||||||
|
|
||||||
|
def grapheme_len(text):
|
||||||
|
"""Return the grapheme cluster count, matching Bluesky's character counting."""
|
||||||
|
return grapheme.length(text)
|
||||||
|
|
||||||
# --- Custom Classes ---
|
# --- Custom Classes ---
|
||||||
class ScrapedMedia:
|
class ScrapedMedia:
|
||||||
@@ -1160,16 +1164,17 @@ def find_tail_preservation_start(text, primary_non_x_url):
|
|||||||
|
|
||||||
|
|
||||||
def truncate_text_safely(text, max_length=BSKY_TEXT_MAX_LENGTH):
|
def truncate_text_safely(text, max_length=BSKY_TEXT_MAX_LENGTH):
|
||||||
if len(text) <= max_length:
|
if grapheme_len(text) <= max_length:
|
||||||
return text
|
return text
|
||||||
|
|
||||||
truncated = text[: max_length - 3]
|
# Truncate by grapheme clusters
|
||||||
|
clusters = list(grapheme.graphemes(text))
|
||||||
|
truncated = "".join(clusters[: max_length - 3])
|
||||||
last_space = truncated.rfind(" ")
|
last_space = truncated.rfind(" ")
|
||||||
if last_space > TRUNCATE_MIN_PREFIX_CHARS:
|
if last_space > TRUNCATE_MIN_PREFIX_CHARS:
|
||||||
return truncated[:last_space] + "..."
|
return truncated[:last_space] + "..."
|
||||||
return truncated + "..."
|
return truncated + "..."
|
||||||
|
|
||||||
|
|
||||||
def truncate_text_preserving_tail(text, tail_start, max_length=BSKY_TEXT_MAX_LENGTH):
|
def truncate_text_preserving_tail(text, tail_start, max_length=BSKY_TEXT_MAX_LENGTH):
|
||||||
if (
|
if (
|
||||||
not text
|
not text
|
||||||
|
|||||||
Reference in New Issue
Block a user