diff --git a/rss2bsky.py b/rss2bsky.py index 1247645..100792b 100644 --- a/rss2bsky.py +++ b/rss2bsky.py @@ -359,8 +359,7 @@ def is_html(text: str) -> bool: def strip_trailing_url_punctuation(url: str) -> str: if not url: return url - # Fix the unterminated string literal issue - return re.sub(r"[\s…,\.;:!?)\]'\"]+$", "", url.strip()) + return re.sub(r"[\s…\.,;:!?)\]\"']+$", "", url.strip()) def canonicalize_url(url: str): @@ -402,7 +401,6 @@ def process_title(title: str) -> str: title_text = clean_whitespace(title_text) return title_text - def build_post_text_variants(title_text: str, link: str, max_length: int = 300): title_text = clean_whitespace(title_text) link = canonicalize_url(link) or link or "" @@ -416,35 +414,36 @@ def build_post_text_variants(title_text: str, link: str, max_length: int = 300): seen.add(cleaned) variants.append(cleaned) - # Variant 1: title + link (if fits completely) + # Variant 1: títol + link (si cap sencer) if title_text and link: full = f"{title_text}\n\n{link}" if len(full) <= max_length: add_variant(full) else: - # Truncate the title to make space for the link - # Reserve space for "\n\n" + link + # Trunca el títol per fer-hi lloc al link + # Reserva espai per "\n\n" + link reserve = len(link) + 2 available = max_length - reserve if available > 20: - truncated_title = title_text[:available].rstrip() + # FIX: Use single char '…' and strip trailing dots/spaces + truncated_title = title_text[:available - 1].rstrip(" .") + "…" add_variant(f"{truncated_title}\n\n{link}") - # Variant 2: title only (truncated if necessary) + # Variant 2: només títol (truncat si cal) if title_text: if len(title_text) <= max_length: add_variant(title_text) else: - truncated = title_text[:max_length].rstrip() + # FIX: Use single char '…' and strip trailing dots/spaces + truncated = title_text[:max_length - 1].rstrip(" .") + "…" add_variant(truncated) - # Variant 3: link only (if no title) + # Variant 3: només link (si no hi ha títol) if link and not title_text: add_variant(link) return variants - def is_x_or_twitter_domain(url: str) -> bool: try: hostname = (urlparse(url).hostname or "").lower() @@ -501,7 +500,7 @@ def make_rich(content: str): text_builder.text(trailing) elif cleaned_word.startswith("#") and len(cleaned_word) > 1: - tag_name = cleaned_word[1:].rstrip(".,;:!?)'"…") + tag_name = cleaned_word[1:].rstrip(".,;:!?)'\"…") if tag_name: text_builder.tag(cleaned_word, tag_name) trailing = word[len(cleaned_word):]