This commit is contained in:
Guillem Hernandez Sola
2026-04-28 17:23:08 +02:00
parent b36c06b529
commit c850965147

View File

@@ -359,8 +359,7 @@ def is_html(text: str) -> bool:
def strip_trailing_url_punctuation(url: str) -> str: def strip_trailing_url_punctuation(url: str) -> str:
if not url: if not url:
return url return url
# Fix the unterminated string literal issue return re.sub(r"[\s…\.,;:!?)\]\"']+$", "", url.strip())
return re.sub(r"[\s…,\.;:!?)\]'\"]+$", "", url.strip())
def canonicalize_url(url: str): def canonicalize_url(url: str):
@@ -402,7 +401,6 @@ def process_title(title: str) -> str:
title_text = clean_whitespace(title_text) title_text = clean_whitespace(title_text)
return title_text return title_text
def build_post_text_variants(title_text: str, link: str, max_length: int = 300): def build_post_text_variants(title_text: str, link: str, max_length: int = 300):
title_text = clean_whitespace(title_text) title_text = clean_whitespace(title_text)
link = canonicalize_url(link) or link or "" link = canonicalize_url(link) or link or ""
@@ -416,35 +414,36 @@ def build_post_text_variants(title_text: str, link: str, max_length: int = 300):
seen.add(cleaned) seen.add(cleaned)
variants.append(cleaned) variants.append(cleaned)
# Variant 1: title + link (if fits completely) # Variant 1: títol + link (si cap sencer)
if title_text and link: if title_text and link:
full = f"{title_text}\n\n{link}" full = f"{title_text}\n\n{link}"
if len(full) <= max_length: if len(full) <= max_length:
add_variant(full) add_variant(full)
else: else:
# Truncate the title to make space for the link # Trunca el títol per fer-hi lloc al link
# Reserve space for "\n\n" + link # Reserva espai per "\n\n" + link
reserve = len(link) + 2 reserve = len(link) + 2
available = max_length - reserve available = max_length - reserve
if available > 20: if available > 20:
truncated_title = title_text[:available].rstrip() # FIX: Use single char '…' and strip trailing dots/spaces
truncated_title = title_text[:available - 1].rstrip(" .") + ""
add_variant(f"{truncated_title}\n\n{link}") add_variant(f"{truncated_title}\n\n{link}")
# Variant 2: title only (truncated if necessary) # Variant 2: només títol (truncat si cal)
if title_text: if title_text:
if len(title_text) <= max_length: if len(title_text) <= max_length:
add_variant(title_text) add_variant(title_text)
else: else:
truncated = title_text[:max_length].rstrip() # FIX: Use single char '…' and strip trailing dots/spaces
truncated = title_text[:max_length - 1].rstrip(" .") + ""
add_variant(truncated) add_variant(truncated)
# Variant 3: link only (if no title) # Variant 3: només link (si no hi ha títol)
if link and not title_text: if link and not title_text:
add_variant(link) add_variant(link)
return variants return variants
def is_x_or_twitter_domain(url: str) -> bool: def is_x_or_twitter_domain(url: str) -> bool:
try: try:
hostname = (urlparse(url).hostname or "").lower() hostname = (urlparse(url).hostname or "").lower()
@@ -501,7 +500,7 @@ def make_rich(content: str):
text_builder.text(trailing) text_builder.text(trailing)
elif cleaned_word.startswith("#") and len(cleaned_word) > 1: elif cleaned_word.startswith("#") and len(cleaned_word) > 1:
tag_name = cleaned_word[1:].rstrip(".,;:!?)'"") tag_name = cleaned_word[1:].rstrip(".,;:!?)'\"")
if tag_name: if tag_name:
text_builder.tag(cleaned_word, tag_name) text_builder.tag(cleaned_word, tag_name)
trailing = word[len(cleaned_word):] trailing = word[len(cleaned_word):]