Reverted
This commit is contained in:
23
rss2bsky.py
23
rss2bsky.py
@@ -359,8 +359,7 @@ def is_html(text: str) -> bool:
|
|||||||
def strip_trailing_url_punctuation(url: str) -> str:
|
def strip_trailing_url_punctuation(url: str) -> str:
|
||||||
if not url:
|
if not url:
|
||||||
return url
|
return url
|
||||||
# Fix the unterminated string literal issue
|
return re.sub(r"[\s…\.,;:!?)\]\"']+$", "", url.strip())
|
||||||
return re.sub(r"[\s…,\.;:!?)\]'\"]+$", "", url.strip())
|
|
||||||
|
|
||||||
|
|
||||||
def canonicalize_url(url: str):
|
def canonicalize_url(url: str):
|
||||||
@@ -402,7 +401,6 @@ def process_title(title: str) -> str:
|
|||||||
title_text = clean_whitespace(title_text)
|
title_text = clean_whitespace(title_text)
|
||||||
return title_text
|
return title_text
|
||||||
|
|
||||||
|
|
||||||
def build_post_text_variants(title_text: str, link: str, max_length: int = 300):
|
def build_post_text_variants(title_text: str, link: str, max_length: int = 300):
|
||||||
title_text = clean_whitespace(title_text)
|
title_text = clean_whitespace(title_text)
|
||||||
link = canonicalize_url(link) or link or ""
|
link = canonicalize_url(link) or link or ""
|
||||||
@@ -416,35 +414,36 @@ def build_post_text_variants(title_text: str, link: str, max_length: int = 300):
|
|||||||
seen.add(cleaned)
|
seen.add(cleaned)
|
||||||
variants.append(cleaned)
|
variants.append(cleaned)
|
||||||
|
|
||||||
# Variant 1: title + link (if fits completely)
|
# Variant 1: títol + link (si cap sencer)
|
||||||
if title_text and link:
|
if title_text and link:
|
||||||
full = f"{title_text}\n\n{link}"
|
full = f"{title_text}\n\n{link}"
|
||||||
if len(full) <= max_length:
|
if len(full) <= max_length:
|
||||||
add_variant(full)
|
add_variant(full)
|
||||||
else:
|
else:
|
||||||
# Truncate the title to make space for the link
|
# Trunca el títol per fer-hi lloc al link
|
||||||
# Reserve space for "\n\n" + link
|
# Reserva espai per "\n\n" + link
|
||||||
reserve = len(link) + 2
|
reserve = len(link) + 2
|
||||||
available = max_length - reserve
|
available = max_length - reserve
|
||||||
if available > 20:
|
if available > 20:
|
||||||
truncated_title = title_text[:available].rstrip()
|
# FIX: Use single char '…' and strip trailing dots/spaces
|
||||||
|
truncated_title = title_text[:available - 1].rstrip(" .") + "…"
|
||||||
add_variant(f"{truncated_title}\n\n{link}")
|
add_variant(f"{truncated_title}\n\n{link}")
|
||||||
|
|
||||||
# Variant 2: title only (truncated if necessary)
|
# Variant 2: només títol (truncat si cal)
|
||||||
if title_text:
|
if title_text:
|
||||||
if len(title_text) <= max_length:
|
if len(title_text) <= max_length:
|
||||||
add_variant(title_text)
|
add_variant(title_text)
|
||||||
else:
|
else:
|
||||||
truncated = title_text[:max_length].rstrip()
|
# FIX: Use single char '…' and strip trailing dots/spaces
|
||||||
|
truncated = title_text[:max_length - 1].rstrip(" .") + "…"
|
||||||
add_variant(truncated)
|
add_variant(truncated)
|
||||||
|
|
||||||
# Variant 3: link only (if no title)
|
# Variant 3: només link (si no hi ha títol)
|
||||||
if link and not title_text:
|
if link and not title_text:
|
||||||
add_variant(link)
|
add_variant(link)
|
||||||
|
|
||||||
return variants
|
return variants
|
||||||
|
|
||||||
|
|
||||||
def is_x_or_twitter_domain(url: str) -> bool:
|
def is_x_or_twitter_domain(url: str) -> bool:
|
||||||
try:
|
try:
|
||||||
hostname = (urlparse(url).hostname or "").lower()
|
hostname = (urlparse(url).hostname or "").lower()
|
||||||
@@ -501,7 +500,7 @@ def make_rich(content: str):
|
|||||||
text_builder.text(trailing)
|
text_builder.text(trailing)
|
||||||
|
|
||||||
elif cleaned_word.startswith("#") and len(cleaned_word) > 1:
|
elif cleaned_word.startswith("#") and len(cleaned_word) > 1:
|
||||||
tag_name = cleaned_word[1:].rstrip(".,;:!?)'"…")
|
tag_name = cleaned_word[1:].rstrip(".,;:!?)'\"…")
|
||||||
if tag_name:
|
if tag_name:
|
||||||
text_builder.tag(cleaned_word, tag_name)
|
text_builder.tag(cleaned_word, tag_name)
|
||||||
trailing = word[len(cleaned_word):]
|
trailing = word[len(cleaned_word):]
|
||||||
|
|||||||
Reference in New Issue
Block a user