diff --git a/rss2bsky.py b/rss2bsky.py index cb563fb..b7fc621 100644 --- a/rss2bsky.py +++ b/rss2bsky.py @@ -401,6 +401,7 @@ def process_title(title: str) -> str: title_text = clean_whitespace(title_text) return title_text + def build_post_text_variants(title_text: str, link: str, max_length: int = 300): title_text = clean_whitespace(title_text) link = canonicalize_url(link) or link or "" @@ -414,13 +415,13 @@ def build_post_text_variants(title_text: str, link: str, max_length: int = 300): seen.add(cleaned) variants.append(cleaned) - # Variant 1: títol + link (si cap sencer) + # Variant 1: title + link (if fits whole) if title_text and link: full = f"{title_text}\n\n{link}" if len(full) <= max_length: add_variant(full) - # Variant 2: només títol complet (sense truncar) + # Variant 2: full title only (no truncation) if title_text: if len(title_text) <= max_length: add_variant(title_text) @@ -428,7 +429,7 @@ def build_post_text_variants(title_text: str, link: str, max_length: int = 300): truncated = title_text[:max_length].rstrip(" .") add_variant(truncated) - # Variant 3: títol truncat + link (si el títol complet+link no hi cap) + # Variant 3: truncated title + link (when full title+link doesn't fit) if title_text and link: full = f"{title_text}\n\n{link}" if len(full) > max_length: @@ -438,12 +439,13 @@ def build_post_text_variants(title_text: str, link: str, max_length: int = 300): truncated_title = title_text[:available].rstrip(" .") add_variant(f"{truncated_title}\n\n{link}") - # Variant 4: només link (si no hi ha títol) + # Variant 4: link only (when no title) if link and not title_text: add_variant(link) return variants + def is_x_or_twitter_domain(url: str) -> bool: try: hostname = (urlparse(url).hostname or "").lower() @@ -1076,7 +1078,7 @@ def build_candidates_from_feed(feed, max_length: int = 300) -> List[EntryCandida # ============================================================ -# Orchestration +# Login # ============================================================ def login_with_backoff( client: Client, @@ -1110,7 +1112,6 @@ def login_with_backoff( except Exception as e: logging.exception("❌ Login exception") - # Rate-limited login: retry first, cooldown only if exhausted if is_rate_limited_error(e): if attempt < max_attempts: wait_seconds = get_rate_limit_wait_seconds(e, base_delay, cfg) @@ -1125,19 +1126,16 @@ def login_with_backoff( activate_post_creation_cooldown_from_error(e, cooldown_path, cfg) return False - # Bad credentials: fail fast if is_auth_error(e): logging.error("❌ Authentication failed (bad handle/password/app-password).") return False - # Network/transient: bounded retry if attempt < max_attempts and (is_network_error(e) or is_timeout_error(e)): delay = min(base_delay * attempt, max_delay) + random.uniform(0, jitter_max) logging.warning(f"⏳ Transient login failure. Retrying in {delay:.1f}s...") time.sleep(delay) continue - # Other errors: bounded retry if attempt < max_attempts: delay = min(base_delay * attempt, max_delay) + random.uniform(0, jitter_max) logging.warning(f"⏳ Login retry in {delay:.1f}s...") @@ -1148,6 +1146,10 @@ def login_with_backoff( return False + +# ============================================================ +# Orchestration +# ============================================================ def run_once( rss_feed: str, bsky_handle: str, @@ -1158,13 +1160,26 @@ def run_once( state_path: str, cooldown_path: str, cfg: AppConfig, - max_posts: int = 5 # ← NEW PARAMETER + max_posts: int = 5, + max_age_days: int = 7, # ← NEW: 0 = disabled ) -> RunResult: if not PIL_AVAILABLE: logging.warning("🟡 Pillow is not installed. External card thumbnail compression is disabled.") logging.info(f"🌍 Posting language(s): {post_langs}") + # ── Age-filter cutoff ──────────────────────────────────────── + if max_age_days > 0: + cutoff = arrow.utcnow().shift(days=-max_age_days) + logging.info( + f"📅 Age filter active: skipping entries published before " + f"{cutoff.isoformat()} (max_age_days={max_age_days})" + ) + else: + cutoff = None + logging.info("📅 Age filter disabled (max_age_days=0).") + # ──────────────────────────────────────────────────────────── + if check_post_cooldown_or_log(cooldown_path): return RunResult(published_count=0, stopped_reason="global_post_cooldown_active") @@ -1197,12 +1212,35 @@ def run_once( logging.info(f"📰 Prepared {len(candidates)} feed entry candidates for duplicate comparison.") entries_to_post: List[EntryCandidate] = [] + for candidate in candidates: + + # ── Age filter ─────────────────────────────────────────── + if cutoff is not None: + pub = candidate.published_arrow + if pub is None: + logging.info( + f"⏭️ Skipping entry with no publication date " + f"(age filter active, max_age_days={max_age_days}): " + f"{candidate.canonical_link or candidate.title_text}" + ) + continue + if pub < cutoff: + logging.info( + f"⏭️ Skipping old entry published {pub.isoformat()} " + f"(cutoff: {cutoff.isoformat()}): " + f"{candidate.canonical_link or candidate.title_text}" + ) + continue + # ──────────────────────────────────────────────────────── + + # ── Deduplication: local state ─────────────────────────── is_dup_state, reason_state = candidate_matches_state(candidate, state) if is_dup_state: logging.info(f"⏭️ Skipping candidate due to local state duplicate match on: {reason_state}") continue + # ── Deduplication: recent Bluesky posts ────────────────── is_dup_bsky, reason_bsky = candidate_matches_existing_bsky(candidate, recent_bsky_posts) if is_dup_bsky: logging.info(f"⏭️ Skipping candidate due to recent Bluesky duplicate match on: {reason_bsky}") @@ -1212,9 +1250,11 @@ def run_once( logging.info(f"📬 {len(entries_to_post)} entries remain after duplicate filtering.") - # ← NEW: log the effective cap before starting the loop if len(entries_to_post) > max_posts: - logging.info(f"🔢 max-posts cap is {max_posts}: will publish at most {max_posts} of {len(entries_to_post)} entries this run.") + logging.info( + f"🔢 max-posts cap is {max_posts}: will publish at most {max_posts} " + f"of {len(entries_to_post)} entries this run." + ) if not entries_to_post: logging.info("ℹ️ Execution finished: no new entries to publish.") @@ -1227,7 +1267,6 @@ def run_once( for candidate in entries_to_post: - # ← NEW: hard cap check at the top of every iteration if published >= max_posts: logging.info(f"🔢 === MAX POSTS REACHED === Stopping after {published} posts (limit: {max_posts}).") break @@ -1311,6 +1350,7 @@ def run_once( return RunResult(published_count=published) + # ============================================================ # CLI # ============================================================ @@ -1330,10 +1370,15 @@ def main(): ) parser.add_argument("--state-path", default=DEFAULT_STATE_PATH, help="Path to local JSON state file") parser.add_argument("--cooldown-path", default=DEFAULT_COOLDOWN_STATE_PATH, help="Path to shared cooldown JSON state file") - parser.add_argument('--max-posts', type=int, default=5, help='Max new posts to publish per run') + parser.add_argument("--max-posts", type=int, default=5, help="Max new posts to publish per run (default: 5)") + parser.add_argument( + "--max-age-days", + type=int, + default=7, + help="Skip entries older than this many days (default: 7). Use 0 to disable the age filter.", + ) args = parser.parse_args() - # Parse comma-separated langs: "ca,es" → ["ca", "es"] post_langs = [lang.strip() for lang in args.lang.split(",") if lang.strip()] if not post_langs: post_langs = ["ca"] @@ -1348,13 +1393,14 @@ def main(): bsky_username=args.bsky_username, bsky_password=args.bsky_app_password, service_url=args.service, - post_langs=args.lang.split(","), + post_langs=post_langs, state_path=args.state_path, cooldown_path=args.cooldown_path, - cfg=AppConfig(), - max_posts=args.max_posts + cfg=cfg, + max_posts=args.max_posts, + max_age_days=args.max_age_days, # ← NEW ) if __name__ == "__main__": - main() \ No newline at end of file + main()