Files
scripts/bsky/remove_english_followers.py
Guillem Hernandez Sola 1acb9955f2 Added all
2026-04-11 15:14:42 +02:00

264 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import argparse
import logging
import sys
import time
from atproto import Client
# --- Logging ---
logging.basicConfig(
format="%(asctime)s [%(levelname)s] %(message)s",
level=logging.INFO,
stream=sys.stdout
)
# ──────────────────────────────────────────────────────────────────────────────
# AUTH
# ──────────────────────────────────────────────────────────────────────────────
def login(email: str, password: str) -> Client:
"""Login using email or handle + app password."""
client = Client()
try:
logging.info(f"Attempting login: {email}")
client.login(email, password)
logging.info("✅ Login successful!")
return client
except Exception as e:
logging.error(f"❌ Login failed: {e}")
sys.exit(1)
# ──────────────────────────────────────────────────────────────────────────────
# FETCH FOLLOWING
# ──────────────────────────────────────────────────────────────────────────────
def get_all_following(client, handle: str) -> list:
"""Fetches ALL accounts you are following."""
logging.info("📋 Fetching accounts you follow...")
following = []
cursor = None
while True:
try:
params = {'actor': handle, 'cursor': cursor, 'limit': 100}
res = client.app.bsky.graph.get_follows(params)
following.extend(res.follows)
if not res.cursor:
break
cursor = res.cursor
time.sleep(0.3)
except Exception as e:
logging.warning(f"⚠️ Error fetching following: {e}")
break
logging.info(f"➡️ You are following {len(following)} accounts.")
return following
# ──────────────────────────────────────────────────────────────────────────────
# LANGUAGE DETECTION
# ──────────────────────────────────────────────────────────────────────────────
# Languages that protect a user from removal
PROTECTED_LANGS = {'ca', 'es'} # Catalan and Spanish
# Target language to remove
TARGET_LANGS = {'en'}
PROTECTED_BIO_KEYWORDS = [
# Catalan
'català', 'catala', 'catalunya', 'ca-es', 'catalan', 'valencian',
'valencià', '📍 cat',
# Spanish
'español', 'castellano', 'españa', 'es-es', 'spanish', '🇪🇸',
]
ENGLISH_BIO_KEYWORDS = [
'english', 'en-us', 'en-gb', '🇬🇧', '🇺🇸', '🇦🇺', '🇨🇦',
'united kingdom', 'united states', 'australia', 'new zealand',
]
def get_user_post_languages(client, did: str, sample_size: int) -> set:
"""
Fetches recent posts for a user and returns all language codes found.
Normalizes tags: 'en-US', 'en-GB''en'
"""
langs_found = set()
try:
params = {'actor': did, 'limit': sample_size}
res = client.app.bsky.feed.get_author_feed(params)
for item in res.feed:
record = item.post.record
if hasattr(record, 'langs') and record.langs:
for lang in record.langs:
langs_found.add(lang.split('-')[0].lower())
except Exception as e:
logging.warning(f"⚠️ Could not fetch posts for {did}: {e}")
return langs_found
def is_english_only(client, user, sample_size: int) -> bool:
"""
Returns True if the user's default language appears to be English
and NOT any protected language (Catalan, Spanish).
Detection logic:
1. Fetch recent post language tags:
- Any protected lang (ca, es) found → keep (return False)
- Only English tags found → remove (return True)
- No tags found → fallback to bio
2. Bio keyword fallback:
- Protected keywords found → keep
- English keywords found → remove
- No signal → keep (safe default)
"""
post_langs = get_user_post_languages(client, user.did, sample_size)
if post_langs:
has_protected = bool(post_langs & PROTECTED_LANGS)
has_english = bool(post_langs & TARGET_LANGS)
if has_protected:
return False # Protected language detected → keep
if has_english:
return True # English with no protected lang → remove
# Fallback: bio keyword scan
bio = (user.description or "").lower()
if any(kw in bio for kw in PROTECTED_BIO_KEYWORDS):
return False # Protected keyword in bio → keep
if any(kw in bio for kw in ENGLISH_BIO_KEYWORDS):
return True # English keyword in bio → remove
return False # No signal → keep (safe default)
# ──────────────────────────────────────────────────────────────────────────────
# UNFOLLOW ENGLISH USERS
# ──────────────────────────────────────────────────────────────────────────────
def unfollow_english_users(client, following: list, dry_run: bool, sample_size: int):
"""
Iterates through the accounts you follow, detects English-only speakers,
and unfollows them after confirmation.
"""
logging.info(f"🔍 Analyzing {len(following)} accounts you follow for English-only signal...")
logging.info(f" Checking last {sample_size} posts per user — this may take a while...")
english_users = []
for i, user in enumerate(following):
if i > 0 and i % 25 == 0:
logging.info(f" Progress: {i}/{len(following)} checked | Found so far: {len(english_users)}")
if is_english_only(client, user, sample_size):
english_users.append(user)
logging.debug(f" 🏴󠁧󠁢󠁥󠁮󠁧󠁿 English-only detected: @{user.handle}")
time.sleep(0.4) # Rate limit protection
# --- Summary ---
print("\n" + "=" * 60)
print(f"📊 ANALYSIS COMPLETE")
print(f" Total following checked : {len(following)}")
print(f" English-only detected : {len(english_users)}")
print(f" Will be kept : {len(following) - len(english_users)}")
print("=" * 60)
if not english_users:
logging.info("✅ No English-only accounts found in your following list.")
return
# --- Preview list ---
print(f"\n🇬🇧 ENGLISH-ONLY ACCOUNTS TO UNFOLLOW ({len(english_users)} total):")
print("-" * 60)
for u in english_users:
display = f" ({u.display_name})" if u.display_name else ""
print(f" - @{u.handle}{display}")
print("-" * 60)
if dry_run:
logging.info("🔍 Dry run mode — no changes made.")
return
confirm = input(f"\nUnfollow these {len(english_users)} accounts? (y/N): ")
if confirm.lower() != 'y':
logging.info("❌ Cancelled. No one was unfollowed.")
return
# --- Unfollow using the follow record URI ---
logging.info(" Unfollowing English-only accounts...")
success = 0
failed = 0
for user in english_users:
try:
if user.viewer and user.viewer.following:
rkey = user.viewer.following.split("/")[-1]
repo = user.viewer.following.split("/")[2]
client.com.atproto.repo.delete_record({
"repo": repo,
"collection": "app.bsky.graph.follow",
"rkey": rkey
})
logging.info(f" ✅ Unfollowed: @{user.handle}")
success += 1
else:
logging.warning(f" ⚠️ No follow record found for @{user.handle}")
failed += 1
time.sleep(0.5)
except Exception as e:
logging.error(f" ❌ Failed to unfollow @{user.handle}: {e}")
failed += 1
print("\n" + "=" * 60)
logging.info(f"✅ Done! Unfollowed {success} accounts. ({failed} failed)")
print("=" * 60)
# ──────────────────────────────────────────────────────────────────────────────
# MAIN
# ──────────────────────────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(
description="Unfollow Bluesky accounts whose default language is English."
)
parser.add_argument("bsky_email", help="Bluesky login email or handle")
parser.add_argument("bsky_app_password", help="Bluesky app password (Settings > App Passwords)")
parser.add_argument(
"--dry_run",
action="store_true",
help="Preview English-only accounts without unfollowing them"
)
parser.add_argument(
"--sample_size",
type=int,
default=25,
help="Number of recent posts to check per user for language detection (default: 25)"
)
args = parser.parse_args()
# --- Login ---
client = login(args.bsky_email, args.bsky_app_password)
my_handle = client.me.handle
logging.info(f"👤 Logged in as: @{my_handle}")
# --- Fetch following ---
following = get_all_following(client, my_handle)
# --- Detect & unfollow ---
unfollow_english_users(
client,
following,
dry_run=args.dry_run,
sample_size=args.sample_size
)
logging.info("🎉 All done!")
if __name__ == "__main__":
main()