import pandas as pd import os # ── Config ──────────────────────────────────────────────────────────────────── INPUT_FILE = 'users_to_keep.csv' OUTPUT_FILE = 'users_to_keep_clean.csv' # ────────────────────────────────────────────────────────────────────────────── def load_csv(filepath: str) -> pd.DataFrame: """Load CSV file and return a DataFrame.""" if not os.path.exists(filepath): raise FileNotFoundError(f"File not found: {filepath}") return pd.read_csv(filepath) def remove_duplicates(df: pd.DataFrame) -> pd.DataFrame: """Normalize handles and remove duplicate rows.""" df['Handle'] = df['Handle'].str.strip().str.lower() return df.drop_duplicates(subset='Handle', keep='first') def save_csv(df: pd.DataFrame, filepath: str) -> None: """Save DataFrame to CSV.""" df.to_csv(filepath, index=False) def print_report(before: int, after: int, output: str) -> None: """Print a summary report.""" removed = before - after print("─" * 40) print(f" Rows before : {before}") print(f" Rows after : {after}") print(f" Removed : {removed}") print(f" Saved to : {output}") print("─" * 40) def main(): print(f"\n📂 Loading '{INPUT_FILE}'...") df = load_csv(INPUT_FILE) before = len(df) print("🔍 Removing duplicates...") df_clean = remove_duplicates(df) after = len(df_clean) print(f"💾 Saving to '{OUTPUT_FILE}'...") save_csv(df_clean, OUTPUT_FILE) print_report(before, after, OUTPUT_FILE) print("✅ Done!\n") if __name__ == '__main__': main()