manga-translator/clean_bubbles.py

import cv2
import numpy as np
import json
import os
import argparse

def clean_text_from_box(image_bgr, box, bg_type="complex"):
    """
    Erases the text inside a specific bounding box.
    Uses a solid white fill for 'white' backgrounds, and inpainting for 'complex' ones.
    """
    x, y = int(box["x"]), int(box["y"])
    w, h = int(box["w"]), int(box["h"])

    # Image dimensions for boundary checking
    img_h, img_w = image_bgr.shape[:2]

    # Ensure coordinates don't go outside the image boundaries
    x = max(0, x)
    y = max(0, y)
    w = min(w, img_w - x)
    h = min(h, img_h - y)

    # 1. Fast Path: Plain white background
    if bg_type == "white":
        cv2.rectangle(image_bgr, (x, y), (x+w, y+h), (255, 255, 255), -1)
        return image_bgr

    # 2. Complex Path: Inpainting for screentones/artwork
    roi = image_bgr[y:y+h, x:x+w]
    if roi.size == 0:
        return image_bgr

    # Create a mask for the dark text
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

    # Threshold: Pixels darker than 120 become white (the mask), others become black
    _, mask = cv2.threshold(gray, 120, 255, cv2.THRESH_BINARY_INV)

    # Dilate the mask slightly to ensure the edges of the letters are fully covered
    kernel = np.ones((3, 3), np.uint8)
    mask = cv2.dilate(mask, kernel, iterations=1)

    # Inpaint the background to erase the text
    inpainted_roi = cv2.inpaint(roi, mask, inpaintRadius=3, flags=cv2.INPAINT_TELEA)

    # Paste the cleaned region back into the main image
    image_bgr[y:y+h, x:x+w] = inpainted_roi

    return image_bgr

def main():
    parser = argparse.ArgumentParser(description="Clean manga text using bubbles.json")
    parser.add_argument("-i", "--image", required=True, help="Path to the original manga image")
    parser.add_argument("-j", "--json", required=True, help="Path to the bubbles.json file")
    parser.add_argument("-o", "--output", help="Path to save the cleaned image (optional)")

    args = parser.parse_args()

    if not os.path.exists(args.image):
        print(f"❌ Error: Image file not found at {args.image}")
        return

    if not os.path.exists(args.json):
        print(f"❌ Error: JSON file not found at {args.json}")
        return

    # Load the image
    print(f"📂 Loading image: {args.image}")
    image = cv2.imread(args.image)

    # Load the JSON data
    print(f"📂 Loading JSON: {args.json}")
    with open(args.json, "r", encoding="utf-8") as f:
        bubbles_data = json.load(f)

    # Process each box
    print("🧹 Cleaning text from bounding boxes...")
    white_count = 0
    complex_count = 0

    for box_id, data in bubbles_data.items():
        if "box" in data:
            # Default to "complex" if the flag is missing for backward compatibility
            bg_type = data.get("background_type", "complex")

            if bg_type == "white":
                white_count += 1
            else:
                complex_count += 1

            image = clean_text_from_box(image, data["box"], bg_type)

    print(f"   ✓ Cleaned {white_count} white boxes (fast fill)")
    print(f"   ✓ Cleaned {complex_count} complex boxes (inpainting)")

    # Determine output path
    if args.output:
        out_path = args.output
    else:
        base_name, ext = os.path.splitext(args.image)
        out_path = f"{base_name}_cleaned{ext}"

    # Save the result
    cv2.imwrite(out_path, image)
    print(f"✅ Cleaned image saved successfully to: {out_path}")

if __name__ == "__main__":
    main()