import cv2 import numpy as np import json import os import argparse def clean_text_from_box(image_bgr, box, bg_type="complex"): """ Erases the text inside a specific bounding box. Uses a solid white fill for 'white' backgrounds, and inpainting for 'complex' ones. """ x, y = int(box["x"]), int(box["y"]) w, h = int(box["w"]), int(box["h"]) # Image dimensions for boundary checking img_h, img_w = image_bgr.shape[:2] # Ensure coordinates don't go outside the image boundaries x = max(0, x) y = max(0, y) w = min(w, img_w - x) h = min(h, img_h - y) # 1. Fast Path: Plain white background if bg_type == "white": cv2.rectangle(image_bgr, (x, y), (x+w, y+h), (255, 255, 255), -1) return image_bgr # 2. Complex Path: Inpainting for screentones/artwork roi = image_bgr[y:y+h, x:x+w] if roi.size == 0: return image_bgr # Create a mask for the dark text gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) # Threshold: Pixels darker than 120 become white (the mask), others become black _, mask = cv2.threshold(gray, 120, 255, cv2.THRESH_BINARY_INV) # Dilate the mask slightly to ensure the edges of the letters are fully covered kernel = np.ones((3, 3), np.uint8) mask = cv2.dilate(mask, kernel, iterations=1) # Inpaint the background to erase the text inpainted_roi = cv2.inpaint(roi, mask, inpaintRadius=3, flags=cv2.INPAINT_TELEA) # Paste the cleaned region back into the main image image_bgr[y:y+h, x:x+w] = inpainted_roi return image_bgr def main(): parser = argparse.ArgumentParser(description="Clean manga text using bubbles.json") parser.add_argument("-i", "--image", required=True, help="Path to the original manga image") parser.add_argument("-j", "--json", required=True, help="Path to the bubbles.json file") parser.add_argument("-o", "--output", help="Path to save the cleaned image (optional)") args = parser.parse_args() if not os.path.exists(args.image): print(f"โŒ Error: Image file not found at {args.image}") return if not os.path.exists(args.json): print(f"โŒ Error: JSON file not found at {args.json}") return # Load the image print(f"๐Ÿ“‚ Loading image: {args.image}") image = cv2.imread(args.image) # Load the JSON data print(f"๐Ÿ“‚ Loading JSON: {args.json}") with open(args.json, "r", encoding="utf-8") as f: bubbles_data = json.load(f) # Process each box print("๐Ÿงน Cleaning text from bounding boxes...") white_count = 0 complex_count = 0 for box_id, data in bubbles_data.items(): if "box" in data: # Default to "complex" if the flag is missing for backward compatibility bg_type = data.get("background_type", "complex") if bg_type == "white": white_count += 1 else: complex_count += 1 image = clean_text_from_box(image, data["box"], bg_type) print(f" โœ“ Cleaned {white_count} white boxes (fast fill)") print(f" โœ“ Cleaned {complex_count} complex boxes (inpainting)") # Determine output path if args.output: out_path = args.output else: base_name, ext = os.path.splitext(args.image) out_path = f"{base_name}_cleaned{ext}" # Save the result cv2.imwrite(out_path, image) print(f"โœ… Cleaned image saved successfully to: {out_path}") if __name__ == "__main__": main()