import re import json import cv2 import numpy as np from PIL import Image, ImageDraw, ImageFont import os # ───────────────────────────────────────────── # CONFIG # ───────────────────────────────────────────── INPUT_IMAGE = "page.png" OUTPUT_IMAGE = "page_translated.png" TRANSLATIONS_FILE = "output.txt" BUBBLES_FILE = "bubbles.json" FONT_PATH = "font.ttf" FONT_FALLBACK = "/System/Library/Fonts/Helvetica.ttc" FONT_COLOR = (0, 0, 0) # ───────────────────────────────────────────── # PARSE output.txt # ───────────────────────────────────────────── def parse_translations(filepath): """ Parses output.txt → {bubble_id: translated_text}. Only bubbles present in the file are returned. Absent IDs are left completely untouched on the page. """ translations = {} with open(filepath, "r", encoding="utf-8") as f: for line in f: line = line.rstrip("\n") if not re.match(r"^\s*#\d+", line): continue parts = re.split(r" {2,}", line.strip()) if len(parts) < 3: continue bubble_id = int(re.sub(r"[^0-9]", "", parts[0])) translated = parts[-1].strip() if translated.startswith("["): continue translations[bubble_id] = translated print(f" ✅ {len(translations)} bubble(s) to translate: " f"{sorted(translations.keys())}") for bid, text in sorted(translations.items()): print(f" #{bid}: {text}") return translations # ───────────────────────────────────────────── # LOAD bubbles.json # ───────────────────────────────────────────── def load_bubble_boxes(filepath): with open(filepath, "r", encoding="utf-8") as f: raw = json.load(f) boxes = {int(k): v for k, v in raw.items()} print(f" ✅ Loaded {len(boxes)} bubble(s)") for bid, val in sorted(boxes.items()): print(f" #{bid}: ({val['x']},{val['y']}) " f"{val['w']}×{val['h']}px") return boxes # ───────────────────────────────────────────── # SAMPLE BACKGROUND COLOR # ───────────────────────────────────────────── def sample_bubble_background(cv_image, bubble_data): """ Samples the dominant background color inside the bbox by averaging the brightest 10% of pixels. Returns (B, G, R). """ x = max(0, bubble_data["x"]) y = max(0, bubble_data["y"]) x2 = min(cv_image.shape[1], x + bubble_data["w"]) y2 = min(cv_image.shape[0], y + bubble_data["h"]) region = cv_image[y:y2, x:x2] if region.size == 0: return (255, 255, 255) gray = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY) threshold = np.percentile(gray, 90) bg_mask = gray >= threshold if not np.any(bg_mask): return (255, 255, 255) return tuple(int(c) for c in region[bg_mask].mean(axis=0)) # ───────────────────────────────────────────── # ERASE ORIGINAL TEXT # Fills the tight OCR bbox with the sampled # background color. No extra expansion — # the bbox from bubbles.json is already the # exact size of the red squares. # ───────────────────────────────────────────── def erase_bubble_text(cv_image, bubble_data, bg_color=(255, 255, 255)): """ Fills the bubble bounding box with bg_color. Args: cv_image : BGR numpy array (modified in place) bubble_data : Dict with 'x','y','w','h' bg_color : (B,G,R) fill color """ img_h, img_w = cv_image.shape[:2] x = max(0, bubble_data["x"]) y = max(0, bubble_data["y"]) x2 = min(img_w, bubble_data["x"] + bubble_data["w"]) y2 = min(img_h, bubble_data["y"] + bubble_data["h"]) cv_image[y:y2, x:x2] = list(bg_color) # ───────────────────────────────────────────── # FIT FONT SIZE # ───────────────────────────────────────────── def fit_font_size(draw, text, max_w, max_h, font_path, min_size=7, max_size=48): """ Finds the largest font size where word-wrapped text fits inside (max_w × max_h). """ best_font = None best_lines = [text] for size in range(max_size, min_size - 1, -1): try: font = (ImageFont.truetype(font_path, size) if font_path else ImageFont.load_default()) except Exception: font = ImageFont.load_default() words, lines, current = text.split(), [], "" for word in words: test = (current + " " + word).strip() bb = draw.textbbox((0, 0), test, font=font) if (bb[2] - bb[0]) <= max_w: current = test else: if current: lines.append(current) current = word if current: lines.append(current) lh = draw.textbbox((0, 0), "Ay", font=font) line_h = (lh[3] - lh[1]) + 2 if line_h * len(lines) <= max_h: best_font = font best_lines = lines break return best_font or ImageFont.load_default(), best_lines # ───────────────────────────────────────────── # RENDER TEXT INTO BUBBLE # ───────────────────────────────────────────── def render_text_in_bubble(pil_image, bubble_data, text, font_path, padding=8, font_color=(0, 0, 0)): """ Renders translated text centered inside the tight bbox. Font auto-sizes to fill the same w×h the original occupied. """ x, y = bubble_data["x"], bubble_data["y"] w, h = bubble_data["w"], bubble_data["h"] draw = ImageDraw.Draw(pil_image) inner_w = max(1, w - padding * 2) inner_h = max(1, h - padding * 2) font, lines = fit_font_size(draw, text, inner_w, inner_h, font_path) lh_bb = draw.textbbox((0, 0), "Ay", font=font) line_h = (lh_bb[3] - lh_bb[1]) + 2 total_h = line_h * len(lines) start_y = y + padding + max(0, (inner_h - total_h) // 2) for line in lines: lb = draw.textbbox((0, 0), line, font=font) line_w = lb[2] - lb[0] start_x = x + padding + max(0, (inner_w - line_w) // 2) draw.text((start_x, start_y), line, font=font, fill=font_color) start_y += line_h # ───────────────────────────────────────────── # RESOLVE FONT # ───────────────────────────────────────────── def resolve_font(font_path, fallback): if font_path and os.path.exists(font_path): print(f" ✅ Using font: {font_path}") return font_path if fallback and os.path.exists(fallback): print(f" ⚠️ Fallback: {fallback}") return fallback print(" ⚠️ No font found. Using PIL default.") return None # ───────────────────────────────────────────── # MAIN RENDERER # ───────────────────────────────────────────── def render_translated_page( input_image = INPUT_IMAGE, output_image = OUTPUT_IMAGE, translations_file = TRANSLATIONS_FILE, bubbles_file = BUBBLES_FILE, font_path = FONT_PATH, font_fallback = FONT_FALLBACK, font_color = FONT_COLOR, text_padding = 8, debug = False, ): """ Pipeline: 1. Parse translations (only present IDs processed) 2. Load bubble boxes from bubbles.json 3. Cross-check IDs — absent ones left untouched 4. Sample background color per bubble 5. Erase original text (fill tight bbox) 6. Render translated text sized to fit the bbox 7. Save output """ print("=" * 55) print(" MANGA TRANSLATOR — RENDERER") print("=" * 55) print("\n📄 Parsing translations...") translations = parse_translations(translations_file) if not translations: print("❌ No translations found. Aborting.") return print(f"\n📦 Loading bubble data...") bubble_boxes = load_bubble_boxes(bubbles_file) if not bubble_boxes: print("❌ No bubble data. Re-run manga-translator.py.") return translate_ids = set(translations.keys()) box_ids = set(bubble_boxes.keys()) to_process = sorted(translate_ids & box_ids) untouched = sorted(box_ids - translate_ids) missing = sorted(translate_ids - box_ids) print(f"\n🔗 To process : {to_process}") print(f" Untouched : {untouched}") if missing: print(f" ⚠️ In output.txt but no box: {missing}") if not to_process: print("❌ No matching IDs. Aborting.") return print(f"\n🖼️ Loading: {input_image}") cv_image = cv2.imread(input_image) if cv_image is None: print(f"❌ Could not load: {input_image}") return print(f" {cv_image.shape[1]}×{cv_image.shape[0]}px") # Sample backgrounds BEFORE erasing print("\n🎨 Sampling backgrounds...") bg_colors = {} for bid in to_process: bg_bgr = sample_bubble_background( cv_image, bubble_boxes[bid]) bg_colors[bid] = bg_bgr bg_rgb = (bg_bgr[2], bg_bgr[1], bg_bgr[0]) brightness = sum(bg_rgb) / 3 ink = "black" if brightness > 128 else "white" print(f" #{bid}: RGB{bg_rgb} ink→{ink}") # Erase print("\n🧹 Erasing original text...") for bid in to_process: bd = bubble_boxes[bid] erase_bubble_text(cv_image, bd, bg_color=bg_colors[bid]) print(f" ✅ #{bid} ({bd['w']}×{bd['h']}px)") pil_image = Image.fromarray( cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB)) print("\n🔤 Resolving font...") resolved_font = resolve_font(font_path, font_fallback) # Render print("\n✍️ Rendering...") for bid in to_process: text = translations[bid] bd = bubble_boxes[bid] bg_rgb = (bg_colors[bid][2], bg_colors[bid][1], bg_colors[bid][0]) brightness = sum(bg_rgb) / 3 txt_color = (0, 0, 0) if brightness > 128 \ else (255, 255, 255) render_text_in_bubble( pil_image, bd, text, font_path = resolved_font, padding = text_padding, font_color = txt_color, ) print(f" ✅ #{bid}: '{text}' " f"({bd['x']},{bd['y']}) {bd['w']}×{bd['h']}px") if debug: dbg = pil_image.copy() dbg_draw = ImageDraw.Draw(dbg) for bid, bd in sorted(bubble_boxes.items()): color = (0, 200, 0) if bid in translate_ids \ else (160, 160, 160) dbg_draw.rectangle( [bd["x"], bd["y"], bd["x"] + bd["w"], bd["y"] + bd["h"]], outline=color, width=2) dbg_draw.text((bd["x"] + 3, bd["y"] + 3), f"#{bid}", fill=color) dbg.save("debug_render.png") print("\n 🐛 debug_render.png saved " "(green=translated, grey=untouched)") print(f"\n💾 Saving → {output_image}") pil_image.save(output_image, "PNG") print(" ✅ Done!") print("=" * 55) # ───────────────────────────────────────────── # ENTRY POINT # ───────────────────────────────────────────── if __name__ == "__main__": render_translated_page( input_image = "page.png", output_image = "page_translated.png", translations_file = "output.txt", bubbles_file = "bubbles.json", font_path = "font.ttf", font_fallback = "/System/Library/Fonts/Helvetica.ttc", font_color = (0, 0, 0), text_padding = 8, debug = True, )