import re import json import cv2 import numpy as np from PIL import Image, ImageDraw, ImageFont import os # ───────────────────────────────────────────── # CONFIG # ───────────────────────────────────────────── INPUT_IMAGE = "page.png" OUTPUT_IMAGE = "page_translated.png" TRANSLATIONS_FILE = "output.txt" BUBBLES_FILE = "bubbles.json" FONT_PATH = "font.ttf" FONT_FALLBACK = "/System/Library/Fonts/Helvetica.ttc" FONT_COLOR = (0, 0, 0) BUBBLE_FILL = (255, 255, 255) # ───────────────────────────────────────────── # STEP 1: PARSE output.txt # Robust parser: always takes the LAST # whitespace-separated column as translation. # ───────────────────────────────────────────── def parse_translations(filepath): """ Parses output.txt and returns {bubble_id: translated_text}. Strategy: split each #N line on 2+ consecutive spaces, then always take the LAST token as the translation. This is robust even when original or translated text contains internal spaces. Args: filepath : Path to output.txt Returns: Dict {1: "LA NOIA ESTÀ IL·LESA!", ...} """ translations = {} with open(filepath, "r", encoding="utf-8") as f: for line in f: line = line.rstrip("\n") # Must start with #N if not re.match(r"^#\d+", line.strip()): continue # Split on 2+ spaces → [bubble_id_col, original_col, translated_col] parts = re.split(r" {2,}", line.strip()) if len(parts) < 3: continue bubble_id = int(re.sub(r"[^0-9]", "", parts[0])) translated = parts[-1].strip() # always last column translations[bubble_id] = translated print(f" ✅ Parsed {len(translations)} translation(s) from {filepath}") for bid, text in sorted(translations.items()): print(f" #{bid}: {text}") return translations # ───────────────────────────────────────────── # STEP 2: LOAD BUBBLE BOXES from bubbles.json # These were saved by manga-translator.py # and are guaranteed to match the clusters. # ───────────────────────────────────────────── def load_bubble_boxes(filepath): """ Loads bubble bounding boxes from bubbles.json. Expected format: { "1": {"x": 120, "y": 45, "w": 180, "h": 210}, "2": { ... }, ... } Args: filepath : Path to bubbles.json Returns: Dict {bubble_id (int): (x, y, w, h)} """ with open(filepath, "r", encoding="utf-8") as f: raw = json.load(f) boxes = {} for key, val in raw.items(): bubble_id = int(key) boxes[bubble_id] = (val["x"], val["y"], val["w"], val["h"]) print(f" ✅ Loaded {len(boxes)} bubble box(es) from {filepath}") for bid, (x, y, w, h) in sorted(boxes.items()): print(f" #{bid}: ({x},{y}) {w}×{h}px") return boxes # ───────────────────────────────────────────── # STEP 3: ERASE BUBBLE CONTENT # Fills a rectangular region with white. # Uses a slightly inset rect to preserve # the bubble border. # ───────────────────────────────────────────── def erase_bubble_rect(image, x, y, w, h, padding=6): """ Fills the interior of a bounding box with white, leaving a border of `padding` pixels intact. Args: image : BGR numpy array (modified in place) x,y,w,h : Bounding box padding : Pixels to leave as border (default: 6) """ x1 = max(0, x + padding) y1 = max(0, y + padding) x2 = min(image.shape[1], x + w - padding) y2 = min(image.shape[0], y + h - padding) if x2 > x1 and y2 > y1: image[y1:y2, x1:x2] = 255 # ───────────────────────────────────────────── # STEP 4: FIT FONT SIZE # Finds the largest font size where the text # fits inside (max_w × max_h) with word wrap. # ───────────────────────────────────────────── def fit_font_size(draw, text, max_w, max_h, font_path, min_size=8, max_size=48): """ Binary-searches for the largest font size where word-wrapped text fits within the given box. Args: draw : PIL ImageDraw instance text : Text string to fit max_w : Available width in pixels max_h : Available height in pixels font_path : Path to .ttf font (or None for default) min_size : Smallest font size to try (default: 8) max_size : Largest font size to try (default: 48) Returns: (font, list_of_wrapped_lines) """ best_font = None best_lines = [text] for size in range(max_size, min_size - 1, -1): try: font = ImageFont.truetype(font_path, size) if font_path else ImageFont.load_default() except Exception: font = ImageFont.load_default() # Word-wrap words = text.split() lines = [] current = "" for word in words: test = (current + " " + word).strip() bbox = draw.textbbox((0, 0), test, font=font) if (bbox[2] - bbox[0]) <= max_w: current = test else: if current: lines.append(current) current = word if current: lines.append(current) # Measure total block height lh_bbox = draw.textbbox((0, 0), "Ay", font=font) line_h = (lh_bbox[3] - lh_bbox[1]) + 3 total_h = line_h * len(lines) if total_h <= max_h: best_font = font best_lines = lines break if best_font is None: best_font = ImageFont.load_default() return best_font, best_lines # ───────────────────────────────────────────── # STEP 5: RENDER TEXT INTO BUBBLE # Draws translated text centered inside # the bubble bounding box. # ───────────────────────────────────────────── def render_text_in_bubble(pil_image, x, y, w, h, text, font_path, padding=12, font_color=(0, 0, 0)): """ Renders text centered (horizontally + vertically) inside a bubble bounding box. Args: pil_image : PIL Image (modified in place) x,y,w,h : Bubble bounding box text : Translated text to render font_path : Path to .ttf font (or None) padding : Inner padding in pixels (default: 12) font_color : RGB color tuple (default: black) """ draw = ImageDraw.Draw(pil_image) inner_w = max(1, w - padding * 2) inner_h = max(1, h - padding * 2) font, lines = fit_font_size(draw, text, inner_w, inner_h, font_path) lh_bbox = draw.textbbox((0, 0), "Ay", font=font) line_h = (lh_bbox[3] - lh_bbox[1]) + 3 total_h = line_h * len(lines) start_y = y + padding + max(0, (inner_h - total_h) // 2) for line in lines: lb = draw.textbbox((0, 0), line, font=font) line_w = lb[2] - lb[0] start_x = x + padding + max(0, (inner_w - line_w) // 2) draw.text((start_x, start_y), line, font=font, fill=font_color) start_y += line_h # ───────────────────────────────────────────── # RESOLVE FONT # ───────────────────────────────────────────── def resolve_font(font_path, fallback): if font_path and os.path.exists(font_path): print(f" ✅ Using font: {font_path}") return font_path if fallback and os.path.exists(fallback): print(f" ⚠️ '{font_path}' not found → fallback: {fallback}") return fallback print(" ⚠️ No font found. Using PIL default.") return None # ───────────────────────────────────────────── # MAIN RENDERER # ───────────────────────────────────────────── def render_translated_page( input_image = INPUT_IMAGE, output_image = OUTPUT_IMAGE, translations_file = TRANSLATIONS_FILE, bubbles_file = BUBBLES_FILE, font_path = FONT_PATH, font_fallback = FONT_FALLBACK, font_color = FONT_COLOR, erase_padding = 6, text_padding = 12, debug = False, ): """ Full rendering pipeline: 1. Parse translations from output.txt 2. Load bubble boxes from bubbles.json 3. Load original manga page 4. Erase original text from each bubble 5. Render translated text into each bubble 6. Save output image Args: input_image : Source manga page (default: 'page.png') output_image : Output path (default: 'page_translated.png') translations_file : Path to output.txt (default: 'output.txt') bubbles_file : Path to bubbles.json (default: 'bubbles.json') font_path : Primary .ttf font path font_fallback : Fallback font path font_color : RGB text color (default: black) erase_padding : Border px when erasing (default: 6) text_padding : Inner padding for text (default: 12) debug : Save debug_render.png (default: False) """ print("=" * 55) print(" MANGA TRANSLATOR — RENDERER") print("=" * 55) # ── 1. Parse translations ───────────────────────────────────────────────── print("\n📄 Parsing translations...") translations = parse_translations(translations_file) if not translations: print("❌ No translations found. Aborting.") return # ── 2. Load bubble boxes ────────────────────────────────────────────────── print(f"\n📦 Loading bubble boxes from {bubbles_file}...") bubble_boxes = load_bubble_boxes(bubbles_file) if not bubble_boxes: print("❌ No bubble boxes found. Re-run manga-translator.py first.") return # ── 3. Load image ───────────────────────────────────────────────────────── print(f"\n🖼️ Loading image: {input_image}") cv_image = cv2.imread(input_image) if cv_image is None: print(f"❌ Could not load: {input_image}") return print(f" Image size: {cv_image.shape[1]}×{cv_image.shape[0]}px") # ── 4. Erase original text ──────────────────────────────────────────────── print("\n🧹 Erasing original bubble text...") for bubble_id in sorted(translations.keys()): if bubble_id not in bubble_boxes: print(f" ⚠️ #{bubble_id}: no box in bubbles.json, skipping") continue x, y, w, h = bubble_boxes[bubble_id] erase_bubble_rect(cv_image, x, y, w, h, padding=erase_padding) print(f" Erased #{bubble_id} at ({x},{y}) {w}×{h}px") # ── 5. Convert to PIL ───────────────────────────────────────────────────── pil_image = Image.fromarray(cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB)) # ── 6. Resolve font ─────────────────────────────────────────────────────── print("\n🔤 Resolving font...") resolved_font = resolve_font(font_path, font_fallback) # ── 7. Render translated text ───────────────────────────────────────────── print("\n✍️ Rendering translated text...") for bubble_id, text in sorted(translations.items()): if bubble_id not in bubble_boxes: continue x, y, w, h = bubble_boxes[bubble_id] render_text_in_bubble( pil_image, x, y, w, h, text, font_path = resolved_font, padding = text_padding, font_color = font_color, ) print(f" #{bubble_id}: '{text}' → ({x},{y}) {w}×{h}px") # ── 8. Debug overlay ────────────────────────────────────────────────────── if debug: dbg = pil_image.copy() dbg_draw = ImageDraw.Draw(dbg) for bubble_id, (x, y, w, h) in sorted(bubble_boxes.items()): dbg_draw.rectangle([x, y, x + w, y + h], outline=(255, 0, 0), width=2) dbg_draw.text((x + 4, y + 4), f"#{bubble_id}", fill=(255, 0, 0)) dbg.save("debug_render.png") print("\n 🐛 Debug render saved → debug_render.png") # ── 9. Save output ──────────────────────────────────────────────────────── print(f"\n💾 Saving → {output_image}") pil_image.save(output_image, "PNG") print(f" ✅ Done! Open: {output_image}") print("=" * 55) # ───────────────────────────────────────────── # ENTRY POINT # ───────────────────────────────────────────── if __name__ == "__main__": render_translated_page( input_image = "page.png", output_image = "page_translated.png", translations_file = "output.txt", bubbles_file = "bubbles.json", font_path = "font.ttf", font_fallback = "/System/Library/Fonts/Helvetica.ttc", font_color = (0, 0, 0), erase_padding = 6, text_padding = 12, debug = True, )