#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ manga-renderer.py Inputs: 001.jpg + bubbles.json + output.txt Output: translated_page.png Strategy: 1. For every bubble, white-fill all its OCR quads (erases original text cleanly) 2. Render the translated text centered inside the bubble bounding box 3. Bubbles in SKIP_BUBBLE_IDS are erased but NOT re-rendered (left blank) """ import json import textwrap import cv2 import numpy as np from PIL import Image, ImageDraw, ImageFont from typing import Dict, List, Tuple, Optional, Set # ============================================================ # CONFIG — edit these paths to match your setup # ============================================================ IMAGE_PATH = "003.jpg" BUBBLES_PATH = "bubbles.json" TRANSLATIONS_PATH = "output.txt" OUTPUT_PATH = "translated_page_003.png" # Font candidates — first one that loads wins FONT_CANDIDATES = [ "fonts/ComicNeue-Bold.ttf", ] FONT_SIZE = 20 MIN_FONT_SIZE = 10 QUAD_PAD = 4 # extra pixels added around each quad before white-fill # ============================================================ # SKIP LIST # ── Add any bubble IDs you do NOT want rendered here. # ── The quads will still be erased (white-filled) but no # ── translated text will be drawn inside them. # ── # ── Examples of why you'd skip a bubble: # ── • Sound effects (BURP, BAM, POW …) # ── • Untranslatable single characters # ── • Bubbles with bad OCR you want to fix manually later # ── • Narrator boxes you want to leave in the source language # ============================================================ SKIP_BUBBLE_IDS: Set[int] = { # 8, # BURP BURP — sound effect # 2, # example: bad OCR, fix manually } # ============================================================ # FONT LOADER # ============================================================ def load_font(path: str, size: int) -> Optional[ImageFont.FreeTypeFont]: """Try every face index in a .ttc collection. Validate with getbbox.""" indices = range(4) if path.lower().endswith(".ttc") else [0] for idx in indices: try: font = ImageFont.truetype(path, size, index=idx) font.getbbox("A") # raises if face metrics are broken return font except Exception: continue return None def resolve_font() -> Tuple[str, ImageFont.FreeTypeFont]: """Return (path, font) for the first working candidate.""" for candidate in FONT_CANDIDATES: font = load_font(candidate, FONT_SIZE) if font is not None: print(f" ✅ Font: {candidate}") return candidate, font print(" ⚠️ No TrueType font found — using Pillow bitmap fallback") return "", ImageFont.load_default() # ============================================================ # PARSE output.txt → {bid: translated_string} # ============================================================ def parse_translations(filepath: str) -> Dict[int, str]: """ Reads output.txt and returns {bubble_id: translated_text}. Lines look like: #2|1|vision-base|ORIGINAL|TRANSLATED|FLAGS """ translations = {} with open(filepath, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line.startswith("#"): continue parts = line.split("|") if len(parts) < 5: continue try: bid = int(parts[0].lstrip("#")) translated = parts[4].strip() if translated and translated != "-": translations[bid] = translated except ValueError: continue return translations # ============================================================ # PARSE bubbles.json → bubble_boxes, quads_per_bubble # ============================================================ def parse_bubbles(filepath: str): """ Returns: bubble_boxes : {bid: (x1, y1, x2, y2)} quads_per_bubble : {bid: [ [[x,y],[x,y],[x,y],[x,y]], ... ]} """ with open(filepath, "r", encoding="utf-8") as f: data = json.load(f) bubble_boxes = {} quads_per_bubble = {} for key, val in data.items(): bid = int(key) x1 = val["x"]; y1 = val["y"] x2 = x1 + val["w"]; y2 = y1 + val["h"] bubble_boxes[bid] = (x1, y1, x2, y2) quads_per_bubble[bid] = val.get("quads", []) return bubble_boxes, quads_per_bubble # ============================================================ # ERASE — white-fill every OCR quad (with small padding) # ============================================================ def erase_quads( image_bgr, quads_per_bubble: Dict[int, List], translations: Dict[int, str], # ← NEW: only erase what we'll render skip_ids: Set[int], pad: int = QUAD_PAD ): """ White-fills OCR quads ONLY for bubbles that: - have a translation in output.txt AND - are NOT in SKIP_BUBBLE_IDS Everything else is left completely untouched. """ ih, iw = image_bgr.shape[:2] result = image_bgr.copy() erased_count = 0 skipped_count = 0 for bid, quads in quads_per_bubble.items(): # ignore if explicitly skipped if bid in skip_ids: skipped_count += 1 continue # ignore if no translation exists (deleted from output.txt) if bid not in translations: skipped_count += 1 continue for quad in quads: pts = np.array(quad, dtype=np.int32) cv2.fillPoly(result, [pts], (255, 255, 255)) xs = [p[0] for p in quad]; ys = [p[1] for p in quad] x1 = max(0, min(xs) - pad) y1 = max(0, min(ys) - pad) x2 = min(iw - 1, max(xs) + pad) y2 = min(ih - 1, max(ys) + pad) cv2.rectangle(result, (x1, y1), (x2, y2), (255, 255, 255), -1) erased_count += 1 print(f" Erased : {erased_count} bubbles") print(f" Ignored: {skipped_count} bubbles (no translation or in skip list)") return result # ============================================================ # FONT SIZING + TEXT WRAP # ============================================================ def fit_text( text: str, box_w: int, box_h: int, font_path: str, max_size: int = FONT_SIZE, min_size: int = MIN_FONT_SIZE ) -> Tuple[int, ImageFont.FreeTypeFont, List[str]]: """ Returns (fitted_size, font, wrapped_lines) — largest size where the text block fits inside box_w × box_h. """ for size in range(max_size, min_size - 1, -1): font = load_font(font_path, size) if font_path else None if font is None: return min_size, ImageFont.load_default(), [text] chars_per_line = max(1, int(box_w / (size * 0.62))) wrapped = textwrap.fill(text, width=chars_per_line) lines = wrapped.split("\n") total_h = (size + 8) * len(lines) if total_h <= box_h - 8: return size, font, lines # Nothing fit — use minimum size font = load_font(font_path, min_size) if font_path else None if font is None: font = ImageFont.load_default() chars_per_line = max(1, int(box_w / (min_size * 0.62))) lines = textwrap.fill(text, width=chars_per_line).split("\n") return min_size, font, lines # ============================================================ # COLOR HELPERS # ============================================================ def sample_bg_color( image_bgr, x1: int, y1: int, x2: int, y2: int ) -> Tuple[int, int, int]: """Sample four corners of a bubble to estimate background color (R, G, B).""" ih, iw = image_bgr.shape[:2] samples = [] for sx, sy in [(x1+4, y1+4), (x2-4, y1+4), (x1+4, y2-4), (x2-4, y2-4)]: sx = max(0, min(iw-1, sx)); sy = max(0, min(ih-1, sy)) b, g, r = image_bgr[sy, sx] samples.append((int(r), int(g), int(b))) return ( int(np.median([s[0] for s in samples])), int(np.median([s[1] for s in samples])), int(np.median([s[2] for s in samples])), ) def pick_fg_color(bg: Tuple[int, int, int]) -> Tuple[int, int, int]: lum = 0.299 * bg[0] + 0.587 * bg[1] + 0.114 * bg[2] return (0, 0, 0) if lum > 128 else (255, 255, 255) def safe_textbbox( draw, pos, text, font ) -> Tuple[int, int, int, int]: try: return draw.textbbox(pos, text, font=font) except Exception: size = getattr(font, "size", 12) return ( pos[0], pos[1], pos[0] + int(len(text) * size * 0.6), pos[1] + int(size * 1.2) ) # ============================================================ # RENDER # ============================================================ def render_translations( image_bgr, bubble_boxes: Dict[int, Tuple], translations: Dict[int, str], skip_ids: Set[int], font_path: str, font_size: int = FONT_SIZE, bold_outline: bool = True, auto_color: bool = True, output_path: str = OUTPUT_PATH ): image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB) pil_img = Image.fromarray(image_rgb) draw = ImageDraw.Draw(pil_img) rendered = 0 skipped = 0 missing = 0 for bid, (x1, y1, x2, y2) in sorted(bubble_boxes.items()): # ── skip list check ──────────────────────────────────────── if bid in skip_ids: print(f" ⏭️ Bubble #{bid:<3} — skipped (in SKIP_BUBBLE_IDS)") skipped += 1 continue text = translations.get(bid, "").strip() if not text: print(f" ⚠️ Bubble #{bid:<3} — no translation found, left blank") missing += 1 continue box_w = x2 - x1 box_h = y2 - y1 if box_w < 10 or box_h < 10: continue # ── fit font + wrap ──────────────────────────────────────── size, font, lines = fit_text( text, box_w, box_h, font_path, max_size=font_size ) # ── colors ───────────────────────────────────────────────── if auto_color: bg = sample_bg_color(image_bgr, x1, y1, x2, y2) fg = pick_fg_color(bg) ol = (255, 255, 255) if fg == (0, 0, 0) else (0, 0, 0) else: fg, ol = (0, 0, 0), (255, 255, 255) # ── vertical center ──────────────────────────────────────── line_h = size + 8 total_h = line_h * len(lines) y_cur = y1 + max(4, (box_h - total_h) // 2) for line in lines: bb = safe_textbbox(draw, (0, 0), line, font) line_w = bb[2] - bb[0] x_cur = x1 + max(2, (box_w - line_w) // 2) if bold_outline: for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]: try: draw.text((x_cur + dx, y_cur + dy), line, font=font, fill=ol) except Exception: pass try: draw.text((x_cur, y_cur), line, font=font, fill=fg) except Exception as e: print(f" ❌ Draw error bubble #{bid}: {e}") y_cur += line_h print(f" ✅ Bubble #{bid:<3} — rendered ({len(lines)} lines, size {size}px)") rendered += 1 pil_img.save(output_path) print() print(f"{'─'*50}") print(f" Rendered : {rendered}") print(f" Skipped : {skipped} (SKIP_BUBBLE_IDS)") print(f" No text : {missing} (not in output.txt)") print(f"{'─'*50}") print(f"✅ Saved → {output_path}") return pil_img # ============================================================ # MAIN # ============================================================ def main(): print(f"📖 Loading image : {IMAGE_PATH}") image = cv2.imread(IMAGE_PATH) if image is None: print(f"❌ Cannot load: {IMAGE_PATH}"); return print(f"📦 Loading bubbles : {BUBBLES_PATH}") bubble_boxes, quads_per_bubble = parse_bubbles(BUBBLES_PATH) print(f" {len(bubble_boxes)} bubbles | " f"{sum(len(v) for v in quads_per_bubble.values())} quads total") print(f"🌐 Loading translations : {TRANSLATIONS_PATH}") translations = parse_translations(TRANSLATIONS_PATH) print(f" {len(translations)} translations found") if SKIP_BUBBLE_IDS: print(f"⏭️ Skip list : bubbles {sorted(SKIP_BUBBLE_IDS)}") else: print(f"⏭️ Skip list : (empty — all bubbles will be rendered)") print("🔤 Resolving font...") font_path, _ = resolve_font() print(f"🧹 Erasing original text (quad fill + pad={QUAD_PAD}px)...") clean_image = erase_quads( image, quads_per_bubble, translations = translations, # ← pass translations here skip_ids = SKIP_BUBBLE_IDS, pad = QUAD_PAD ) print("✍️ Rendering translated text...") render_translations( image_bgr = clean_image, bubble_boxes = bubble_boxes, translations = translations, skip_ids = SKIP_BUBBLE_IDS, font_path = font_path, font_size = FONT_SIZE, bold_outline = True, auto_color = True, output_path = OUTPUT_PATH ) if __name__ == "__main__": main()