manga-translator/manga-renderer.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
manga-renderer.py

Inputs:  001.jpg  +  bubbles.json  +  output.txt
Output:  translated_page.png

Strategy:
  1. For every bubble, white-fill all its OCR quads  (erases original text cleanly)
  2. Render the translated text centered inside the bubble bounding box
  3. Bubbles in SKIP_BUBBLE_IDS are erased but NOT re-rendered (left blank)
"""

import json
import textwrap
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from typing import Dict, List, Tuple, Optional, Set

# ============================================================
# CONFIG  — edit these paths to match your setup
# ============================================================
IMAGE_PATH        = "004.png"
BUBBLES_PATH      = "bubbles.json"
TRANSLATIONS_PATH = "output_004.txt"
OUTPUT_PATH       = "translated_page_004.png"

# Font candidates — first one that loads wins
FONT_CANDIDATES = [
    "fonts/ComicNeue-Bold.ttf",
]

FONT_SIZE     = 24
MIN_FONT_SIZE = 18
QUAD_PAD      = 4    # extra pixels added around each quad before white-fill

# ============================================================
# SKIP LIST
# ── Add any bubble IDs you do NOT want rendered here.
# ── The quads will still be erased (white-filled) but no
# ── translated text will be drawn inside them.
# ──
# ── Examples of why you'd skip a bubble:
# ──   • Sound effects  (BURP, BAM, POW …)
# ──   • Untranslatable single characters
# ──   • Bubbles with bad OCR you want to fix manually later
# ──   • Narrator boxes you want to leave in the source language
# ============================================================
SKIP_BUBBLE_IDS: Set[int] = {
    # 8,    # BURP BURP — sound effect
    # 2,    # example: bad OCR, fix manually
}


# ============================================================
# FONT LOADER
# ============================================================
def load_font(path: str, size: int) -> Optional[ImageFont.FreeTypeFont]:
    """Try every face index in a .ttc collection. Validate with getbbox."""
    indices = range(4) if path.lower().endswith(".ttc") else [0]
    for idx in indices:
        try:
            font = ImageFont.truetype(path, size, index=idx)
            font.getbbox("A")   # raises if face metrics are broken
            return font
        except Exception:
            continue
    return None


def resolve_font() -> Tuple[str, ImageFont.FreeTypeFont]:
    """Return (path, font) for the first working candidate."""
    for candidate in FONT_CANDIDATES:
        font = load_font(candidate, FONT_SIZE)
        if font is not None:
            print(f"   ✅ Font: {candidate}")
            return candidate, font
    print("   ⚠️  No TrueType font found — using Pillow bitmap fallback")
    return "", ImageFont.load_default()


# ============================================================
# PARSE output.txt  →  {bid: translated_string}
# ============================================================
def parse_translations(filepath: str) -> Dict[int, str]:
    """
    Reads output.txt and returns {bubble_id: translated_text}.
    Lines look like:  #2|1|vision-base|ORIGINAL|TRANSLATED|FLAGS
    """
    translations = {}
    with open(filepath, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line.startswith("#"):
                continue
            parts = line.split("|")
            if len(parts) < 5:
                continue
            try:
                bid        = int(parts[0].lstrip("#"))
                translated = parts[4].strip()
                if translated and translated != "-":
                    translations[bid] = translated
            except ValueError:
                continue
    return translations


# ============================================================
# PARSE bubbles.json  →  bubble_boxes, quads_per_bubble
# ============================================================
def parse_bubbles(filepath: str):
    """
    Returns:
        bubble_boxes     : {bid: (x1, y1, x2, y2)}
        quads_per_bubble : {bid: [ [[x,y],[x,y],[x,y],[x,y]], ... ]}
    """
    with open(filepath, "r", encoding="utf-8") as f:
        data = json.load(f)

    bubble_boxes     = {}
    quads_per_bubble = {}

    for key, val in data.items():
        bid = int(key)

        x1 = val["x"];       y1 = val["y"]
        x2 = x1 + val["w"]; y2 = y1 + val["h"]
        bubble_boxes[bid] = (x1, y1, x2, y2)

        quads_per_bubble[bid] = val.get("quads", [])

    return bubble_boxes, quads_per_bubble


# ============================================================
# ERASE  — white-fill every OCR quad (with small padding)
# ============================================================
def erase_quads(
    image_bgr,
    quads_per_bubble: Dict[int, List],
    translations: Dict[int, str],   # ← NEW: only erase what we'll render
    skip_ids: Set[int],
    pad: int = QUAD_PAD
):
    """
    White-fills OCR quads ONLY for bubbles that:
      - have a translation in output.txt  AND
      - are NOT in SKIP_BUBBLE_IDS
    Everything else is left completely untouched.
    """
    ih, iw = image_bgr.shape[:2]
    result = image_bgr.copy()

    erased_count  = 0
    skipped_count = 0

    for bid, quads in quads_per_bubble.items():

        # ignore if explicitly skipped
        if bid in skip_ids:
            skipped_count += 1
            continue

        # ignore if no translation exists (deleted from output.txt)
        if bid not in translations:
            skipped_count += 1
            continue

        for quad in quads:
            pts = np.array(quad, dtype=np.int32)
            cv2.fillPoly(result, [pts], (255, 255, 255))

            xs = [p[0] for p in quad]; ys = [p[1] for p in quad]
            x1 = max(0,      min(xs) - pad)
            y1 = max(0,      min(ys) - pad)
            x2 = min(iw - 1, max(xs) + pad)
            y2 = min(ih - 1, max(ys) + pad)
            cv2.rectangle(result, (x1, y1), (x2, y2), (255, 255, 255), -1)

        erased_count += 1

    print(f"   Erased : {erased_count} bubbles")
    print(f"   Ignored: {skipped_count} bubbles (no translation or in skip list)")
    return result


# ============================================================
# FONT SIZING  +  TEXT WRAP
# ============================================================
def fit_text(
    text: str,
    box_w: int,
    box_h: int,
    font_path: str,
    max_size: int = FONT_SIZE,
    min_size: int = MIN_FONT_SIZE
) -> Tuple[int, ImageFont.FreeTypeFont, List[str]]:
    """
    Returns (fitted_size, font, wrapped_lines) — largest size where
    the text block fits inside box_w × box_h.
    """
    for size in range(max_size, min_size - 1, -1):
        font = load_font(font_path, size) if font_path else None
        if font is None:
            return min_size, ImageFont.load_default(), [text]

        chars_per_line = max(1, int(box_w / (size * 0.62)))
        wrapped        = textwrap.fill(text, width=chars_per_line)
        lines          = wrapped.split("\n")
        total_h        = (size + 8) * len(lines)

        if total_h <= box_h - 8:
            return size, font, lines

    # Nothing fit — use minimum size
    font = load_font(font_path, min_size) if font_path else None
    if font is None:
        font = ImageFont.load_default()
    chars_per_line = max(1, int(box_w / (min_size * 0.62)))
    lines = textwrap.fill(text, width=chars_per_line).split("\n")
    return min_size, font, lines


# ============================================================
# COLOR HELPERS
# ============================================================
def sample_bg_color(
    image_bgr,
    x1: int, y1: int,
    x2: int, y2: int
) -> Tuple[int, int, int]:
    """Sample four corners of a bubble to estimate background color (R, G, B)."""
    ih, iw = image_bgr.shape[:2]
    samples = []
    for sx, sy in [(x1+4, y1+4), (x2-4, y1+4), (x1+4, y2-4), (x2-4, y2-4)]:
        sx = max(0, min(iw-1, sx)); sy = max(0, min(ih-1, sy))
        b, g, r = image_bgr[sy, sx]
        samples.append((int(r), int(g), int(b)))
    return (
        int(np.median([s[0] for s in samples])),
        int(np.median([s[1] for s in samples])),
        int(np.median([s[2] for s in samples])),
    )


def pick_fg_color(bg: Tuple[int, int, int]) -> Tuple[int, int, int]:
    lum = 0.299 * bg[0] + 0.587 * bg[1] + 0.114 * bg[2]
    return (0, 0, 0) if lum > 128 else (255, 255, 255)


def safe_textbbox(
    draw, pos, text, font
) -> Tuple[int, int, int, int]:
    try:
        return draw.textbbox(pos, text, font=font)
    except Exception:
        size = getattr(font, "size", 12)
        return (
            pos[0], pos[1],
            pos[0] + int(len(text) * size * 0.6),
            pos[1] + int(size * 1.2)
        )


# ============================================================
# RENDER
# ============================================================
def render_translations(
    image_bgr,
    bubble_boxes: Dict[int, Tuple],
    translations: Dict[int, str],
    skip_ids: Set[int],
    font_path: str,
    font_size: int     = FONT_SIZE,
    bold_outline: bool = True,
    auto_color: bool   = True,
    output_path: str   = OUTPUT_PATH
):
    image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
    pil_img   = Image.fromarray(image_rgb)
    draw      = ImageDraw.Draw(pil_img)

    rendered = 0
    skipped  = 0
    missing  = 0

    for bid, (x1, y1, x2, y2) in sorted(bubble_boxes.items()):

        # ── skip list check ────────────────────────────────────────
        if bid in skip_ids:
            print(f"   ⏭️  Bubble #{bid:<3} — skipped (in SKIP_BUBBLE_IDS)")
            skipped += 1
            continue

        text = translations.get(bid, "").strip()
        if not text:
            print(f"   ⚠️  Bubble #{bid:<3} — no translation found, left blank")
            missing += 1
            continue

        box_w = x2 - x1
        box_h = y2 - y1
        if box_w < 10 or box_h < 10:
            continue

        # ── fit font + wrap ────────────────────────────────────────
        size, font, lines = fit_text(
            text, box_w, box_h, font_path, max_size=font_size
        )

        # ── colors ─────────────────────────────────────────────────
        if auto_color:
            bg = sample_bg_color(image_bgr, x1, y1, x2, y2)
            fg = pick_fg_color(bg)
            ol = (255, 255, 255) if fg == (0, 0, 0) else (0, 0, 0)
        else:
            fg, ol = (0, 0, 0), (255, 255, 255)

        # ── vertical center ────────────────────────────────────────
        line_h  = size + 8
        total_h = line_h * len(lines)
        y_cur   = y1 + max(4, (box_h - total_h) // 2)

        for line in lines:
            bb     = safe_textbbox(draw, (0, 0), line, font)
            line_w = bb[2] - bb[0]
            x_cur  = x1 + max(2, (box_w - line_w) // 2)

            if bold_outline:
                for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    try:
                        draw.text((x_cur + dx, y_cur + dy), line, font=font, fill=ol)
                    except Exception:
                        pass

            try:
                draw.text((x_cur, y_cur), line, font=font, fill=fg)
            except Exception as e:
                print(f"   ❌ Draw error bubble #{bid}: {e}")

            y_cur += line_h

        print(f"   ✅ Bubble #{bid:<3} — rendered  ({len(lines)} lines, size {size}px)")
        rendered += 1

    pil_img.save(output_path)

    print()
    print(f"{'─'*50}")
    print(f"  Rendered : {rendered}")
    print(f"  Skipped  : {skipped}  (SKIP_BUBBLE_IDS)")
    print(f"  No text  : {missing}  (not in output.txt)")
    print(f"{'─'*50}")
    print(f"✅ Saved → {output_path}")

    return pil_img


# ============================================================
# MAIN
# ============================================================
def main():
    print(f"📖 Loading image        : {IMAGE_PATH}")
    image = cv2.imread(IMAGE_PATH)
    if image is None:
        print(f"❌ Cannot load: {IMAGE_PATH}"); return

    print(f"📦 Loading bubbles      : {BUBBLES_PATH}")
    bubble_boxes, quads_per_bubble = parse_bubbles(BUBBLES_PATH)
    print(f"   {len(bubble_boxes)} bubbles  |  "
          f"{sum(len(v) for v in quads_per_bubble.values())} quads total")

    print(f"🌐 Loading translations : {TRANSLATIONS_PATH}")
    translations = parse_translations(TRANSLATIONS_PATH)
    print(f"   {len(translations)} translations found")

    if SKIP_BUBBLE_IDS:
        print(f"⏭️  Skip list            : bubbles {sorted(SKIP_BUBBLE_IDS)}")
    else:
        print(f"⏭️  Skip list            : (empty — all bubbles will be rendered)")

    print("🔤 Resolving font...")
    font_path, _ = resolve_font()

    print(f"🧹 Erasing original text (quad fill + pad={QUAD_PAD}px)...")
    clean_image = erase_quads(
        image,
        quads_per_bubble,
        translations = translations,   # ← pass translations here
        skip_ids     = SKIP_BUBBLE_IDS,
        pad          = QUAD_PAD
    )

    print("✍️  Rendering translated text...")
    render_translations(
        image_bgr    = clean_image,
        bubble_boxes = bubble_boxes,
        translations = translations,
        skip_ids     = SKIP_BUBBLE_IDS,
        font_path    = font_path,
        font_size    = FONT_SIZE,
        bold_outline = True,
        auto_color   = True,
        output_path  = OUTPUT_PATH
    )


if __name__ == "__main__":
    main()