manga-translator/manga-renderer.py

import re
import json
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import os


# ─────────────────────────────────────────────
#  CONFIG
# ─────────────────────────────────────────────
INPUT_IMAGE       = "page.png"
OUTPUT_IMAGE      = "page_translated.png"
TRANSLATIONS_FILE = "output.txt"
BUBBLES_FILE      = "bubbles.json"

FONT_PATH         = "font.ttf"
FONT_FALLBACK     = "/System/Library/Fonts/Helvetica.ttc"
FONT_COLOR        = (0, 0, 0)
BUBBLE_FILL       = (255, 255, 255)


# ─────────────────────────────────────────────
#  STEP 1: PARSE output.txt
#  Robust parser: always takes the LAST
#  whitespace-separated column as translation.
# ─────────────────────────────────────────────
def parse_translations(filepath):
    """
    Parses output.txt and returns {bubble_id: translated_text}.

    Strategy: split each #N line on 2+ consecutive spaces,
    then always take the LAST token as the translation.
    This is robust even when original or translated text
    contains internal spaces.

    Args:
        filepath : Path to output.txt

    Returns:
        Dict {1: "LA NOIA ESTÀ IL·LESA!", ...}
    """
    translations = {}

    with open(filepath, "r", encoding="utf-8") as f:
        for line in f:
            line = line.rstrip("\n")

            # Must start with #N
            if not re.match(r"^#\d+", line.strip()):
                continue

            # Split on 2+ spaces → [bubble_id_col, original_col, translated_col]
            parts = re.split(r" {2,}", line.strip())

            if len(parts) < 3:
                continue

            bubble_id  = int(re.sub(r"[^0-9]", "", parts[0]))
            translated = parts[-1].strip()   # always last column

            translations[bubble_id] = translated

    print(f"  ✅ Parsed {len(translations)} translation(s) from {filepath}")
    for bid, text in sorted(translations.items()):
        print(f"     #{bid}: {text}")

    return translations


# ─────────────────────────────────────────────
#  STEP 2: LOAD BUBBLE BOXES from bubbles.json
#  These were saved by manga-translator.py
#  and are guaranteed to match the clusters.
# ─────────────────────────────────────────────
def load_bubble_boxes(filepath):
    """
    Loads bubble bounding boxes from bubbles.json.

    Expected format:
        {
          "1": {"x": 120, "y": 45, "w": 180, "h": 210},
          "2": { ... },
          ...
        }

    Args:
        filepath : Path to bubbles.json

    Returns:
        Dict {bubble_id (int): (x, y, w, h)}
    """
    with open(filepath, "r", encoding="utf-8") as f:
        raw = json.load(f)

    boxes = {}
    for key, val in raw.items():
        bubble_id = int(key)
        boxes[bubble_id] = (val["x"], val["y"], val["w"], val["h"])

    print(f"  ✅ Loaded {len(boxes)} bubble box(es) from {filepath}")
    for bid, (x, y, w, h) in sorted(boxes.items()):
        print(f"     #{bid}: ({x},{y}) {w}×{h}px")

    return boxes


# ─────────────────────────────────────────────
#  STEP 3: ERASE BUBBLE CONTENT
#  Fills a rectangular region with white.
#  Uses a slightly inset rect to preserve
#  the bubble border.
# ─────────────────────────────────────────────
def erase_bubble_rect(image, x, y, w, h, padding=6):
    """
    Fills the interior of a bounding box with white,
    leaving a border of `padding` pixels intact.

    Args:
        image   : BGR numpy array (modified in place)
        x,y,w,h : Bounding box
        padding : Pixels to leave as border (default: 6)
    """
    x1 = max(0, x + padding)
    y1 = max(0, y + padding)
    x2 = min(image.shape[1], x + w - padding)
    y2 = min(image.shape[0], y + h - padding)

    if x2 > x1 and y2 > y1:
        image[y1:y2, x1:x2] = 255


# ─────────────────────────────────────────────
#  STEP 4: FIT FONT SIZE
#  Finds the largest font size where the text
#  fits inside (max_w × max_h) with word wrap.
# ─────────────────────────────────────────────
def fit_font_size(draw, text, max_w, max_h, font_path,
                  min_size=8, max_size=48):
    """
    Binary-searches for the largest font size where
    word-wrapped text fits within the given box.

    Args:
        draw      : PIL ImageDraw instance
        text      : Text string to fit
        max_w     : Available width in pixels
        max_h     : Available height in pixels
        font_path : Path to .ttf font (or None for default)
        min_size  : Smallest font size to try (default: 8)
        max_size  : Largest font size to try  (default: 48)

    Returns:
        (font, list_of_wrapped_lines)
    """
    best_font  = None
    best_lines = [text]

    for size in range(max_size, min_size - 1, -1):
        try:
            font = ImageFont.truetype(font_path, size) if font_path else ImageFont.load_default()
        except Exception:
            font = ImageFont.load_default()

        # Word-wrap
        words   = text.split()
        lines   = []
        current = ""

        for word in words:
            test = (current + " " + word).strip()
            bbox = draw.textbbox((0, 0), test, font=font)
            if (bbox[2] - bbox[0]) <= max_w:
                current = test
            else:
                if current:
                    lines.append(current)
                current = word

        if current:
            lines.append(current)

        # Measure total block height
        lh_bbox     = draw.textbbox((0, 0), "Ay", font=font)
        line_h      = (lh_bbox[3] - lh_bbox[1]) + 3
        total_h     = line_h * len(lines)

        if total_h <= max_h:
            best_font  = font
            best_lines = lines
            break

    if best_font is None:
        best_font = ImageFont.load_default()

    return best_font, best_lines


# ─────────────────────────────────────────────
#  STEP 5: RENDER TEXT INTO BUBBLE
#  Draws translated text centered inside
#  the bubble bounding box.
# ─────────────────────────────────────────────
def render_text_in_bubble(pil_image, x, y, w, h, text,
                           font_path, padding=12,
                           font_color=(0, 0, 0)):
    """
    Renders text centered (horizontally + vertically)
    inside a bubble bounding box.

    Args:
        pil_image  : PIL Image (modified in place)
        x,y,w,h    : Bubble bounding box
        text       : Translated text to render
        font_path  : Path to .ttf font (or None)
        padding    : Inner padding in pixels (default: 12)
        font_color : RGB color tuple (default: black)
    """
    draw    = ImageDraw.Draw(pil_image)
    inner_w = max(1, w - padding * 2)
    inner_h = max(1, h - padding * 2)

    font, lines = fit_font_size(draw, text, inner_w, inner_h, font_path)

    lh_bbox = draw.textbbox((0, 0), "Ay", font=font)
    line_h  = (lh_bbox[3] - lh_bbox[1]) + 3

    total_h = line_h * len(lines)
    start_y = y + padding + max(0, (inner_h - total_h) // 2)

    for line in lines:
        lb      = draw.textbbox((0, 0), line, font=font)
        line_w  = lb[2] - lb[0]
        start_x = x + padding + max(0, (inner_w - line_w) // 2)
        draw.text((start_x, start_y), line, font=font, fill=font_color)
        start_y += line_h


# ─────────────────────────────────────────────
#  RESOLVE FONT
# ─────────────────────────────────────────────
def resolve_font(font_path, fallback):
    if font_path and os.path.exists(font_path):
        print(f"  ✅ Using font: {font_path}")
        return font_path
    if fallback and os.path.exists(fallback):
        print(f"  ⚠️  '{font_path}' not found → fallback: {fallback}")
        return fallback
    print("  ⚠️  No font found. Using PIL default.")
    return None


# ─────────────────────────────────────────────
#  MAIN RENDERER
# ─────────────────────────────────────────────
def render_translated_page(
    input_image       = INPUT_IMAGE,
    output_image      = OUTPUT_IMAGE,
    translations_file = TRANSLATIONS_FILE,
    bubbles_file      = BUBBLES_FILE,
    font_path         = FONT_PATH,
    font_fallback     = FONT_FALLBACK,
    font_color        = FONT_COLOR,
    erase_padding     = 6,
    text_padding      = 12,
    debug             = False,
):
    """
    Full rendering pipeline:
      1. Parse translations from output.txt
      2. Load bubble boxes from bubbles.json
      3. Load original manga page
      4. Erase original text from each bubble
      5. Render translated text into each bubble
      6. Save output image

    Args:
        input_image       : Source manga page      (default: 'page.png')
        output_image      : Output path            (default: 'page_translated.png')
        translations_file : Path to output.txt     (default: 'output.txt')
        bubbles_file      : Path to bubbles.json   (default: 'bubbles.json')
        font_path         : Primary .ttf font path
        font_fallback     : Fallback font path
        font_color        : RGB text color         (default: black)
        erase_padding     : Border px when erasing (default: 6)
        text_padding      : Inner padding for text (default: 12)
        debug             : Save debug_render.png  (default: False)
    """
    print("=" * 55)
    print("  MANGA TRANSLATOR — RENDERER")
    print("=" * 55)

    # ── 1. Parse translations ─────────────────────────────────────────────────
    print("\n📄 Parsing translations...")
    translations = parse_translations(translations_file)

    if not translations:
        print("❌ No translations found. Aborting.")
        return

    # ── 2. Load bubble boxes ──────────────────────────────────────────────────
    print(f"\n📦 Loading bubble boxes from {bubbles_file}...")
    bubble_boxes = load_bubble_boxes(bubbles_file)

    if not bubble_boxes:
        print("❌ No bubble boxes found. Re-run manga-translator.py first.")
        return

    # ── 3. Load image ─────────────────────────────────────────────────────────
    print(f"\n🖼️  Loading image: {input_image}")
    cv_image = cv2.imread(input_image)
    if cv_image is None:
        print(f"❌ Could not load: {input_image}")
        return
    print(f"  Image size: {cv_image.shape[1]}×{cv_image.shape[0]}px")

    # ── 4. Erase original text ────────────────────────────────────────────────
    print("\n🧹 Erasing original bubble text...")
    for bubble_id in sorted(translations.keys()):
        if bubble_id not in bubble_boxes:
            print(f"  ⚠️  #{bubble_id}: no box in bubbles.json, skipping")
            continue
        x, y, w, h = bubble_boxes[bubble_id]
        erase_bubble_rect(cv_image, x, y, w, h, padding=erase_padding)
        print(f"  Erased #{bubble_id} at ({x},{y}) {w}×{h}px")

    # ── 5. Convert to PIL ─────────────────────────────────────────────────────
    pil_image = Image.fromarray(cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB))

    # ── 6. Resolve font ───────────────────────────────────────────────────────
    print("\n🔤 Resolving font...")
    resolved_font = resolve_font(font_path, font_fallback)

    # ── 7. Render translated text ─────────────────────────────────────────────
    print("\n✍️  Rendering translated text...")
    for bubble_id, text in sorted(translations.items()):
        if bubble_id not in bubble_boxes:
            continue
        x, y, w, h = bubble_boxes[bubble_id]
        render_text_in_bubble(
            pil_image, x, y, w, h, text,
            font_path  = resolved_font,
            padding    = text_padding,
            font_color = font_color,
        )
        print(f"  #{bubble_id}: '{text}' → ({x},{y}) {w}×{h}px")

    # ── 8. Debug overlay ──────────────────────────────────────────────────────
    if debug:
        dbg = pil_image.copy()
        dbg_draw = ImageDraw.Draw(dbg)
        for bubble_id, (x, y, w, h) in sorted(bubble_boxes.items()):
            dbg_draw.rectangle([x, y, x + w, y + h], outline=(255, 0, 0), width=2)
            dbg_draw.text((x + 4, y + 4), f"#{bubble_id}", fill=(255, 0, 0))
        dbg.save("debug_render.png")
        print("\n  🐛 Debug render saved → debug_render.png")

    # ── 9. Save output ────────────────────────────────────────────────────────
    print(f"\n💾 Saving → {output_image}")
    pil_image.save(output_image, "PNG")
    print(f"  ✅ Done! Open: {output_image}")
    print("=" * 55)


# ─────────────────────────────────────────────
#  ENTRY POINT
# ─────────────────────────────────────────────
if __name__ == "__main__":

    render_translated_page(
        input_image       = "page.png",
        output_image      = "page_translated.png",
        translations_file = "output.txt",
        bubbles_file      = "bubbles.json",
        font_path         = "font.ttf",
        font_fallback     = "/System/Library/Fonts/Helvetica.ttc",
        font_color        = (0, 0, 0),
        erase_padding     = 6,
        text_padding      = 12,
        debug             = True,
    )