manga-translator/manga-renderer.py

import re
import json
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import os


# ─────────────────────────────────────────────
#  CONFIG
# ─────────────────────────────────────────────
INPUT_IMAGE       = "page.png"
OUTPUT_IMAGE      = "page_translated.png"
TRANSLATIONS_FILE = "output.txt"
BUBBLES_FILE      = "bubbles.json"
FONT_PATH         = "font.ttf"
FONT_FALLBACK     = "/System/Library/Fonts/Helvetica.ttc"
FONT_COLOR        = (0, 0, 0)


# ─────────────────────────────────────────────
#  PARSE output.txt
# ─────────────────────────────────────────────
def parse_translations(filepath):
    """
    Parses output.txt → {bubble_id: translated_text}.
    Only bubbles present in the file are returned.
    Absent IDs are left completely untouched on the page.
    """
    translations = {}
    with open(filepath, "r", encoding="utf-8") as f:
        for line in f:
            line = line.rstrip("\n")
            if not re.match(r"^\s*#\d+", line):
                continue
            parts = re.split(r" {2,}", line.strip())
            if len(parts) < 3:
                continue
            bubble_id  = int(re.sub(r"[^0-9]", "", parts[0]))
            translated = parts[-1].strip()
            if translated.startswith("["):
                continue
            translations[bubble_id] = translated

    print(f"  ✅ {len(translations)} bubble(s) to translate: "
          f"{sorted(translations.keys())}")
    for bid, text in sorted(translations.items()):
        print(f"     #{bid}: {text}")
    return translations


# ─────────────────────────────────────────────
#  LOAD bubbles.json
# ─────────────────────────────────────────────
def load_bubble_boxes(filepath):
    with open(filepath, "r", encoding="utf-8") as f:
        raw = json.load(f)
    boxes = {int(k): v for k, v in raw.items()}
    print(f"  ✅ Loaded {len(boxes)} bubble(s)")
    for bid, val in sorted(boxes.items()):
        print(f"     #{bid}: ({val['x']},{val['y']}) "
              f"{val['w']}×{val['h']}px")
    return boxes


# ─────────────────────────────────────────────
#  SAMPLE BACKGROUND COLOR
# ─────────────────────────────────────────────
def sample_bubble_background(cv_image, bubble_data):
    """
    Samples the dominant background color inside the bbox
    by averaging the brightest 10% of pixels.
    Returns (B, G, R).
    """
    x  = max(0, bubble_data["x"])
    y  = max(0, bubble_data["y"])
    x2 = min(cv_image.shape[1], x + bubble_data["w"])
    y2 = min(cv_image.shape[0], y + bubble_data["h"])

    region = cv_image[y:y2, x:x2]
    if region.size == 0:
        return (255, 255, 255)

    gray      = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY)
    threshold = np.percentile(gray, 90)
    bg_mask   = gray >= threshold
    if not np.any(bg_mask):
        return (255, 255, 255)

    return tuple(int(c) for c in region[bg_mask].mean(axis=0))


# ─────────────────────────────────────────────
#  ERASE ORIGINAL TEXT
#  Fills the tight OCR bbox with the sampled
#  background color. No extra expansion —
#  the bbox from bubbles.json is already the
#  exact size of the red squares.
# ─────────────────────────────────────────────
def erase_bubble_text(cv_image, bubble_data,
                      bg_color=(255, 255, 255)):
    """
    Fills the bubble bounding box with bg_color.

    Args:
        cv_image    : BGR numpy array (modified in place)
        bubble_data : Dict with 'x','y','w','h'
        bg_color    : (B,G,R) fill color
    """
    img_h, img_w = cv_image.shape[:2]
    x  = max(0,     bubble_data["x"])
    y  = max(0,     bubble_data["y"])
    x2 = min(img_w, bubble_data["x"] + bubble_data["w"])
    y2 = min(img_h, bubble_data["y"] + bubble_data["h"])
    cv_image[y:y2, x:x2] = list(bg_color)


# ─────────────────────────────────────────────
#  FIT FONT SIZE
# ─────────────────────────────────────────────
def fit_font_size(draw, text, max_w, max_h, font_path,
                  min_size=7, max_size=48):
    """
    Finds the largest font size where word-wrapped text
    fits inside (max_w × max_h).
    """
    best_font  = None
    best_lines = [text]

    for size in range(max_size, min_size - 1, -1):
        try:
            font = (ImageFont.truetype(font_path, size)
                    if font_path else ImageFont.load_default())
        except Exception:
            font = ImageFont.load_default()

        words, lines, current = text.split(), [], ""
        for word in words:
            test = (current + " " + word).strip()
            bb   = draw.textbbox((0, 0), test, font=font)
            if (bb[2] - bb[0]) <= max_w:
                current = test
            else:
                if current:
                    lines.append(current)
                current = word
        if current:
            lines.append(current)

        lh     = draw.textbbox((0, 0), "Ay", font=font)
        line_h = (lh[3] - lh[1]) + 2
        if line_h * len(lines) <= max_h:
            best_font  = font
            best_lines = lines
            break

    return best_font or ImageFont.load_default(), best_lines


# ─────────────────────────────────────────────
#  RENDER TEXT INTO BUBBLE
# ─────────────────────────────────────────────
def render_text_in_bubble(pil_image, bubble_data, text,
                           font_path, padding=8,
                           font_color=(0, 0, 0)):
    """
    Renders translated text centered inside the tight bbox.
    Font auto-sizes to fill the same w×h the original occupied.
    """
    x, y = bubble_data["x"], bubble_data["y"]
    w, h = bubble_data["w"], bubble_data["h"]

    draw    = ImageDraw.Draw(pil_image)
    inner_w = max(1, w - padding * 2)
    inner_h = max(1, h - padding * 2)

    font, lines = fit_font_size(draw, text, inner_w, inner_h,
                                font_path)

    lh_bb   = draw.textbbox((0, 0), "Ay", font=font)
    line_h  = (lh_bb[3] - lh_bb[1]) + 2
    total_h = line_h * len(lines)
    start_y = y + padding + max(0, (inner_h - total_h) // 2)

    for line in lines:
        lb      = draw.textbbox((0, 0), line, font=font)
        line_w  = lb[2] - lb[0]
        start_x = x + padding + max(0, (inner_w - line_w) // 2)
        draw.text((start_x, start_y), line,
                  font=font, fill=font_color)
        start_y += line_h


# ─────────────────────────────────────────────
#  RESOLVE FONT
# ─────────────────────────────────────────────
def resolve_font(font_path, fallback):
    if font_path and os.path.exists(font_path):
        print(f"  ✅ Using font: {font_path}")
        return font_path
    if fallback and os.path.exists(fallback):
        print(f"  ⚠️  Fallback: {fallback}")
        return fallback
    print("  ⚠️  No font found. Using PIL default.")
    return None


# ─────────────────────────────────────────────
#  MAIN RENDERER
# ─────────────────────────────────────────────
def render_translated_page(
    input_image       = INPUT_IMAGE,
    output_image      = OUTPUT_IMAGE,
    translations_file = TRANSLATIONS_FILE,
    bubbles_file      = BUBBLES_FILE,
    font_path         = FONT_PATH,
    font_fallback     = FONT_FALLBACK,
    font_color        = FONT_COLOR,
    text_padding      = 8,
    debug             = False,
):
    """
    Pipeline:
      1. Parse translations (only present IDs processed)
      2. Load bubble boxes from bubbles.json
      3. Cross-check IDs — absent ones left untouched
      4. Sample background color per bubble
      5. Erase original text (fill tight bbox)
      6. Render translated text sized to fit the bbox
      7. Save output
    """
    print("=" * 55)
    print("  MANGA TRANSLATOR — RENDERER")
    print("=" * 55)

    print("\n📄 Parsing translations...")
    translations = parse_translations(translations_file)
    if not translations:
        print("❌ No translations found. Aborting.")
        return

    print(f"\n📦 Loading bubble data...")
    bubble_boxes = load_bubble_boxes(bubbles_file)
    if not bubble_boxes:
        print("❌ No bubble data. Re-run manga-translator.py.")
        return

    translate_ids = set(translations.keys())
    box_ids       = set(bubble_boxes.keys())
    to_process    = sorted(translate_ids & box_ids)
    untouched     = sorted(box_ids - translate_ids)
    missing       = sorted(translate_ids - box_ids)

    print(f"\n🔗 To process : {to_process}")
    print(f"   Untouched  : {untouched}")
    if missing:
        print(f"   ⚠️  In output.txt but no box: {missing}")

    if not to_process:
        print("❌ No matching IDs. Aborting.")
        return

    print(f"\n🖼️  Loading: {input_image}")
    cv_image = cv2.imread(input_image)
    if cv_image is None:
        print(f"❌ Could not load: {input_image}")
        return
    print(f"  {cv_image.shape[1]}×{cv_image.shape[0]}px")

    # Sample backgrounds BEFORE erasing
    print("\n🎨 Sampling backgrounds...")
    bg_colors = {}
    for bid in to_process:
        bg_bgr        = sample_bubble_background(
            cv_image, bubble_boxes[bid])
        bg_colors[bid] = bg_bgr
        bg_rgb         = (bg_bgr[2], bg_bgr[1], bg_bgr[0])
        brightness     = sum(bg_rgb) / 3
        ink            = "black" if brightness > 128 else "white"
        print(f"  #{bid}: RGB{bg_rgb}  ink→{ink}")

    # Erase
    print("\n🧹 Erasing original text...")
    for bid in to_process:
        bd = bubble_boxes[bid]
        erase_bubble_text(cv_image, bd, bg_color=bg_colors[bid])
        print(f"  ✅ #{bid}  ({bd['w']}×{bd['h']}px)")

    pil_image = Image.fromarray(
        cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB))

    print("\n🔤 Resolving font...")
    resolved_font = resolve_font(font_path, font_fallback)

    # Render
    print("\n✍️  Rendering...")
    for bid in to_process:
        text   = translations[bid]
        bd     = bubble_boxes[bid]
        bg_rgb = (bg_colors[bid][2],
                  bg_colors[bid][1],
                  bg_colors[bid][0])
        brightness = sum(bg_rgb) / 3
        txt_color  = (0, 0, 0) if brightness > 128 \
                     else (255, 255, 255)

        render_text_in_bubble(
            pil_image, bd, text,
            font_path  = resolved_font,
            padding    = text_padding,
            font_color = txt_color,
        )
        print(f"  ✅ #{bid}: '{text}'  "
              f"({bd['x']},{bd['y']}) {bd['w']}×{bd['h']}px")

    if debug:
        dbg      = pil_image.copy()
        dbg_draw = ImageDraw.Draw(dbg)
        for bid, bd in sorted(bubble_boxes.items()):
            color = (0, 200, 0) if bid in translate_ids \
                    else (160, 160, 160)
            dbg_draw.rectangle(
                [bd["x"], bd["y"],
                 bd["x"] + bd["w"], bd["y"] + bd["h"]],
                outline=color, width=2)
            dbg_draw.text((bd["x"] + 3, bd["y"] + 3),
                          f"#{bid}", fill=color)
        dbg.save("debug_render.png")
        print("\n  🐛 debug_render.png saved "
              "(green=translated, grey=untouched)")

    print(f"\n💾 Saving → {output_image}")
    pil_image.save(output_image, "PNG")
    print("  ✅ Done!")
    print("=" * 55)


# ─────────────────────────────────────────────
#  ENTRY POINT
# ─────────────────────────────────────────────
if __name__ == "__main__":
    render_translated_page(
        input_image       = "page.png",
        output_image      = "page_translated.png",
        translations_file = "output.txt",
        bubbles_file      = "bubbles.json",
        font_path         = "font.ttf",
        font_fallback     = "/System/Library/Fonts/Helvetica.ttc",
        font_color        = (0, 0, 0),
        text_padding      = 8,
        debug             = True,
    )