manga-translator/manga-renderer.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
manga-renderer.py

Inputs:  16_cleaned.png + bubbles.json + output.txt
Output:  16_translated.png
"""

import json
import textwrap
import cv2
import numpy as np
import os
import argparse
from PIL import Image, ImageDraw, ImageFont
from typing import Dict, List, Tuple, Optional, Set, Any

# ============================================================
# CONFIG
# ============================================================
# Added System Fallbacks (macOS, Windows, Linux) so it never fails
FONT_CANDIDATES = [
    "fonts/animeace2_reg.ttf",
    "fonts/ComicNeue-Bold.ttf",
    "/Library/Fonts/Arial.ttf",                 # macOS
    "/System/Library/Fonts/Helvetica.ttc",      # macOS
    "C:\\Windows\\Fonts\\arial.ttf",            # Windows
    "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf" # Linux
]

DEFAULT_FONT_SIZE = 18
MIN_FONT_SIZE     = 8

# Add any bubble IDs you do NOT want rendered here.
SKIP_BUBBLE_IDS: Set[int] = set()

# ============================================================
# FONT LOADER
# ============================================================
def load_font(path: str, size: int) -> Optional[ImageFont.FreeTypeFont]:
    """Try every face index in a .ttc collection. Validate with getbbox."""
    indices = range(4) if path.lower().endswith(".ttc") else [0]
    for idx in indices:
        try:
            font = ImageFont.truetype(path, size, index=idx)
            font.getbbox("A")   # raises if face metrics are broken
            return font
        except Exception:
            continue
    return None

def resolve_font_path() -> str:
    """Return the path for the first working candidate."""
    for candidate in FONT_CANDIDATES:
        if os.path.exists(candidate) and load_font(candidate, DEFAULT_FONT_SIZE) is not None:
            print(f"   ✅ Font loaded: {candidate}")
            return candidate
    print("   ⚠️  No TrueType font found — using Pillow bitmap fallback (Text may look small)")
    return ""

# ============================================================
# PARSERS
# ============================================================
def parse_translations(filepath: str) -> Dict[int, str]:
    """Reads output.txt and returns {bubble_id: translated_text}."""
    translations = {}
    with open(filepath, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line.startswith("#"):
                continue
            parts = line.split("|")
            if len(parts) < 9:
                continue
            try:
                bid        = int(parts[0].lstrip("#"))
                translated = parts[8].strip() # Index 8 is TRANSLATED
                if translated and translated != "-":
                    translations[bid] = translated
            except ValueError:
                continue
    return translations

def parse_bubbles(filepath: str):
    with open(filepath, "r", encoding="utf-8") as f:
        return json.load(f)

# ============================================================
# DYNAMIC TEXT FITTING
# ============================================================
def get_original_font_size(bubble_data: dict, fallback_size: int = DEFAULT_FONT_SIZE) -> int:
    box = bubble_data.get("box")
    lines = bubble_data.get("lines", [])

    if not box or not lines:
        return fallback_size

    line_count = len(lines)
    estimated_line_height = box["h"] / max(1, line_count)
    estimated_size = int(estimated_line_height * 0.85)

    return max(MIN_FONT_SIZE, min(estimated_size, 60))

def fit_text_dynamically(
    text: str,
    font_path: str,
    max_w: int,
    max_h: int,
    target_font_size: int
) -> Tuple[List[str], Any, int, int]:
    font_size = target_font_size

    if not font_path:
        font = ImageFont.load_default()
        chars_per_line = max(1, int(max_w / 6))
        wrapped_lines = textwrap.wrap(text, width=chars_per_line)
        return wrapped_lines, font, 4, 10

    while font_size >= MIN_FONT_SIZE:
        font = load_font(font_path, font_size)
        if font is None:
            font = ImageFont.load_default()
            return [text], font, 4, 10

        char_bbox = font.getbbox("A")
        char_w = (char_bbox[2] - char_bbox[0]) or 10
        chars_per_line = max(1, int((max_w * 0.95) / char_w))

        wrapped_lines = textwrap.wrap(text, width=chars_per_line)

        line_spacing = max(2, int(font_size * 0.15))
        if hasattr(font, 'getmetrics'):
            ascent, descent = font.getmetrics()
            line_h = ascent + descent
        else:
            line_h = font_size

        total_h = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1))

        max_line_w = 0
        for line in wrapped_lines:
            bbox = font.getbbox(line)
            lw = bbox[2] - bbox[0]
            max_line_w = max(max_line_w, lw)

        if max_line_w <= max_w and total_h <= max_h:
            return wrapped_lines, font, line_spacing, font_size

        font_size -= 2

    font = load_font(font_path, MIN_FONT_SIZE) or ImageFont.load_default()
    char_bbox = font.getbbox("A") if hasattr(font, 'getbbox') else (0,0,6,10)
    char_w = (char_bbox[2] - char_bbox[0]) or 6
    chars_per_line = max(1, int(max_w / char_w))
    wrapped_lines = textwrap.wrap(text, width=chars_per_line)

    return wrapped_lines, font, max(2, int(MIN_FONT_SIZE * 0.15)), MIN_FONT_SIZE

# ============================================================
# RENDER
# ============================================================
def render_text(
    image_bgr,
    bubbles_data: Dict[str, dict],
    translations: Dict[int, str],
    font_path: str,
    skip_ids: Set[int]
):
    image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
    pil_img = Image.fromarray(image_rgb)
    draw = ImageDraw.Draw(pil_img)

    rendered_count = 0

    for bid_str, val in bubbles_data.items():
        bid = int(bid_str)

        if bid in skip_ids or bid not in translations:
            continue

        text = translations[bid]
        box = val.get("box")
        if not box:
            continue

        bx, by, bw, bh = box["x"], box["y"], box["w"], box["h"]

        pad_x = int(bw * 0.1)
        pad_y = int(bh * 0.1)
        bx -= pad_x // 2
        by -= pad_y // 2
        bw += pad_x
        bh += pad_y

        target_size = get_original_font_size(val)
        wrapped_lines, font, line_spacing, final_size = fit_text_dynamically(text, font_path, bw, bh, target_size)

        if hasattr(font, 'getmetrics'):
            ascent, descent = font.getmetrics()
            line_h = ascent + descent
        else:
            line_h = final_size

        total_text_height = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1))
        current_y = by + (bh - total_text_height) // 2

        # --- SMART OUTLINE LOGIC ---
        bg_type = val.get("background_type", "white")

        # Only use a white outline if the background is complex (inpainted artwork).
        # If it's a white bubble, or if we are using the tiny default font, disable the outline.
        if bg_type == "complex" and font_path:
            outline_thickness = max(1, int(final_size * 0.05))
        else:
            outline_thickness = 0

        for i, line in enumerate(wrapped_lines):
            if hasattr(font, 'getbbox'):
                bbox = font.getbbox(line)
                lw = bbox[2] - bbox[0]
            else:
                lw = len(line) * 6

            current_x = bx + (bw - lw) // 2

            draw.text(
                (current_x, current_y),
                line,
                fill=(0, 0, 0),
                font=font,
                stroke_width=outline_thickness,
                stroke_fill=(255, 255, 255)
            )

            current_y += line_h + line_spacing

        rendered_count += 1

    print(f"   Rendered: {rendered_count} bubbles")
    return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)

# ============================================================
# MAIN
# ============================================================
def main():
    parser = argparse.ArgumentParser(description="Render translated text onto cleaned manga pages.")
    parser.add_argument("-i", "--image", required=True, help="Path to the CLEANED manga image")
    parser.add_argument("-j", "--json", required=True, help="Path to bubbles.json")
    parser.add_argument("-t", "--txt", required=True, help="Path to output.txt")
    parser.add_argument("-o", "--output", help="Path to save the final translated image")

    args = parser.parse_args()

    if not os.path.exists(args.image):
        print(f"❌ Error: Image file not found at {args.image}")
        return

    print(f"📂 Loading cleaned image: {args.image}")
    image_bgr = cv2.imread(args.image)

    print(f"📂 Loading translations: {args.txt}")
    translations = parse_translations(args.txt)

    print(f"📂 Loading bubble data: {args.json}")
    bubbles_data = parse_bubbles(args.json)

    print("🔍 Resolving font...")
    font_path = resolve_font_path()

    print("\n--- Rendering translated text ---")
    final_bgr = render_text(
        image_bgr=image_bgr,
        bubbles_data=bubbles_data,
        translations=translations,
        font_path=font_path,
        skip_ids=SKIP_BUBBLE_IDS
    )

    if args.output:
        out_path = args.output
    else:
        base_name = args.image.replace("_cleaned", "")
        base_name, ext = os.path.splitext(base_name)
        out_path = f"{base_name}_translated{ext}"

    print(f"\n💾 Saving final image to: {out_path}")
    cv2.imwrite(out_path, final_bgr)
    print("✅ Done!")

if __name__ == "__main__":
    main()