manga-translator/manga-renderer.py

import os
import json
import re
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont


# ─────────────────────────────────────────────
#  CONFIG
# ─────────────────────────────────────────────
DEFAULT_FONT_CANDIDATES = [
    "fonts/AnimeAce2_reg.ttf",
    "fonts/WildWordsRoman.ttf",
    "fonts/ComicRelief-Regular.ttf",
    "fonts/NotoSans-Regular.ttf",
]
DEFAULT_FONT_COLOR = (0, 0, 0)
DEFAULT_STROKE_COLOR = (255, 255, 255)

MAX_FONT_SIZE = 20
MIN_FONT_SIZE = 6

# Guarantee full wipe of yellow squares
YELLOW_BOX_PAD_X = 1
YELLOW_BOX_PAD_Y = 1
YELLOW_UNION_PAD_X = 4
YELLOW_UNION_PAD_Y = 4

# Optional extra cleanup expansion
ENABLE_EXTRA_CLEAN = True
EXTRA_DILATE_ITERS = 1
EXTRA_CLOSE_ITERS = 1

# Bubble detection (for optional extra mask / border preservation)
FLOOD_TOL = 30

# Border restoration: keep very conservative
ENABLE_EDGE_RESTORE = True
EDGE_RESTORE_DILATE = 1

# Text layout inside yellow-union
TEXT_INSET = 0.92


# ─────────────────────────────────────────────
#  PARSERS
# ─────────────────────────────────────────────
def parse_translations(translations_file):
    translations = {}
    originals = {}
    flags_map = {}

    with open(translations_file, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line.startswith("#"):
                continue

            parts = line.split("|")
            try:
                bubble_id = int(parts[0].lstrip("#"))
            except Exception:
                continue

            if len(parts) >= 5:
                original = parts[2].strip()
                translated = parts[3].strip()
                flags = parts[4].strip()
            elif len(parts) >= 4:
                original = parts[2].strip()
                translated = parts[3].strip()
                flags = "-"
            elif len(parts) >= 3:
                original = ""
                translated = parts[2].strip()
                flags = "-"
            else:
                continue

            if translated.startswith("["):
                continue

            translations[bubble_id] = translated
            originals[bubble_id] = original
            flags_map[bubble_id] = flags

    return translations, originals, flags_map


def parse_bubbles(bubbles_file):
    with open(bubbles_file, "r", encoding="utf-8") as f:
        raw = json.load(f)
    return {int(k): v for k, v in raw.items()}


# ─────────────────────────────────────────────
#  HELPERS
# ─────────────────────────────────────────────
def clamp(v, lo, hi):
    return max(lo, min(hi, v))


def xywh_to_xyxy(box):
    if not box:
        return None
    x = int(box.get("x", 0))
    y = int(box.get("y", 0))
    w = int(box.get("w", 0))
    h = int(box.get("h", 0))
    return (x, y, x + w, y + h)


def union_xyxy(boxes):
    boxes = [b for b in boxes if b is not None]
    if not boxes:
        return None
    x1 = min(b[0] for b in boxes)
    y1 = min(b[1] for b in boxes)
    x2 = max(b[2] for b in boxes)
    y2 = max(b[3] for b in boxes)
    if x2 <= x1 or y2 <= y1:
        return None
    return (x1, y1, x2, y2)


def bbox_from_mask(mask):
    ys, xs = np.where(mask > 0)
    if len(xs) == 0:
        return None
    return (int(xs.min()), int(ys.min()), int(xs.max()) + 1, int(ys.max()) + 1)


def normalize_text(s):
    t = s.upper().strip()
    t = re.sub(r"[^\w]+", "", t)
    return t


def is_sfx_like(text):
    t = normalize_text(text)
    return bool(len(t) <= 8 and re.fullmatch(r"(SHA+|BIP+|BEEP+|HN+|AH+|OH+)", t))


# ─────────────────────────────────────────────
#  FONT
# ─────────────────────────────────────────────
def load_font_from_candidates(candidates, size):
    for path in candidates:
        if path and os.path.exists(path):
            try:
                return ImageFont.truetype(path, size), path
            except Exception:
                continue
    return ImageFont.load_default(), "PIL_DEFAULT"


def measure_text(draw, text, font):
    bb = draw.textbbox((0, 0), text, font=font)
    return bb[2] - bb[0], bb[3] - bb[1]


def wrap_text(draw, text, font, max_width):
    words = text.split()
    lines = []
    cur = ""

    for w in words:
        test = (cur + " " + w).strip()
        tw, _ = measure_text(draw, test, font)
        if tw <= max_width or not cur:
            cur = test
        else:
            lines.append(cur)
            cur = w
    if cur:
        lines.append(cur)

    if not lines:
        return [""], 0, 0

    widths = []
    heights = []
    for ln in lines:
        lw, lh = measure_text(draw, ln, font)
        widths.append(lw)
        heights.append(lh)

    gap = max(2, heights[0] // 5)
    total_h = sum(heights) + gap * (len(lines) - 1)
    return lines, total_h, max(widths)


def fit_font(draw, text, font_candidates, safe_w, safe_h):
    for size in range(MAX_FONT_SIZE, MIN_FONT_SIZE - 1, -1):
        font, _ = load_font_from_candidates(font_candidates, size)
        lines, total_h, max_w = wrap_text(draw, text, font, safe_w)
        if total_h <= safe_h and max_w <= safe_w:
            return font, lines, total_h

    font, _ = load_font_from_candidates(font_candidates, MIN_FONT_SIZE)
    lines, total_h, _ = wrap_text(draw, text, font, safe_w)
    return font, lines, total_h


def draw_text_with_stroke(draw, pos, text, font, fill, stroke_fill):
    x, y = pos
    _, h = measure_text(draw, text, font)
    sw = 2 if h <= 11 else 1

    for dx in range(-sw, sw + 1):
        for dy in range(-sw, sw + 1):
            if dx == 0 and dy == 0:
                continue
            draw.text((x + dx, y + dy), text, font=font, fill=stroke_fill)

    draw.text((x, y), text, font=font, fill=fill)


# ─────────────────────────────────────────────
#  MASK BUILDERS
# ─────────────────────────────────────────────
def build_yellow_mask(bubble_data, img_h, img_w):
    """
    HARD GUARANTEE:
    Returned mask always covers all yellow squares (line_bboxes).
    """
    mask = np.zeros((img_h, img_w), dtype=np.uint8)

    # Preferred: exact line boxes
    line_boxes = bubble_data.get("line_bboxes", [])
    for lb in line_boxes:
        b = xywh_to_xyxy(lb)
        if not b:
            continue
        x1, y1, x2, y2 = b
        x1 -= YELLOW_BOX_PAD_X
        y1 -= YELLOW_BOX_PAD_Y
        x2 += YELLOW_BOX_PAD_X
        y2 += YELLOW_BOX_PAD_Y
        x1 = clamp(x1, 0, img_w - 1)
        y1 = clamp(y1, 0, img_h - 1)
        x2 = clamp(x2, 1, img_w)
        y2 = clamp(y2, 1, img_h)
        if x2 > x1 and y2 > y1:
            cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)

    # If no line boxes available, use line_union fallback
    if np.count_nonzero(mask) == 0:
        ub = xywh_to_xyxy(bubble_data.get("line_union_bbox"))
        if ub:
            x1, y1, x2, y2 = ub
            x1 -= YELLOW_UNION_PAD_X
            y1 -= YELLOW_UNION_PAD_Y
            x2 += YELLOW_UNION_PAD_X
            y2 += YELLOW_UNION_PAD_Y
            x1 = clamp(x1, 0, img_w - 1)
            y1 = clamp(y1, 0, img_h - 1)
            x2 = clamp(x2, 1, img_w)
            y2 = clamp(y2, 1, img_h)
            if x2 > x1 and y2 > y1:
                cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)

    # Last fallback: text_bbox
    if np.count_nonzero(mask) == 0:
        tb = xywh_to_xyxy(bubble_data.get("text_bbox"))
        if tb:
            x1, y1, x2, y2 = tb
            x1 -= YELLOW_UNION_PAD_X
            y1 -= YELLOW_UNION_PAD_Y
            x2 += YELLOW_UNION_PAD_X
            y2 += YELLOW_UNION_PAD_Y
            x1 = clamp(x1, 0, img_w - 1)
            y1 = clamp(y1, 0, img_h - 1)
            x2 = clamp(x2, 1, img_w)
            y2 = clamp(y2, 1, img_h)
            if x2 > x1 and y2 > y1:
                cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)

    return mask


def bubble_interior_mask(img_bgr, bubble_data):
    """
    Optional helper to expand clean region safely; never used to shrink yellow coverage.
    """
    h, w = img_bgr.shape[:2]

    panel = xywh_to_xyxy(bubble_data.get("panel_bbox"))
    if panel is None:
        panel = (0, 0, w, h)
    px1, py1, px2, py2 = panel

    seed = bubble_data.get("seed_point", {})
    sx = int(seed.get("x", bubble_data.get("x", 0) + bubble_data.get("w", 1) // 2))
    sy = int(seed.get("y", bubble_data.get("y", 0) + bubble_data.get("h", 1) // 2))
    sx = clamp(sx, 1, w - 2)
    sy = clamp(sy, 1, h - 2)

    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)

    panel_bin = np.zeros_like(binary)
    panel_bin[py1:py2, px1:px2] = binary[py1:py2, px1:px2]

    # if seed on dark pixel, search nearby white
    if gray[sy, sx] < 150:
        found = False
        search_r = max(2, min(bubble_data.get("w", 20), bubble_data.get("h", 20)) // 3)
        for r in range(1, search_r + 1):
            for dy in range(-r, r + 1):
                for dx in range(-r, r + 1):
                    nx, ny = sx + dx, sy + dy
                    if px1 <= nx < px2 and py1 <= ny < py2 and gray[ny, nx] >= 200:
                        sx, sy = nx, ny
                        found = True
                        break
                if found:
                    break
            if found:
                break

        if not found:
            m = np.zeros((h, w), dtype=np.uint8)
            bx = bubble_data.get("x", 0)
            by = bubble_data.get("y", 0)
            bw = bubble_data.get("w", 20)
            bh = bubble_data.get("h", 20)
            cv2.ellipse(m, (bx + bw // 2, by + bh // 2), (max(4, bw // 2), max(4, bh // 2)), 0, 0, 360, 255, -1)
            return m

    ff_mask = np.zeros((h + 2, w + 2), dtype=np.uint8)
    flood = panel_bin.copy()
    cv2.floodFill(
        flood, ff_mask, (sx, sy), 255,
        loDiff=FLOOD_TOL, upDiff=FLOOD_TOL,
        flags=cv2.FLOODFILL_FIXED_RANGE
    )

    m = (ff_mask[1:-1, 1:-1] * 255).astype(np.uint8)
    m = cv2.morphologyEx(m, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8), iterations=1)
    return m


def build_clean_mask(img_bgr, bubble_data):
    """
    FINAL RULE:
    clean_mask MUST cover yellow_mask completely.
    """
    h, w = img_bgr.shape[:2]
    yellow = build_yellow_mask(bubble_data, h, w)

    # start with guaranteed yellow
    clean = yellow.copy()

    if ENABLE_EXTRA_CLEAN:
        bubble_m = bubble_interior_mask(img_bgr, bubble_data)
        extra = cv2.dilate(yellow, np.ones((3, 3), np.uint8), iterations=EXTRA_DILATE_ITERS)
        extra = cv2.morphologyEx(extra, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8), iterations=EXTRA_CLOSE_ITERS)
        extra = cv2.bitwise_and(extra, bubble_m)

        # IMPORTANT: union with yellow (never subtract yellow)
        clean = cv2.bitwise_or(yellow, extra)

    # final guarantee (defensive)
    clean = cv2.bitwise_or(clean, yellow)

    return clean, yellow


# ─────────────────────────────────────────────
#  DRAW BUBBLE
# ─────────────────────────────────────────────
def draw_bubble(
    pil_img,
    img_bgr_ref,
    bubble_data,
    original_text,
    translated_text,
    font_candidates,
    font_color,
    stroke_color
):
    if original_text and translated_text:
        if normalize_text(original_text) == normalize_text(translated_text) and is_sfx_like(original_text):
            return "skip_sfx"

    rgb = np.array(pil_img)
    h, w = rgb.shape[:2]

    clean_mask, yellow_mask = build_clean_mask(img_bgr_ref, bubble_data)
    if np.count_nonzero(clean_mask) == 0:
        return "skip_no_area"

    # 1) FORCE white fill on clean mask (includes full yellow by guarantee)
    rgb[clean_mask == 255] = [255, 255, 255]

    # 2) Optional edge restore, but NEVER overwrite yellow coverage
    if ENABLE_EDGE_RESTORE:
        bubble_m = bubble_interior_mask(img_bgr_ref, bubble_data)
        edge = cv2.morphologyEx(bubble_m, cv2.MORPH_GRADIENT, np.ones((3, 3), np.uint8))
        edge = cv2.dilate(edge, np.ones((3, 3), np.uint8), iterations=EDGE_RESTORE_DILATE)

        # Don't restore where yellow exists (hard guarantee)
        edge[yellow_mask == 255] = 0

        orig_rgb = cv2.cvtColor(img_bgr_ref, cv2.COLOR_BGR2RGB)
        rgb[edge == 255] = orig_rgb[edge == 255]

    pil_img.paste(Image.fromarray(rgb))

    if not translated_text:
        return "clean_only"

    # text region based on yellow area (exact requirement)
    text_bbox = bbox_from_mask(yellow_mask)
    if text_bbox is None:
        text_bbox = bbox_from_mask(clean_mask)
        if text_bbox is None:
            return "skip_no_area"

    x1, y1, x2, y2 = text_bbox

    draw = ImageDraw.Draw(pil_img)
    text_cx = int((x1 + x2) / 2)
    text_cy = int((y1 + y2) / 2)
    safe_w = max(16, int((x2 - x1) * TEXT_INSET))
    safe_h = max(16, int((y2 - y1) * TEXT_INSET))

    font, lines, total_h = fit_font(draw, translated_text, font_candidates, safe_w, safe_h)

    y_cursor = int(round(text_cy - total_h / 2.0))
    for line in lines:
        lw, lh = measure_text(draw, line, font)
        x = text_cx - lw // 2
        draw_text_with_stroke(draw, (x, y_cursor), line, font, fill=font_color, stroke_fill=stroke_color)
        y_cursor += lh + max(lh // 5, 2)

    return "rendered"


# ─────────────────────────────────────────────
#  MAIN
# ─────────────────────────────────────────────
def render_translations(
    input_image,
    output_image,
    translations_file,
    bubbles_file,
    font_candidates=DEFAULT_FONT_CANDIDATES,
    font_color=DEFAULT_FONT_COLOR,
    stroke_color=DEFAULT_STROKE_COLOR
):
    img_bgr = cv2.imread(input_image)
    if img_bgr is None:
        raise FileNotFoundError(f"Cannot load image: {input_image}")

    img_pil = Image.fromarray(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))

    translations, originals, flags_map = parse_translations(translations_file)
    bubbles = parse_bubbles(bubbles_file)

    rendered, skipped = 0, 0

    def sort_key(item):
        bid, _ = item
        b = bubbles.get(bid, {})
        return int(b.get("reading_order", bid))

    for bubble_id, translated_text in sorted(translations.items(), key=sort_key):
        if bubble_id not in bubbles:
            skipped += 1
            continue

        bubble_data = bubbles[bubble_id]
        original_text = originals.get(bubble_id, "")

        status = draw_bubble(
            pil_img=img_pil,
            img_bgr_ref=img_bgr,
            bubble_data=bubble_data,
            original_text=original_text,
            translated_text=translated_text,
            font_candidates=font_candidates,
            font_color=font_color,
            stroke_color=stroke_color
        )

        if status.startswith("skip"):
            skipped += 1
        else:
            rendered += 1

    out_bgr = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
    cv2.imwrite(output_image, out_bgr)

    print(f"✅ Done — {rendered} rendered, {skipped} skipped.")
    print(f"📄 Output → {output_image}")
    print("Guarantee: full yellow-square area is always white-cleaned before drawing text.")


if __name__ == "__main__":
    render_translations(
        input_image="002-page.png",
        output_image="page_translated.png",
        translations_file="output.txt",
        bubbles_file="bubbles.json",
        font_candidates=DEFAULT_FONT_CANDIDATES,
        font_color=DEFAULT_FONT_COLOR,
        stroke_color=DEFAULT_STROKE_COLOR
    )