manga-translator/manga-renderer.py

"""
manga-renderer.py
─────────────────────────────────────────────────────────────────
Pipeline:
  1. Detect panel boundaries
  2. Assign bubble -> panel
  3. Detect/fallback bubble ellipse
  4. Clean original text region:
       - OCR union mask (default)
       - Hybrid mask fallback
       - Ellipse mode optional
  5. Render translated text with ellipse-aware wrapping
"""

import os
import math
import json
import re

import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont


# ─────────────────────────────────────────────
#  CONSTANTS
# ─────────────────────────────────────────────
DEFAULT_FONT_CANDIDATES = [
    "fonts/AnimeAce2_reg.ttf",
    "fonts/WildWordsRoman.ttf",
    "fonts/ComicRelief-Regular.ttf",
    "fonts/NotoSans-Regular.ttf",
]
DEFAULT_FONT_COLOR = (0, 0, 0)
DEFAULT_STROKE_COLOR = (255, 255, 255)

MAX_FONT_SIZE = 20
MIN_FONT_SIZE = 6
FONT_SIZE_STEP = 1
TEXT_RATIO = 0.76

FLOOD_TOLERANCE = 30
BORDER_SHRINK_PX = 4
MIN_PANEL_AREA_RATIO = 0.02
MAX_NUDGE_RATIO = 0.30

# Cleaning mode:
#   "ocr_union" -> precise cleanup from OCR quad boxes (recommended)
#   "hybrid"    -> rounded-rect + inner ellipse
#   "ellipse"   -> legacy large ellipse fill
CLEAN_MODE = "ocr_union"

# OCR-union cleaning tuning
OCR_CLEAN_PAD_X = 12
OCR_CLEAN_PAD_Y = 10
OCR_CLEAN_MIN_W = 24
OCR_CLEAN_MIN_H = 24
OCR_CLEAN_CLOSE_KERNEL = 5
OCR_CLEAN_DILATE = 1

# Hybrid cleanup mask tuning
CLEAN_MASK_RECT_SCALE_W = 1.08
CLEAN_MASK_RECT_SCALE_H = 1.20
CLEAN_MASK_ELLIPSE_SCALE = 0.84
CLEAN_MASK_BLUR = 0


# ─────────────────────────────────────────────
#  PARSERS
# ─────────────────────────────────────────────
def parse_translations(translations_file):
    translations = {}
    originals = {}
    flags_map = {}

    with open(translations_file, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line.startswith("#"):
                continue
            parts = line.split("|")
            # Format:
            # #ID|ORDER|ORIGINAL|TRANSLATED|FLAGS
            # backward-compatible with older variants
            try:
                bubble_id = int(parts[0].lstrip("#"))
            except Exception:
                continue

            if len(parts) >= 5:
                original = parts[2].strip()
                translated = parts[3].strip()
                flags = parts[4].strip()
            elif len(parts) >= 4:
                original = parts[2].strip()
                translated = parts[3].strip()
                flags = "-"
            elif len(parts) >= 3:
                original = ""
                translated = parts[2].strip()
                flags = "-"
            else:
                continue

            if translated.startswith("["):
                continue

            translations[bubble_id] = translated
            originals[bubble_id] = original
            flags_map[bubble_id] = flags

    return translations, originals, flags_map


def parse_bubbles(bubbles_file):
    with open(bubbles_file, "r", encoding="utf-8") as f:
        raw = json.load(f)
    return {int(k): v for k, v in raw.items()}


# ─────────────────────────────────────────────
#  HELPERS
# ─────────────────────────────────────────────
def normalize_text(s):
    t = s.upper().strip()
    t = re.sub(r"[^\w]+", "", t)
    return t


def is_sfx_like(text):
    t = normalize_text(text)
    if len(t) <= 8 and re.fullmatch(r"(SHA+|BIP+|BEEP+|HN+|AH+|OH+)", t):
        return True
    return False


# ─────────────────────────────────────────────
#  FONT HELPERS
# ─────────────────────────────────────────────
def load_font_from_candidates(candidates, size):
    for path in candidates:
        if path and os.path.exists(path):
            try:
                return ImageFont.truetype(path, size), path
            except Exception:
                continue
    return ImageFont.load_default(), "PIL_DEFAULT"


def measure_text(draw, text, font):
    bbox = draw.textbbox((0, 0), text, font=font)
    return bbox[2] - bbox[0], bbox[3] - bbox[1]


def ellipse_line_max_width(y_offset, a, b):
    if b <= 0:
        return 0
    t = 1.0 - (y_offset * y_offset) / (b * b)
    t = max(0.0, t)
    return 2.0 * a * math.sqrt(t)


def wrap_text(draw, text, font, max_width):
    words = text.split()
    lines = []
    current = ""
    for word in words:
        test = (current + " " + word).strip()
        w, _ = measure_text(draw, test, font)
        if w <= max_width or not current:
            current = test
        else:
            lines.append(current)
            current = word
    if current:
        lines.append(current)

    if not lines:
        return [""], 0, 0

    widths, heights = [], []
    for ln in lines:
        w, h = measure_text(draw, ln, font)
        widths.append(w)
        heights.append(h)

    line_gap = max(heights[0] // 5, 2) if heights else 2
    total_h = sum(heights) + line_gap * (len(lines) - 1)
    return lines, total_h, max(widths) if widths else 0


def wrap_text_ellipse_aware(draw, text, font, safe_w, safe_h, tall_bubble=False):
    target_w = safe_w * (0.85 if tall_bubble else 1.0)
    lines, total_h, _ = wrap_text(draw, text, font, target_w)
    if not lines:
        return lines, total_h

    heights = []
    for ln in lines:
        _, h = measure_text(draw, ln, font)
        heights.append(h)

    line_gap = max(heights[0] // 5, 2) if heights else 2
    if tall_bubble:
        line_gap += 1

    block_h = sum(heights) + line_gap * (len(lines) - 1)
    if block_h > safe_h:
        return lines, block_h

    a = target_w / 2.0
    b = safe_h / 2.0

    words = text.split()
    refined = []
    cursor_y = -block_h / 2.0
    current = ""
    idx_h = 0

    for word in words:
        h_line = heights[min(idx_h, len(heights) - 1)] if heights else 12
        y_mid = cursor_y + h_line / 2.0
        row_max = ellipse_line_max_width(y_mid, a, b) * 0.95
        row_max = max(20, row_max)

        candidate = (current + " " + word).strip()
        w, _ = measure_text(draw, candidate, font)

        if (w <= row_max) or (not current):
            current = candidate
        else:
            refined.append(current)
            cursor_y += h_line + line_gap
            idx_h += 1
            current = word

    if current:
        refined.append(current)

    hs = []
    for ln in refined:
        _, h = measure_text(draw, ln, font)
        hs.append(h)

    total = sum(hs) + (max(hs[0] // 5, 2) + (1 if tall_bubble else 0)) * (len(refined) - 1) if hs else 0
    return refined, total


def best_fit_font(draw, text, font_candidates, safe_w, safe_h, tall_bubble=False):
    for size in range(MAX_FONT_SIZE, MIN_FONT_SIZE - 1, -FONT_SIZE_STEP):
        font, path_used = load_font_from_candidates(font_candidates, size)
        lines, total_h = wrap_text_ellipse_aware(draw, text, font, safe_w, safe_h, tall_bubble=tall_bubble)

        max_lw = 0
        for ln in lines:
            lw, _ = measure_text(draw, ln, font)
            max_lw = max(max_lw, lw)

        if total_h <= safe_h and max_lw <= safe_w:
            return font, lines, total_h, path_used

    font, path_used = load_font_from_candidates(font_candidates, MIN_FONT_SIZE)
    lines, total_h = wrap_text_ellipse_aware(draw, text, font, safe_w, safe_h, tall_bubble=tall_bubble)
    return font, lines, total_h, path_used


def draw_text_with_stroke(draw, pos, text, font, fill, stroke_fill):
    x, y = pos
    _, h = measure_text(draw, text, font)
    stroke_width = 2 if h <= 11 else 1

    for dx in range(-stroke_width, stroke_width + 1):
        for dy in range(-stroke_width, stroke_width + 1):
            if dx == 0 and dy == 0:
                continue
            draw.text((x + dx, y + dy), text, font=font, fill=stroke_fill)

    draw.text((x, y), text, font=font, fill=fill)


# ─────────────────────────────────────────────
#  CLEAN MASK BUILDERS
# ─────────────────────────────────────────────
def draw_rounded_rect_mask(mask, x1, y1, x2, y2, radius, color=255):
    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
    if x2 <= x1 or y2 <= y1:
        return mask

    r = int(max(1, min(radius, (x2 - x1) // 2, (y2 - y1) // 2)))

    cv2.rectangle(mask, (x1 + r, y1), (x2 - r, y2), color, -1)
    cv2.rectangle(mask, (x1, y1 + r), (x2, y2 - r), color, -1)

    cv2.circle(mask, (x1 + r, y1 + r), r, color, -1)
    cv2.circle(mask, (x2 - r, y1 + r), r, color, -1)
    cv2.circle(mask, (x1 + r, y2 - r), r, color, -1)
    cv2.circle(mask, (x2 - r, y2 - r), r, color, -1)
    return mask


def build_hybrid_clean_mask(img_h, img_w, cx, cy, sa_fill, sb_fill, angle, safe_w, safe_h, panel):
    px1, py1, px2, py2 = panel
    mask = np.zeros((img_h, img_w), dtype=np.uint8)

    rw = max(8, int(safe_w * CLEAN_MASK_RECT_SCALE_W))
    rh = max(8, int(safe_h * CLEAN_MASK_RECT_SCALE_H))
    x1 = int(cx - rw / 2)
    y1 = int(cy - rh / 2)
    x2 = int(cx + rw / 2)
    y2 = int(cy + rh / 2)
    rr = int(min(rw, rh) * 0.22)

    draw_rounded_rect_mask(mask, x1, y1, x2, y2, rr, color=255)

    e_sa = max(3, int(sa_fill * CLEAN_MASK_ELLIPSE_SCALE))
    e_sb = max(3, int(sb_fill * CLEAN_MASK_ELLIPSE_SCALE))
    cv2.ellipse(mask, (int(round(cx)), int(round(cy))), (e_sa, e_sb), angle, 0, 360, 255, -1)

    kernel = np.ones((3, 3), np.uint8)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=1)

    clip = np.zeros_like(mask)
    clip[py1:py2, px1:px2] = 255
    mask = cv2.bitwise_and(mask, clip)

    if CLEAN_MASK_BLUR > 0:
        mask = cv2.GaussianBlur(mask, (0, 0), CLEAN_MASK_BLUR)
        _, mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)

    return mask


def build_ocr_union_clean_mask(img_h, img_w, bubble_data, panel):
    """
    Build precise text cleanup mask from OCR quad bounding boxes.
    """
    px1, py1, px2, py2 = panel
    quad_boxes = bubble_data.get("quad_bboxes", [])
    mask = np.zeros((img_h, img_w), dtype=np.uint8)

    if not quad_boxes:
        return mask

    for qb in quad_boxes:
        x = int(qb.get("x", 0))
        y = int(qb.get("y", 0))
        w = int(qb.get("w", 0))
        h = int(qb.get("h", 0))

        if w <= 0 or h <= 0:
            continue

        if w < OCR_CLEAN_MIN_W:
            extra = (OCR_CLEAN_MIN_W - w) // 2
            x -= extra
            w += 2 * extra

        if h < OCR_CLEAN_MIN_H:
            extra = (OCR_CLEAN_MIN_H - h) // 2
            y -= extra
            h += 2 * extra

        x1 = max(px1, x - OCR_CLEAN_PAD_X)
        y1 = max(py1, y - OCR_CLEAN_PAD_Y)
        x2 = min(px2, x + w + OCR_CLEAN_PAD_X)
        y2 = min(py2, y + h + OCR_CLEAN_PAD_Y)

        if x2 > x1 and y2 > y1:
            cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)

    # Merge nearby fragments
    ksize = max(3, int(OCR_CLEAN_CLOSE_KERNEL) | 1)  # ensure odd and >=3
    k = np.ones((ksize, ksize), np.uint8)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, k, iterations=1)

    if OCR_CLEAN_DILATE > 0:
        mask = cv2.dilate(mask, np.ones((3, 3), np.uint8), iterations=OCR_CLEAN_DILATE)

    # Clip to panel bounds
    clip = np.zeros_like(mask)
    clip[py1:py2, px1:px2] = 255
    mask = cv2.bitwise_and(mask, clip)

    return mask


# ─────────────────────────────────────────────
#  PANEL DETECTION
# ─────────────────────────────────────────────
def merge_nested_panels(panels):
    if len(panels) <= 1:
        return panels

    panels_sorted = sorted(panels, key=lambda p: (p[2] - p[0]) * (p[3] - p[1]), reverse=True)
    keep = []

    for panel in panels_sorted:
        px1, py1, px2, py2 = panel
        p_area = (px2 - px1) * (py2 - py1)
        dominated = False

        for kept in keep:
            kx1, ky1, kx2, ky2 = kept
            ix1 = max(px1, kx1)
            iy1 = max(py1, ky1)
            ix2 = min(px2, kx2)
            iy2 = min(py2, ky2)
            if ix2 > ix1 and iy2 > iy1:
                inter = (ix2 - ix1) * (iy2 - iy1)
                if inter / max(1, p_area) > 0.80:
                    dominated = True
                    break

        if not dominated:
            keep.append(panel)

    return keep


def split_panels_on_internal_borders(panels, v_lines, img_w, img_h):
    result = []
    for (px1, py1, px2, py2) in panels:
        pw = px2 - px1
        if pw < img_w * 0.30:
            result.append((px1, py1, px2, py2))
            continue

        margin = int(pw * 0.20)
        search_x1 = px1 + margin
        search_x2 = px2 - margin
        if search_x2 <= search_x1:
            result.append((px1, py1, px2, py2))
            continue

        panel_vlines = v_lines[py1:py2, search_x1:search_x2]
        col_sums = panel_vlines.sum(axis=0)

        panel_h = py2 - py1
        threshold = panel_h * 255 * 0.40
        split_cols = np.where(col_sums > threshold)[0]

        if len(split_cols) == 0:
            result.append((px1, py1, px2, py2))
            continue

        split_x = int(np.median(split_cols)) + search_x1
        left_w = split_x - px1
        right_w = px2 - split_x

        if left_w > img_w * 0.10 and right_w > img_w * 0.10:
            result.append((px1, py1, split_x, py2))
            result.append((split_x, py1, px2, py2))
        else:
            result.append((px1, py1, px2, py2))

    return result


def detect_panels(img_bgr):
    img_h, img_w = img_bgr.shape[:2]
    total_area = img_h * img_w
    min_area = total_area * MIN_PANEL_AREA_RATIO

    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, dark_mask = cv2.threshold(gray, 80, 255, cv2.THRESH_BINARY_INV)

    h_len = max(40, img_w // 25)
    h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (h_len, 1))
    h_lines = cv2.morphologyEx(dark_mask, cv2.MORPH_OPEN, h_kernel)

    v_len = max(40, img_h // 25)
    v_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, v_len))
    v_lines = cv2.morphologyEx(dark_mask, cv2.MORPH_OPEN, v_kernel)

    borders = cv2.bitwise_or(h_lines, v_lines)
    borders = cv2.dilate(borders, np.ones((5, 5), np.uint8), iterations=2)

    panel_interior = cv2.bitwise_not(borders)
    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(panel_interior, connectivity=8)

    panels = []
    for label_id in range(1, num_labels):
        area = stats[label_id, cv2.CC_STAT_AREA]
        if area < min_area:
            continue

        x = stats[label_id, cv2.CC_STAT_LEFT]
        y = stats[label_id, cv2.CC_STAT_TOP]
        w = stats[label_id, cv2.CC_STAT_WIDTH]
        h = stats[label_id, cv2.CC_STAT_HEIGHT]
        x2 = x + w
        y2 = y + h

        if w * h > total_area * 0.90:
            continue

        aspect = max(w, h) / max(min(w, h), 1)
        if aspect > 15:
            continue

        if w < img_w * 0.05 or h < img_h * 0.05:
            continue

        panels.append((x, y, x2, y2))

    panels = merge_nested_panels(panels)
    panels = split_panels_on_internal_borders(panels, v_lines, img_w, img_h)
    panels.sort(key=lambda p: (p[1] // 100, p[0]))

    if not panels:
        panels = [(0, 0, img_w, img_h)]

    return panels


# ─────────────────────────────────────────────
#  BUBBLE GEOMETRY
# ─────────────────────────────────────────────
def assign_panel(bubble_data, panels, img_w, img_h):
    bx, bw = bubble_data["x"], bubble_data["w"]
    by, bh = bubble_data["y"], bubble_data["h"]
    bcx, bcy = bx + bw / 2.0, by + bh / 2.0

    best_panel, best_overlap = None, 0
    for (px1, py1, px2, py2) in panels:
        ix1 = max(bx, px1)
        iy1 = max(by, py1)
        ix2 = min(bx + bw, px2)
        iy2 = min(by + bh, py2)
        if ix2 > ix1 and iy2 > iy1:
            overlap = (ix2 - ix1) * (iy2 - iy1)
            if overlap > best_overlap:
                best_overlap = overlap
                best_panel = (px1, py1, px2, py2)

    if best_panel is None:
        for (px1, py1, px2, py2) in panels:
            if px1 <= bcx <= px2 and py1 <= bcy <= py2:
                return (px1, py1, px2, py2)
        return (0, 0, img_w, img_h)

    return best_panel


def detect_bubble_ellipse(img_bgr, bubble_data, panel):
    x, w = bubble_data["x"], bubble_data["w"]
    y, h = bubble_data["y"], bubble_data["h"]

    img_h, img_w = img_bgr.shape[:2]
    px1, py1, px2, py2 = panel

    seed_x = max(1, min(img_w - 2, int(x + w / 2.0)))
    seed_y = max(1, min(img_h - 2, int(y + h / 2.0)))

    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)

    panel_mask = np.zeros_like(binary)
    panel_mask[py1:py2, px1:px2] = binary[py1:py2, px1:px2]

    if gray[seed_y, seed_x] < 150:
        found = False
        for r in range(1, max(2, min(w, h) // 3)):
            for dy in range(-r, r + 1):
                for dx in range(-r, r + 1):
                    nx, ny = seed_x + dx, seed_y + dy
                    if px1 <= nx < px2 and py1 <= ny < py2 and gray[ny, nx] >= 200:
                        seed_x, seed_y = nx, ny
                        found = True
                        break
                if found:
                    break
            if found:
                break
        if not found:
            return None

    flood_mask = np.zeros((img_h + 2, img_w + 2), dtype=np.uint8)
    flood_fill_img = panel_mask.copy()
    cv2.floodFill(
        flood_fill_img,
        flood_mask,
        (seed_x, seed_y),
        255,
        loDiff=FLOOD_TOLERANCE,
        upDiff=FLOOD_TOLERANCE,
        flags=cv2.FLOODFILL_FIXED_RANGE
    )

    filled_region = flood_mask[1:-1, 1:-1] * 255
    filled_region = cv2.morphologyEx(filled_region, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8), iterations=1)

    contours, _ = cv2.findContours(filled_region, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return None

    bubble_contour = max(contours, key=cv2.contourArea)
    if len(bubble_contour) < 5:
        return None
    if cv2.contourArea(bubble_contour) < 100:
        return None

    (ecx, ecy), (ew, eh), angle = cv2.fitEllipse(bubble_contour)
    return float(ecx), float(ecy), float(ew / 2), float(eh / 2), float(angle)


def clip_ellipse_to_panel(cx, cy, sa, sb, angle, panel, shrink=BORDER_SHRINK_PX):
    px1, py1, px2, py2 = panel
    inner_x1, inner_y1 = px1 + shrink, py1 + shrink
    inner_x2, inner_y2 = px2 - shrink, py2 - shrink

    sa_s = max(sa - shrink, 1.0)
    sb_s = max(sb - shrink, 1.0)

    for _ in range(3):
        rad = math.radians(angle)
        hw = math.sqrt((sa_s * math.cos(rad))**2 + (sb_s * math.sin(rad))**2)
        hh = math.sqrt((sa_s * math.sin(rad))**2 + (sb_s * math.cos(rad))**2)

        ovf_l = max(0, inner_x1 - (cx - hw))
        ovf_r = max(0, (cx + hw) - inner_x2)
        ovf_t = max(0, inner_y1 - (cy - hh))
        ovf_b = max(0, (cy + hh) - inner_y2)

        if max(ovf_l, ovf_r, ovf_t, ovf_b) == 0:
            break

        max_nx = sa_s * MAX_NUDGE_RATIO
        max_ny = sb_s * MAX_NUDGE_RATIO
        cx += min(ovf_l, max_nx) - min(ovf_r, max_nx)
        cy += min(ovf_t, max_ny) - min(ovf_b, max_ny)

        rad = math.radians(angle)
        hw = math.sqrt((sa_s * math.cos(rad))**2 + (sb_s * math.sin(rad))**2)
        hh = math.sqrt((sa_s * math.sin(rad))**2 + (sb_s * math.cos(rad))**2)

        ovf_l = max(0, inner_x1 - (cx - hw))
        ovf_r = max(0, (cx + hw) - inner_x2)
        ovf_t = max(0, inner_y1 - (cy - hh))
        ovf_b = max(0, (cy + hh) - inner_y2)
        max_ovf = max(ovf_l, ovf_r, ovf_t, ovf_b)

        if max_ovf > 0:
            sa_s = max(sa_s - max_ovf, 1.0)
            sb_s = max(sb_s - max_ovf, 1.0)

    return cx, cy, sa_s, sb_s


def get_render_ellipse(img_bgr, bubble_data, panel):
    x, w = bubble_data["x"], bubble_data["w"]
    y, h = bubble_data["y"], bubble_data["h"]

    detected = detect_bubble_ellipse(img_bgr, bubble_data, panel)
    if detected is not None:
        ecx, ecy, sa, sb, angle = detected
        ecx, ecy, sa_fill, sb_fill = clip_ellipse_to_panel(ecx, ecy, sa, sb, angle, panel)
        safe_w = sa_fill * math.sqrt(2) * TEXT_RATIO
        safe_h = sb_fill * math.sqrt(2) * TEXT_RATIO
        return (ecx, ecy, sa_fill, sb_fill, angle, safe_w, safe_h, "detected")
    else:
        cx, cy = x + w / 2.0, y + h / 2.0
        sa, sb = w / 2.0, h / 2.0
        cx, cy, sa_fill, sb_fill = clip_ellipse_to_panel(cx, cy, sa, sb, 0.0, panel)
        safe_w = sa_fill * math.sqrt(2) * TEXT_RATIO
        safe_h = sb_fill * math.sqrt(2) * TEXT_RATIO
        return (cx, cy, sa_fill, sb_fill, 0.0, safe_w, safe_h, "fallback")


# ─────────────────────────────────────────────
#  DRAW ONE BUBBLE
# ─────────────────────────────────────────────
def draw_bubble(
    pil_img,
    img_bgr,
    bubble_data,
    original_text,
    translated_text,
    flags,
    font_candidates,
    font_color,
    stroke_color,
    panel
):
    # skip unchanged SFX
    if original_text and translated_text:
        if normalize_text(original_text) == normalize_text(translated_text) and is_sfx_like(original_text):
            return "skip_sfx", "NO_FONT"

    (cx, cy, sa_fill, sb_fill, angle, safe_w, safe_h, method) = get_render_ellipse(img_bgr, bubble_data, panel)

    cx_i, cy_i = int(round(cx)), int(round(cy))
    img_h, img_w = img_bgr.shape[:2]

    # choose cleaning mask
    if CLEAN_MODE == "ocr_union":
        mask = build_ocr_union_clean_mask(img_h, img_w, bubble_data, panel)
        # robust fallback
        if mask is None or int(mask.sum()) == 0:
            mask = build_hybrid_clean_mask(
                img_h=img_h, img_w=img_w,
                cx=cx, cy=cy,
                sa_fill=sa_fill, sb_fill=sb_fill, angle=angle,
                safe_w=safe_w, safe_h=safe_h,
                panel=panel
            )
    elif CLEAN_MODE == "hybrid":
        mask = build_hybrid_clean_mask(
            img_h=img_h, img_w=img_w,
            cx=cx, cy=cy,
            sa_fill=sa_fill, sb_fill=sb_fill, angle=angle,
            safe_w=safe_w, safe_h=safe_h,
            panel=panel
        )
    else:  # ellipse
        mask = np.zeros((img_h, img_w), dtype=np.uint8)
        cv2.ellipse(mask, (cx_i, cy_i), (int(math.ceil(sa_fill)), int(math.ceil(sb_fill))), angle, 0, 360, 255, -1)

    # paint white over mask
    img_np = np.array(pil_img)
    img_np[mask == 255] = [255, 255, 255]
    pil_img.paste(Image.fromarray(img_np))

    if not translated_text:
        return method, "NO_FONT"

    draw = ImageDraw.Draw(pil_img)

    # Center text in the cleaned region bbox (red-box style target)
    ys, xs = np.where(mask > 0)
    if len(xs) > 0 and len(ys) > 0:
        mx1, my1, mx2, my2 = xs.min(), ys.min(), xs.max(), ys.max()
        text_cx = int((mx1 + mx2) / 2)
        text_cy = int((my1 + my2) / 2)
        sw = max(20, int((mx2 - mx1) * 0.92))
        sh = max(20, int((my2 - my1) * 0.92))
    else:
        text_cx, text_cy = cx_i, cy_i
        sw, sh = max(int(safe_w), 1), max(int(safe_h), 1)

    bw = max(1, bubble_data.get("w", 1))
    bh = max(1, bubble_data.get("h", 1))
    tall_bubble = (bh / bw) > 1.25

    font, lines, total_h, font_used = best_fit_font(
        draw, translated_text, font_candidates, sw, sh, tall_bubble=tall_bubble
    )

    if not lines:
        return method, font_used

    y_cursor = int(round(text_cy - total_h / 2.0 - 0.02 * sh))

    for line in lines:
        lw, lh = measure_text(draw, line, font)
        x = text_cx - lw // 2
        draw_text_with_stroke(draw, (x, y_cursor), line, font, fill=font_color, stroke_fill=stroke_color)
        y_cursor += lh + max(lh // 5, 2) + (1 if tall_bubble else 0)

    return method, font_used


# ─────────────────────────────────────────────
#  MAIN RENDER FUNCTION
# ─────────────────────────────────────────────
def render_translations(
    input_image,
    output_image,
    translations_file,
    bubbles_file,
    font_candidates=DEFAULT_FONT_CANDIDATES,
    font_color=DEFAULT_FONT_COLOR,
    stroke_color=DEFAULT_STROKE_COLOR,
):
    img_bgr = cv2.imread(input_image)
    if img_bgr is None:
        raise FileNotFoundError(f"Cannot load image: {input_image}")

    img_h, img_w = img_bgr.shape[:2]
    img_pil = Image.fromarray(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))

    translations, originals, flags_map = parse_translations(translations_file)
    bubbles = parse_bubbles(bubbles_file)
    panels = detect_panels(img_bgr)

    rendered = 0
    skipped = 0

    def sort_key(item):
        bid, _ = item
        b = bubbles.get(bid, {})
        return int(b.get("reading_order", bid))

    for bubble_id, translated_text in sorted(translations.items(), key=sort_key):
        if bubble_id not in bubbles:
            skipped += 1
            continue

        bubble_data = bubbles[bubble_id]
        panel = assign_panel(bubble_data, panels, img_w, img_h)

        original_text = originals.get(bubble_id, "")
        flags = flags_map.get(bubble_id, "-")

        method, font_used = draw_bubble(
            pil_img=img_pil,
            img_bgr=img_bgr,
            bubble_data=bubble_data,
            original_text=original_text,
            translated_text=translated_text,
            flags=flags,
            font_candidates=font_candidates,
            font_color=font_color,
            stroke_color=stroke_color,
            panel=panel
        )

        if method == "skip_sfx":
            skipped += 1
        else:
            rendered += 1

    result_cv = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
    cv2.imwrite(output_image, result_cv)

    print(f"✅ Done — {rendered} rendered, {skipped} skipped.")
    print(f"📄 Output → {output_image}")
    print(f"🧼 Clean mode: {CLEAN_MODE}")


# ─────────────────────────────────────────────
#  ENTRY POINT
# ─────────────────────────────────────────────
if __name__ == "__main__":
    render_translations(
        input_image="001-page.png",
        output_image="page_translated.png",
        translations_file="output.txt",
        bubbles_file="bubbles.json",
        font_candidates=DEFAULT_FONT_CANDIDATES,
        font_color=DEFAULT_FONT_COLOR,
        stroke_color=DEFAULT_STROKE_COLOR,
    )