First beta

2026-04-15 21:12:41 +02:00
parent 5ef8c39f69
commit dd1cf54f86
7 changed files with 736 additions and 905 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1,53 @@
 # Manga Translator OCR Pipeline
 A robust manga/comic OCR + translation pipeline with:
 - EasyOCR (default, reliable on macOS M1)
 - Optional PaddleOCR (auto-fallback if unavailable)
 - Bubble clustering and line-level boxes
 - Robust reread pass (multi-preprocessing + slight rotation)
 - Translation export + debug overlays
 ---
 ## ✨ Features
 - OCR from raw manga pages
 - Noise filtering (`BOX` debug artifacts, tiny garbage tokens, symbols)
 - Speech bubble grouping
 - Reading order estimation (`ltr` / `rtl`)
 - Translation output (`output.txt`)
 - Structured bubble metadata (`bubbles.json`)
 - Visual debug output (`debug_clusters.png`)
 ---
 ## 🧰 Requirements
 - macOS (Apple Silicon supported)
 - Python **3.11** recommended
 - Homebrew (for Python install)
 ---
 ## 🚀 Setup (Python 3.11 venv)
 ```bash
 cd /path/to/manga-translator
 # 1) Create venv with 3.11
 /opt/homebrew/bin/python3.11 -m venv venv
 # 2) Activate
 source venv/bin/activate
 # 3) Verify interpreter
 python -V
 # expected: Python 3.11.x
 # 4) Install dependencies
 python -m pip install --upgrade pip setuptools wheel
 python -m pip install -r requirements.txt
 # Optional Paddle runtime
 python -m pip install paddlepaddle || true
--- a/fonts/ComicNeue-Bold.ttf
+++ b/fonts/ComicNeue-Bold.ttf
--- a/fonts/ComicRelief-Bold.ttf
+++ b/fonts/ComicRelief-Bold.ttf
--- a/manga-renderer.py
+++ b/manga-renderer.py
@@ -1,509 +1,412 @@
-import os
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 manga-renderer.py
 Inputs:  001.jpg  +  bubbles.json  +  output.txt
 Output:  translated_page.png
 Strategy:
  1. For every bubble, white-fill all its OCR quads  (erases original text cleanly)
  2. Render the translated text centered inside the bubble bounding box
  3. Bubbles in SKIP_BUBBLE_IDS are erased but NOT re-rendered (left blank)
 """
 import json
-import re
+import textwrap
 import cv2
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
 from typing import Dict, List, Tuple, Optional, Set
 # ============================================================
 # CONFIG  — edit these paths to match your setup
 # ============================================================
 IMAGE_PATH        = "003.jpg"
 BUBBLES_PATH      = "bubbles.json"
 TRANSLATIONS_PATH = "output.txt"
 OUTPUT_PATH       = "translated_page_003.png"
-# ─────────────────────────────────────────────
+# Font candidates — first one that loads wins
-#  CONFIG
+FONT_CANDIDATES = [
-# ─────────────────────────────────────────────
+    "fonts/ComicNeue-Bold.ttf",
 DEFAULT_FONT_CANDIDATES = [
    "fonts/ComicRelief-Regular.ttf",
    "fonts/ComicNeue-Regular.ttf",
 ]
 DEFAULT_FONT_COLOR = (0, 0, 0)
 DEFAULT_STROKE_COLOR = (255, 255, 255)
-MAX_FONT_SIZE = 20
+FONT_SIZE     = 20
-MIN_FONT_SIZE = 6
+MIN_FONT_SIZE = 10
 QUAD_PAD      = 4    # extra pixels added around each quad before white-fill
-# Guarantee full wipe of yellow squares
+# ============================================================
-YELLOW_BOX_PAD_X = 1
+# SKIP LIST
-YELLOW_BOX_PAD_Y = 1
+# ── Add any bubble IDs you do NOT want rendered here.
-YELLOW_UNION_PAD_X = 4
+# ── The quads will still be erased (white-filled) but no
-YELLOW_UNION_PAD_Y = 4
+# ── translated text will be drawn inside them.
-
+# ──
-# Optional extra cleanup expansion
+# ── Examples of why you'd skip a bubble:
-ENABLE_EXTRA_CLEAN = True
+# ──   • Sound effects  (BURP, BAM, POW …)
-EXTRA_DILATE_ITERS = 1
+# ──   • Untranslatable single characters
-EXTRA_CLOSE_ITERS = 1
+# ──   • Bubbles with bad OCR you want to fix manually later
-
+# ──   • Narrator boxes you want to leave in the source language
-# Bubble detection (for optional extra mask / border preservation)
+# ============================================================
-FLOOD_TOL = 30
+SKIP_BUBBLE_IDS: Set[int] = {
-
+    # 8,    # BURP BURP — sound effect
-# Border restoration: keep very conservative
+    # 2,    # example: bad OCR, fix manually
-ENABLE_EDGE_RESTORE = True
+}
 EDGE_RESTORE_DILATE = 1
 # Text layout inside yellow-union
 TEXT_INSET = 0.92
-# ─────────────────────────────────────────────
+# ============================================================
-#  PARSERS
+# FONT LOADER
-# ─────────────────────────────────────────────
+# ============================================================
-def parse_translations(translations_file):
+def load_font(path: str, size: int) -> Optional[ImageFont.FreeTypeFont]:
    """Try every face index in a .ttc collection. Validate with getbbox."""
    indices = range(4) if path.lower().endswith(".ttc") else [0]
    for idx in indices:
        try:
            font = ImageFont.truetype(path, size, index=idx)
            font.getbbox("A")   # raises if face metrics are broken
            return font
        except Exception:
            continue
    return None
 def resolve_font() -> Tuple[str, ImageFont.FreeTypeFont]:
    """Return (path, font) for the first working candidate."""
    for candidate in FONT_CANDIDATES:
        font = load_font(candidate, FONT_SIZE)
        if font is not None:
            print(f"   ✅ Font: {candidate}")
            return candidate, font
    print("   ⚠️  No TrueType font found — using Pillow bitmap fallback")
    return "", ImageFont.load_default()
 # ============================================================
 # PARSE output.txt  →  {bid: translated_string}
 # ============================================================
 def parse_translations(filepath: str) -> Dict[int, str]:
    """
    Reads output.txt and returns {bubble_id: translated_text}.
    Lines look like:  #2|1|vision-base|ORIGINAL|TRANSLATED|FLAGS
    """
    translations = {}
-    originals = {}
+    with open(filepath, "r", encoding="utf-8") as f:
    flags_map = {}
    with open(translations_file, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line.startswith("#"):
                continue
            parts = line.split("|")
            if len(parts) < 5:
                continue
            try:
-                bubble_id = int(parts[0].lstrip("#"))
+                bid        = int(parts[0].lstrip("#"))
-            except Exception:
+                translated = parts[4].strip()
                if translated and translated != "-":
                    translations[bid] = translated
            except ValueError:
                continue
-
+    return translations
            if len(parts) >= 5:
                original = parts[2].strip()
                translated = parts[3].strip()
                flags = parts[4].strip()
            elif len(parts) >= 4:
                original = parts[2].strip()
                translated = parts[3].strip()
                flags = "-"
            elif len(parts) >= 3:
                original = ""
                translated = parts[2].strip()
                flags = "-"
            else:
                continue
            if translated.startswith("["):
                continue
            translations[bubble_id] = translated
            originals[bubble_id] = original
            flags_map[bubble_id] = flags
    return translations, originals, flags_map
-def parse_bubbles(bubbles_file):
+# ============================================================
-    with open(bubbles_file, "r", encoding="utf-8") as f:
+# PARSE bubbles.json  →  bubble_boxes, quads_per_bubble
-        raw = json.load(f)
+# ============================================================
-    return {int(k): v for k, v in raw.items()}
+def parse_bubbles(filepath: str):
 # ─────────────────────────────────────────────
 #  HELPERS
 # ─────────────────────────────────────────────
 def clamp(v, lo, hi):
    return max(lo, min(hi, v))
 def xywh_to_xyxy(box):
    if not box:
        return None
    x = int(box.get("x", 0))
    y = int(box.get("y", 0))
    w = int(box.get("w", 0))
    h = int(box.get("h", 0))
    return (x, y, x + w, y + h)
 def union_xyxy(boxes):
    boxes = [b for b in boxes if b is not None]
    if not boxes:
        return None
    x1 = min(b[0] for b in boxes)
    y1 = min(b[1] for b in boxes)
    x2 = max(b[2] for b in boxes)
    y2 = max(b[3] for b in boxes)
    if x2 <= x1 or y2 <= y1:
        return None
    return (x1, y1, x2, y2)
 def bbox_from_mask(mask):
    ys, xs = np.where(mask > 0)
    if len(xs) == 0:
        return None
    return (int(xs.min()), int(ys.min()), int(xs.max()) + 1, int(ys.max()) + 1)
 def normalize_text(s):
    t = s.upper().strip()
    t = re.sub(r"[^\w]+", "", t)
    return t
 def is_sfx_like(text):
    t = normalize_text(text)
    return bool(len(t) <= 8 and re.fullmatch(r"(SHA+|BIP+|BEEP+|HN+|AH+|OH+)", t))
 # ─────────────────────────────────────────────
 #  FONT
 # ─────────────────────────────────────────────
 def load_font_from_candidates(candidates, size):
    for path in candidates:
        if path and os.path.exists(path):
            try:
                return ImageFont.truetype(path, size), path
            except Exception:
                continue
    return ImageFont.load_default(), "PIL_DEFAULT"
 def measure_text(draw, text, font):
    bb = draw.textbbox((0, 0), text, font=font)
    return bb[2] - bb[0], bb[3] - bb[1]
 def wrap_text(draw, text, font, max_width):
    words = text.split()
    lines = []
    cur = ""
    for w in words:
        test = (cur + " " + w).strip()
        tw, _ = measure_text(draw, test, font)
        if tw <= max_width or not cur:
            cur = test
        else:
            lines.append(cur)
            cur = w
    if cur:
        lines.append(cur)
    if not lines:
        return [""], 0, 0
    widths = []
    heights = []
    for ln in lines:
        lw, lh = measure_text(draw, ln, font)
        widths.append(lw)
        heights.append(lh)
    gap = max(2, heights[0] // 5)
    total_h = sum(heights) + gap * (len(lines) - 1)
    return lines, total_h, max(widths)
 def fit_font(draw, text, font_candidates, safe_w, safe_h):
    for size in range(MAX_FONT_SIZE, MIN_FONT_SIZE - 1, -1):
        font, _ = load_font_from_candidates(font_candidates, size)
        lines, total_h, max_w = wrap_text(draw, text, font, safe_w)
        if total_h <= safe_h and max_w <= safe_w:
            return font, lines, total_h
    font, _ = load_font_from_candidates(font_candidates, MIN_FONT_SIZE)
    lines, total_h, _ = wrap_text(draw, text, font, safe_w)
    return font, lines, total_h
 def draw_text_with_stroke(draw, pos, text, font, fill, stroke_fill):
    x, y = pos
    _, h = measure_text(draw, text, font)
    sw = 2 if h <= 11 else 1
    for dx in range(-sw, sw + 1):
        for dy in range(-sw, sw + 1):
            if dx == 0 and dy == 0:
                continue
            draw.text((x + dx, y + dy), text, font=font, fill=stroke_fill)
    draw.text((x, y), text, font=font, fill=fill)
 # ─────────────────────────────────────────────
 #  MASK BUILDERS
 # ─────────────────────────────────────────────
 def build_yellow_mask(bubble_data, img_h, img_w):
    """
-    HARD GUARANTEE:
+    Returns:
-    Returned mask always covers all yellow squares (line_bboxes).
+        bubble_boxes     : {bid: (x1, y1, x2, y2)}
        quads_per_bubble : {bid: [ [[x,y],[x,y],[x,y],[x,y]], ... ]}
    """
-    mask = np.zeros((img_h, img_w), dtype=np.uint8)
+    with open(filepath, "r", encoding="utf-8") as f:
        data = json.load(f)
-    # Preferred: exact line boxes
+    bubble_boxes     = {}
-    line_boxes = bubble_data.get("line_bboxes", [])
+    quads_per_bubble = {}
-    for lb in line_boxes:
+
-        b = xywh_to_xyxy(lb)
+    for key, val in data.items():
-        if not b:
+        bid = int(key)
        x1 = val["x"];       y1 = val["y"]
        x2 = x1 + val["w"]; y2 = y1 + val["h"]
        bubble_boxes[bid] = (x1, y1, x2, y2)
        quads_per_bubble[bid] = val.get("quads", [])
    return bubble_boxes, quads_per_bubble
 # ============================================================
 # ERASE  — white-fill every OCR quad (with small padding)
 # ============================================================
 def erase_quads(
    image_bgr,
    quads_per_bubble: Dict[int, List],
    translations: Dict[int, str],   # ← NEW: only erase what we'll render
    skip_ids: Set[int],
    pad: int = QUAD_PAD
 ):
    """
    White-fills OCR quads ONLY for bubbles that:
      - have a translation in output.txt  AND
      - are NOT in SKIP_BUBBLE_IDS
    Everything else is left completely untouched.
    """
    ih, iw = image_bgr.shape[:2]
    result = image_bgr.copy()
    erased_count  = 0
    skipped_count = 0
    for bid, quads in quads_per_bubble.items():
        # ignore if explicitly skipped
        if bid in skip_ids:
            skipped_count += 1
            continue
        x1, y1, x2, y2 = b
        x1 -= YELLOW_BOX_PAD_X
        y1 -= YELLOW_BOX_PAD_Y
        x2 += YELLOW_BOX_PAD_X
        y2 += YELLOW_BOX_PAD_Y
        x1 = clamp(x1, 0, img_w - 1)
        y1 = clamp(y1, 0, img_h - 1)
        x2 = clamp(x2, 1, img_w)
        y2 = clamp(y2, 1, img_h)
        if x2 > x1 and y2 > y1:
            cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)
-    # If no line boxes available, use line_union fallback
+        # ignore if no translation exists (deleted from output.txt)
-    if np.count_nonzero(mask) == 0:
+        if bid not in translations:
-        ub = xywh_to_xyxy(bubble_data.get("line_union_bbox"))
+            skipped_count += 1
-        if ub:
+            continue
            x1, y1, x2, y2 = ub
            x1 -= YELLOW_UNION_PAD_X
            y1 -= YELLOW_UNION_PAD_Y
            x2 += YELLOW_UNION_PAD_X
            y2 += YELLOW_UNION_PAD_Y
            x1 = clamp(x1, 0, img_w - 1)
            y1 = clamp(y1, 0, img_h - 1)
            x2 = clamp(x2, 1, img_w)
            y2 = clamp(y2, 1, img_h)
            if x2 > x1 and y2 > y1:
                cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)
-    # Last fallback: text_bbox
+        for quad in quads:
-    if np.count_nonzero(mask) == 0:
+            pts = np.array(quad, dtype=np.int32)
-        tb = xywh_to_xyxy(bubble_data.get("text_bbox"))
+            cv2.fillPoly(result, [pts], (255, 255, 255))
        if tb:
            x1, y1, x2, y2 = tb
            x1 -= YELLOW_UNION_PAD_X
            y1 -= YELLOW_UNION_PAD_Y
            x2 += YELLOW_UNION_PAD_X
            y2 += YELLOW_UNION_PAD_Y
            x1 = clamp(x1, 0, img_w - 1)
            y1 = clamp(y1, 0, img_h - 1)
            x2 = clamp(x2, 1, img_w)
            y2 = clamp(y2, 1, img_h)
            if x2 > x1 and y2 > y1:
                cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)
-    return mask
+            xs = [p[0] for p in quad]; ys = [p[1] for p in quad]
            x1 = max(0,      min(xs) - pad)
            y1 = max(0,      min(ys) - pad)
            x2 = min(iw - 1, max(xs) + pad)
            y2 = min(ih - 1, max(ys) + pad)
            cv2.rectangle(result, (x1, y1), (x2, y2), (255, 255, 255), -1)
        erased_count += 1
    print(f"   Erased : {erased_count} bubbles")
    print(f"   Ignored: {skipped_count} bubbles (no translation or in skip list)")
    return result
-def bubble_interior_mask(img_bgr, bubble_data):
+# ============================================================
 # FONT SIZING  +  TEXT WRAP
 # ============================================================
 def fit_text(
    text: str,
    box_w: int,
    box_h: int,
    font_path: str,
    max_size: int = FONT_SIZE,
    min_size: int = MIN_FONT_SIZE
 ) -> Tuple[int, ImageFont.FreeTypeFont, List[str]]:
    """
-    Optional helper to expand clean region safely; never used to shrink yellow coverage.
+    Returns (fitted_size, font, wrapped_lines) — largest size where
    the text block fits inside box_w × box_h.
    """
-    h, w = img_bgr.shape[:2]
+    for size in range(max_size, min_size - 1, -1):
        font = load_font(font_path, size) if font_path else None
        if font is None:
            return min_size, ImageFont.load_default(), [text]
-    panel = xywh_to_xyxy(bubble_data.get("panel_bbox"))
+        chars_per_line = max(1, int(box_w / (size * 0.62)))
-    if panel is None:
+        wrapped        = textwrap.fill(text, width=chars_per_line)
-        panel = (0, 0, w, h)
+        lines          = wrapped.split("\n")
-    px1, py1, px2, py2 = panel
+        total_h        = (size + 8) * len(lines)
-    seed = bubble_data.get("seed_point", {})
+        if total_h <= box_h - 8:
-    sx = int(seed.get("x", bubble_data.get("x", 0) + bubble_data.get("w", 1) // 2))
+            return size, font, lines
    sy = int(seed.get("y", bubble_data.get("y", 0) + bubble_data.get("h", 1) // 2))
    sx = clamp(sx, 1, w - 2)
    sy = clamp(sy, 1, h - 2)
-    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
+    # Nothing fit — use minimum size
-    _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+    font = load_font(font_path, min_size) if font_path else None
    if font is None:
        font = ImageFont.load_default()
    chars_per_line = max(1, int(box_w / (min_size * 0.62)))
    lines = textwrap.fill(text, width=chars_per_line).split("\n")
    return min_size, font, lines
    panel_bin = np.zeros_like(binary)
    panel_bin[py1:py2, px1:px2] = binary[py1:py2, px1:px2]
-    # if seed on dark pixel, search nearby white
+# ============================================================
-    if gray[sy, sx] < 150:
+# COLOR HELPERS
-        found = False
+# ============================================================
-        search_r = max(2, min(bubble_data.get("w", 20), bubble_data.get("h", 20)) // 3)
+def sample_bg_color(
-        for r in range(1, search_r + 1):
+    image_bgr,
-            for dy in range(-r, r + 1):
+    x1: int, y1: int,
-                for dx in range(-r, r + 1):
+    x2: int, y2: int
-                    nx, ny = sx + dx, sy + dy
+) -> Tuple[int, int, int]:
-                    if px1 <= nx < px2 and py1 <= ny < py2 and gray[ny, nx] >= 200:
+    """Sample four corners of a bubble to estimate background color (R, G, B)."""
-                        sx, sy = nx, ny
+    ih, iw = image_bgr.shape[:2]
-                        found = True
+    samples = []
-                        break
+    for sx, sy in [(x1+4, y1+4), (x2-4, y1+4), (x1+4, y2-4), (x2-4, y2-4)]:
-                if found:
+        sx = max(0, min(iw-1, sx)); sy = max(0, min(ih-1, sy))
-                    break
+        b, g, r = image_bgr[sy, sx]
-            if found:
+        samples.append((int(r), int(g), int(b)))
-                break
+    return (
-
+        int(np.median([s[0] for s in samples])),
-        if not found:
+        int(np.median([s[1] for s in samples])),
-            m = np.zeros((h, w), dtype=np.uint8)
+        int(np.median([s[2] for s in samples])),
            bx = bubble_data.get("x", 0)
            by = bubble_data.get("y", 0)
            bw = bubble_data.get("w", 20)
            bh = bubble_data.get("h", 20)
            cv2.ellipse(m, (bx + bw // 2, by + bh // 2), (max(4, bw // 2), max(4, bh // 2)), 0, 0, 360, 255, -1)
            return m
    ff_mask = np.zeros((h + 2, w + 2), dtype=np.uint8)
    flood = panel_bin.copy()
    cv2.floodFill(
        flood, ff_mask, (sx, sy), 255,
        loDiff=FLOOD_TOL, upDiff=FLOOD_TOL,
        flags=cv2.FLOODFILL_FIXED_RANGE
    )
-    m = (ff_mask[1:-1, 1:-1] * 255).astype(np.uint8)
+
-    m = cv2.morphologyEx(m, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8), iterations=1)
+def pick_fg_color(bg: Tuple[int, int, int]) -> Tuple[int, int, int]:
-    return m
+    lum = 0.299 * bg[0] + 0.587 * bg[1] + 0.114 * bg[2]
    return (0, 0, 0) if lum > 128 else (255, 255, 255)
-def build_clean_mask(img_bgr, bubble_data):
+def safe_textbbox(
-    """
+    draw, pos, text, font
-    FINAL RULE:
+) -> Tuple[int, int, int, int]:
-    clean_mask MUST cover yellow_mask completely.
+    try:
-    """
+        return draw.textbbox(pos, text, font=font)
-    h, w = img_bgr.shape[:2]
+    except Exception:
-    yellow = build_yellow_mask(bubble_data, h, w)
+        size = getattr(font, "size", 12)
-
+        return (
-    # start with guaranteed yellow
+            pos[0], pos[1],
-    clean = yellow.copy()
+            pos[0] + int(len(text) * size * 0.6),
-
+            pos[1] + int(size * 1.2)
-    if ENABLE_EXTRA_CLEAN:
+        )
        bubble_m = bubble_interior_mask(img_bgr, bubble_data)
        extra = cv2.dilate(yellow, np.ones((3, 3), np.uint8), iterations=EXTRA_DILATE_ITERS)
        extra = cv2.morphologyEx(extra, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8), iterations=EXTRA_CLOSE_ITERS)
        extra = cv2.bitwise_and(extra, bubble_m)
        # IMPORTANT: union with yellow (never subtract yellow)
        clean = cv2.bitwise_or(yellow, extra)
    # final guarantee (defensive)
    clean = cv2.bitwise_or(clean, yellow)
    return clean, yellow
-# ─────────────────────────────────────────────
+# ============================================================
-#  DRAW BUBBLE
+# RENDER
-# ─────────────────────────────────────────────
+# ============================================================
 def draw_bubble(
    pil_img,
    img_bgr_ref,
    bubble_data,
    original_text,
    translated_text,
    font_candidates,
    font_color,
    stroke_color
 ):
    if original_text and translated_text:
        if normalize_text(original_text) == normalize_text(translated_text) and is_sfx_like(original_text):
            return "skip_sfx"
    rgb = np.array(pil_img)
    h, w = rgb.shape[:2]
    clean_mask, yellow_mask = build_clean_mask(img_bgr_ref, bubble_data)
    if np.count_nonzero(clean_mask) == 0:
        return "skip_no_area"
    # 1) FORCE white fill on clean mask (includes full yellow by guarantee)
    rgb[clean_mask == 255] = [255, 255, 255]
    # 2) Optional edge restore, but NEVER overwrite yellow coverage
    if ENABLE_EDGE_RESTORE:
        bubble_m = bubble_interior_mask(img_bgr_ref, bubble_data)
        edge = cv2.morphologyEx(bubble_m, cv2.MORPH_GRADIENT, np.ones((3, 3), np.uint8))
        edge = cv2.dilate(edge, np.ones((3, 3), np.uint8), iterations=EDGE_RESTORE_DILATE)
        # Don't restore where yellow exists (hard guarantee)
        edge[yellow_mask == 255] = 0
        orig_rgb = cv2.cvtColor(img_bgr_ref, cv2.COLOR_BGR2RGB)
        rgb[edge == 255] = orig_rgb[edge == 255]
    pil_img.paste(Image.fromarray(rgb))
    if not translated_text:
        return "clean_only"
    # text region based on yellow area (exact requirement)
    text_bbox = bbox_from_mask(yellow_mask)
    if text_bbox is None:
        text_bbox = bbox_from_mask(clean_mask)
        if text_bbox is None:
            return "skip_no_area"
    x1, y1, x2, y2 = text_bbox
    draw = ImageDraw.Draw(pil_img)
    text_cx = int((x1 + x2) / 2)
    text_cy = int((y1 + y2) / 2)
    safe_w = max(16, int((x2 - x1) * TEXT_INSET))
    safe_h = max(16, int((y2 - y1) * TEXT_INSET))
    font, lines, total_h = fit_font(draw, translated_text, font_candidates, safe_w, safe_h)
    y_cursor = int(round(text_cy - total_h / 2.0))
    for line in lines:
        lw, lh = measure_text(draw, line, font)
        x = text_cx - lw // 2
        draw_text_with_stroke(draw, (x, y_cursor), line, font, fill=font_color, stroke_fill=stroke_color)
        y_cursor += lh + max(lh // 5, 2)
    return "rendered"
 # ─────────────────────────────────────────────
 #  MAIN
 # ─────────────────────────────────────────────
 def render_translations(
-    input_image,
+    image_bgr,
-    output_image,
+    bubble_boxes: Dict[int, Tuple],
-    translations_file,
+    translations: Dict[int, str],
-    bubbles_file,
+    skip_ids: Set[int],
-    font_candidates=DEFAULT_FONT_CANDIDATES,
+    font_path: str,
-    font_color=DEFAULT_FONT_COLOR,
+    font_size: int     = FONT_SIZE,
-    stroke_color=DEFAULT_STROKE_COLOR
+    bold_outline: bool = True,
    auto_color: bool   = True,
    output_path: str   = OUTPUT_PATH
 ):
-    img_bgr = cv2.imread(input_image)
+    image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
-    if img_bgr is None:
+    pil_img   = Image.fromarray(image_rgb)
-        raise FileNotFoundError(f"Cannot load image: {input_image}")
+    draw      = ImageDraw.Draw(pil_img)
-    img_pil = Image.fromarray(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))
+    rendered = 0
    skipped  = 0
    missing  = 0
-    translations, originals, flags_map = parse_translations(translations_file)
+    for bid, (x1, y1, x2, y2) in sorted(bubble_boxes.items()):
    bubbles = parse_bubbles(bubbles_file)
-    rendered, skipped = 0, 0
+        # ── skip list check ────────────────────────────────────────
-
+        if bid in skip_ids:
-    def sort_key(item):
+            print(f"   ⏭️  Bubble #{bid:<3} — skipped (in SKIP_BUBBLE_IDS)")
        bid, _ = item
        b = bubbles.get(bid, {})
        return int(b.get("reading_order", bid))
    for bubble_id, translated_text in sorted(translations.items(), key=sort_key):
        if bubble_id not in bubbles:
            skipped += 1
            continue
-        bubble_data = bubbles[bubble_id]
+        text = translations.get(bid, "").strip()
-        original_text = originals.get(bubble_id, "")
+        if not text:
            print(f"   ⚠️  Bubble #{bid:<3} — no translation found, left blank")
            missing += 1
            continue
-        status = draw_bubble(
+        box_w = x2 - x1
-            pil_img=img_pil,
+        box_h = y2 - y1
-            img_bgr_ref=img_bgr,
+        if box_w < 10 or box_h < 10:
-            bubble_data=bubble_data,
+            continue
-            original_text=original_text,
+
-            translated_text=translated_text,
+        # ── fit font + wrap ────────────────────────────────────────
-            font_candidates=font_candidates,
+        size, font, lines = fit_text(
-            font_color=font_color,
+            text, box_w, box_h, font_path, max_size=font_size
            stroke_color=stroke_color
        )
-        if status.startswith("skip"):
+        # ── colors ─────────────────────────────────────────────────
-            skipped += 1
+        if auto_color:
            bg = sample_bg_color(image_bgr, x1, y1, x2, y2)
            fg = pick_fg_color(bg)
            ol = (255, 255, 255) if fg == (0, 0, 0) else (0, 0, 0)
        else:
-            rendered += 1
+            fg, ol = (0, 0, 0), (255, 255, 255)
-    out_bgr = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
+        # ── vertical center ────────────────────────────────────────
-    cv2.imwrite(output_image, out_bgr)
+        line_h  = size + 8
        total_h = line_h * len(lines)
        y_cur   = y1 + max(4, (box_h - total_h) // 2)
-    print(f"✅ Done — {rendered} rendered, {skipped} skipped.")
+        for line in lines:
-    print(f"📄 Output → {output_image}")
+            bb     = safe_textbbox(draw, (0, 0), line, font)
-    print("Guarantee: full yellow-square area is always white-cleaned before drawing text.")
+            line_w = bb[2] - bb[0]
            x_cur  = x1 + max(2, (box_w - line_w) // 2)
            if bold_outline:
                for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    try:
                        draw.text((x_cur + dx, y_cur + dy), line, font=font, fill=ol)
                    except Exception:
                        pass
            try:
                draw.text((x_cur, y_cur), line, font=font, fill=fg)
            except Exception as e:
                print(f"   ❌ Draw error bubble #{bid}: {e}")
            y_cur += line_h
        print(f"   ✅ Bubble #{bid:<3} — rendered  ({len(lines)} lines, size {size}px)")
        rendered += 1
    pil_img.save(output_path)
    print()
    print(f"{'─'*50}")
    print(f"  Rendered : {rendered}")
    print(f"  Skipped  : {skipped}  (SKIP_BUBBLE_IDS)")
    print(f"  No text  : {missing}  (not in output.txt)")
    print(f"{'─'*50}")
    print(f"✅ Saved → {output_path}")
    return pil_img
 # ============================================================
 # MAIN
 # ============================================================
 def main():
    print(f"📖 Loading image        : {IMAGE_PATH}")
    image = cv2.imread(IMAGE_PATH)
    if image is None:
        print(f"❌ Cannot load: {IMAGE_PATH}"); return
    print(f"📦 Loading bubbles      : {BUBBLES_PATH}")
    bubble_boxes, quads_per_bubble = parse_bubbles(BUBBLES_PATH)
    print(f"   {len(bubble_boxes)} bubbles  |  "
          f"{sum(len(v) for v in quads_per_bubble.values())} quads total")
    print(f"🌐 Loading translations : {TRANSLATIONS_PATH}")
    translations = parse_translations(TRANSLATIONS_PATH)
    print(f"   {len(translations)} translations found")
    if SKIP_BUBBLE_IDS:
        print(f"⏭️  Skip list            : bubbles {sorted(SKIP_BUBBLE_IDS)}")
    else:
        print(f"⏭️  Skip list            : (empty — all bubbles will be rendered)")
    print("🔤 Resolving font...")
    font_path, _ = resolve_font()
    print(f"🧹 Erasing original text (quad fill + pad={QUAD_PAD}px)...")
    clean_image = erase_quads(
        image,
        quads_per_bubble,
        translations = translations,   # ← pass translations here
        skip_ids     = SKIP_BUBBLE_IDS,
        pad          = QUAD_PAD
    )
    print("✍️  Rendering translated text...")
    render_translations(
        image_bgr    = clean_image,
        bubble_boxes = bubble_boxes,
        translations = translations,
        skip_ids     = SKIP_BUBBLE_IDS,
        font_path    = font_path,
        font_size    = FONT_SIZE,
        bold_outline = True,
        auto_color   = True,
        output_path  = OUTPUT_PATH
    )
 if __name__ == "__main__":
-    render_translations(
+    main()
        input_image="001-page.png",
        output_image="page_translated.png",
        translations_file="output.txt",
        bubbles_file="bubbles.json",
        font_candidates=DEFAULT_FONT_CANDIDATES,
        font_color=DEFAULT_FONT_COLOR,
        stroke_color=DEFAULT_STROKE_COLOR
    )
--- a/manga-translator.py
+++ b/manga-translator.py
--- a/79
+++ b/79
@@ -0,0 +1,79 @@
 aistudio-sdk==0.3.8
 annotated-doc==0.0.4
 annotated-types==0.7.0
 anyio==4.13.0
 bce-python-sdk==0.9.70
 beautifulsoup4==4.14.3
 certifi==2026.2.25
 chardet==7.4.3
 charset-normalizer==3.4.7
 click==8.3.2
 colorlog==6.10.1
 crc32c==2.8
 deep-translator==1.11.4
 easyocr==1.7.2
 filelock==3.28.0
 fsspec==2026.3.0
 future==1.0.0
 h11==0.16.0
 hf-xet==1.4.3
 httpcore==1.0.9
 httpx==0.28.1
 huggingface_hub==1.10.2
 idna==3.11
 ImageIO==2.37.3
 imagesize==2.0.0
 Jinja2==3.1.6
 lazy-loader==0.5
 markdown-it-py==4.0.0
 MarkupSafe==3.0.3
 mdurl==0.1.2
 modelscope==1.35.4
 mpmath==1.3.0
 networkx==3.6.1
 ninja==1.13.0
 numpy==1.26.4
 opencv-contrib-python==4.10.0.84
 opencv-python==4.11.0.86
 opencv-python-headless==4.11.0.86
 opt-einsum==3.3.0
 packaging==26.1
 paddleocr==3.4.1
 paddlepaddle==3.3.1
 paddlex==3.4.3
 pandas==3.0.2
 pillow==12.2.0
 prettytable==3.17.0
 protobuf==7.34.1
 psutil==7.2.2
 py-cpuinfo==9.0.0
 pyclipper==1.4.0
 pycryptodome==3.23.0
 pydantic==2.13.1
 pydantic_core==2.46.1
 Pygments==2.20.0
 pypdfium2==5.7.0
 python-bidi==0.6.7
 python-dateutil==2.9.0.post0
 PyYAML==6.0.2
 requests==2.33.1
 rich==15.0.0
 ruamel.yaml==0.19.1
 safetensors==0.7.0
 scikit-image==0.26.0
 scipy==1.17.1
 shapely==2.1.2
 shellingham==1.5.4
 six==1.17.0
 soupsieve==2.8.3
 sympy==1.14.0
 tifffile==2026.3.3
 torch==2.11.0
 torchvision==0.26.0
 tqdm==4.67.3
 typer==0.24.1
 typing-inspection==0.4.2
 typing_extensions==4.15.0
 ujson==5.12.0
 urllib3==2.6.3
 wcwidth==0.6.0
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,19 +1,12 @@
-# ─────────────────────────────────────────────
+numpy<2.0
-#  manga-translator + manga-renderer
+opencv-python>=4.8
-#  Python >= 3.9 recommended
+easyocr>=1.7.1
-# ─────────────────────────────────────────────
+deep-translator>=1.11.4
-
+manga-ocr>=0.1.14
-# Computer vision + image processing
+torch
-opencv-python>=4.8.0
+torchvision
-numpy>=1.24.0
+Pillow
-Pillow>=10.0.0
+transformers
-
+fugashi
-# OCR engine (manga-translator)
+unidic-lite
 manga-ocr>=0.1.8
 # Translation (manga-translator)
 deep-translator>=1.11.0
 # HTTP / file handling used internally by manga-ocr
 requests>=2.31.0