First beta

2026-04-15 21:12:41 +02:00
parent 5ef8c39f69
commit dd1cf54f86
7 changed files with 736 additions and 905 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1,53 @@
+# Manga Translator OCR Pipeline
+
+A robust manga/comic OCR + translation pipeline with:
+
+- EasyOCR (default, reliable on macOS M1)
+- Optional PaddleOCR (auto-fallback if unavailable)
+- Bubble clustering and line-level boxes
+- Robust reread pass (multi-preprocessing + slight rotation)
+- Translation export + debug overlays
+
+---
+
+## ✨ Features
+
+- OCR from raw manga pages
+- Noise filtering (`BOX` debug artifacts, tiny garbage tokens, symbols)
+- Speech bubble grouping
+- Reading order estimation (`ltr` / `rtl`)
+- Translation output (`output.txt`)
+- Structured bubble metadata (`bubbles.json`)
+- Visual debug output (`debug_clusters.png`)
+
+---
+
+## 🧰 Requirements
+
+- macOS (Apple Silicon supported)
+- Python **3.11** recommended
+- Homebrew (for Python install)
+
+---
+
+## 🚀 Setup (Python 3.11 venv)
+
+```bash
+cd /path/to/manga-translator
+
+# 1) Create venv with 3.11
+/opt/homebrew/bin/python3.11 -m venv venv
+
+# 2) Activate
+source venv/bin/activate
+
+# 3) Verify interpreter
+python -V
+# expected: Python 3.11.x
+
+# 4) Install dependencies
+python -m pip install --upgrade pip setuptools wheel
+python -m pip install -r requirements.txt
+
+# Optional Paddle runtime
+python -m pip install paddlepaddle || true
--- a/fonts/ComicNeue-Bold.ttf
+++ b/fonts/ComicNeue-Bold.ttf
--- a/fonts/ComicRelief-Bold.ttf
+++ b/fonts/ComicRelief-Bold.ttf
--- a/manga-renderer.py
+++ b/manga-renderer.py
@@ -1,509 +1,412 @@
-import os
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+manga-renderer.py
+
+Inputs:  001.jpg  +  bubbles.json  +  output.txt
+Output:  translated_page.png
+
+Strategy:
+  1. For every bubble, white-fill all its OCR quads  (erases original text cleanly)
+  2. Render the translated text centered inside the bubble bounding box
+  3. Bubbles in SKIP_BUBBLE_IDS are erased but NOT re-rendered (left blank)
+"""
+
 import json
-import re
+import textwrap
 import cv2
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
+from typing import Dict, List, Tuple, Optional, Set

+# ============================================================
+# CONFIG  — edit these paths to match your setup
+# ============================================================
+IMAGE_PATH        = "003.jpg"
+BUBBLES_PATH      = "bubbles.json"
+TRANSLATIONS_PATH = "output.txt"
+OUTPUT_PATH       = "translated_page_003.png"

-# ─────────────────────────────────────────────
-#  CONFIG
-# ─────────────────────────────────────────────
-DEFAULT_FONT_CANDIDATES = [
-    "fonts/ComicRelief-Regular.ttf",
-    "fonts/ComicNeue-Regular.ttf",
+# Font candidates — first one that loads wins
+FONT_CANDIDATES = [
+    "fonts/ComicNeue-Bold.ttf",
 ]
-DEFAULT_FONT_COLOR = (0, 0, 0)
-DEFAULT_STROKE_COLOR = (255, 255, 255)

-MAX_FONT_SIZE = 20
-MIN_FONT_SIZE = 6
+FONT_SIZE     = 20
+MIN_FONT_SIZE = 10
+QUAD_PAD      = 4    # extra pixels added around each quad before white-fill

-# Guarantee full wipe of yellow squares
-YELLOW_BOX_PAD_X = 1
-YELLOW_BOX_PAD_Y = 1
-YELLOW_UNION_PAD_X = 4
-YELLOW_UNION_PAD_Y = 4
-
-# Optional extra cleanup expansion
-ENABLE_EXTRA_CLEAN = True
-EXTRA_DILATE_ITERS = 1
-EXTRA_CLOSE_ITERS = 1
-
-# Bubble detection (for optional extra mask / border preservation)
-FLOOD_TOL = 30
-
-# Border restoration: keep very conservative
-ENABLE_EDGE_RESTORE = True
-EDGE_RESTORE_DILATE = 1
-
-# Text layout inside yellow-union
-TEXT_INSET = 0.92
+# ============================================================
+# SKIP LIST
+# ── Add any bubble IDs you do NOT want rendered here.
+# ── The quads will still be erased (white-filled) but no
+# ── translated text will be drawn inside them.
+# ──
+# ── Examples of why you'd skip a bubble:
+# ──   • Sound effects  (BURP, BAM, POW …)
+# ──   • Untranslatable single characters
+# ──   • Bubbles with bad OCR you want to fix manually later
+# ──   • Narrator boxes you want to leave in the source language
+# ============================================================
+SKIP_BUBBLE_IDS: Set[int] = {
+    # 8,    # BURP BURP — sound effect
+    # 2,    # example: bad OCR, fix manually
+}


-# ─────────────────────────────────────────────
-#  PARSERS
-# ─────────────────────────────────────────────
-def parse_translations(translations_file):
+# ============================================================
+# FONT LOADER
+# ============================================================
+def load_font(path: str, size: int) -> Optional[ImageFont.FreeTypeFont]:
+    """Try every face index in a .ttc collection. Validate with getbbox."""
+    indices = range(4) if path.lower().endswith(".ttc") else [0]
+    for idx in indices:
+        try:
+            font = ImageFont.truetype(path, size, index=idx)
+            font.getbbox("A")   # raises if face metrics are broken
+            return font
+        except Exception:
+            continue
+    return None
+
+
+def resolve_font() -> Tuple[str, ImageFont.FreeTypeFont]:
+    """Return (path, font) for the first working candidate."""
+    for candidate in FONT_CANDIDATES:
+        font = load_font(candidate, FONT_SIZE)
+        if font is not None:
+            print(f"   ✅ Font: {candidate}")
+            return candidate, font
+    print("   ⚠️  No TrueType font found — using Pillow bitmap fallback")
+    return "", ImageFont.load_default()
+
+
+# ============================================================
+# PARSE output.txt  →  {bid: translated_string}
+# ============================================================
+def parse_translations(filepath: str) -> Dict[int, str]:
+    """
+    Reads output.txt and returns {bubble_id: translated_text}.
+    Lines look like:  #2|1|vision-base|ORIGINAL|TRANSLATED|FLAGS
+    """
    translations = {}
-    originals = {}
-    flags_map = {}
-
-    with open(translations_file, "r", encoding="utf-8") as f:
+    with open(filepath, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line.startswith("#"):
                continue
-
            parts = line.split("|")
+            if len(parts) < 5:
+                continue
            try:
-                bubble_id = int(parts[0].lstrip("#"))
-            except Exception:
+                bid        = int(parts[0].lstrip("#"))
+                translated = parts[4].strip()
+                if translated and translated != "-":
+                    translations[bid] = translated
+            except ValueError:
                continue
-
-            if len(parts) >= 5:
-                original = parts[2].strip()
-                translated = parts[3].strip()
-                flags = parts[4].strip()
-            elif len(parts) >= 4:
-                original = parts[2].strip()
-                translated = parts[3].strip()
-                flags = "-"
-            elif len(parts) >= 3:
-                original = ""
-                translated = parts[2].strip()
-                flags = "-"
-            else:
-                continue
-
-            if translated.startswith("["):
-                continue
-
-            translations[bubble_id] = translated
-            originals[bubble_id] = original
-            flags_map[bubble_id] = flags
-
-    return translations, originals, flags_map
+    return translations


-def parse_bubbles(bubbles_file):
-    with open(bubbles_file, "r", encoding="utf-8") as f:
-        raw = json.load(f)
-    return {int(k): v for k, v in raw.items()}
-
-
-# ─────────────────────────────────────────────
-#  HELPERS
-# ─────────────────────────────────────────────
-def clamp(v, lo, hi):
-    return max(lo, min(hi, v))
-
-
-def xywh_to_xyxy(box):
-    if not box:
-        return None
-    x = int(box.get("x", 0))
-    y = int(box.get("y", 0))
-    w = int(box.get("w", 0))
-    h = int(box.get("h", 0))
-    return (x, y, x + w, y + h)
-
-
-def union_xyxy(boxes):
-    boxes = [b for b in boxes if b is not None]
-    if not boxes:
-        return None
-    x1 = min(b[0] for b in boxes)
-    y1 = min(b[1] for b in boxes)
-    x2 = max(b[2] for b in boxes)
-    y2 = max(b[3] for b in boxes)
-    if x2 <= x1 or y2 <= y1:
-        return None
-    return (x1, y1, x2, y2)
-
-
-def bbox_from_mask(mask):
-    ys, xs = np.where(mask > 0)
-    if len(xs) == 0:
-        return None
-    return (int(xs.min()), int(ys.min()), int(xs.max()) + 1, int(ys.max()) + 1)
-
-
-def normalize_text(s):
-    t = s.upper().strip()
-    t = re.sub(r"[^\w]+", "", t)
-    return t
-
-
-def is_sfx_like(text):
-    t = normalize_text(text)
-    return bool(len(t) <= 8 and re.fullmatch(r"(SHA+|BIP+|BEEP+|HN+|AH+|OH+)", t))
-
-
-# ─────────────────────────────────────────────
-#  FONT
-# ─────────────────────────────────────────────
-def load_font_from_candidates(candidates, size):
-    for path in candidates:
-        if path and os.path.exists(path):
-            try:
-                return ImageFont.truetype(path, size), path
-            except Exception:
-                continue
-    return ImageFont.load_default(), "PIL_DEFAULT"
-
-
-def measure_text(draw, text, font):
-    bb = draw.textbbox((0, 0), text, font=font)
-    return bb[2] - bb[0], bb[3] - bb[1]
-
-
-def wrap_text(draw, text, font, max_width):
-    words = text.split()
-    lines = []
-    cur = ""
-
-    for w in words:
-        test = (cur + " " + w).strip()
-        tw, _ = measure_text(draw, test, font)
-        if tw <= max_width or not cur:
-            cur = test
-        else:
-            lines.append(cur)
-            cur = w
-    if cur:
-        lines.append(cur)
-
-    if not lines:
-        return [""], 0, 0
-
-    widths = []
-    heights = []
-    for ln in lines:
-        lw, lh = measure_text(draw, ln, font)
-        widths.append(lw)
-        heights.append(lh)
-
-    gap = max(2, heights[0] // 5)
-    total_h = sum(heights) + gap * (len(lines) - 1)
-    return lines, total_h, max(widths)
-
-
-def fit_font(draw, text, font_candidates, safe_w, safe_h):
-    for size in range(MAX_FONT_SIZE, MIN_FONT_SIZE - 1, -1):
-        font, _ = load_font_from_candidates(font_candidates, size)
-        lines, total_h, max_w = wrap_text(draw, text, font, safe_w)
-        if total_h <= safe_h and max_w <= safe_w:
-            return font, lines, total_h
-
-    font, _ = load_font_from_candidates(font_candidates, MIN_FONT_SIZE)
-    lines, total_h, _ = wrap_text(draw, text, font, safe_w)
-    return font, lines, total_h
-
-
-def draw_text_with_stroke(draw, pos, text, font, fill, stroke_fill):
-    x, y = pos
-    _, h = measure_text(draw, text, font)
-    sw = 2 if h <= 11 else 1
-
-    for dx in range(-sw, sw + 1):
-        for dy in range(-sw, sw + 1):
-            if dx == 0 and dy == 0:
-                continue
-            draw.text((x + dx, y + dy), text, font=font, fill=stroke_fill)
-
-    draw.text((x, y), text, font=font, fill=fill)
-
-
-# ─────────────────────────────────────────────
-#  MASK BUILDERS
-# ─────────────────────────────────────────────
-def build_yellow_mask(bubble_data, img_h, img_w):
+# ============================================================
+# PARSE bubbles.json  →  bubble_boxes, quads_per_bubble
+# ============================================================
+def parse_bubbles(filepath: str):
    """
-    HARD GUARANTEE:
-    Returned mask always covers all yellow squares (line_bboxes).
+    Returns:
+        bubble_boxes     : {bid: (x1, y1, x2, y2)}
+        quads_per_bubble : {bid: [ [[x,y],[x,y],[x,y],[x,y]], ... ]}
    """
-    mask = np.zeros((img_h, img_w), dtype=np.uint8)
+    with open(filepath, "r", encoding="utf-8") as f:
+        data = json.load(f)

-    # Preferred: exact line boxes
-    line_boxes = bubble_data.get("line_bboxes", [])
-    for lb in line_boxes:
-        b = xywh_to_xyxy(lb)
-        if not b:
+    bubble_boxes     = {}
+    quads_per_bubble = {}
+
+    for key, val in data.items():
+        bid = int(key)
+
+        x1 = val["x"];       y1 = val["y"]
+        x2 = x1 + val["w"]; y2 = y1 + val["h"]
+        bubble_boxes[bid] = (x1, y1, x2, y2)
+
+        quads_per_bubble[bid] = val.get("quads", [])
+
+    return bubble_boxes, quads_per_bubble
+
+
+# ============================================================
+# ERASE  — white-fill every OCR quad (with small padding)
+# ============================================================
+def erase_quads(
+    image_bgr,
+    quads_per_bubble: Dict[int, List],
+    translations: Dict[int, str],   # ← NEW: only erase what we'll render
+    skip_ids: Set[int],
+    pad: int = QUAD_PAD
+):
+    """
+    White-fills OCR quads ONLY for bubbles that:
+      - have a translation in output.txt  AND
+      - are NOT in SKIP_BUBBLE_IDS
+    Everything else is left completely untouched.
+    """
+    ih, iw = image_bgr.shape[:2]
+    result = image_bgr.copy()
+
+    erased_count  = 0
+    skipped_count = 0
+
+    for bid, quads in quads_per_bubble.items():
+
+        # ignore if explicitly skipped
+        if bid in skip_ids:
+            skipped_count += 1
            continue
-        x1, y1, x2, y2 = b
-        x1 -= YELLOW_BOX_PAD_X
-        y1 -= YELLOW_BOX_PAD_Y
-        x2 += YELLOW_BOX_PAD_X
-        y2 += YELLOW_BOX_PAD_Y
-        x1 = clamp(x1, 0, img_w - 1)
-        y1 = clamp(y1, 0, img_h - 1)
-        x2 = clamp(x2, 1, img_w)
-        y2 = clamp(y2, 1, img_h)
-        if x2 > x1 and y2 > y1:
-            cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)

-    # If no line boxes available, use line_union fallback
-    if np.count_nonzero(mask) == 0:
-        ub = xywh_to_xyxy(bubble_data.get("line_union_bbox"))
-        if ub:
-            x1, y1, x2, y2 = ub
-            x1 -= YELLOW_UNION_PAD_X
-            y1 -= YELLOW_UNION_PAD_Y
-            x2 += YELLOW_UNION_PAD_X
-            y2 += YELLOW_UNION_PAD_Y
-            x1 = clamp(x1, 0, img_w - 1)
-            y1 = clamp(y1, 0, img_h - 1)
-            x2 = clamp(x2, 1, img_w)
-            y2 = clamp(y2, 1, img_h)
-            if x2 > x1 and y2 > y1:
-                cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)
+        # ignore if no translation exists (deleted from output.txt)
+        if bid not in translations:
+            skipped_count += 1
+            continue

-    # Last fallback: text_bbox
-    if np.count_nonzero(mask) == 0:
-        tb = xywh_to_xyxy(bubble_data.get("text_bbox"))
-        if tb:
-            x1, y1, x2, y2 = tb
-            x1 -= YELLOW_UNION_PAD_X
-            y1 -= YELLOW_UNION_PAD_Y
-            x2 += YELLOW_UNION_PAD_X
-            y2 += YELLOW_UNION_PAD_Y
-            x1 = clamp(x1, 0, img_w - 1)
-            y1 = clamp(y1, 0, img_h - 1)
-            x2 = clamp(x2, 1, img_w)
-            y2 = clamp(y2, 1, img_h)
-            if x2 > x1 and y2 > y1:
-                cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)
+        for quad in quads:
+            pts = np.array(quad, dtype=np.int32)
+            cv2.fillPoly(result, [pts], (255, 255, 255))

-    return mask
+            xs = [p[0] for p in quad]; ys = [p[1] for p in quad]
+            x1 = max(0,      min(xs) - pad)
+            y1 = max(0,      min(ys) - pad)
+            x2 = min(iw - 1, max(xs) + pad)
+            y2 = min(ih - 1, max(ys) + pad)
+            cv2.rectangle(result, (x1, y1), (x2, y2), (255, 255, 255), -1)
+
+        erased_count += 1
+
+    print(f"   Erased : {erased_count} bubbles")
+    print(f"   Ignored: {skipped_count} bubbles (no translation or in skip list)")
+    return result


-def bubble_interior_mask(img_bgr, bubble_data):
+# ============================================================
+# FONT SIZING  +  TEXT WRAP
+# ============================================================
+def fit_text(
+    text: str,
+    box_w: int,
+    box_h: int,
+    font_path: str,
+    max_size: int = FONT_SIZE,
+    min_size: int = MIN_FONT_SIZE
+) -> Tuple[int, ImageFont.FreeTypeFont, List[str]]:
    """
-    Optional helper to expand clean region safely; never used to shrink yellow coverage.
+    Returns (fitted_size, font, wrapped_lines) — largest size where
+    the text block fits inside box_w × box_h.
    """
-    h, w = img_bgr.shape[:2]
+    for size in range(max_size, min_size - 1, -1):
+        font = load_font(font_path, size) if font_path else None
+        if font is None:
+            return min_size, ImageFont.load_default(), [text]

-    panel = xywh_to_xyxy(bubble_data.get("panel_bbox"))
-    if panel is None:
-        panel = (0, 0, w, h)
-    px1, py1, px2, py2 = panel
+        chars_per_line = max(1, int(box_w / (size * 0.62)))
+        wrapped        = textwrap.fill(text, width=chars_per_line)
+        lines          = wrapped.split("\n")
+        total_h        = (size + 8) * len(lines)

-    seed = bubble_data.get("seed_point", {})
-    sx = int(seed.get("x", bubble_data.get("x", 0) + bubble_data.get("w", 1) // 2))
-    sy = int(seed.get("y", bubble_data.get("y", 0) + bubble_data.get("h", 1) // 2))
-    sx = clamp(sx, 1, w - 2)
-    sy = clamp(sy, 1, h - 2)
+        if total_h <= box_h - 8:
+            return size, font, lines

-    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
-    _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+    # Nothing fit — use minimum size
+    font = load_font(font_path, min_size) if font_path else None
+    if font is None:
+        font = ImageFont.load_default()
+    chars_per_line = max(1, int(box_w / (min_size * 0.62)))
+    lines = textwrap.fill(text, width=chars_per_line).split("\n")
+    return min_size, font, lines

-    panel_bin = np.zeros_like(binary)
-    panel_bin[py1:py2, px1:px2] = binary[py1:py2, px1:px2]

-    # if seed on dark pixel, search nearby white
-    if gray[sy, sx] < 150:
-        found = False
-        search_r = max(2, min(bubble_data.get("w", 20), bubble_data.get("h", 20)) // 3)
-        for r in range(1, search_r + 1):
-            for dy in range(-r, r + 1):
-                for dx in range(-r, r + 1):
-                    nx, ny = sx + dx, sy + dy
-                    if px1 <= nx < px2 and py1 <= ny < py2 and gray[ny, nx] >= 200:
-                        sx, sy = nx, ny
-                        found = True
-                        break
-                if found:
-                    break
-            if found:
-                break
-
-        if not found:
-            m = np.zeros((h, w), dtype=np.uint8)
-            bx = bubble_data.get("x", 0)
-            by = bubble_data.get("y", 0)
-            bw = bubble_data.get("w", 20)
-            bh = bubble_data.get("h", 20)
-            cv2.ellipse(m, (bx + bw // 2, by + bh // 2), (max(4, bw // 2), max(4, bh // 2)), 0, 0, 360, 255, -1)
-            return m
-
-    ff_mask = np.zeros((h + 2, w + 2), dtype=np.uint8)
-    flood = panel_bin.copy()
-    cv2.floodFill(
-        flood, ff_mask, (sx, sy), 255,
-        loDiff=FLOOD_TOL, upDiff=FLOOD_TOL,
-        flags=cv2.FLOODFILL_FIXED_RANGE
+# ============================================================
+# COLOR HELPERS
+# ============================================================
+def sample_bg_color(
+    image_bgr,
+    x1: int, y1: int,
+    x2: int, y2: int
+) -> Tuple[int, int, int]:
+    """Sample four corners of a bubble to estimate background color (R, G, B)."""
+    ih, iw = image_bgr.shape[:2]
+    samples = []
+    for sx, sy in [(x1+4, y1+4), (x2-4, y1+4), (x1+4, y2-4), (x2-4, y2-4)]:
+        sx = max(0, min(iw-1, sx)); sy = max(0, min(ih-1, sy))
+        b, g, r = image_bgr[sy, sx]
+        samples.append((int(r), int(g), int(b)))
+    return (
+        int(np.median([s[0] for s in samples])),
+        int(np.median([s[1] for s in samples])),
+        int(np.median([s[2] for s in samples])),
    )

-    m = (ff_mask[1:-1, 1:-1] * 255).astype(np.uint8)
-    m = cv2.morphologyEx(m, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8), iterations=1)
-    return m
+
+def pick_fg_color(bg: Tuple[int, int, int]) -> Tuple[int, int, int]:
+    lum = 0.299 * bg[0] + 0.587 * bg[1] + 0.114 * bg[2]
+    return (0, 0, 0) if lum > 128 else (255, 255, 255)


-def build_clean_mask(img_bgr, bubble_data):
-    """
-    FINAL RULE:
-    clean_mask MUST cover yellow_mask completely.
-    """
-    h, w = img_bgr.shape[:2]
-    yellow = build_yellow_mask(bubble_data, h, w)
-
-    # start with guaranteed yellow
-    clean = yellow.copy()
-
-    if ENABLE_EXTRA_CLEAN:
-        bubble_m = bubble_interior_mask(img_bgr, bubble_data)
-        extra = cv2.dilate(yellow, np.ones((3, 3), np.uint8), iterations=EXTRA_DILATE_ITERS)
-        extra = cv2.morphologyEx(extra, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8), iterations=EXTRA_CLOSE_ITERS)
-        extra = cv2.bitwise_and(extra, bubble_m)
-
-        # IMPORTANT: union with yellow (never subtract yellow)
-        clean = cv2.bitwise_or(yellow, extra)
-
-    # final guarantee (defensive)
-    clean = cv2.bitwise_or(clean, yellow)
-
-    return clean, yellow
+def safe_textbbox(
+    draw, pos, text, font
+) -> Tuple[int, int, int, int]:
+    try:
+        return draw.textbbox(pos, text, font=font)
+    except Exception:
+        size = getattr(font, "size", 12)
+        return (
+            pos[0], pos[1],
+            pos[0] + int(len(text) * size * 0.6),
+            pos[1] + int(size * 1.2)
+        )


-# ─────────────────────────────────────────────
-#  DRAW BUBBLE
-# ─────────────────────────────────────────────
-def draw_bubble(
-    pil_img,
-    img_bgr_ref,
-    bubble_data,
-    original_text,
-    translated_text,
-    font_candidates,
-    font_color,
-    stroke_color
-):
-    if original_text and translated_text:
-        if normalize_text(original_text) == normalize_text(translated_text) and is_sfx_like(original_text):
-            return "skip_sfx"
-
-    rgb = np.array(pil_img)
-    h, w = rgb.shape[:2]
-
-    clean_mask, yellow_mask = build_clean_mask(img_bgr_ref, bubble_data)
-    if np.count_nonzero(clean_mask) == 0:
-        return "skip_no_area"
-
-    # 1) FORCE white fill on clean mask (includes full yellow by guarantee)
-    rgb[clean_mask == 255] = [255, 255, 255]
-
-    # 2) Optional edge restore, but NEVER overwrite yellow coverage
-    if ENABLE_EDGE_RESTORE:
-        bubble_m = bubble_interior_mask(img_bgr_ref, bubble_data)
-        edge = cv2.morphologyEx(bubble_m, cv2.MORPH_GRADIENT, np.ones((3, 3), np.uint8))
-        edge = cv2.dilate(edge, np.ones((3, 3), np.uint8), iterations=EDGE_RESTORE_DILATE)
-
-        # Don't restore where yellow exists (hard guarantee)
-        edge[yellow_mask == 255] = 0
-
-        orig_rgb = cv2.cvtColor(img_bgr_ref, cv2.COLOR_BGR2RGB)
-        rgb[edge == 255] = orig_rgb[edge == 255]
-
-    pil_img.paste(Image.fromarray(rgb))
-
-    if not translated_text:
-        return "clean_only"
-
-    # text region based on yellow area (exact requirement)
-    text_bbox = bbox_from_mask(yellow_mask)
-    if text_bbox is None:
-        text_bbox = bbox_from_mask(clean_mask)
-        if text_bbox is None:
-            return "skip_no_area"
-
-    x1, y1, x2, y2 = text_bbox
-
-    draw = ImageDraw.Draw(pil_img)
-    text_cx = int((x1 + x2) / 2)
-    text_cy = int((y1 + y2) / 2)
-    safe_w = max(16, int((x2 - x1) * TEXT_INSET))
-    safe_h = max(16, int((y2 - y1) * TEXT_INSET))
-
-    font, lines, total_h = fit_font(draw, translated_text, font_candidates, safe_w, safe_h)
-
-    y_cursor = int(round(text_cy - total_h / 2.0))
-    for line in lines:
-        lw, lh = measure_text(draw, line, font)
-        x = text_cx - lw // 2
-        draw_text_with_stroke(draw, (x, y_cursor), line, font, fill=font_color, stroke_fill=stroke_color)
-        y_cursor += lh + max(lh // 5, 2)
-
-    return "rendered"
-
-
-# ─────────────────────────────────────────────
-#  MAIN
-# ─────────────────────────────────────────────
+# ============================================================
+# RENDER
+# ============================================================
 def render_translations(
-    input_image,
-    output_image,
-    translations_file,
-    bubbles_file,
-    font_candidates=DEFAULT_FONT_CANDIDATES,
-    font_color=DEFAULT_FONT_COLOR,
-    stroke_color=DEFAULT_STROKE_COLOR
+    image_bgr,
+    bubble_boxes: Dict[int, Tuple],
+    translations: Dict[int, str],
+    skip_ids: Set[int],
+    font_path: str,
+    font_size: int     = FONT_SIZE,
+    bold_outline: bool = True,
+    auto_color: bool   = True,
+    output_path: str   = OUTPUT_PATH
 ):
-    img_bgr = cv2.imread(input_image)
-    if img_bgr is None:
-        raise FileNotFoundError(f"Cannot load image: {input_image}")
+    image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
+    pil_img   = Image.fromarray(image_rgb)
+    draw      = ImageDraw.Draw(pil_img)

-    img_pil = Image.fromarray(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))
+    rendered = 0
+    skipped  = 0
+    missing  = 0

-    translations, originals, flags_map = parse_translations(translations_file)
-    bubbles = parse_bubbles(bubbles_file)
+    for bid, (x1, y1, x2, y2) in sorted(bubble_boxes.items()):

-    rendered, skipped = 0, 0
-
-    def sort_key(item):
-        bid, _ = item
-        b = bubbles.get(bid, {})
-        return int(b.get("reading_order", bid))
-
-    for bubble_id, translated_text in sorted(translations.items(), key=sort_key):
-        if bubble_id not in bubbles:
+        # ── skip list check ────────────────────────────────────────
+        if bid in skip_ids:
+            print(f"   ⏭️  Bubble #{bid:<3} — skipped (in SKIP_BUBBLE_IDS)")
            skipped += 1
            continue

-        bubble_data = bubbles[bubble_id]
-        original_text = originals.get(bubble_id, "")
+        text = translations.get(bid, "").strip()
+        if not text:
+            print(f"   ⚠️  Bubble #{bid:<3} — no translation found, left blank")
+            missing += 1
+            continue

-        status = draw_bubble(
-            pil_img=img_pil,
-            img_bgr_ref=img_bgr,
-            bubble_data=bubble_data,
-            original_text=original_text,
-            translated_text=translated_text,
-            font_candidates=font_candidates,
-            font_color=font_color,
-            stroke_color=stroke_color
+        box_w = x2 - x1
+        box_h = y2 - y1
+        if box_w < 10 or box_h < 10:
+            continue
+
+        # ── fit font + wrap ────────────────────────────────────────
+        size, font, lines = fit_text(
+            text, box_w, box_h, font_path, max_size=font_size
        )

-        if status.startswith("skip"):
-            skipped += 1
+        # ── colors ─────────────────────────────────────────────────
+        if auto_color:
+            bg = sample_bg_color(image_bgr, x1, y1, x2, y2)
+            fg = pick_fg_color(bg)
+            ol = (255, 255, 255) if fg == (0, 0, 0) else (0, 0, 0)
        else:
+            fg, ol = (0, 0, 0), (255, 255, 255)
+
+        # ── vertical center ────────────────────────────────────────
+        line_h  = size + 8
+        total_h = line_h * len(lines)
+        y_cur   = y1 + max(4, (box_h - total_h) // 2)
+
+        for line in lines:
+            bb     = safe_textbbox(draw, (0, 0), line, font)
+            line_w = bb[2] - bb[0]
+            x_cur  = x1 + max(2, (box_w - line_w) // 2)
+
+            if bold_outline:
+                for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
+                    try:
+                        draw.text((x_cur + dx, y_cur + dy), line, font=font, fill=ol)
+                    except Exception:
+                        pass
+
+            try:
+                draw.text((x_cur, y_cur), line, font=font, fill=fg)
+            except Exception as e:
+                print(f"   ❌ Draw error bubble #{bid}: {e}")
+
+            y_cur += line_h
+
+        print(f"   ✅ Bubble #{bid:<3} — rendered  ({len(lines)} lines, size {size}px)")
        rendered += 1

-    out_bgr = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
-    cv2.imwrite(output_image, out_bgr)
+    pil_img.save(output_path)

-    print(f"✅ Done — {rendered} rendered, {skipped} skipped.")
-    print(f"📄 Output → {output_image}")
-    print("Guarantee: full yellow-square area is always white-cleaned before drawing text.")
+    print()
+    print(f"{'─'*50}")
+    print(f"  Rendered : {rendered}")
+    print(f"  Skipped  : {skipped}  (SKIP_BUBBLE_IDS)")
+    print(f"  No text  : {missing}  (not in output.txt)")
+    print(f"{'─'*50}")
+    print(f"✅ Saved → {output_path}")
+
+    return pil_img
+
+
+# ============================================================
+# MAIN
+# ============================================================
+def main():
+    print(f"📖 Loading image        : {IMAGE_PATH}")
+    image = cv2.imread(IMAGE_PATH)
+    if image is None:
+        print(f"❌ Cannot load: {IMAGE_PATH}"); return
+
+    print(f"📦 Loading bubbles      : {BUBBLES_PATH}")
+    bubble_boxes, quads_per_bubble = parse_bubbles(BUBBLES_PATH)
+    print(f"   {len(bubble_boxes)} bubbles  |  "
+          f"{sum(len(v) for v in quads_per_bubble.values())} quads total")
+
+    print(f"🌐 Loading translations : {TRANSLATIONS_PATH}")
+    translations = parse_translations(TRANSLATIONS_PATH)
+    print(f"   {len(translations)} translations found")
+
+    if SKIP_BUBBLE_IDS:
+        print(f"⏭️  Skip list            : bubbles {sorted(SKIP_BUBBLE_IDS)}")
+    else:
+        print(f"⏭️  Skip list            : (empty — all bubbles will be rendered)")
+
+    print("🔤 Resolving font...")
+    font_path, _ = resolve_font()
+
+    print(f"🧹 Erasing original text (quad fill + pad={QUAD_PAD}px)...")
+    clean_image = erase_quads(
+        image,
+        quads_per_bubble,
+        translations = translations,   # ← pass translations here
+        skip_ids     = SKIP_BUBBLE_IDS,
+        pad          = QUAD_PAD
+    )
+
+    print("✍️  Rendering translated text...")
+    render_translations(
+        image_bgr    = clean_image,
+        bubble_boxes = bubble_boxes,
+        translations = translations,
+        skip_ids     = SKIP_BUBBLE_IDS,
+        font_path    = font_path,
+        font_size    = FONT_SIZE,
+        bold_outline = True,
+        auto_color   = True,
+        output_path  = OUTPUT_PATH
+    )


 if __name__ == "__main__":
-    render_translations(
-        input_image="001-page.png",
-        output_image="page_translated.png",
-        translations_file="output.txt",
-        bubbles_file="bubbles.json",
-        font_candidates=DEFAULT_FONT_CANDIDATES,
-        font_color=DEFAULT_FONT_COLOR,
-        stroke_color=DEFAULT_STROKE_COLOR
-    )
+    main()
--- a/manga-translator.py
+++ b/manga-translator.py
@@ -6,13 +6,17 @@ import re
 import json
 import cv2
 import numpy as np
+import warnings
+from typing import List, Tuple, Dict, Any, Optional

 from deep_translator import GoogleTranslator

-# OCR engines
-import easyocr
-from paddleocr import PaddleOCR
+# macOS Native Vision imports
+import Vision
+import Quartz
+from Foundation import NSData

+warnings.filterwarnings("ignore", category=UserWarning)

 # ============================================================
 # CONFIG
@@ -26,7 +30,7 @@ GLOSSARY = {
 }

 SOUND_EFFECT_PATTERNS = [
-    r"^b+i+p+$", r"^sha+$", r"^ha+$", r"^ah+$", r"^oh+$",
+    r"^b+i+p+$", r"^sha+$", r"^ha+$", r"^ah+$",
    r"^ugh+$", r"^bam+$", r"^pow+$", r"^boom+$", r"^bang+$",
    r"^crash+$", r"^thud+$", r"^zip+$", r"^swoosh+$", r"^chirp+$"
 ]
@@ -47,13 +51,13 @@ TOP_BAND_RATIO = 0.08


 # ============================================================
-# TEXT HELPERS
+# HELPERS
 # ============================================================
 def normalize_text(text: str) -> str:
    t = (text or "").strip().upper()
-    t = t.replace("“", "\"").replace("”", "\"")
-    t = t.replace("’", "'").replace("‘", "'")
-    t = t.replace("…", "...")
+    t = t.replace("\u201c", "\"").replace("\u201d", "\"")
+    t = t.replace("\u2018", "'").replace("\u2019", "'")
+    t = t.replace("\u2026", "...")
    t = re.sub(r"\s+", " ", t)
    t = re.sub(r"\s+([,.;:!?])", r"\1", t)
    t = re.sub(r"([¡¿])\s+", r"\1", t)
@@ -88,24 +92,35 @@ def is_title_text(text: str) -> bool:
    return any(re.fullmatch(p, t, re.IGNORECASE) for p in TITLE_PATTERNS)


+def looks_like_box_tag(t: str) -> bool:
+    s = re.sub(r"[^A-Z0-9#]", "", (t or "").upper())
+    if re.fullmatch(r"[BEF]?[O0D]X#?\d{0,3}", s):
+        return True
+    if re.fullmatch(r"B[O0D]X\d{0,3}", s):
+        return True
+    return False
+
+
 def is_noise_text(text: str) -> bool:
    t = (text or "").strip()
+
+    # Explicitly allow standalone punctuation like ? or !
+    if re.fullmatch(r"[\?\!]+", t):
+        return False
+
    if any(re.fullmatch(p, t) for p in NOISE_PATTERNS):
        return True
-
-    if len(t) <= 2 and not re.search(r"[A-Z0-9]", t):
+    if looks_like_box_tag(t):
+        return True
+    if len(t) <= 2 and not re.search(r"[A-Z0-9\?\!]", t):
        return True

    symbol_ratio = sum(1 for c in t if not c.isalnum() and not c.isspace()) / max(1, len(t))
    if len(t) <= 6 and symbol_ratio > 0.60:
        return True
-
    return False


-# ============================================================
-# GEOMETRY HELPERS
-# ============================================================
 def quad_bbox(quad):
    xs = [p[0] for p in quad]
    ys = [p[1] for p in quad]
@@ -150,9 +165,6 @@ def overlap_or_near(a, b, gap=0):
    return gap_x <= gap and gap_y <= gap


-# ============================================================
-# QUALITY
-# ============================================================
 def ocr_candidate_score(text: str) -> float:
    if not text:
        return 0.0
@@ -179,204 +191,98 @@ def ocr_candidate_score(text: str) -> float:


 # ============================================================
-# OCR ENGINE WRAPPER (PADDLE + EASYOCR HYBRID)
+# OCR ENGINES (Apple Native Vision)
 # ============================================================
-class HybridOCR:
-    def __init__(self, source_lang="en", use_gpu=False):
-        self.source_lang = source_lang
+class MacVisionDetector:
+    def __init__(self, source_lang="en"):
+        lang_map = {"en": "en-US", "es": "es-ES", "ca": "ca-ES", "fr": "fr-FR", "ja": "ja-JP"}
+        apple_lang = lang_map.get(source_lang, "en-US")
+        self.langs = [apple_lang]
+        print(f"⚡ Using Apple Vision OCR (Language: {self.langs})")

-        # Paddle language choice (single lang for Paddle)
-        # For manga EN/ES pages, latin model is robust.
-        if source_lang in ("en", "es", "ca", "fr", "de", "it", "pt"):
-            paddle_lang = "latin"
-        elif source_lang in ("ja",):
-            paddle_lang = "japan"
-        elif source_lang in ("ko",):
-            paddle_lang = "korean"
-        elif source_lang in ("ch", "zh", "zh-cn", "zh-tw"):
-            paddle_lang = "ch"
+    def read(self, image_path_or_array):
+        if isinstance(image_path_or_array, str):
+            img = cv2.imread(image_path_or_array)
        else:
-            paddle_lang = "latin"
+            img = image_path_or_array

-        # EasyOCR language list
-        if source_lang == "ca":
-            easy_langs = ["es", "en"]
-        elif source_lang == "en":
-            easy_langs = ["en", "es"]
-        elif source_lang == "es":
-            easy_langs = ["es", "en"]
-        else:
-            easy_langs = [source_lang]
+        if img is None or img.size == 0:
+            return []

-        self.paddle = PaddleOCR(
-            use_angle_cls=True,
-            lang=paddle_lang,
-            use_gpu=use_gpu,
-            show_log=False
-        )
-        self.easy = easyocr.Reader(easy_langs, gpu=use_gpu)
+        ih, iw = img.shape[:2]

-    @staticmethod
-    def _paddle_to_std(result):
-        """
-        Convert Paddle result to Easy-like:
-        [ (quad, text, conf), ... ]
-        """
-        out = []
-        # paddle.ocr(...) returns list per image
-        # each item line: [ [ [x,y],...4pts ], (text, conf) ]
-        if not result:
-            return out
-        # result can be [None] or nested list
-        blocks = result if isinstance(result, list) else [result]
-        for blk in blocks:
-            if blk is None:
-                continue
-            if len(blk) == 0:
-                continue
-            # some versions wrap once more
-            if isinstance(blk[0], list) and len(blk[0]) > 0 and isinstance(blk[0][0], (list, tuple)) and len(blk[0]) == 2:
-                lines = blk
-            elif isinstance(blk[0], (list, tuple)) and len(blk[0]) >= 2:
-                lines = blk
-            else:
-                # maybe nested once more
-                if len(blk) == 1 and isinstance(blk[0], list):
-                    lines = blk[0]
-                else:
-                    lines = []
+        success, buffer = cv2.imencode('.png', img)
+        if not success:
+            return []

-            for ln in lines:
-                try:
-                    pts, rec = ln
-                    txt, conf = rec[0], float(rec[1])
-                    quad = [[float(p[0]), float(p[1])] for p in pts]
-                    out.append((quad, txt, conf))
-                except Exception:
-                    continue
-        return out
+        ns_data = NSData.dataWithBytes_length_(buffer.tobytes(), len(buffer.tobytes()))
+        handler = Vision.VNImageRequestHandler.alloc().initWithData_options_(ns_data, None)
+        results = []

-    def read_full_image(self, image_path):
-        """
-        Primary: Paddle
-        Fallback merge: EasyOCR
-        Returns merged standardized detections.
-        """
-        # Paddle
-        pr = self.paddle.ocr(image_path, cls=True)
-        paddle_det = self._paddle_to_std(pr)
+        def completion_handler(request, error):
+            if error:
+                print(f"Vision API Error: {error}")
+                return

-        # Easy
-        easy_det = self.easy.readtext(image_path, paragraph=False)
+            for observation in request.results():
+                candidate = observation.topCandidates_(1)[0]
+                text = candidate.string()
+                confidence = candidate.confidence()

-        # Merge by IOU/text proximity
-        merged = list(paddle_det)
-        for eb in easy_det:
-            eq, et, ec = eb
-            ebox = quad_bbox(eq)
-            keep = True
-            for pb in paddle_det:
-                pq, pt, pc = pb
-                pbox = quad_bbox(pq)
+                bbox = observation.boundingBox()
+                x = bbox.origin.x * iw
+                y_bottom_left = bbox.origin.y * ih
+                w = bbox.size.width * iw
+                h = bbox.size.height * ih

-                ix1 = max(ebox[0], pbox[0]); iy1 = max(ebox[1], pbox[1])
-                ix2 = min(ebox[2], pbox[2]); iy2 = min(ebox[3], pbox[3])
-                inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
-                a1 = max(1, (ebox[2] - ebox[0]) * (ebox[3] - ebox[1]))
-                a2 = max(1, (pbox[2] - pbox[0]) * (pbox[3] - pbox[1]))
-                iou = inter / float(a1 + a2 - inter) if (a1 + a2 - inter) > 0 else 0.0
+                y = ih - y_bottom_left - h

-                if iou > 0.55:
-                    # if overlapped and paddle exists, keep paddle unless easy much higher conf
-                    if float(ec) > float(pc) + 0.20:
-                        # replace paddle with easy-like entry
-                        try:
-                            merged.remove(pb)
-                        except Exception:
-                            pass
-                        merged.append((eq, et, float(ec)))
-                    keep = False
-                    break
+                quad = [
+                    [int(x), int(y)],
+                    [int(x + w), int(y)],
+                    [int(x + w), int(y + h)],
+                    [int(x), int(y + h)]
+                ]

-            if keep:
-                merged.append((eq, et, float(ec)))
+                results.append((quad, text, confidence))

-        return merged
+        request = Vision.VNRecognizeTextRequest.alloc().initWithCompletionHandler_(completion_handler)
+        request.setRecognitionLevel_(Vision.VNRequestTextRecognitionLevelAccurate)
+        request.setUsesLanguageCorrection_(True)
+        request.setRecognitionLanguages_(self.langs)

-    def read_array_with_both(self, arr_gray_or_bgr):
-        """
-        OCR from array (used in robust reread pass).
-        Returns merged detections in standardized format.
-        """
-        tmp = "_tmp_ocr_hybrid.png"
-        cv2.imwrite(tmp, arr_gray_or_bgr)
-        try:
-            pr = self.paddle.ocr(tmp, cls=True)
-            paddle_det = self._paddle_to_std(pr)
-            easy_det = self.easy.readtext(tmp, paragraph=False)
+        handler.performRequests_error_([request], None)

-            merged = list(paddle_det)
-
-            for eb in easy_det:
-                eq, et, ec = eb
-                ebox = quad_bbox(eq)
-                keep = True
-                for pb in paddle_det:
-                    pq, pt, pc = pb
-                    pbox = quad_bbox(pq)
-
-                    ix1 = max(ebox[0], pbox[0]); iy1 = max(ebox[1], pbox[1])
-                    ix2 = min(ebox[2], pbox[2]); iy2 = min(ebox[3], pbox[3])
-                    inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
-                    a1 = max(1, (ebox[2] - ebox[0]) * (ebox[3] - ebox[1]))
-                    a2 = max(1, (pbox[2] - pbox[0]) * (pbox[3] - pbox[1]))
-                    iou = inter / float(a1 + a2 - inter) if (a1 + a2 - inter) > 0 else 0.0
-
-                    if iou > 0.55:
-                        if float(ec) > float(pc) + 0.20:
-                            try:
-                                merged.remove(pb)
-                            except Exception:
-                                pass
-                            merged.append((eq, et, float(ec)))
-                        keep = False
-                        break
-
-                if keep:
-                    merged.append((eq, et, float(ec)))
-
-            return merged
-        finally:
-            if os.path.exists(tmp):
-                os.remove(tmp)
+        return results


 # ============================================================
-# PREPROCESS + ROBUST REREAD
+# PREPROCESS
 # ============================================================
 def preprocess_variant(crop_bgr, mode):
    gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY)

    if mode == "raw":
        return gray
-
    if mode == "clahe":
        return cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)).apply(gray)
-
    if mode == "adaptive":
        den = cv2.GaussianBlur(gray, (3, 3), 0)
-        return cv2.adaptiveThreshold(
-            den, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-            cv2.THRESH_BINARY, 35, 11
-        )
-
+        return cv2.adaptiveThreshold(den, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 35, 11)
    if mode == "otsu":
        den = cv2.GaussianBlur(gray, (3, 3), 0)
        _, th = cv2.threshold(den, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        return th
-
    if mode == "invert":
        return 255 - gray
+    if mode == "bilateral":
+        den = cv2.bilateralFilter(gray, 7, 60, 60)
+        _, th = cv2.threshold(den, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+        return th
+    if mode == "morph_open":
+        _, th = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+        k = np.ones((2, 2), np.uint8)
+        return cv2.morphologyEx(th, cv2.MORPH_OPEN, k)

    return gray

@@ -389,22 +295,18 @@ def rotate_image_keep_bounds(img, angle_deg):

    new_w = int((h * sin) + (w * cos))
    new_h = int((h * cos) + (w * sin))
-
    M[0, 2] += (new_w / 2) - c[0]
    M[1, 2] += (new_h / 2) - c[1]

    return cv2.warpAffine(img, M, (new_w, new_h), flags=cv2.INTER_CUBIC, borderValue=255)


-def rebuild_text_from_ocr_result(res):
+def rebuild_text_from_vision_result(res):
    if not res:
        return ""

    norm = []
-    for item in res:
-        if len(item) != 3:
-            continue
-        bbox, txt, conf = item
+    for bbox, txt, conf in res:
        if not txt or not txt.strip():
            continue
        b = quad_bbox(bbox)
@@ -419,7 +321,7 @@ def rebuild_text_from_ocr_result(res):
    med_h = float(np.median([x[5] for x in norm]))
    row_tol = max(6.0, med_h * 0.75)

-    norm.sort(key=lambda z: z[4])  # y
+    norm.sort(key=lambda z: z[4])
    rows = []
    for it in norm:
        placed = False
@@ -435,7 +337,7 @@ def rebuild_text_from_ocr_result(res):
    rows.sort(key=lambda r: r["yc"])
    lines = []
    for r in rows:
-        mem = sorted(r["m"], key=lambda z: z[3])  # x
+        mem = sorted(r["m"], key=lambda z: z[3])
        line = normalize_text(" ".join(x[1] for x in mem))
        if line:
            lines.append(line)
@@ -443,57 +345,51 @@ def rebuild_text_from_ocr_result(res):
    return normalize_text(" ".join(lines))


-def reread_crop_robust(image, bbox, hybrid_ocr: HybridOCR, upscale=3.0, pad=24):
-    ih, iw = image.shape[:2]
-    x1, y1, x2, y2 = bbox
-    x1 = max(0, int(x1 - pad))
-    y1 = max(0, int(y1 - pad))
-    x2 = min(iw, int(x2 + pad))
-    y2 = min(ih, int(y2 + pad))
-    crop = image[y1:y2, x1:x2]
+def reread_bubble_with_vision(
+    image_bgr,
+    bbox_xyxy,
+    vision_detector: MacVisionDetector,
+    upscale=3.0,
+    pad=24
+):
+    ih, iw = image_bgr.shape[:2]
+    x1, y1, x2, y2 = bbox_xyxy
+    x1 = max(0, int(x1 - pad)); y1 = max(0, int(y1 - pad))
+    x2 = min(iw, int(x2 + pad)); y2 = min(ih, int(y2 + pad))
+
+    crop = image_bgr[y1:y2, x1:x2]
    if crop.size == 0:
-        return None, 0.0
+        return None, 0.0, "none"

-    up = cv2.resize(
-        crop,
-        (int(crop.shape[1] * upscale), int(crop.shape[0] * upscale)),
-        interpolation=cv2.INTER_CUBIC
-    )
-
-    modes = ["raw", "clahe", "adaptive", "otsu", "invert"]
+    modes = ["raw", "clahe", "adaptive", "otsu", "invert", "bilateral", "morph_open"]
    angles = [0.0, 1.5, -1.5]

-    best_text, best_score = "", 0.0
+    best_v_txt, best_v_sc = "", 0.0
+    up0 = cv2.resize(crop, (int(crop.shape[1]*upscale), int(crop.shape[0]*upscale)), interpolation=cv2.INTER_CUBIC)

    for mode in modes:
-        proc = preprocess_variant(up, mode)
-
-        if len(proc.shape) == 2:
-            proc3 = cv2.cvtColor(proc, cv2.COLOR_GRAY2BGR)
-        else:
-            proc3 = proc
-
+        proc = preprocess_variant(up0, mode)
+        proc3 = cv2.cvtColor(proc, cv2.COLOR_GRAY2BGR) if len(proc.shape) == 2 else proc
        for a in angles:
            rot = rotate_image_keep_bounds(proc3, a)
-            res = hybrid_ocr.read_array_with_both(rot)
-            txt = rebuild_text_from_ocr_result(res)
+            res = vision_detector.read(rot)
+            txt = rebuild_text_from_vision_result(res)
            sc = ocr_candidate_score(txt)
+            if sc > best_v_sc:
+                best_v_txt, best_v_sc = txt, sc

-            if sc > best_score:
-                best_text, best_score = txt, sc
+    if best_v_txt:
+        return best_v_txt, best_v_sc, "vision-reread"

-    if not best_text:
-        return None, 0.0
-    return best_text, best_score
+    return None, 0.0, "none"


 # ============================================================
-# LINE REBUILD + YELLOW BOXES
+# LINES + BUBBLES
 # ============================================================
 def build_lines_from_indices(indices, ocr):
    if not indices:
        return []
-
    items = []
    for i in indices:
        b = quad_bbox(ocr[i][0])
@@ -526,7 +422,6 @@ def build_lines_from_indices(indices, ocr):
        txt = normalize_text(" ".join(ocr[i][1] for i, _, _, _ in mem))
        if txt and not is_noise_text(txt):
            lines.append(txt)
-
    return lines


@@ -540,16 +435,10 @@ def build_line_boxes_from_indices(indices, ocr, image_shape=None):
        txt = normalize_text(ocr[i][1])
        if is_noise_text(txt):
            continue
-
        xc = (b[0] + b[2]) / 2.0
        yc = (b[1] + b[3]) / 2.0
-        w = max(1.0, b[2] - b[0])
        h = max(1.0, b[3] - b[1])
-
-        items.append({
-            "i": i, "b": b, "txt": txt,
-            "xc": xc, "yc": yc, "w": w, "h": h
-        })
+        items.append({"i": i, "b": b, "txt": txt, "xc": xc, "yc": yc, "h": h})

    if not items:
        return []
@@ -559,16 +448,8 @@ def build_line_boxes_from_indices(indices, ocr, image_shape=None):
    gap_x_tol = max(8.0, med_h * 1.25)
    pad = max(3, int(round(med_h * 0.22)))

-    def is_punct_like(t):
-        raw = (t or "").strip()
-        if raw == "":
-            return True
-        punct_ratio = sum(1 for c in raw if not c.isalnum()) / max(1, len(raw))
-        return punct_ratio >= 0.5 or len(raw) <= 2
-
-    items_sorted = sorted(items, key=lambda x: x["yc"])
    rows = []
-    for it in items_sorted:
+    for it in sorted(items, key=lambda x: x["yc"]):
        placed = False
        for r in rows:
            if abs(it["yc"] - r["yc"]) <= row_tol:
@@ -584,16 +465,12 @@ def build_line_boxes_from_indices(indices, ocr, image_shape=None):

    for r in rows:
        mem = sorted(r["m"], key=lambda z: z["xc"])
-        normal = [t for t in mem if not is_punct_like(t["txt"])]
-        punct = [t for t in mem if is_punct_like(t["txt"])]
-
-        if not normal:
-            normal = mem
-            punct = []
+        if not mem:
+            continue

        chunks = []
-        cur = [normal[0]]
-        for t in normal[1:]:
+        cur = [mem[0]]
+        for t in mem[1:]:
            prev = cur[-1]["b"]
            b = t["b"]
            gap = b[0] - prev[2]
@@ -604,106 +481,26 @@ def build_line_boxes_from_indices(indices, ocr, image_shape=None):
                cur = [t]
        chunks.append(cur)

-        for p in punct:
-            pb = p["b"]
-            pxc, pyc = p["xc"], p["yc"]
-            best_k = -1
-            best_score = 1e18
-
-            for k, ch in enumerate(chunks):
-                ub = boxes_union_xyxy([x["b"] for x in ch])
-                cx = (ub[0] + ub[2]) / 2.0
-                cy = (ub[1] + ub[3]) / 2.0
-                dx = abs(pxc - cx)
-                dy = abs(pyc - cy)
-                score = dx + 1.8 * dy
-
-                near = overlap_or_near(pb, ub, gap=int(med_h * 1.25))
-                if near:
-                    score -= med_h * 2.0
-
-                if score < best_score:
-                    best_score = score
-                    best_k = k
-
-            if best_k >= 0:
-                chunks[best_k].append(p)
-            else:
-                chunks.append([p])
-
        for ch in chunks:
            ub = boxes_union_xyxy([x["b"] for x in ch])
            if ub:
                x1, y1, x2, y2 = ub
-                pad_x = pad
-                pad_top = int(round(pad * 1.35))
-                pad_bot = int(round(pad * 0.95))
-                out_boxes.append((x1 - pad_x, y1 - pad_top, x2 + pad_x, y2 + pad_bot))
-
-    token_boxes = [it["b"] for it in items]
-
-    def inside(tb, lb):
-        return tb[0] >= lb[0] and tb[1] >= lb[1] and tb[2] <= lb[2] and tb[3] <= lb[3]
-
-    for tb in token_boxes:
-        if not any(inside(tb, lb) for lb in out_boxes):
-            x1, y1, x2, y2 = tb
-            pad_x = pad
-            pad_top = int(round(pad * 1.35))
-            pad_bot = int(round(pad * 0.95))
-            out_boxes.append((x1 - pad_x, y1 - pad_top, x2 + pad_x, y2 + pad_bot))
-
-    merged = []
-    for b in out_boxes:
-        merged_into = False
-        for i, m in enumerate(merged):
-            ix1 = max(b[0], m[0]); iy1 = max(b[1], m[1])
-            ix2 = min(b[2], m[2]); iy2 = min(b[3], m[3])
-            inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
-            a1 = max(1, (b[2] - b[0]) * (b[3] - b[1]))
-            a2 = max(1, (m[2] - m[0]) * (m[3] - m[1]))
-            iou = inter / float(a1 + a2 - inter) if (a1 + a2 - inter) > 0 else 0.0
-            if iou > 0.72:
-                merged[i] = boxes_union_xyxy([b, m])
-                merged_into = True
-                break
-        if not merged_into:
-            merged.append(b)
-
-    safe = []
-    for (x1, y1, x2, y2) in merged:
-        w = x2 - x1
-        h = y2 - y1
-        if w < 28:
-            d = (28 - w) // 2 + 2
-            x1 -= d; x2 += d
-        if h < 18:
-            d = (18 - h) // 2 + 2
-            y1 -= d; y2 += d
-        safe.append((x1, y1, x2, y2))
-    merged = safe
+                out_boxes.append((x1 - pad, y1 - int(round(pad*1.35)), x2 + pad, y2 + int(round(pad*0.95))))

    if image_shape is not None:
        ih, iw = image_shape[:2]
        clamped = []
-        for b in merged:
-            x1 = max(0, int(b[0]))
-            y1 = max(0, int(b[1]))
-            x2 = min(iw - 1, int(b[2]))
-            y2 = min(ih - 1, int(b[3]))
+        for b in out_boxes:
+            x1 = max(0, int(b[0])); y1 = max(0, int(b[1]))
+            x2 = min(iw - 1, int(b[2])); y2 = min(ih - 1, int(b[3]))
            if x2 > x1 and y2 > y1:
                clamped.append((x1, y1, x2, y2))
-        merged = clamped
-    else:
-        merged = [(int(b[0]), int(b[1]), int(b[2]), int(b[3])) for b in merged]
+        out_boxes = clamped

-    merged.sort(key=lambda z: (z[1], z[0]))
-    return merged
+    out_boxes.sort(key=lambda z: (z[1], z[0]))
+    return out_boxes


-# ============================================================
-# GROUPING
-# ============================================================
 def auto_gap(image_path, base=18, ref_w=750):
    img = cv2.imread(image_path)
    if img is None:
@@ -750,21 +547,14 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):

    sorted_groups = sorted(
        groups.values(),
-        key=lambda idxs: (
-            min(boxes[i][1] for i in idxs),
-            min(boxes[i][0] for i in idxs)
-        )
+        key=lambda idxs: (min(boxes[i][1] for i in idxs), min(boxes[i][0] for i in idxs))
    )

-    bubbles = {}
-    bubble_boxes = {}
-    bubble_quads = {}
-    bubble_indices = {}
-
+    bubbles, bubble_boxes, bubble_quads, bubble_indices = {}, {}, {}, {}
    ih, iw = image_shape[:2]
+
    for bid, idxs in enumerate(sorted_groups, start=1):
        idxs = sorted(idxs, key=lambda k: boxes[k][1])
-
        lines = build_lines_from_indices(idxs, ocr)
        quads = [ocr[k][0] for k in idxs]
        ub = boxes_union_xyxy([quad_bbox(q) for q in quads])
@@ -772,10 +562,8 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
            continue

        x1, y1, x2, y2 = ub
-        x1 = max(0, x1 - bbox_padding)
-        y1 = max(0, y1 - bbox_padding)
-        x2 = min(iw - 1, x2 + bbox_padding)
-        y2 = min(ih - 1, y2 + bbox_padding)
+        x1 = max(0, x1 - bbox_padding); y1 = max(0, y1 - bbox_padding)
+        x2 = min(iw - 1, x2 + bbox_padding); y2 = min(ih - 1, y2 + bbox_padding)

        bubbles[bid] = lines
        bubble_boxes[bid] = (x1, y1, x2, y2)
@@ -786,37 +574,63 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):


 # ============================================================
-# DEBUG
+# DEBUG / EXPORT
 # ============================================================
-def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices, out_path="debug_clusters.png"):
+def save_debug_clusters(
+    image_path,
+    ocr,
+    bubble_boxes,
+    bubble_indices,
+    clean_lines=None,
+    out_path="debug_clusters.png"
+):
    img = cv2.imread(image_path)
    if img is None:
        return

+    # ── FIX 1: white-fill each OCR quad before drawing its outline ──
    for bbox, txt, conf in ocr:
        pts = np.array(bbox, dtype=np.int32)
-        cv2.polylines(img, [pts], True, (180, 180, 180), 1)
+        cv2.fillPoly(img, [pts], (255, 255, 255))   # ← white background
+        cv2.polylines(img, [pts], True, (180, 180, 180), 1)  # ← grey outline

    for bid, bb in bubble_boxes.items():
        x1, y1, x2, y2 = bb
-        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 220, 0), 2)
-        cv2.putText(
-            img, f"BOX#{bid}", (x1 + 2, max(15, y1 + 16)),
-            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2
-        )

-        idxs = bubble_indices.get(bid, [])
-        line_boxes = build_line_boxes_from_indices(idxs, ocr, image_shape=img.shape)
-        for lb in line_boxes:
-            lx1, ly1, lx2, ly2 = lb
-            cv2.rectangle(img, (lx1, ly1), (lx2, ly2), (0, 255, 255), 3)
+        # Draw green bubble bounding box + ID label
+        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 220, 0), 2)
+        cv2.putText(img, f"BOX#{bid}", (x1 + 2, max(15, y1 + 16)),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)
+
+        # ── FIX 2: yellow line-box drawing loop removed entirely ────
+
+        # Draw translated text overlay below each bubble box
+        if clean_lines and bid in clean_lines:
+            text = clean_lines[bid]
+            words = text.split()
+            lines = []
+            current_line = ""
+
+            for word in words:
+                if len(current_line) + len(word) < 25:
+                    current_line += word + " "
+                else:
+                    lines.append(current_line.strip())
+                    current_line = word + " "
+            if current_line:
+                lines.append(current_line.strip())
+
+            y_text = y2 + 18
+            for line in lines:
+                cv2.putText(img, line, (x1, y_text),
+                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 3)
+                cv2.putText(img, line, (x1, y_text),
+                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
+                y_text += 18

    cv2.imwrite(out_path, img)


-# ============================================================
-# EXPORT
-# ============================================================
 def estimate_reading_order(bbox_dict, mode="ltr"):
    items = []
    for bid, (x1, y1, x2, y2) in bbox_dict.items():
@@ -826,8 +640,7 @@ def estimate_reading_order(bbox_dict, mode="ltr"):

    items.sort(key=lambda t: t[2])

-    rows = []
-    tol = 90
+    rows, tol = [], 90
    for it in items:
        placed = False
        for r in rows:
@@ -850,7 +663,6 @@ def estimate_reading_order(bbox_dict, mode="ltr"):

 def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_map, image_shape):
    out = {}
-
    for bid, bb in bbox_dict.items():
        x1, y1, x2, y2 = bb
        quads = quads_dict.get(bid, [])
@@ -870,9 +682,7 @@ def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_m
                {"x": int(b[0]), "y": int(b[1]), "w": int(b[2] - b[0]), "h": int(b[3] - b[1])}
                for b in qboxes
            ],
-            "quads": [
-                [[int(p[0]), int(p[1])] for p in q] for q in quads
-            ],
+            "quads": [[[int(p[0]), int(p[1])] for p in q] for q in quads],
            "text_bbox": xyxy_to_xywh(text_union),
            "line_bboxes": [xyxy_to_xywh(lb) for lb in line_boxes_xyxy],
            "line_union_bbox": xyxy_to_xywh(line_union_xyxy) if line_union_xyxy else None,
@@ -884,10 +694,10 @@ def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_m


 # ============================================================
-# MAIN PIPELINE
+# PIPELINE
 # ============================================================
 def translate_manga_text(
-    image_path,
+    image_path="001-page.png",
    source_lang="en",
    target_lang="ca",
    confidence_threshold=0.12,
@@ -898,8 +708,7 @@ def translate_manga_text(
    export_to_file="output.txt",
    export_bubbles_to="bubbles.json",
    reading_mode="ltr",
-    debug=True,
-    use_gpu=False
+    debug=True
 ):
    image = cv2.imread(image_path)
    if image is None:
@@ -908,12 +717,12 @@ def translate_manga_text(

    resolved_gap = auto_gap(image_path) if gap_px == "auto" else float(gap_px)

-    print("Loading Hybrid OCR (Paddle + EasyOCR)...")
-    hybrid = HybridOCR(source_lang=source_lang, use_gpu=use_gpu)
+    print("Loading OCR engines...")
+    detector = MacVisionDetector(source_lang=source_lang)

-    print("Running OCR...")
-    raw = hybrid.read_full_image(image_path)
-    print(f"Raw detections (merged): {len(raw)}")
+    print("Running detection OCR (Apple Vision)...")
+    raw = detector.read(image_path)
+    print(f"Raw detections: {len(raw)}")

    filtered = []
    skipped = 0
@@ -924,25 +733,18 @@ def translate_manga_text(
        qb = quad_bbox(bbox)

        if conf < confidence_threshold:
-            skipped += 1
-            continue
+            skipped += 1; continue
        if len(t) < min_text_length:
-            skipped += 1
-            continue
+            skipped += 1; continue
        if is_noise_text(t):
-            skipped += 1
-            continue
+            skipped += 1; continue
        if filter_sound_effects and is_sound_effect(t):
-            skipped += 1
-            continue
+            skipped += 1; continue
        if is_title_text(t):
-            skipped += 1
-            continue
-
+            skipped += 1; continue
        if qb[1] < int(ih * TOP_BAND_RATIO):
            if conf < 0.70 and len(t) >= 5:
-                skipped += 1
-                continue
+                skipped += 1; continue

        filtered.append((bbox, t, conf))

@@ -955,75 +757,80 @@ def translate_manga_text(
        filtered, image.shape, gap_px=resolved_gap, bbox_padding=3
    )

-    if debug:
-        save_debug_clusters(
-            image_path=image_path,
-            ocr=filtered,
-            bubble_boxes=bubble_boxes,
-            bubble_indices=bubble_indices,
-            out_path="debug_clusters.png"
-        )
-
    translator = GoogleTranslator(source=source_lang, target=target_lang)

-    clean_lines = {}
+    clean_lines: Dict[int, str] = {}
+    sources_used: Dict[int, str] = {}
+
    for bid, lines in bubbles.items():
        base_txt = normalize_text(" ".join(lines))
        base_sc = ocr_candidate_score(base_txt)

+        txt = base_txt
+        src_used = "vision-base"
+
        if base_sc < quality_threshold:
-            rr_txt, rr_sc = reread_crop_robust(
-                image,
-                bubble_boxes[bid],
-                hybrid,
+            rr_txt, rr_sc, rr_src = reread_bubble_with_vision(
+                image_bgr=image,
+                bbox_xyxy=bubble_boxes[bid],
+                vision_detector=detector,
                upscale=3.0,
                pad=24
            )
-            if rr_txt and rr_sc > base_sc + 0.06:
+            if rr_txt and rr_sc > base_sc + 0.04:
                txt = rr_txt
-            else:
-                txt = base_txt
-        else:
-            txt = base_txt
+                src_used = rr_src

        txt = txt.replace(" BOMPORTA", " IMPORTA")
        txt = txt.replace(" TESTO ",   " ESTO ")
        txt = txt.replace(" MIVERDAD", " MI VERDAD")

        clean_lines[bid] = apply_glossary(normalize_text(txt))
+        sources_used[bid] = src_used

    reading_map = estimate_reading_order(bubble_boxes, mode=reading_mode)

+    if debug:
+        save_debug_clusters(
+            image_path=image_path,
+            ocr=filtered,
+            bubble_boxes=bubble_boxes,
+            bubble_indices=bubble_indices,
+            clean_lines=clean_lines,
+            out_path="debug_clusters.png"
+        )
+
    divider = "─" * 120
-    out_lines = ["BUBBLE|ORDER|ORIGINAL|TRANSLATED|FLAGS", divider]
+    out_lines = ["BUBBLE|ORDER|OCR_SOURCE|ORIGINAL|TRANSLATED|FLAGS", divider]

    print(divider)
-    print(f"{'BUBBLE':<8} {'ORDER':<6} {'ORIGINAL':<50} {'TRANSLATED':<50} FLAGS")
+    print(f"{'BUBBLE':<8} {'ORDER':<6} {'SOURCE':<12} {'ORIGINAL':<40} {'TRANSLATED':<40} FLAGS")
    print(divider)

    translated_count = 0
    for bid in sorted(clean_lines.keys(), key=lambda x: reading_map.get(x, x)):
-        src = clean_lines[bid].strip()
-        if not src:
+        src_txt = clean_lines[bid].strip()
+        if not src_txt:
            continue

        flags = []
        try:
-            tgt = translator.translate(src) or ""
+            tgt = translator.translate(src_txt) or ""
        except Exception as e:
            tgt = f"[Translation error: {e}]"
            flags.append("TRANSLATION_ERROR")

        tgt = apply_glossary(postprocess_translation_general(tgt)).upper()
-        src_u = src.upper()
+        src_u = src_txt.upper()
+        src_engine = sources_used.get(bid, "unknown")

        out_lines.append(
-            f"#{bid}|{reading_map.get(bid,bid)}|{src_u}|{tgt}|{','.join(flags) if flags else '-'}"
+            f"#{bid}|{reading_map.get(bid,bid)}|{src_engine}|{src_u}|{tgt}|{','.join(flags) if flags else '-'}"
        )

        print(
-            f"#{bid:<7} {reading_map.get(bid,bid):<6} "
-            f"{src_u[:50]:<50} {tgt[:50]:<50} {','.join(flags) if flags else '-'}"
+            f"#{bid:<7} {reading_map.get(bid,bid):<6} {src_engine:<12} "
+            f"{src_u[:40]:<40} {tgt[:40]:<40} {','.join(flags) if flags else '-'}"
        )
        translated_count += 1

@@ -1050,22 +857,18 @@ def translate_manga_text(
        print("Saved: debug_clusters.png")


-# ============================================================
-# ENTRYPOINT
-# ============================================================
 if __name__ == "__main__":
    translate_manga_text(
-        image_path="001-page.png",
-        source_lang="it",
+        image_path="003.jpg",
+        source_lang="es",
        target_lang="ca",
        confidence_threshold=0.12,
-        min_text_length=1,
+        min_text_length=2,
        gap_px="auto",
        filter_sound_effects=True,
        quality_threshold=0.62,
        export_to_file="output.txt",
        export_bubbles_to="bubbles.json",
        reading_mode="ltr",
-        debug=True,
-        use_gpu=False
+        debug=True
    )
--- a/79
+++ b/79
@@ -0,0 +1,79 @@
+aistudio-sdk==0.3.8
+annotated-doc==0.0.4
+annotated-types==0.7.0
+anyio==4.13.0
+bce-python-sdk==0.9.70
+beautifulsoup4==4.14.3
+certifi==2026.2.25
+chardet==7.4.3
+charset-normalizer==3.4.7
+click==8.3.2
+colorlog==6.10.1
+crc32c==2.8
+deep-translator==1.11.4
+easyocr==1.7.2
+filelock==3.28.0
+fsspec==2026.3.0
+future==1.0.0
+h11==0.16.0
+hf-xet==1.4.3
+httpcore==1.0.9
+httpx==0.28.1
+huggingface_hub==1.10.2
+idna==3.11
+ImageIO==2.37.3
+imagesize==2.0.0
+Jinja2==3.1.6
+lazy-loader==0.5
+markdown-it-py==4.0.0
+MarkupSafe==3.0.3
+mdurl==0.1.2
+modelscope==1.35.4
+mpmath==1.3.0
+networkx==3.6.1
+ninja==1.13.0
+numpy==1.26.4
+opencv-contrib-python==4.10.0.84
+opencv-python==4.11.0.86
+opencv-python-headless==4.11.0.86
+opt-einsum==3.3.0
+packaging==26.1
+paddleocr==3.4.1
+paddlepaddle==3.3.1
+paddlex==3.4.3
+pandas==3.0.2
+pillow==12.2.0
+prettytable==3.17.0
+protobuf==7.34.1
+psutil==7.2.2
+py-cpuinfo==9.0.0
+pyclipper==1.4.0
+pycryptodome==3.23.0
+pydantic==2.13.1
+pydantic_core==2.46.1
+Pygments==2.20.0
+pypdfium2==5.7.0
+python-bidi==0.6.7
+python-dateutil==2.9.0.post0
+PyYAML==6.0.2
+requests==2.33.1
+rich==15.0.0
+ruamel.yaml==0.19.1
+safetensors==0.7.0
+scikit-image==0.26.0
+scipy==1.17.1
+shapely==2.1.2
+shellingham==1.5.4
+six==1.17.0
+soupsieve==2.8.3
+sympy==1.14.0
+tifffile==2026.3.3
+torch==2.11.0
+torchvision==0.26.0
+tqdm==4.67.3
+typer==0.24.1
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+ujson==5.12.0
+urllib3==2.6.3
+wcwidth==0.6.0
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,19 +1,12 @@
-# ─────────────────────────────────────────────
-#  manga-translator + manga-renderer
-#  Python >= 3.9 recommended
-# ─────────────────────────────────────────────
-
-# Computer vision + image processing
-opencv-python>=4.8.0
-numpy>=1.24.0
-Pillow>=10.0.0
-
-# OCR engine (manga-translator)
-manga-ocr>=0.1.8
-
-# Translation (manga-translator)
-deep-translator>=1.11.0
-
-# HTTP / file handling used internally by manga-ocr
-requests>=2.31.0
+numpy<2.0
+opencv-python>=4.8
+easyocr>=1.7.1
+deep-translator>=1.11.4
+manga-ocr>=0.1.14
+torch
+torchvision
+Pillow
+transformers
+fugashi
+unidic-lite