manga-translator/manga-translator.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import re
import json
import cv2
import numpy as np
import warnings
from typing import List, Tuple, Dict, Any, Optional

from deep_translator import GoogleTranslator

# macOS Native Vision imports
import Vision
import Quartz
from Foundation import NSData

warnings.filterwarnings("ignore", category=UserWarning)

# ============================================================
# CONFIG
# ============================================================
GLOSSARY = {
    "ANYA": "ANYA",
    "STARLIGHT ANYA": "STARLIGHT ANYA",
    "MR. HENDERSON": "MR. HENDERSON",
    "HENDERSON": "HENDERSON",
    "STELLA STAR": "STELLA STAR",
}

SOUND_EFFECT_PATTERNS = [
    r"^b+i+p+$", r"^sha+$", r"^ha+$", r"^ah+$",
    r"^ugh+$", r"^bam+$", r"^pow+$", r"^boom+$", r"^bang+$",
    r"^Grr+$", r"^grrp+$", r"^fshoo+$", r"^fwuip+$",
    r"^crash+$", r"^thud+$", r"^zip+$", r"^swoosh+$", r"^chirp+$"
]

TITLE_PATTERNS = [
    r"^(chapter|episode|vol\.?|volume)\s*\d+$",
    r"^by\s+.+$",
]

NOISE_PATTERNS = [
    r"^[^a-zA-Z0-9\?!.¡¿]+$",
    r"^BOX[#\s0-9A-Z\-]*$",
    r"^[0-9]{1,3}\s*[Xx]\s*[0-9]{1,3}$",
]

TOP_BAND_RATIO = 0.08


# ============================================================
# HELPERS
# ============================================================
def normalize_text(text: str) -> str:
    t = (text or "").strip().upper()
    t = t.replace("\u201c", "\"").replace("\u201d", "\"")
    t = t.replace("\u2018", "'").replace("\u2019", "'")
    t = t.replace("\u2026", "...")
    t = re.sub(r"\s+", " ", t)
    t = re.sub(r"\s+([,.;:!?])", r"\1", t)
    t = re.sub(r"([¡¿])\s+", r"\1", t)
    t = re.sub(r"\(\s+", "(", t)
    t = re.sub(r"\s+\)", ")", t)
    t = re.sub(r"\.{4,}", "...", t)
    return t.strip()


def apply_glossary(text: str) -> str:
    out = text or ""
    for k in sorted(GLOSSARY.keys(), key=len, reverse=True):
        out = re.sub(rf"\b{re.escape(k)}\b", GLOSSARY[k], out, flags=re.IGNORECASE)
    return out


def postprocess_translation_general(text: str) -> str:
    t = normalize_text(text)
    t = re.sub(r"\s{2,}", " ", t).strip()
    t = re.sub(r"([!?]){3,}", r"\1\1", t)
    t = re.sub(r"\.{4,}", "...", t)
    return t


def fix_common_ocr_errors(text: str) -> str:
    """Fix common OCR mistakes in manga text"""
    result = text

    # Apply context-aware fixes
    # Fix "O" to "0" only if surrounded by numbers
    result = re.sub(r'(\d)O(\d)', r'\g<1>0\g<2>', result)
    result = re.sub(r'(\d)O([^a-zA-Z])', r'\g<1>0\g<2>', result)

    # Fix common character confusions
    result = result.replace('|', 'I')
    result = result.replace('`', "'")

    return result


def is_sound_effect(text: str) -> bool:
    cleaned = re.sub(r"[^a-z]", "", (text or "").strip().lower())
    return any(re.fullmatch(p, cleaned, re.IGNORECASE) for p in SOUND_EFFECT_PATTERNS)


def is_title_text(text: str) -> bool:
    t = (text or "").strip().lower()
    return any(re.fullmatch(p, t, re.IGNORECASE) for p in TITLE_PATTERNS)


def looks_like_box_tag(t: str) -> bool:
    s = re.sub(r"[^A-Z0-9#]", "", (t or "").upper())
    if re.fullmatch(r"[BEF]?[O0D]X#?\d{0,3}", s):
        return True
    if re.fullmatch(r"B[O0D]X\d{0,3}", s):
        return True
    return False


def is_noise_text(text: str) -> bool:
    t = (text or "").strip()

    if re.fullmatch(r"[\?\!\.]+", t):
        return False

    if len(t) == 1 and t.isalpha():
        return False

    if any(re.fullmatch(p, t) for p in NOISE_PATTERNS):
        return True
    if looks_like_box_tag(t):
        return True

    if len(t) <= 2 and not re.search(r"[A-Z0-9\?\!\.]", t) and not t.isalpha():
        return True

    symbol_ratio = sum(1 for c in t if not c.isalnum() and not c.isspace()) / max(1, len(t))
    if len(t) <= 6 and symbol_ratio > 0.60:
        return True
    return False


def quad_bbox(quad):
    xs = [p[0] for p in quad]
    ys = [p[1] for p in quad]
    return (int(min(xs)), int(min(ys)), int(max(xs)), int(max(ys)))


def quad_center(quad):
    x1, y1, x2, y2 = quad_bbox(quad)
    return ((x1 + x2) / 2.0, (y1 + y2) / 2.0)


def boxes_union_xyxy(boxes):
    boxes = [b for b in boxes if b is not None]
    if not boxes:
        return None
    return (
        int(min(b[0] for b in boxes)),
        int(min(b[1] for b in boxes)),
        int(max(b[2] for b in boxes)),
        int(max(b[3] for b in boxes)),
    )


def bbox_area_xyxy(b):
    if b is None:
        return 0
    return int(max(0, b[2] - b[0]) * max(0, b[3] - b[1]))


def xyxy_to_xywh(b):
    if b is None:
        return None
    x1, y1, x2, y2 = b
    return {"x": int(x1), "y": int(y1), "w": int(max(0, x2 - x1)), "h": int(max(0, y2 - y1))}


def overlap_or_near(a, b, gap=0):
    ax1, ay1, ax2, ay2 = a
    bx1, by1, bx2, by2 = b
    gap_x = max(0, max(ax1, bx1) - min(ax2, bx2))
    gap_y = max(0, max(ay1, by1) - min(ay2, by2))
    return gap_x <= gap and gap_y <= gap


def ocr_candidate_score(text: str) -> float:
    if not text:
        return 0.0
    t = text.strip()
    n = len(t)
    if n == 0:
        return 0.0

    alpha = sum(c.isalpha() for c in t) / n
    spaces = sum(c.isspace() for c in t) / n
    punct_ok = sum(c in ".,!?'-:;()[]\"¡¿" for c in t) / n
    bad = len(re.findall(r"[^\w\s\.\,\!\?\-\'\:\;\(\)\[\]\"¡¿]", t)) / n

    penalty = 0.0
    if re.search(r"\b[A-Z]\b", t):
        penalty += 0.05
    if re.search(r"[0-9]{2,}", t):
        penalty += 0.08
    if re.search(r"(..)\1\1", t):
        penalty += 0.08

    score = (0.62 * alpha) + (0.10 * spaces) + (0.20 * punct_ok) - (0.45 * bad) - penalty
    return max(0.0, min(1.0, score))


# ============================================================
# ENHANCED IMAGE PREPROCESSING
# ============================================================
def enhance_image_for_ocr(image_bgr, upscale_factor=2.5):
    """Enhanced preprocessing for better OCR results"""

    # Upscale first
    h, w = image_bgr.shape[:2]
    new_w = int(w * upscale_factor)
    new_h = int(h * upscale_factor)
    upscaled = cv2.resize(image_bgr, (new_w, new_h), interpolation=cv2.INTER_CUBIC)

    # Convert to grayscale
    gray = cv2.cvtColor(upscaled, cv2.COLOR_BGR2GRAY)

    # Denoise
    denoised = cv2.fastNlMeansDenoising(gray, None, h=10, templateWindowSize=7, searchWindowSize=21)

    # Increase contrast with CLAHE
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
    enhanced = clahe.apply(denoised)

    # Sharpen
    kernel_sharpen = np.array([[-1,-1,-1],
                               [-1, 9,-1],
                               [-1,-1,-1]])
    sharpened = cv2.filter2D(enhanced, -1, kernel_sharpen)

    # Adaptive thresholding for clean text
    binary = cv2.adaptiveThreshold(sharpened, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 11, 2)

    # Morphological operations to clean up
    kernel = np.ones((2, 2), np.uint8)
    cleaned = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)

    # Convert back to BGR for Vision API
    return cv2.cvtColor(cleaned, cv2.COLOR_GRAY2BGR)


def detect_small_text_regions(image_bgr, existing_quads):
    """Detect small text regions that might have been missed"""
    gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)

    # Create mask of existing detections
    mask = np.zeros(gray.shape, dtype=np.uint8)
    for quad in existing_quads:
        pts = np.array(quad, dtype=np.int32)
        cv2.fillPoly(mask, [pts], 255)

    # Invert mask to find undetected regions
    mask_inv = cv2.bitwise_not(mask)

    # Find text-like regions
    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    binary_masked = cv2.bitwise_and(binary, binary, mask=mask_inv)

    # Find contours in undetected regions
    contours, _ = cv2.findContours(binary_masked, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Filter for text-like contours
    text_regions = []
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        area = w * h

        # Filter by size and aspect ratio
        if 50 < area < 5000 and 0.1 < h/max(w, 1) < 10:
            text_regions.append((x, y, x+w, y+h))

    return text_regions


# ============================================================
# SPEECH BUBBLE DETECTION
# ============================================================
def detect_speech_bubbles(image_bgr: np.ndarray) -> List[np.ndarray]:
    """Detect speech bubble contours for box splitting"""
    gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)

    # Apply adaptive thresholding
    thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY_INV, 11, 2)

    # Find contours
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Filter contours by area
    bubble_contours = []
    for contour in contours:
        area = cv2.contourArea(contour)
        if area > 500:  # Minimum bubble area
            bubble_contours.append(contour)

    return bubble_contours


def is_quad_in_bubble(quad_bbox_xyxy: Tuple[int, int, int, int],
                      bubble_contour: np.ndarray,
                      tolerance: int = 5) -> bool:
    """Check if a quad (text box) is inside a speech bubble"""
    x1, y1, x2, y2 = quad_bbox_xyxy
    cx = (x1 + x2) // 2
    cy = (y1 + y2) // 2

    # Check if center point is inside contour
    result = cv2.pointPolygonTest(bubble_contour, (float(cx), float(cy)), False)

    return result >= -tolerance


def split_indices_by_bubble(indices: List[int],
                            ocr: List[Tuple],
                            bubble_contours: List[np.ndarray]) -> List[List[int]]:
    """Split indices into groups based on bubble membership"""
    if not indices:
        return []

    # Group indices by which bubble they belong to
    bubble_groups = {}
    outside_group = []

    for idx in indices:
        bbox = quad_bbox(ocr[idx][0])
        found_bubble = False

        for bubble_idx, bubble in enumerate(bubble_contours):
            if is_quad_in_bubble(bbox, bubble):
                if bubble_idx not in bubble_groups:
                    bubble_groups[bubble_idx] = []
                bubble_groups[bubble_idx].append(idx)
                found_bubble = True
                break

        if not found_bubble:
            outside_group.append(idx)

    # Create result list
    result = list(bubble_groups.values())

    # Add outside quads as separate groups
    if outside_group:
        result.append(outside_group)

    return result


def check_vertical_alignment_split(indices: List[int],
                                   ocr: List[Tuple],
                                   threshold: int = 20) -> List[List[int]]:
    """Split indices that are vertically separated"""
    if len(indices) <= 1:
        return [indices]

    # Sort by y-coordinate
    items = [(idx, quad_bbox(ocr[idx][0])) for idx in indices]
    items.sort(key=lambda x: x[1][1])

    groups = []
    current_group = [items[0][0]]

    for i in range(1, len(items)):
        prev_bbox = items[i-1][1]
        curr_bbox = items[i][1]

        # Check vertical gap
        gap = curr_bbox[1] - prev_bbox[3]

        if gap > threshold:
            # Start new group
            groups.append(current_group)
            current_group = [items[i][0]]
        else:
            current_group.append(items[i][0])

    if current_group:
        groups.append(current_group)

    return groups


# ============================================================
# BOX FIXING FUNCTIONS
# ============================================================
def apply_page_specific_fixes(bubbles: Dict[int, List[str]],
                              bubble_boxes: Dict[int, Tuple],
                              bubble_quads: Dict[int, List],
                              bubble_indices: Dict[int, List[int]],
                              ocr: List[Tuple],
                              image_bgr: np.ndarray,
                              page_identifier: str) -> Tuple[Dict, Dict, Dict, Dict]:
    """Apply page-specific fixes to bubble detection issues"""

    # Detect speech bubbles for splitting logic
    bubble_contours = detect_speech_bubbles(image_bgr)

    fixes_applied = []

    # PAGE 15 FIXES
    if "15" in page_identifier:
        # Fix: Merge Box 12 and Box 16 into one box
        if 12 in bubbles and 16 in bubbles:
            # Merge indices
            merged_indices = sorted(set(bubble_indices[12] + bubble_indices[16]))

            # Rebuild merged box
            bubbles[12] = build_lines_from_indices(merged_indices, ocr)
            bubble_boxes[12] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in merged_indices])
            bubble_quads[12] = [ocr[i][0] for i in merged_indices]
            bubble_indices[12] = merged_indices

            # Remove box 16
            del bubbles[16]
            del bubble_boxes[16]
            del bubble_quads[16]
            del bubble_indices[16]

            fixes_applied.append("Page 15: Merged BOX#12 and BOX#16")

    # PAGE 16 FIXES
    if "16" in page_identifier:
        next_bid = max(bubbles.keys()) + 1 if bubbles else 100

        # Fix Box 15: Split quads outside bubble
        if 15 in bubbles:
            split_groups = split_indices_by_bubble(bubble_indices[15], ocr, bubble_contours)

            if len(split_groups) > 1:
                # Keep main group in BOX#15
                bubbles[15] = build_lines_from_indices(split_groups[0], ocr)
                bubble_boxes[15] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in split_groups[0]])
                bubble_quads[15] = [ocr[i][0] for i in split_groups[0]]
                bubble_indices[15] = split_groups[0]

                # Create new boxes for other groups
                for group in split_groups[1:]:
                    bubbles[next_bid] = build_lines_from_indices(group, ocr)
                    bubble_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group])
                    bubble_quads[next_bid] = [ocr[i][0] for i in group]
                    bubble_indices[next_bid] = group
                    next_bid += 1

                fixes_applied.append(f"Page 16: Split BOX#15 into {len(split_groups)} parts")

        # Fix Box 8: Split bubble vs outside quads
        if 8 in bubbles:
            split_groups = split_indices_by_bubble(bubble_indices[8], ocr, bubble_contours)

            if len(split_groups) > 1:
                bubbles[8] = build_lines_from_indices(split_groups[0], ocr)
                bubble_boxes[8] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in split_groups[0]])
                bubble_quads[8] = [ocr[i][0] for i in split_groups[0]]
                bubble_indices[8] = split_groups[0]

                for group in split_groups[1:]:
                    bubbles[next_bid] = build_lines_from_indices(group, ocr)
                    bubble_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group])
                    bubble_quads[next_bid] = [ocr[i][0] for i in group]
                    bubble_indices[next_bid] = group
                    next_bid += 1

                fixes_applied.append(f"Page 16: Split BOX#8 into {len(split_groups)} parts")

        # Fix Box 18: Split into 2 separate boxes
        if 18 in bubbles:
            # Try bubble-based split first
            split_groups = split_indices_by_bubble(bubble_indices[18], ocr, bubble_contours)

            if len(split_groups) == 1:
                # If bubble detection doesn't work, try vertical alignment
                split_groups = check_vertical_alignment_split(bubble_indices[18], ocr, threshold=30)

            if len(split_groups) > 1:
                bubbles[18] = build_lines_from_indices(split_groups[0], ocr)
                bubble_boxes[18] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in split_groups[0]])
                bubble_quads[18] = [ocr[i][0] for i in split_groups[0]]
                bubble_indices[18] = split_groups[0]

                for group in split_groups[1:]:
                    bubbles[next_bid] = build_lines_from_indices(group, ocr)
                    bubble_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group])
                    bubble_quads[next_bid] = [ocr[i][0] for i in group]
                    bubble_indices[next_bid] = group
                    next_bid += 1

                fixes_applied.append(f"Page 16: Split BOX#18 into {len(split_groups)} parts")

    # PAGE 19 FIXES
    if "19" in page_identifier:
        next_bid = max(bubbles.keys()) + 1 if bubbles else 100

        # Fix Box 5: Split into 4 different boxes
        if 5 in bubbles:
            # First split by bubble
            split_groups = split_indices_by_bubble(bubble_indices[5], ocr, bubble_contours)

            # Then split each group by vertical alignment
            final_groups = []
            for group in split_groups:
                vertical_splits = check_vertical_alignment_split(group, ocr, threshold=25)
                final_groups.extend(vertical_splits)

            if len(final_groups) > 1:
                bubbles[5] = build_lines_from_indices(final_groups[0], ocr)
                bubble_boxes[5] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in final_groups[0]])
                bubble_quads[5] = [ocr[i][0] for i in final_groups[0]]
                bubble_indices[5] = final_groups[0]

                for group in final_groups[1:]:
                    bubbles[next_bid] = build_lines_from_indices(group, ocr)
                    bubble_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group])
                    bubble_quads[next_bid] = [ocr[i][0] for i in group]
                    bubble_indices[next_bid] = group
                    next_bid += 1

                fixes_applied.append(f"Page 19: Split BOX#5 into {len(final_groups)} parts")

        # Fix Box 11: Split into 2 boxes
        if 11 in bubbles:
            split_groups = split_indices_by_bubble(bubble_indices[11], ocr, bubble_contours)

            if len(split_groups) > 1:
                bubbles[11] = build_lines_from_indices(split_groups[0], ocr)
                bubble_boxes[11] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in split_groups[0]])
                bubble_quads[11] = [ocr[i][0] for i in split_groups[0]]
                bubble_indices[11] = split_groups[0]

                for group in split_groups[1:]:
                    bubbles[next_bid] = build_lines_from_indices(group, ocr)
                    bubble_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group])
                    bubble_quads[next_bid] = [ocr[i][0] for i in group]
                    bubble_indices[next_bid] = group
                    next_bid += 1

                fixes_applied.append(f"Page 19: Split BOX#11 into {len(split_groups)} parts")

    # Print fixes applied
    if fixes_applied:
        print(f"\n🔧 Page-specific fixes applied:")
        for fix in fixes_applied:
            print(f"   ✓ {fix}")

    return bubbles, bubble_boxes, bubble_quads, bubble_indices


# ============================================================
# ENHANCED OCR ENGINE
# ============================================================
class ImprovedMacVisionDetector:
    def __init__(self, source_lang="en"):
        lang_key = source_lang.lower().strip()

        lang_map = {
            "en": "en-US", "english": "en-US",
            "es": "es-ES", "spanish": "es-ES",
            "ca": "ca-ES", "catalan": "ca-ES",
            "fr": "fr-FR", "french": "fr-FR",
            "ja": "ja-JP", "japanese": "ja-JP",
            "it": "it-IT", "italian": "it-IT",
            "de": "de-DE", "german": "de-DE",
            "ko": "ko-KR", "korean": "ko-KR",
            "zh": "zh-Hans", "chinese": "zh-Hans"
        }

        apple_lang = lang_map.get(lang_key, "en-US")
        self.langs = [apple_lang]
        print(f"⚡ Using Enhanced Apple Vision OCR (Language: {self.langs[0]})")

    def preprocess_variants(self, image_bgr):
        """Generate multiple preprocessing variants"""
        variants = []

        # Variant 1: Enhanced standard
        variants.append(("enhanced", enhance_image_for_ocr(image_bgr, upscale_factor=2.5)))

        # Variant 2: High contrast
        gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
        _, high_contrast = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        upscaled_hc = cv2.resize(high_contrast, None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC)
        variants.append(("high_contrast", cv2.cvtColor(upscaled_hc, cv2.COLOR_GRAY2BGR)))

        # Variant 3: Bilateral filter (preserves edges)
        bilateral = cv2.bilateralFilter(image_bgr, 9, 75, 75)
        upscaled_bil = cv2.resize(bilateral, None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC)
        variants.append(("bilateral", upscaled_bil))

        # Variant 4: Inverted (for white text on black)
        inverted = cv2.bitwise_not(image_bgr)
        upscaled_inv = cv2.resize(inverted, None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC)
        variants.append(("inverted", upscaled_inv))

        # Variant 5: Original upscaled
        upscaled_orig = cv2.resize(image_bgr, None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC)
        variants.append(("original", upscaled_orig))

        return variants

    def run_vision_ocr(self, image_bgr):
        """Run Vision OCR on a single image"""
        if image_bgr is None or image_bgr.size == 0:
            return []

        ih, iw = image_bgr.shape[:2]

        success, buffer = cv2.imencode('.png', image_bgr)
        if not success:
            return []

        ns_data = NSData.dataWithBytes_length_(buffer.tobytes(), len(buffer.tobytes()))
        handler = Vision.VNImageRequestHandler.alloc().initWithData_options_(ns_data, None)
        results = []

        def completion_handler(request, error):
            if error:
                return

            for observation in request.results():
                candidate = observation.topCandidates_(1)[0]
                text = candidate.string()
                confidence = candidate.confidence()

                bbox = observation.boundingBox()
                x = bbox.origin.x * iw
                y_bottom_left = bbox.origin.y * ih
                w = bbox.size.width * iw
                h = bbox.size.height * ih

                y = ih - y_bottom_left - h

                quad = [
                    [int(x), int(y)],
                    [int(x + w), int(y)],
                    [int(x + w), int(y + h)],
                    [int(x), int(y + h)]
                ]

                results.append((quad, text, confidence))

        request = Vision.VNRecognizeTextRequest.alloc().initWithCompletionHandler_(completion_handler)
        request.setRecognitionLevel_(Vision.VNRequestTextRecognitionLevelAccurate)
        request.setUsesLanguageCorrection_(False)  # Disable for manga
        request.setRecognitionLanguages_(self.langs)
        request.setAutomaticallyDetectsLanguage_(True)

        handler.performRequests_error_([request], None)
        return results

    def merge_multi_pass_results(self, all_results, original_shape):
        """Merge results from multiple preprocessing passes"""
        if not all_results:
            return []

        # Scale factor to normalize coordinates back to original
        scale_factor = 2.5

        # Normalize all quads to original image coordinates
        normalized_results = []
        for variant_name, results in all_results:
            for quad, text, conf in results:
                # Scale quad back to original size
                scaled_quad = [[int(p[0] / scale_factor), int(p[1] / scale_factor)] for p in quad]
                normalized_results.append((scaled_quad, text, conf, variant_name))

        # Group similar detections (same location, similar text)
        def quads_overlap(q1, q2, threshold=0.5):
            b1 = quad_bbox(q1)
            b2 = quad_bbox(q2)

            # Calculate IoU
            x1 = max(b1[0], b2[0])
            y1 = max(b1[1], b2[1])
            x2 = min(b1[2], b2[2])
            y2 = min(b1[3], b2[3])

            if x2 < x1 or y2 < y1:
                return False

            intersection = (x2 - x1) * (y2 - y1)
            area1 = (b1[2] - b1[0]) * (b1[3] - b1[1])
            area2 = (b2[2] - b2[0]) * (b2[3] - b2[1])
            union = area1 + area2 - intersection

            iou = intersection / max(union, 1)
            return iou > threshold

        # Cluster overlapping detections
        clusters = []
        used = set()

        for i, (quad1, text1, conf1, var1) in enumerate(normalized_results):
            if i in used:
                continue

            cluster = [(quad1, text1, conf1, var1)]
            used.add(i)

            for j, (quad2, text2, conf2, var2) in enumerate(normalized_results):
                if j in used or i == j:
                    continue

                if quads_overlap(quad1, quad2, threshold=0.5):
                    cluster.append((quad2, text2, conf2, var2))
                    used.add(j)

            clusters.append(cluster)

        # Vote on best result per cluster
        final_results = []
        for cluster in clusters:
            # Sort by confidence
            cluster.sort(key=lambda x: x[2], reverse=True)

            # Take highest confidence result
            best_quad, best_text, best_conf, best_var = cluster[0]

            # If multiple variants agree on text, boost confidence
            text_votes = {}
            for _, text, conf, _ in cluster:
                normalized = normalize_text(text)
                if normalized:
                    text_votes[normalized] = text_votes.get(normalized, 0) + conf

            if text_votes:
                best_voted_text = max(text_votes.items(), key=lambda x: x[1])[0]
                if best_voted_text != normalize_text(best_text):
                    # Use voted text if it has more support
                    best_text = best_voted_text

            # Apply OCR error fixes
            best_text = fix_common_ocr_errors(best_text)

            final_results.append((best_quad, best_text, best_conf))

        return final_results

    def read(self, image_path_or_array):
        """Enhanced multi-pass OCR"""
        if isinstance(image_path_or_array, str):
            img = cv2.imread(image_path_or_array)
        else:
            img = image_path_or_array

        if img is None or img.size == 0:
            return []

        original_shape = img.shape

        # Generate preprocessing variants
        variants = self.preprocess_variants(img)

        # Run OCR on each variant
        all_results = []
        for variant_name, variant_img in variants:
            results = self.run_vision_ocr(variant_img)
            if results:
                all_results.append((variant_name, results))

        # Merge and vote on results
        final_results = self.merge_multi_pass_results(all_results, original_shape)

        return final_results


# ============================================================
# ORIGINAL OCR ENGINE (Fallback)
# ============================================================
class MacVisionDetector:
    def __init__(self, source_lang="en"):
        lang_key = source_lang.lower().strip()

        lang_map = {
            "en": "en-US", "english": "en-US",
            "es": "es-ES", "spanish": "es-ES",
            "ca": "ca-ES", "catalan": "ca-ES",
            "fr": "fr-FR", "french": "fr-FR",
            "ja": "ja-JP", "japanese": "ja-JP",
            "it": "it-IT", "italian": "it-IT",
            "de": "de-DE", "german": "de-DE",
            "ko": "ko-KR", "korean": "ko-KR",
            "zh": "zh-Hans", "chinese": "zh-Hans"
        }

        apple_lang = lang_map.get(lang_key, "en-US")
        self.langs = [apple_lang]
        print(f"⚡ Using Apple Vision OCR (Language: {self.langs[0]})")

    def read(self, image_path_or_array):
        if isinstance(image_path_or_array, str):
            img = cv2.imread(image_path_or_array)
        else:
            img = image_path_or_array

        if img is None or img.size == 0:
            return []

        ih, iw = img.shape[:2]

        success, buffer = cv2.imencode('.png', img)
        if not success:
            return []

        ns_data = NSData.dataWithBytes_length_(buffer.tobytes(), len(buffer.tobytes()))
        handler = Vision.VNImageRequestHandler.alloc().initWithData_options_(ns_data, None)
        results = []

        def completion_handler(request, error):
            if error:
                print(f"Vision API Error: {error}")
                return

            for observation in request.results():
                candidate = observation.topCandidates_(1)[0]
                text = candidate.string()
                confidence = candidate.confidence()

                bbox = observation.boundingBox()
                x = bbox.origin.x * iw
                y_bottom_left = bbox.origin.y * ih
                w = bbox.size.width * iw
                h = bbox.size.height * ih

                y = ih - y_bottom_left - h

                quad = [
                    [int(x), int(y)],
                    [int(x + w), int(y)],
                    [int(x + w), int(y + h)],
                    [int(x), int(y + h)]
                ]

                results.append((quad, text, confidence))

        request = Vision.VNRecognizeTextRequest.alloc().initWithCompletionHandler_(completion_handler)
        request.setRecognitionLevel_(Vision.VNRequestTextRecognitionLevelAccurate)
        request.setUsesLanguageCorrection_(True)
        request.setRecognitionLanguages_(self.langs)

        handler.performRequests_error_([request], None)
        return results


# ============================================================
# SPLITTERS + QUAD NORMALIZATION
# ============================================================
def estimate_char_capacity_width(text_len, med_h, k=0.72):
    return max(18.0, text_len * med_h * k)


def shrink_ocr_quad_to_text(quad, text, med_h):
    x1, y1, x2, y2 = quad_bbox(quad)
    w = max(1, x2 - x1)
    h = max(1, y2 - y1)

    t = (text or "").strip()
    n = max(1, len(t.replace(" ", "")))
    exp_w = estimate_char_capacity_width(n, med_h, k=0.62)
    max_w = max(exp_w * 1.35, h * 1.15)

    if w <= max_w:
        return quad

    cx = (x1 + x2) / 2.0
    nw = int(round(max_w))
    nx1 = int(round(cx - nw / 2))
    nx2 = int(round(cx + nw / 2))

    return [[nx1, y1], [nx2, y1], [nx2, y2], [nx1, y2]]


def normalize_ocr_quads(filtered_ocr):
    if not filtered_ocr:
        return filtered_ocr

    hs = [max(1, quad_bbox(q)[3] - quad_bbox(q)[1]) for q, _, _ in filtered_ocr]
    med_h = float(np.median(hs)) if hs else 14.0

    out = []
    for quad, text, conf in filtered_ocr:
        nq = shrink_ocr_quad_to_text(quad, text, med_h)
        out.append((nq, text, conf))
    return out


def split_abnormal_bridge_quads(image_bgr, filtered_ocr):
    if not filtered_ocr:
        return filtered_ocr, 0

    hs = [max(1, quad_bbox(q)[3] - quad_bbox(q)[1]) for q, _, _ in filtered_ocr]
    med_h = float(np.median(hs)) if hs else 14.0

    out = []
    splits = 0

    for quad, text, conf in filtered_ocr:
        x1, y1, x2, y2 = quad_bbox(quad)
        w = max(1, x2 - x1)
        h = max(1, y2 - y1)

        if w > med_h * 11.0 and " " in text and len(text) >= 14:
            roi = image_bgr[max(0, y1):min(image_bgr.shape[0], y2), max(0, x1):min(image_bgr.shape[1], x2)]
            if roi.size > 0:
                gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
                _, inv = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
                proj = np.sum(inv, axis=0)

                s = int(w * 0.18)
                e = int(w * 0.82)
                if e > s:
                    segment = proj[s:e]
                    valley_rel = int(np.argmin(segment))
                    valley_x = s + valley_rel

                    low = float(segment[valley_rel])
                    meanv = float(np.mean(segment))
                    if low < meanv * 0.52:
                        split_x = x1 + valley_x

                        char_w = w / max(1, len(text))
                        split_idx = int((split_x - x1) / max(1e-6, char_w))
                        spaces = [i for i, c in enumerate(text) if c == " "]
                        if spaces:
                            split_idx = min(spaces, key=lambda i: abs(i - split_idx))

                        left_t = text[:split_idx].strip()
                        right_t = text[split_idx:].strip()

                        if left_t and right_t:
                            ql = [[x1, y1], [split_x, y1], [split_x, y2], [x1, y2]]
                            qr = [[split_x, y1], [x2, y1], [x2, y2], [split_x, y2]]
                            out.append((ql, left_t, conf))
                            out.append((qr, right_t, conf))
                            splits += 1
                            continue

        out.append((quad, text, conf))

    return out, splits


def split_wide_ocr_items(image_bgr, filtered_ocr):
    new_filtered = []
    splits_made = 0

    for item in filtered_ocr:
        quad, text, conf = item
        x1, y1, x2, y2 = quad_bbox(quad)
        w = x2 - x1
        h = max(1, y2 - y1)

        if w > h * 2.5 and len(text) > 5 and ' ' in text:
            pad = 2
            roi_y1 = max(0, y1 - pad)
            roi_y2 = min(image_bgr.shape[0], y2 + pad)
            roi_x1 = max(0, x1)
            roi_x2 = min(image_bgr.shape[1], x2)

            roi = image_bgr[roi_y1:roi_y2, roi_x1:roi_x2]
            if roi.size > 0:
                gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
                _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
                proj = np.sum(thresh, axis=0)

                start_x = int(w * 0.20)
                end_x = int(w * 0.80)

                if start_x < end_x:
                    char_w = w / max(1, len(text))
                    min_gap_width = max(int(char_w * 2.5), int(h * 0.75))

                    gap_threshold = h * 255 * 0.15
                    gap_mask = proj < gap_threshold

                    best_gap_start = -1
                    best_gap_len = 0
                    current_gap_start = -1
                    current_gap_len = 0

                    for x_rel in range(start_x, end_x):
                        if gap_mask[x_rel]:
                            if current_gap_len == 0:
                                current_gap_start = x_rel
                            current_gap_len += 1
                        else:
                            if current_gap_len > best_gap_len:
                                best_gap_len = current_gap_len
                                best_gap_start = current_gap_start
                            current_gap_len = 0

                    if current_gap_len > best_gap_len:
                        best_gap_len = current_gap_len
                        best_gap_start = current_gap_start

                    if best_gap_len >= min_gap_width:
                        split_x = roi_x1 + best_gap_start + (best_gap_len // 2)

                        split_idx = int((split_x - x1) / max(1e-6, char_w))
                        spaces = [i for i, c in enumerate(text) if c == ' ']
                        if spaces:
                            best_space = min(spaces, key=lambda i: abs(i - split_idx))
                            if abs(best_space - split_idx) < len(text) * 0.35:
                                split_idx = best_space

                        text_left = text[:split_idx].strip()
                        text_right = text[split_idx:].strip()

                        if text_left and text_right:
                            quad_left = [[x1, y1], [split_x, y1], [split_x, y2], [x1, y2]]
                            quad_right = [[split_x, y1], [x2, y1], [x2, y2], [split_x, y2]]
                            new_filtered.append((quad_left, text_left, conf))
                            new_filtered.append((quad_right, text_right, conf))
                            splits_made += 1
                            continue

        new_filtered.append(item)

    return new_filtered, splits_made


def split_panel_box(image_bgr, bbox_xyxy, bubble_quads=None):
    x1, y1, x2, y2 = bbox_xyxy
    w = x2 - x1
    h = y2 - y1

    if bubble_quads is not None and len(bubble_quads) < 4:
        return None

    if w < 50 or h < 50:
        return None

    roi = image_bgr[y1:y2, x1:x2]
    if roi.size == 0:
        return None

    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV)

    vertical_projection = np.sum(thresh, axis=0)

    search_start = int(w * 0.25)
    search_end = int(w * 0.75)

    if search_start >= search_end:
        return None

    peak_x_relative = np.argmax(vertical_projection[search_start:search_end]) + search_start
    peak_val = vertical_projection[peak_x_relative]

    threshold_val = h * 255 * 0.25
    significant_peaks = []

    for x_rel in range(search_start, search_end):
        if vertical_projection[x_rel] > threshold_val:
            significant_peaks.append((x_rel, vertical_projection[x_rel]))

    if len(significant_peaks) > 1:
        min_proj_val = np.min(vertical_projection[search_start:search_end])
        min_proj_idx = np.argmin(vertical_projection[search_start:search_end]) + search_start

        if min_proj_val < threshold_val * 0.6:
            split_x_absolute = x1 + min_proj_idx
            box_left = (x1, y1, split_x_absolute, y2)
            box_right = (split_x_absolute, y1, x2, y2)
            return box_left, box_right, split_x_absolute

    if peak_val > (h * 255 * 0.40):
        split_x_absolute = x1 + peak_x_relative
        box_left = (x1, y1, split_x_absolute, y2)
        box_right = (split_x_absolute, y1, x2, y2)
        return box_left, box_right, split_x_absolute

    return None


def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thresholds=False):
    if len(indices) < 2:
        return None

    boxes = [quad_bbox(ocr[i][0]) for i in indices]
    sorted_items = sorted(zip(indices, boxes), key=lambda x: x[1][0])

    gaps = []
    current_max_x = sorted_items[0][1][2]

    for i in range(1, len(sorted_items)):
        idx, b = sorted_items[i]
        x1 = b[0]
        gap = x1 - current_max_x
        gaps.append((i, gap, current_max_x, x1))
        current_max_x = max(current_max_x, b[2])

    if not gaps:
        return None

    max_gap_idx, max_gap_size, _, _ = max(gaps, key=lambda x: x[1])

    hs = [b[3] - b[1] for b in boxes]
    med_h = float(np.median(hs)) if hs else 15.0

    if use_aggressive_thresholds:
        threshold1 = 60.0
        threshold2 = med_h * 1.0
        min_gap = 20.0
    else:
        threshold1 = 90.0
        threshold2 = med_h * 1.5
        min_gap = 25.0

    if max_gap_size > threshold1 or (max_gap_size > threshold2 and max_gap_size > min_gap):
        split_idx = max_gap_idx
        left_indices = [item[0] for item in sorted_items[:split_idx]]
        right_indices = [item[0] for item in sorted_items[split_idx:]]

        if len(left_indices) < 1 or len(right_indices) < 1:
            return None

        return left_indices, right_indices

    return None


def split_bubble_if_multiple_rows(indices, ocr, bid=None):
    if len(indices) < 2:
        return None

    boxes = [quad_bbox(ocr[i][0]) for i in indices]
    sorted_items = sorted(zip(indices, boxes), key=lambda x: x[1][1])

    gaps = []
    current_max_y = sorted_items[0][1][3]

    for i in range(1, len(sorted_items)):
        idx, b = sorted_items[i]
        y1 = b[1]
        gap = y1 - current_max_y
        gaps.append((i, gap, current_max_y, y1))
        current_max_y = max(current_max_y, b[3])

    if not gaps:
        return None

    max_gap_idx, max_gap_size, _, _ = max(gaps, key=lambda x: x[1])

    hs = [b[3] - b[1] for b in boxes]
    med_h = float(np.median(hs)) if hs else 15.0

    threshold = med_h * 1.8
    min_gap = 20.0

    if max_gap_size > threshold and max_gap_size > min_gap:
        split_idx = max_gap_idx
        top_indices = [item[0] for item in sorted_items[:split_idx]]
        bottom_indices = [item[0] for item in sorted_items[split_idx:]]

        if len(top_indices) >= 1 and len(bottom_indices) >= 1:
            return top_indices, bottom_indices

    return None


def is_vertical_text_like(indices, ocr):
    if len(indices) < 2:
        return False

    bxs = [quad_bbox(ocr[i][0]) for i in indices]
    ub = boxes_union_xyxy(bxs)
    if ub is None:
        return False

    x1, y1, x2, y2 = ub
    w = max(1, x2 - x1)
    h = max(1, y2 - y1)

    aspect = h / w
    xcs = [((b[0] + b[2]) / 2.0) for b in bxs]
    x_spread = float(np.std(xcs)) if len(xcs) > 1 else 0.0
    med_h = float(np.median([max(1, b[3]-b[1]) for b in bxs]))

    ys = sorted([((b[1] + b[3]) / 2.0) for b in bxs])
    gaps = [ys[i+1] - ys[i] for i in range(len(ys)-1)] if len(ys) >= 2 else [0]
    med_gap = float(np.median(gaps)) if gaps else 0.0

    return (
        aspect > 1.35 and
        x_spread < max(10.0, med_h * 0.9) and
        med_gap > max(6.0, med_h * 0.35)
    )


def split_cluster_by_big_vertical_gap(indices, ocr, factor=1.9, min_gap=22):
    if len(indices) < 2:
        return None

    items = []
    for i in indices:
        b = quad_bbox(ocr[i][0])
        yc = (b[1] + b[3]) / 2.0
        h = max(1.0, b[3] - b[1])
        items.append((i, b, yc, h))

    items.sort(key=lambda t: t[2])
    med_h = float(np.median([t[3] for t in items])) if items else 12.0

    best_k = -1
    best_gap = -1
    for k in range(len(items)-1):
        y_top = items[k][1][3]
        y_bot = items[k+1][1][1]
        gap = y_bot - y_top
        if gap > best_gap:
            best_gap = gap
            best_k = k

    if best_k < 0:
        return None

    if best_gap > max(min_gap, med_h * factor):
        a = [t[0] for t in items[:best_k+1]]
        b = [t[0] for t in items[best_k+1:]]
        if a and b:
            return a, b
    return None


def split_nested_or_side_by_side(indices, ocr):
    if len(indices) < 2:
        return None

    boxes = [quad_bbox(ocr[i][0]) for i in indices]
    xcs = np.array([[(b[0] + b[2]) / 2.0] for b in boxes], dtype=np.float32)

    c1 = float(np.min(xcs))
    c2 = float(np.max(xcs))
    if abs(c2 - c1) < 8:
        return None

    for _ in range(12):
        g1, g2 = [], []
        for idx, v in enumerate(xcs[:, 0]):
            if abs(v - c1) <= abs(v - c2):
                g1.append(idx)
            else:
                g2.append(idx)
        if not g1 or not g2:
            return None
        new_c1 = float(np.mean([xcs[i, 0] for i in g1]))
        new_c2 = float(np.mean([xcs[i, 0] for i in g2]))
        if abs(new_c1 - c1) < 0.5 and abs(new_c2 - c2) < 0.5:
            break
        c1, c2 = new_c1, new_c2

    left_group = g1 if c1 < c2 else g2
    right_group = g2 if c1 < c2 else g1

    left_idxs = [indices[i] for i in left_group]
    right_idxs = [indices[i] for i in right_group]
    if not left_idxs or not right_idxs:
        return None

    left_box = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in left_idxs])
    right_box = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in right_idxs])

    sep = right_box[0] - left_box[2]
    if sep < -8:
        return None

    return left_idxs, right_idxs


def merge_close_bubbles_by_line_height(bubbles, bubble_boxes, bubble_quads, bubble_indices, ocr):
    bids = sorted(bubbles.keys())
    used = set()
    out_b, out_bb, out_bq, out_bi = {}, {}, {}, {}
    nbid = 1

    all_h = []
    for i in range(len(ocr)):
        b = quad_bbox(ocr[i][0])
        all_h.append(max(1, b[3]-b[1]))
    med_h = float(np.median(all_h)) if all_h else 14.0

    for i, a in enumerate(bids):
        if a in used:
            continue
        used.add(a)
        group = [a]

        ax1, ay1, ax2, ay2 = bubble_boxes[a]

        for b in bids[i+1:]:
            if b in used:
                continue
            bx1, by1, bx2, by2 = bubble_boxes[b]

            acx, acy = (ax1+ax2)/2.0, (ay1+ay2)/2.0
            bcx, bcy = (bx1+bx2)/2.0, (by1+by2)/2.0
            dx, dy = abs(acx-bcx), abs(acy-bcy)

            near = dx < med_h * 10.0 and dy < med_h * 3.6
            touching = overlap_or_near((ax1, ay1, ax2, ay2), (bx1, by1, bx2, by2), gap=int(med_h*1.25))

            ua = boxes_union_xyxy([(ax1, ay1, ax2, ay2), (bx1, by1, bx2, by2)])
            area_a = max(1, (ax2-ax1)*(ay2-ay1))
            area_b = max(1, (bx2-bx1)*(by2-by1))
            area_u = max(1, (ua[2]-ua[0])*(ua[3]-ua[1]))
            compact_union = area_u < (area_a + area_b) * 1.65

            if near and touching and compact_union:
                group.append(b)
                used.add(b)
                ax1 = min(ax1, bx1); ay1 = min(ay1, by1); ax2 = max(ax2, bx2); ay2 = max(ay2, by2)

        idxs = []
        quads = []
        for g in group:
            idxs.extend(bubble_indices[g])
            quads.extend(bubble_quads[g])

        idxs = sorted(set(idxs))
        ub = boxes_union_xyxy([quad_bbox(ocr[k][0]) for k in idxs])
        if ub is None:
            continue

        out_b[nbid] = build_lines_from_indices(idxs, ocr)
        out_bb[nbid] = ub
        out_bq[nbid] = quads
        out_bi[nbid] = idxs
        nbid += 1

    return out_b, out_bb, out_bq, out_bi


# ============================================================
# PREPROCESS
# ============================================================
def preprocess_variant(crop_bgr, mode):
    gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY)

    if mode == "raw":
        return gray
    if mode == "clahe":
        return cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)).apply(gray)
    if mode == "adaptive":
        den = cv2.GaussianBlur(gray, (3, 3), 0)
        return cv2.adaptiveThreshold(den, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 35, 11)
    if mode == "otsu":
        den = cv2.GaussianBlur(gray, (3, 3), 0)
        _, th = cv2.threshold(den, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        return th
    if mode == "invert":
        return 255 - gray
    if mode == "bilateral":
        den = cv2.bilateralFilter(gray, 7, 60, 60)
        _, th = cv2.threshold(den, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        return th
    if mode == "morph_open":
        _, th = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        k = np.ones((2, 2), np.uint8)
        return cv2.morphologyEx(th, cv2.MORPH_OPEN, k)

    return gray


def rotate_image_keep_bounds(img, angle_deg):
    h, w = img.shape[:2]
    c = (w / 2, h / 2)
    M = cv2.getRotationMatrix2D(c, angle_deg, 1.0)
    cos = abs(M[0, 0]); sin = abs(M[0, 1])

    new_w = int((h * sin) + (w * cos))
    new_h = int((h * cos) + (w * sin))
    M[0, 2] += (new_w / 2) - c[0]
    M[1, 2] += (new_h / 2) - c[1]

    return cv2.warpAffine(img, M, (new_w, new_h), flags=cv2.INTER_CUBIC, borderValue=255)


def rebuild_text_from_vision_result(res):
    if not res:
        return ""

    norm = []
    for bbox, txt, conf in res:
        if not txt or not txt.strip():
            continue
        b = quad_bbox(bbox)
        xc = (b[0] + b[2]) / 2.0
        yc = (b[1] + b[3]) / 2.0
        h = max(1.0, b[3] - b[1])
        norm.append((b, txt, conf, xc, yc, h))

    if not norm:
        return ""

    med_h = float(np.median([x[5] for x in norm]))
    row_tol = max(6.0, med_h * 0.75)

    norm.sort(key=lambda z: z[4])
    rows = []
    for it in norm:
        placed = False
        for r in rows:
            if abs(it[4] - r["yc"]) <= row_tol:
                r["m"].append(it)
                r["yc"] = float(np.mean([k[4] for k in r["m"]]))
                placed = True
                break
        if not placed:
            rows.append({"yc": it[4], "m": [it]})

    rows.sort(key=lambda r: r["yc"])
    lines = []
    for r in rows:
        mem = sorted(r["m"], key=lambda z: z[3])
        line = normalize_text(" ".join(x[1] for x in mem))
        if line:
            lines.append(line)

    return normalize_text(" ".join(lines))


def reread_bubble_with_vision(image_bgr, bbox_xyxy, vision_detector, upscale=3.0, pad=24):
    ih, iw = image_bgr.shape[:2]
    x1, y1, x2, y2 = bbox_xyxy
    x1 = max(0, int(x1 - pad)); y1 = max(0, int(y1 - pad))
    x2 = min(iw, int(x2 + pad)); y2 = min(ih, int(y2 + pad))

    crop = image_bgr[y1:y2, x1:x2]
    if crop.size == 0:
        return None, 0.0, "none"

    modes = ["raw", "clahe", "adaptive", "otsu", "invert", "bilateral", "morph_open"]
    angles = [0.0, 1.5, -1.5]

    best_v_txt, best_v_sc = "", 0.0
    up0 = cv2.resize(crop, (int(crop.shape[1] * upscale), int(crop.shape[0] * upscale)), interpolation=cv2.INTER_CUBIC)

    for mode in modes:
        proc = preprocess_variant(up0, mode)
        proc3 = cv2.cvtColor(proc, cv2.COLOR_GRAY2BGR) if len(proc.shape) == 2 else proc
        for a in angles:
            rot = rotate_image_keep_bounds(proc3, a)

            # Use run_vision_ocr if available (enhanced detector)
            if hasattr(vision_detector, 'run_vision_ocr'):
                res = vision_detector.run_vision_ocr(rot)
            else:
                res = vision_detector.read(rot)

            txt = rebuild_text_from_vision_result(res)
            sc = ocr_candidate_score(txt)
            if sc > best_v_sc:
                best_v_txt, best_v_sc = txt, sc

    if best_v_txt:
        return best_v_txt, best_v_sc, "vision-reread"

    return None, 0.0, "none"


# ============================================================
# LINES + BUBBLES
# ============================================================
def build_lines_from_indices(indices, ocr):
    if not indices:
        return []
    items = []
    for i in indices:
        b = quad_bbox(ocr[i][0])
        xc = (b[0] + b[2]) / 2.0
        yc = (b[1] + b[3]) / 2.0
        h = max(1.0, b[3] - b[1])
        items.append((i, b, xc, yc, h))

    med_h = float(np.median([it[4] for it in items])) if items else 10.0
    row_tol = max(6.0, med_h * 0.75)

    items.sort(key=lambda x: x[3])
    rows = []
    for it in items:
        i, b, xc, yc, h = it
        placed = False
        for r in rows:
            if abs(yc - r["yc"]) <= row_tol:
                r["m"].append((i, b, xc, yc))
                r["yc"] = float(np.mean([k[3] for k in r["m"]]))
                placed = True
                break
        if not placed:
            rows.append({"yc": yc, "m": [(i, b, xc, yc)]})

    rows.sort(key=lambda r: r["yc"])
    lines = []
    for r in rows:
        mem = sorted(r["m"], key=lambda z: z[2])
        txt = normalize_text(" ".join(ocr[i][1] for i, _, _, _ in mem))
        if txt and not is_noise_text(txt):
            lines.append(txt)
    return lines


def build_line_boxes_from_indices(indices, ocr, image_shape=None):
    if not indices:
        return []

    items = []
    for i in indices:
        b = quad_bbox(ocr[i][0])
        txt = normalize_text(ocr[i][1])
        if is_noise_text(txt):
            continue
        xc = (b[0] + b[2]) / 2.0
        yc = (b[1] + b[3]) / 2.0
        h = max(1.0, b[3] - b[1])
        items.append({"i": i, "b": b, "txt": txt, "xc": xc, "yc": yc, "h": h})

    if not items:
        return []

    med_h = float(np.median([it["h"] for it in items]))
    row_tol = max(6.0, med_h * 0.90)
    gap_x_tol = max(8.0, med_h * 1.25)
    pad = max(2, int(round(med_h * 0.14)))

    rows = []
    for it in sorted(items, key=lambda x: x["yc"]):
        placed = False
        for r in rows:
            if abs(it["yc"] - r["yc"]) <= row_tol:
                r["m"].append(it)
                r["yc"] = float(np.mean([k["yc"] for k in r["m"]]))
                placed = True
                break
        if not placed:
            rows.append({"yc": it["yc"], "m": [it]})

    rows.sort(key=lambda r: r["yc"])
    out_boxes = []

    for r in rows:
        mem = sorted(r["m"], key=lambda z: z["xc"])
        if not mem:
            continue

        chunks = []
        cur = [mem[0]]
        for t in mem[1:]:
            prev = cur[-1]["b"]
            b = t["b"]
            gap = b[0] - prev[2]
            if gap <= gap_x_tol:
                cur.append(t)
            else:
                chunks.append(cur)
                cur = [t]
        chunks.append(cur)

        for ch in chunks:
            ub = boxes_union_xyxy([x["b"] for x in ch])
            if ub:
                x1, y1, x2, y2 = ub
                out_boxes.append((x1 - pad, y1 - int(round(pad * 1.2)), x2 + pad, y2 + int(round(pad * 0.9))))

    if image_shape is not None:
        ih, iw = image_shape[:2]
        clamped = []
        for b in out_boxes:
            x1 = max(0, int(b[0])); y1 = max(0, int(b[1]))
            x2 = min(iw - 1, int(b[2])); y2 = min(ih - 1, int(b[3]))
            if x2 > x1 and y2 > y1:
                clamped.append((x1, y1, x2, y2))
        out_boxes = clamped

    out_boxes.sort(key=lambda z: (z[1], z[0]))
    return out_boxes


def auto_gap(image_path, base=18, ref_w=750):
    img = cv2.imread(image_path)
    if img is None:
        return base
    return base * (img.shape[1] / ref_w)


def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=1):
    n = len(ocr)
    if n == 0:
        return {}, {}, {}, {}

    boxes = [quad_bbox(r[0]) for r in ocr]
    centers = [quad_center(r[0]) for r in ocr]
    hs = [max(1.0, b[3] - b[1]) for b in boxes]
    med_h = float(np.median(hs)) if hs else 12.0
    dist_thresh = max(20.0, med_h * 1.8)
    adaptive_gap_y = max(gap_px, med_h * 2.5)

    p = list(range(n))

    def find(x):
        while p[x] != x:
            p[x] = p[p[x]]
            x = p[x]
        return x

    def unite(a, b):
        p[find(a)] = find(b)

    for i in range(n):
        for j in range(i + 1, n):
            ax1, ay1, ax2, ay2 = boxes[i]
            bx1, by1, bx2, by2 = boxes[j]
            gap_x = max(0, max(ax1, bx1) - min(ax2, bx2))
            gap_y = max(0, max(ay1, by1) - min(ay2, by2))

            cx1, cy1 = centers[i]
            cx2, cy2 = centers[j]
            is_vertically_aligned = abs(cx1 - cx2) < (med_h * 1.5)

            if gap_x == 0 and gap_y <= (med_h * 3.5):
                unite(i, j); continue

            if is_vertically_aligned and gap_y <= (med_h * 3.2):
                unite(i, j); continue

            if gap_x <= gap_px and gap_y <= adaptive_gap_y:
                unite(i, j); continue

            d = ((cx1 - cx2) ** 2 + (cy1 - cy2) ** 2) ** 0.5
            if d <= dist_thresh and abs(cy1 - cy2) <= med_h * 1.5:
                unite(i, j)

    groups = {}
    for i in range(n):
        groups.setdefault(find(i), []).append(i)

    sorted_groups = sorted(
        groups.values(),
        key=lambda idxs: (min(boxes[i][1] for i in idxs), min(boxes[i][0] for i in idxs))
    )

    bubbles, bubble_boxes, bubble_quads, bubble_indices = {}, {}, {}, {}
    ih, iw = image_shape[:2]

    for bid, idxs in enumerate(sorted_groups, start=1):
        idxs = sorted(idxs, key=lambda k: boxes[k][1])
        lines = build_lines_from_indices(idxs, ocr)
        quads = [ocr[k][0] for k in idxs]
        ub = boxes_union_xyxy([quad_bbox(q) for q in quads])
        if ub is None:
            continue

        x1, y1, x2, y2 = ub
        adaptive_pad = max(1, int(round(med_h * 0.16)))
        x1 = max(0, x1 - adaptive_pad); y1 = max(0, y1 - adaptive_pad)
        x2 = min(iw - 1, x2 + adaptive_pad); y2 = min(ih - 1, y2 + adaptive_pad)

        bubbles[bid] = lines
        bubble_boxes[bid] = (x1, y1, x2, y2)
        bubble_quads[bid] = quads
        bubble_indices[bid] = idxs

    return bubbles, bubble_boxes, bubble_quads, bubble_indices


# ============================================================
# DEBUG / EXPORT
# ============================================================
def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices, clean_lines=None, out_path="debug_clusters.png"):
    img = cv2.imread(image_path)
    if img is None:
        return

    for bbox, txt, conf in ocr:
        pts = np.array(bbox, dtype=np.int32)
        cv2.fillPoly(img, [pts], (255, 255, 255))
        cv2.polylines(img, [pts], True, (180, 180, 180), 1)

    for bid, bb in bubble_boxes.items():
        x1, y1, x2, y2 = bb
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 220, 0), 2)
        cv2.putText(img, f"BOX#{bid}", (x1 + 2, max(15, y1 + 16)),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)

        if clean_lines and bid in clean_lines:
            text = clean_lines[bid]
            words = text.split()
            lines = []
            cur = ""
            for w in words:
                if len(cur) + len(w) < 25:
                    cur += w + " "
                else:
                    lines.append(cur.strip())
                    cur = w + " "
            if cur:
                lines.append(cur.strip())

            y_text = y2 + 18
            for line in lines:
                cv2.putText(img, line, (x1, y_text), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 3)
                cv2.putText(img, line, (x1, y_text), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
                y_text += 18

    cv2.imwrite(out_path, img)


def estimate_reading_order(bbox_dict, mode="ltr"):
    items = []
    for bid, (x1, y1, x2, y2) in bbox_dict.items():
        cx = (x1 + x2) / 2.0
        cy = (y1 + y2) / 2.0
        items.append((bid, cx, cy))

    items.sort(key=lambda t: t[2])

    rows, tol = [], 90
    for it in items:
        placed = False
        for r in rows:
            if abs(it[2] - r["cy"]) <= tol:
                r["items"].append(it)
                r["cy"] = float(np.mean([x[2] for x in r["items"]]))
                placed = True
                break
        if not placed:
            rows.append({"cy": it[2], "items": [it]})

    rows.sort(key=lambda r: r["cy"])
    order = []
    for r in rows:
        r["items"].sort(key=lambda x: x[1], reverse=(mode == "rtl"))
        order.extend([z[0] for z in r["items"]])

    return {bid: i + 1 for i, bid in enumerate(order)}


def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_map, image_shape):
    out = {}
    for bid, bb in bbox_dict.items():
        x1, y1, x2, y2 = bb
        quads = quads_dict.get(bid, [])
        idxs = indices_dict.get(bid, [])

        qboxes = [quad_bbox(q) for q in quads]
        text_union = boxes_union_xyxy(qboxes)

        line_boxes_xyxy = build_line_boxes_from_indices(idxs, ocr, image_shape=image_shape)
        line_union_xyxy = boxes_union_xyxy(line_boxes_xyxy)
        line_union_area = bbox_area_xyxy(line_union_xyxy)

        out[str(bid)] = {
            "x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1),
            "reading_order": int(reading_map.get(bid, bid)),
            "quad_bboxes": [
                {"x": int(b[0]), "y": int(b[1]), "w": int(b[2] - b[0]), "h": int(b[3] - b[1])}
                for b in qboxes
            ],
            "quads": [[[int(p[0]), int(p[1])] for p in q] for q in quads],
            "text_bbox": xyxy_to_xywh(text_union),
            "line_bboxes": [xyxy_to_xywh(lb) for lb in line_boxes_xyxy],
            "line_union_bbox": xyxy_to_xywh(line_union_xyxy) if line_union_xyxy else None,
            "line_union_area": int(line_union_area),
        }

    with open(filepath, "w", encoding="utf-8") as f:
        json.dump(out, f, indent=2, ensure_ascii=False)


# ============================================================
# MAIN PIPELINE
# ============================================================
def translate_manga_text(
    image_path="001-page.png",
    source_lang="en",
    target_lang="ca",
    confidence_threshold=0.03,
    min_text_length=1,
    gap_px="auto",
    filter_sound_effects=True,
    quality_threshold=0.62,
    export_to_file="output.txt",
    export_bubbles_to="bubbles.json",
    reading_mode="ltr",
    debug=True,
    use_enhanced_ocr=True
):
    image = cv2.imread(image_path)
    if image is None:
        print(f"❌ Cannot load image: {image_path}")
        return

    resolved_gap = auto_gap(image_path) if gap_px == "auto" else float(gap_px)

    print("Loading OCR engines...")

    # Use enhanced detector
    if use_enhanced_ocr:
        detector = ImprovedMacVisionDetector(source_lang=source_lang)
        print("🚀 Using Enhanced Multi-Pass OCR")
    else:
        detector = MacVisionDetector(source_lang=source_lang)

    print("Running detection OCR (Apple Vision)...")
    raw = detector.read(image_path)
    print(f"Raw detections: {len(raw)}")

    # Secondary pass for missed regions
    if use_enhanced_ocr:
        existing_quads = [r[0] for r in raw]
        missed_regions = detect_small_text_regions(image, existing_quads)

        if missed_regions:
            print(f"🔍 Found {len(missed_regions)} potentially missed text regions")

            # Re-run OCR on missed regions with higher upscaling
            for region in missed_regions:
                x1, y1, x2, y2 = region
                # Add padding
                pad = 10
                x1 = max(0, x1 - pad)
                y1 = max(0, y1 - pad)
                x2 = min(image.shape[1], x2 + pad)
                y2 = min(image.shape[0], y2 + pad)

                crop = image[y1:y2, x1:x2]
                if crop.size > 0:
                    # Aggressive upscaling for small text
                    upscaled = cv2.resize(crop, None, fx=4.0, fy=4.0, interpolation=cv2.INTER_CUBIC)
                    region_results = detector.run_vision_ocr(upscaled)

                    # Scale back and offset coordinates
                    for quad, text, conf in region_results:
                        scaled_quad = [[int(p[0]/4.0 + x1), int(p[1]/4.0 + y1)] for p in quad]
                        raw.append((scaled_quad, text, conf))

            print(f"📝 Total detections after missed region scan: {len(raw)}")

    filtered = []
    skipped = 0
    ih, iw = image.shape[:2]

    for bbox, text, conf in raw:
        t = normalize_text(text)
        qb = quad_bbox(bbox)

        if conf < confidence_threshold:
            skipped += 1; continue
        if len(t) < min_text_length:
            skipped += 1; continue
        if is_noise_text(t):
            skipped += 1; continue
        if filter_sound_effects and is_sound_effect(t):
            skipped += 1; continue
        if is_title_text(t):
            skipped += 1; continue
        if qb[1] < int(ih * TOP_BAND_RATIO):
            if conf < 0.70 and len(t) >= 5:
                skipped += 1; continue

        filtered.append((bbox, t, conf))

    print(f"Kept: {len(filtered)} | Skipped: {skipped}")
    if not filtered:
        print("⚠️ No text after filtering.")
        return

    # 1) split obvious wide OCR merges
    filtered, splits_made = split_wide_ocr_items(image, filtered)
    if splits_made > 0:
        print(f"✂️  Split {splits_made} wide OCR lines across column gaps.")

    # 2) split giant bridge quads
    filtered, bridge_splits = split_abnormal_bridge_quads(image, filtered)
    if bridge_splits > 0:
        print(f"🧩 Split {bridge_splits} abnormal bridge OCR quad(s).")

    # 3) shrink quads to tighter text footprint
    filtered = normalize_ocr_quads(filtered)

    bubbles, bubble_boxes, bubble_quads, bubble_indices = group_tokens(
        filtered, image.shape, gap_px=resolved_gap, bbox_padding=1
    )

    # merge accidental sibling fragments
    bubbles, bubble_boxes, bubble_quads, bubble_indices = merge_close_bubbles_by_line_height(
        bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered
    )

    # Apply page-specific fixes
    page_identifier = os.path.basename(image_path)
    bubbles, bubble_boxes, bubble_quads, bubble_indices = apply_page_specific_fixes(
        bubbles, bubble_boxes, bubble_quads, bubble_indices,
        filtered, image, page_identifier
    )

    new_bubbles, new_bubble_boxes, new_bubble_quads, new_bubble_indices = {}, {}, {}, {}
    next_bid = max(bubbles.keys()) + 1 if bubbles else 1
    splits_performed = []

    for bid in list(bubbles.keys()):
        box = bubble_boxes[bid]
        bubble_split = None

        if is_vertical_text_like(bubble_indices[bid], filtered):
            vgap_split = split_cluster_by_big_vertical_gap(bubble_indices[bid], filtered, factor=1.7, min_gap=18)
            if vgap_split:
                bubble_split = vgap_split
                splits_performed.append(f"BOX#{bid} (vertical-stack y-gap split)")

        if bubble_split is None:
            split_result = split_panel_box(image, box, bubble_quads=bubble_quads[bid])
            if split_result:
                _, _, split_x = split_result
                left_idxs, right_idxs = [], []
                for idx in bubble_indices[bid]:
                    cx, cy = quad_center(filtered[idx][0])
                    if cx < split_x:
                        left_idxs.append(idx)
                    else:
                        right_idxs.append(idx)

                if left_idxs and right_idxs:
                    bubble_split = (left_idxs, right_idxs)
                    splits_performed.append(f"BOX#{bid} (panel border at x={split_x})")
                elif len(bubble_quads[bid]) >= 4:
                    col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid, use_aggressive_thresholds=True)
                    if col_split:
                        l, r = col_split
                        if l and r:
                            bubble_split = (l, r)
                            splits_performed.append(f"BOX#{bid} ({len(l)} quads | {len(r)} quads)")

        if bubble_split is None:
            col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid)
            if col_split:
                l, r = col_split
                if l and r:
                    bubble_split = (l, r)
                    splits_performed.append(f"BOX#{bid} (Vertical Column Split: {len(l)} | {len(r)} quads)")

        if bubble_split is None:
            nested_split = split_nested_or_side_by_side(bubble_indices[bid], filtered)
            if nested_split:
                l, r = nested_split
                if l and r:
                    bubble_split = (l, r)
                    splits_performed.append(f"BOX#{bid} (nested/side-by-side forced split)")

        if bubble_split is None:
            row_split = split_bubble_if_multiple_rows(bubble_indices[bid], filtered, bid=bid)
            if row_split:
                t, b = row_split
                if t and b:
                    bubble_split = (t, b)
                    splits_performed.append(f"BOX#{bid} (Horizontal Row Split: {len(t)} | {len(b)} quads)")

        if bubble_split is None:
            gy = split_cluster_by_big_vertical_gap(bubble_indices[bid], filtered, factor=1.9, min_gap=22)
            if gy:
                a, b = gy
                bubble_split = (a, b)
                splits_performed.append(f"BOX#{bid} (large vertical-gap split)")

        if bubble_split:
            part1_idxs, part2_idxs = bubble_split

            new_bubbles[bid] = build_lines_from_indices(part1_idxs, filtered)
            ub_1 = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in part1_idxs])
            new_bubble_boxes[bid] = (max(0, ub_1[0]-2), max(0, ub_1[1]-2), min(iw-1, ub_1[2]+2), min(ih-1, ub_1[3]+2))
            new_bubble_quads[bid] = [filtered[i][0] for i in part1_idxs]
            new_bubble_indices[bid] = part1_idxs

            new_bubbles[next_bid] = build_lines_from_indices(part2_idxs, filtered)
            ub_2 = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in part2_idxs])
            new_bubble_boxes[next_bid] = (max(0, ub_2[0]-2), max(0, ub_2[1]-2), min(iw-1, ub_2[2]+2), min(ih-1, ub_2[3]+2))
            new_bubble_quads[next_bid] = [filtered[i][0] for i in part2_idxs]
            new_bubble_indices[next_bid] = part2_idxs
            next_bid += 1
        else:
            new_bubbles[bid] = bubbles[bid]
            new_bubble_boxes[bid] = bubble_boxes[bid]
            new_bubble_quads[bid] = bubble_quads[bid]
            new_bubble_indices[bid] = bubble_indices[bid]

    if splits_performed:
        print(f"\n🔀 Multi-column/row bubble splits detected: {len(splits_performed)}")
        for split_info in splits_performed:
            print(f"   ✓ Split {split_info}")

    bubbles = new_bubbles
    bubble_boxes = new_bubble_boxes
    bubble_quads = new_bubble_quads
    bubble_indices = new_bubble_indices

    translator = GoogleTranslator(source=source_lang, target=target_lang)

    clean_lines: Dict[int, str] = {}
    sources_used: Dict[int, str] = {}

    for bid, lines in bubbles.items():
        base_txt = normalize_text(" ".join(lines))
        base_sc = ocr_candidate_score(base_txt)

        txt = base_txt
        src_used = "vision-base"

        if base_sc < quality_threshold:
            rr_txt, rr_sc, rr_src = reread_bubble_with_vision(
                image_bgr=image,
                bbox_xyxy=bubble_boxes[bid],
                vision_detector=detector,
                upscale=3.0,
                pad=24
            )
            if rr_txt and rr_sc > base_sc + 0.04:
                txt = rr_txt
                src_used = rr_src

        txt = txt.replace(" BOMPORTA", " IMPORTA")
        txt = txt.replace(" TESTO ", " ESTO ")
        txt = txt.replace(" MIVERDAD", " MI VERDAD")

        clean_lines[bid] = apply_glossary(normalize_text(txt))
        sources_used[bid] = src_used

    reading_map = estimate_reading_order(bubble_boxes, mode=reading_mode)

    if debug:
        save_debug_clusters(
            image_path=image_path,
            ocr=filtered,
            bubble_boxes=bubble_boxes,
            bubble_indices=bubble_indices,
            clean_lines=clean_lines,
            out_path="debug_clusters.png"
        )

    divider = "─" * 120
    out_lines = ["BUBBLE|ORDER|OCR_SOURCE|ORIGINAL|TRANSLATED|FLAGS", divider]

    print(divider)
    print(f"{'BUBBLE':<8} {'ORDER':<6} {'SOURCE':<12} {'ORIGINAL':<40} {'TRANSLATED':<40} FLAGS")
    print(divider)

    translated_count = 0
    for bid in sorted(clean_lines.keys(), key=lambda x: reading_map.get(x, x)):
        src_txt = clean_lines[bid].strip()
        if not src_txt:
            continue

        flags = []
        try:
            tgt = translator.translate(src_txt) or ""
        except Exception as e:
            tgt = f"[Translation error: {e}]"
            flags.append("TRANSLATION_ERROR")

        tgt = apply_glossary(postprocess_translation_general(tgt)).upper()
        src_u = src_txt.upper()
        src_engine = sources_used.get(bid, "unknown")

        out_lines.append(
            f"#{bid}|{reading_map.get(bid, bid)}|{src_engine}|{src_u}|{tgt}|{','.join(flags) if flags else '-'}"
        )

        print(
            f"#{bid:<7} {reading_map.get(bid, bid):<6} {src_engine:<12} "
            f"{src_u[:40]:<40} {tgt[:40]:<40} {','.join(flags) if flags else '-'}"
        )
        translated_count += 1

    out_lines.append(divider)
    out_lines.append(f"✅ Done! {translated_count} bubble(s) translated, {skipped} detection(s) skipped.")

    with open(export_to_file, "w", encoding="utf-8") as f:
        f.write("\n".join(out_lines))

    export_bubbles(
        export_bubbles_to,
        bbox_dict=bubble_boxes,
        quads_dict=bubble_quads,
        indices_dict=bubble_indices,
        ocr=filtered,
        reading_map=reading_map,
        image_shape=image.shape
    )

    print(divider)
    print(f"Saved: {export_to_file}")
    print(f"Saved: {export_bubbles_to}")
    if debug:
        print("Saved: debug_clusters.png")


if __name__ == "__main__":
    translate_manga_text(
        image_path="16.jpg",
        source_lang="english",
        target_lang="ca",
        confidence_threshold=0.03,  # Lower threshold for better detection
        min_text_length=1,
        gap_px="auto",
        filter_sound_effects=True,
        quality_threshold=0.62,
        export_to_file="output.txt",
        export_bubbles_to="bubbles.json",
        reading_mode="ltr", #rtl or
        debug=True,
        use_enhanced_ocr=True  # Enable enhanced multi-pass OCR
    )