From b6b0df47740b648b7fabc785e8d90f31ece7cba9 Mon Sep 17 00:00:00 2001
From: Guillem Hernandez Sola <guillem.hernandez.sola@gmail.com>
Date: Wed, 22 Apr 2026 10:51:57 +0200
Subject: [PATCH] Added stuff

---
 .gitignore             |    4 +
 manga-translator.py    | 2895 +++++++++++++++++++---------------------
 pipeline-translator.py |  197 ++-
 requirements           |   79 --
 4 files changed, 1543 insertions(+), 1632 deletions(-)
 delete mode 100644 requirements

diff --git a/.gitignore b/.gitignore
index 646c941..b40dbe3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,10 @@
 
 .venv311/
 
+Spy_x_Family_076/
+
+Dandadan_059/
+
 # Icon must end with two \r
 Icon
 
diff --git a/manga-translator.py b/manga-translator.py
index 5610251..fc53f22 100644
--- a/manga-translator.py
+++ b/manga-translator.py
@@ -21,35 +21,8 @@ warnings.filterwarnings("ignore", category=UserWarning)
 # ============================================================
 # CONFIG
 # ============================================================
-GLOSSARY = {
-    "ANYA": "ANYA",
-    "STARLIGHT ANYA": "STARLIGHT ANYA",
-    "MR. HENDERSON": "MR. HENDERSON",
-    "HENDERSON": "HENDERSON",
-    "STELLA STAR": "STELLA STAR",
-}
-
-SOUND_EFFECT_PATTERNS = [
-    r"^b+i+p+$", r"^sha+$", r"^ha+$", r"^ah+$",
-    r"^ugh+$", r"^bam+$", r"^pow+$", r"^boom+$", r"^bang+$",
-    r"^Grr+$", r"^grrp+$", r"^fshoo+$", r"^fwuip+$",
-    r"^crash+$", r"^thud+$", r"^zip+$", r"^swoosh+$", r"^chirp+$"
-]
-
-TITLE_PATTERNS = [
-    r"^(chapter|episode|vol\.?|volume)\s*\d+$",
-    r"^by\s+.+$",
-]
-
-NOISE_PATTERNS = [
-    r"^[^a-zA-Z0-9\?!.¡¿]+$",
-    r"^BOX[#\s0-9A-Z\-]*$",
-    r"^[0-9]{1,3}\s*[Xx]\s*[0-9]{1,3}$",
-]
-
 TOP_BAND_RATIO = 0.08
 
-
 # ============================================================
 # HELPERS
 # ============================================================
@@ -66,14 +39,6 @@ def normalize_text(text: str) -> str:
     t = re.sub(r"\.{4,}", "...", t)
     return t.strip()
 
-
-def apply_glossary(text: str) -> str:
-    out = text or ""
-    for k in sorted(GLOSSARY.keys(), key=len, reverse=True):
-        out = re.sub(rf"\b{re.escape(k)}\b", GLOSSARY[k], out, flags=re.IGNORECASE)
-    return out
-
-
 def postprocess_translation_general(text: str) -> str:
     t = normalize_text(text)
     t = re.sub(r"\s{2,}", " ", t).strip()
@@ -81,63 +46,124 @@ def postprocess_translation_general(text: str) -> str:
     t = re.sub(r"\.{4,}", "...", t)
     return t
 
-
 def fix_common_ocr_errors(text: str) -> str:
-    """Fix common OCR mistakes in manga text"""
     result = text
-    
-    # Apply context-aware fixes
-    # Fix "O" to "0" only if surrounded by numbers
     result = re.sub(r'(\d)O(\d)', r'\g<1>0\g<2>', result)
     result = re.sub(r'(\d)O([^a-zA-Z])', r'\g<1>0\g<2>', result)
-    
-    # Fix common character confusions
     result = result.replace('|', 'I')
     result = result.replace('`', "'")
-    
     return result
 
-
-def is_sound_effect(text: str) -> bool:
-    cleaned = re.sub(r"[^a-z]", "", (text or "").strip().lower())
-    return any(re.fullmatch(p, cleaned, re.IGNORECASE) for p in SOUND_EFFECT_PATTERNS)
-
-
-def is_title_text(text: str) -> bool:
-    t = (text or "").strip().lower()
-    return any(re.fullmatch(p, t, re.IGNORECASE) for p in TITLE_PATTERNS)
-
-
-def looks_like_box_tag(t: str) -> bool:
-    s = re.sub(r"[^A-Z0-9#]", "", (t or "").upper())
-    if re.fullmatch(r"[BEF]?[O0D]X#?\d{0,3}", s):
-        return True
-    if re.fullmatch(r"B[O0D]X\d{0,3}", s):
-        return True
-    return False
-
-
-def is_noise_text(text: str) -> bool:
-    t = (text or "").strip()
-
-    if re.fullmatch(r"[\?\!\.]+", t):
+def is_valid_language(text: str, source_lang: str) -> bool:
+    if not text:
+        return False
+    clean_text = re.sub(r'[^\w]', '', text)
+    if not clean_text:
         return False
 
-    if len(t) == 1 and t.isalpha():
+    lang = source_lang.lower()
+
+    if lang in ['en', 'english', 'es', 'spanish', 'fr', 'french',
+                'it', 'italian', 'ca', 'catalan', 'de', 'german']:
+        foreign_chars = len(re.findall(
+            r'[\u0600-\u06FF\u0750-\u077F\u3040-\u30FF'
+            r'\u3400-\u4DBF\u4E00-\u9FFF\uAC00-\uD7AF\u1100-\u11FF]',
+            clean_text
+        ))
+        if foreign_chars > 0:
+            return False
+        latin_chars = len(re.findall(r'[a-zA-ZÀ-ÿ]', clean_text))
+        total = len(clean_text)
+        if total <= 3:
+            return latin_chars >= 1
+        if total <= 6:
+            return (latin_chars / total) >= 0.55
+        return (latin_chars / total) >= 0.45
+
+    elif lang in ['ja', 'japanese']:
+        ja_chars = len(re.findall(r'[\u3040-\u30FF\u3400-\u4DBF\u4E00-\u9FFF]', clean_text))
+        if len(clean_text) <= 3:
+            return ja_chars >= 1
+        return (ja_chars / len(clean_text)) >= 0.4
+
+    elif lang in ['ko', 'korean']:
+        ko_chars = len(re.findall(r'[\uAC00-\uD7AF\u1100-\u11FF]', clean_text))
+        if len(clean_text) <= 3:
+            return ko_chars >= 1
+        return (ko_chars / len(clean_text)) >= 0.4
+
+    elif lang in ['zh', 'chinese']:
+        zh_chars = len(re.findall(r'[\u4E00-\u9FFF\u3400-\u4DBF]', clean_text))
+        if len(clean_text) <= 3:
+            return zh_chars >= 1
+        return (zh_chars / len(clean_text)) >= 0.4
+
+    return True
+
+
+_NOISE_TOKENS = {
+    'P', 'F', 'N', 'M', 'X', 'Z', 'Q',
+    'FN', 'PF', 'NM', 'XZ', 'FSHOO', 'GRRP',
+}
+
+_MANGA_INTERJECTIONS = {
+    'HUH', 'HUH?', 'HUH??', 'HUH?!',
+    'OH', 'OH!', 'OOH', 'OOH!',
+    'AH', 'AH!', 'UH', 'UH...',
+    'HEY', 'HEY!',
+    'EH', 'EH?',
+    'WOW', 'WOW!',
+    'YES', 'NO', 'NO!',
+    'RUN', 'GO', 'GO!',
+    'STOP', 'WAIT',
+    'WHAT', 'WHAT?', 'WHAT?!',
+    'WHY', 'WHY?',
+    'HOW', 'HOW?',
+    'OK', 'OK!', 'OKAY',
+    'EEEEP', 'EEEP',
+    'OMIGOSH',
+    'HMM', 'HMM...',
+    'TSK', 'TCH',
+    'GRRR','I','A',
+    'FWUP', 'FWAP',
+    'SHIVER',
+    'RRRING',
+    'MORNING', 'MORNING.',
+}
+
+def is_meaningful_text(text: str, source_lang: str, min_alpha_chars: int = 2) -> bool:
+    if not text:
+        return False
+    t = text.strip()
+    t_upper = t.upper()
+    t_alpha_only = re.sub(r'[^A-Za-zÀ-ÿ]', '', t_upper)
+    if t_upper in _MANGA_INTERJECTIONS or t_alpha_only in _MANGA_INTERJECTIONS:
+        return True
+
+    alpha_count = sum(c.isalpha() for c in t)
+    if alpha_count < min_alpha_chars:
+        return False
+    if t_upper in _NOISE_TOKENS:
         return False
 
-    if any(re.fullmatch(p, t) for p in NOISE_PATTERNS):
-        return True
-    if looks_like_box_tag(t):
-        return True
+    lang = source_lang.lower()
+    if lang in ['en', 'english', 'es', 'spanish', 'fr', 'french',
+                'it', 'italian', 'ca', 'catalan', 'de', 'german']:
+        non_alpha = sum(not c.isalpha() for c in t)
+        if len(t) > 0 and (non_alpha / len(t)) > 0.60:
+            return False
 
-    if len(t) <= 2 and not re.search(r"[A-Z0-9\?\!\.]", t) and not t.isalpha():
-        return True
+    if len(t) >= 3 and len(set(t_upper)) == 1:
+        return False
 
-    symbol_ratio = sum(1 for c in t if not c.isalnum() and not c.isspace()) / max(1, len(t))
-    if len(t) <= 6 and symbol_ratio > 0.60:
-        return True
-    return False
+    if lang in ['en', 'english', 'es', 'spanish', 'fr', 'french',
+                'it', 'italian', 'ca', 'catalan', 'de', 'german']:
+        if len(t) > 4:
+            vowels = len(re.findall(r'[AEIOUaeiouÀ-ÿ]', t))
+            if vowels == 0:
+                return False
+
+    return True
 
 
 def quad_bbox(quad):
@@ -145,12 +171,10 @@ def quad_bbox(quad):
     ys = [p[1] for p in quad]
     return (int(min(xs)), int(min(ys)), int(max(xs)), int(max(ys)))
 
-
 def quad_center(quad):
     x1, y1, x2, y2 = quad_bbox(quad)
     return ((x1 + x2) / 2.0, (y1 + y2) / 2.0)
 
-
 def boxes_union_xyxy(boxes):
     boxes = [b for b in boxes if b is not None]
     if not boxes:
@@ -162,20 +186,17 @@ def boxes_union_xyxy(boxes):
         int(max(b[3] for b in boxes)),
     )
 
-
 def bbox_area_xyxy(b):
     if b is None:
         return 0
     return int(max(0, b[2] - b[0]) * max(0, b[3] - b[1]))
 
-
 def xyxy_to_xywh(b):
     if b is None:
         return None
     x1, y1, x2, y2 = b
     return {"x": int(x1), "y": int(y1), "w": int(max(0, x2 - x1)), "h": int(max(0, y2 - y1))}
 
-
 def overlap_or_near(a, b, gap=0):
     ax1, ay1, ax2, ay2 = a
     bx1, by1, bx2, by2 = b
@@ -183,6 +204,35 @@ def overlap_or_near(a, b, gap=0):
     gap_y = max(0, max(ay1, by1) - min(ay2, by2))
     return gap_x <= gap and gap_y <= gap
 
+def boxes_iou(a, b):
+    """Intersection over Union for two xyxy boxes."""
+    ax1, ay1, ax2, ay2 = a
+    bx1, by1, bx2, by2 = b
+    ix1 = max(ax1, bx1)
+    iy1 = max(ay1, by1)
+    ix2 = min(ax2, bx2)
+    iy2 = min(ay2, by2)
+    inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
+    if inter == 0:
+        return 0.0
+    area_a = max(0, ax2 - ax1) * max(0, ay2 - ay1)
+    area_b = max(0, bx2 - bx1) * max(0, by2 - by1)
+    return inter / max(1, area_a + area_b - inter)
+
+def boxes_overlap_ratio(a, b):
+    """Ratio of intersection to the SMALLER box area."""
+    ax1, ay1, ax2, ay2 = a
+    bx1, by1, bx2, by2 = b
+    ix1 = max(ax1, bx1)
+    iy1 = max(ay1, by1)
+    ix2 = min(ax2, bx2)
+    iy2 = min(ay2, by2)
+    inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
+    if inter == 0:
+        return 0.0
+    area_a = max(0, ax2 - ax1) * max(0, ay2 - ay1)
+    area_b = max(0, bx2 - bx1) * max(0, by2 - by1)
+    return inter / max(1, min(area_a, area_b))
 
 def ocr_candidate_score(text: str) -> float:
     if not text:
@@ -191,94 +241,66 @@ def ocr_candidate_score(text: str) -> float:
     n = len(t)
     if n == 0:
         return 0.0
-
     alpha = sum(c.isalpha() for c in t) / n
     spaces = sum(c.isspace() for c in t) / n
     punct_ok = sum(c in ".,!?'-:;()[]\"¡¿" for c in t) / n
     bad = len(re.findall(r"[^\w\s\.\,\!\?\-\'\:\;\(\)\[\]\"¡¿]", t)) / n
-
     penalty = 0.0
     if re.search(r"\b[A-Z]\b", t):
         penalty += 0.05
     if re.search(r"[0-9]{2,}", t):
         penalty += 0.08
-    if re.search(r"(..)\1\1", t):
-        penalty += 0.08
-
     score = (0.62 * alpha) + (0.10 * spaces) + (0.20 * punct_ok) - (0.45 * bad) - penalty
     return max(0.0, min(1.0, score))
 
+def quad_is_horizontal(quad, ratio_threshold=1.5) -> bool:
+    x1, y1, x2, y2 = quad_bbox(quad)
+    w = max(1, x2 - x1)
+    h = max(1, y2 - y1)
+    return (w / h) >= ratio_threshold
+
+def quad_is_vertical(quad, ratio_threshold=1.5) -> bool:
+    x1, y1, x2, y2 = quad_bbox(quad)
+    w = max(1, x2 - x1)
+    h = max(1, y2 - y1)
+    return (h / w) >= ratio_threshold
+
 
 # ============================================================
 # ENHANCED IMAGE PREPROCESSING
 # ============================================================
 def enhance_image_for_ocr(image_bgr, upscale_factor=2.5):
-    """Enhanced preprocessing for better OCR results"""
-    
-    # Upscale first
     h, w = image_bgr.shape[:2]
-    new_w = int(w * upscale_factor)
-    new_h = int(h * upscale_factor)
-    upscaled = cv2.resize(image_bgr, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
-    
-    # Convert to grayscale
+    upscaled = cv2.resize(image_bgr, (int(w * upscale_factor), int(h * upscale_factor)),
+                          interpolation=cv2.INTER_CUBIC)
     gray = cv2.cvtColor(upscaled, cv2.COLOR_BGR2GRAY)
-    
-    # Denoise
     denoised = cv2.fastNlMeansDenoising(gray, None, h=10, templateWindowSize=7, searchWindowSize=21)
-    
-    # Increase contrast with CLAHE
     clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
     enhanced = clahe.apply(denoised)
-    
-    # Sharpen
-    kernel_sharpen = np.array([[-1,-1,-1],
-                               [-1, 9,-1],
-                               [-1,-1,-1]])
+    kernel_sharpen = np.array([[-1,-1,-1], [-1, 9,-1], [-1,-1,-1]])
     sharpened = cv2.filter2D(enhanced, -1, kernel_sharpen)
-    
-    # Adaptive thresholding for clean text
     binary = cv2.adaptiveThreshold(sharpened, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                    cv2.THRESH_BINARY, 11, 2)
-    
-    # Morphological operations to clean up
     kernel = np.ones((2, 2), np.uint8)
     cleaned = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
-    
-    # Convert back to BGR for Vision API
     return cv2.cvtColor(cleaned, cv2.COLOR_GRAY2BGR)
 
-
 def detect_small_text_regions(image_bgr, existing_quads):
-    """Detect small text regions that might have been missed"""
     gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
-    
-    # Create mask of existing detections
     mask = np.zeros(gray.shape, dtype=np.uint8)
     for quad in existing_quads:
         pts = np.array(quad, dtype=np.int32)
         cv2.fillPoly(mask, [pts], 255)
-    
-    # Invert mask to find undetected regions
     mask_inv = cv2.bitwise_not(mask)
-    
-    # Find text-like regions
     _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
     binary_masked = cv2.bitwise_and(binary, binary, mask=mask_inv)
-    
-    # Find contours in undetected regions
     contours, _ = cv2.findContours(binary_masked, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    
-    # Filter for text-like contours
     text_regions = []
     for contour in contours:
         x, y, w, h = cv2.boundingRect(contour)
         area = w * h
-        
-        # Filter by size and aspect ratio
         if 50 < area < 5000 and 0.1 < h/max(w, 1) < 10:
             text_regions.append((x, y, x+w, y+h))
-    
     return text_regions
 
 
@@ -286,281 +308,598 @@ def detect_small_text_regions(image_bgr, existing_quads):
 # SPEECH BUBBLE DETECTION
 # ============================================================
 def detect_speech_bubbles(image_bgr: np.ndarray) -> List[np.ndarray]:
-    """Detect speech bubble contours for box splitting"""
     gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
-    
-    # Apply adaptive thresholding
     thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                    cv2.THRESH_BINARY_INV, 11, 2)
-    
-    # Find contours
     contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    
-    # Filter contours by area
-    bubble_contours = []
-    for contour in contours:
-        area = cv2.contourArea(contour)
-        if area > 500:  # Minimum bubble area
-            bubble_contours.append(contour)
-    
-    return bubble_contours
+    return [c for c in contours if cv2.contourArea(c) > 500]
 
-
-def is_quad_in_bubble(quad_bbox_xyxy: Tuple[int, int, int, int], 
-                      bubble_contour: np.ndarray, 
-                      tolerance: int = 5) -> bool:
-    """Check if a quad (text box) is inside a speech bubble"""
+def is_quad_in_bubble(quad_bbox_xyxy, bubble_contour, tolerance=5):
     x1, y1, x2, y2 = quad_bbox_xyxy
-    cx = (x1 + x2) // 2
-    cy = (y1 + y2) // 2
-    
-    # Check if center point is inside contour
-    result = cv2.pointPolygonTest(bubble_contour, (float(cx), float(cy)), False)
-    
-    return result >= -tolerance
+    cx, cy = (x1 + x2) // 2, (y1 + y2) // 2
+    return cv2.pointPolygonTest(bubble_contour, (float(cx), float(cy)), False) >= -tolerance
 
-
-def split_indices_by_bubble(indices: List[int], 
-                            ocr: List[Tuple], 
-                            bubble_contours: List[np.ndarray]) -> List[List[int]]:
-    """Split indices into groups based on bubble membership"""
+def split_indices_by_bubble(indices, ocr, bubble_contours):
     if not indices:
         return []
-    
-    # Group indices by which bubble they belong to
     bubble_groups = {}
     outside_group = []
-    
     for idx in indices:
         bbox = quad_bbox(ocr[idx][0])
-        found_bubble = False
-        
-        for bubble_idx, bubble in enumerate(bubble_contours):
+        found = False
+        for bidx, bubble in enumerate(bubble_contours):
             if is_quad_in_bubble(bbox, bubble):
-                if bubble_idx not in bubble_groups:
-                    bubble_groups[bubble_idx] = []
-                bubble_groups[bubble_idx].append(idx)
-                found_bubble = True
+                bubble_groups.setdefault(bidx, []).append(idx)
+                found = True
                 break
-        
-        if not found_bubble:
+        if not found:
             outside_group.append(idx)
-    
-    # Create result list
     result = list(bubble_groups.values())
-    
-    # Add outside quads as separate groups
     if outside_group:
         result.append(outside_group)
-    
     return result
 
-
-def check_vertical_alignment_split(indices: List[int], 
-                                   ocr: List[Tuple], 
-                                   threshold: int = 20) -> List[List[int]]:
-    """Split indices that are vertically separated"""
+def check_vertical_alignment_split(indices, ocr, threshold=20):
     if len(indices) <= 1:
         return [indices]
-    
-    # Sort by y-coordinate
-    items = [(idx, quad_bbox(ocr[idx][0])) for idx in indices]
-    items.sort(key=lambda x: x[1][1])
-    
-    groups = []
-    current_group = [items[0][0]]
-    
+    items = sorted([(idx, quad_bbox(ocr[idx][0])) for idx in indices], key=lambda x: x[1][1])
+    groups, current_group = [], [items[0][0]]
     for i in range(1, len(items)):
-        prev_bbox = items[i-1][1]
-        curr_bbox = items[i][1]
-        
-        # Check vertical gap
-        gap = curr_bbox[1] - prev_bbox[3]
-        
-        if gap > threshold:
-            # Start new group
+        if items[i][1][1] - items[i-1][1][3] > threshold:
             groups.append(current_group)
             current_group = [items[i][0]]
         else:
             current_group.append(items[i][0])
-    
     if current_group:
         groups.append(current_group)
-    
     return groups
 
 
 # ============================================================
-# BOX FIXING FUNCTIONS
+# QUAD SIZE VALIDATION AND SPLITTING
 # ============================================================
-def apply_page_specific_fixes(bubbles: Dict[int, List[str]],
-                              bubble_boxes: Dict[int, Tuple],
-                              bubble_quads: Dict[int, List],
-                              bubble_indices: Dict[int, List[int]],
-                              ocr: List[Tuple],
-                              image_bgr: np.ndarray,
-                              page_identifier: str) -> Tuple[Dict, Dict, Dict, Dict]:
-    """Apply page-specific fixes to bubble detection issues"""
-    
-    # Detect speech bubbles for splitting logic
+def is_quad_oversized(quad, median_height, width_threshold=8.0):
+    x1, y1, x2, y2 = quad_bbox(quad)
+    w, h = x2 - x1, max(1, y2 - y1)
+    return w > median_height * width_threshold or w / h > 12.0
+
+def split_oversized_quad_by_content(image_bgr, quad, text, conf, median_height):
+    x1, y1, x2, y2 = quad_bbox(quad)
+    w, h = x2 - x1, max(1, y2 - y1)
+    pad = 2
+    roi = image_bgr[max(0,y1-pad):min(image_bgr.shape[0],y2+pad),
+                    max(0,x1):min(image_bgr.shape[1],x2)]
+    if roi.size == 0:
+        return [(quad, text, conf)]
+    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
+    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+    v_proj = np.sum(binary, axis=0)
+    gap_threshold = h * 255 * 0.20
+    gaps, in_gap, gap_start = [], False, 0
+    for x in range(len(v_proj)):
+        if v_proj[x] < gap_threshold:
+            if not in_gap: gap_start, in_gap = x, True
+        else:
+            if in_gap:
+                gw = x - gap_start
+                if gw >= max(int(median_height * 0.8), 15):
+                    gaps.append((gap_start + gw // 2, gw))
+                in_gap = False
+    if not gaps:
+        return [(quad, text, conf)]
+    gaps.sort(key=lambda g: g[1], reverse=True)
+    split_x_abs = max(0, x1) + gaps[0][0]
+    if ' ' in text:
+        char_w = w / max(1, len(text))
+        split_idx = int((split_x_abs - x1) / max(1e-6, char_w))
+        spaces = [i for i, c in enumerate(text) if c == ' ']
+        if spaces:
+            split_idx = min(spaces, key=lambda i: abs(i - split_idx))
+        tl, tr = text[:split_idx].strip(), text[split_idx:].strip()
+    else:
+        split_idx = int(len(text) * (split_x_abs - x1) / w)
+        tl, tr = text[:split_idx].strip(), text[split_idx:].strip()
+    if tl and tr:
+        return [([[x1,y1],[split_x_abs,y1],[split_x_abs,y2],[x1,y2]], tl, conf),
+                ([[split_x_abs,y1],[x2,y1],[x2,y2],[split_x_abs,y2]], tr, conf)]
+    return [(quad, text, conf)]
+
+def validate_and_split_oversized_quads(image_bgr, filtered_ocr):
+    if not filtered_ocr:
+        return filtered_ocr, 0
+    heights = [max(1, quad_bbox(q)[3] - quad_bbox(q)[1]) for q, _, _ in filtered_ocr]
+    median_height = float(np.median(heights)) if heights else 14.0
+    result, splits_made = [], 0
+    for quad, text, conf in filtered_ocr:
+        if is_quad_oversized(quad, median_height, 8.0):
+            sr = split_oversized_quad_by_content(image_bgr, quad, text, conf, median_height)
+            if len(sr) > 1:
+                result.extend(sr); splits_made += 1
+            else:
+                result.append((quad, text, conf))
+        else:
+            result.append((quad, text, conf))
+    return result, splits_made
+
+
+# ============================================================
+# HORIZONTAL GAP DETECTION AT QUAD LEVEL
+# ============================================================
+def detect_horizontal_gap_in_group(indices, ocr, med_h, gap_factor=2.5):
+    """
+    Detects a large horizontal gap between quads within a group and splits them.
+    Fixes cases like BOX#8 in debug_clusters_016 where two column groups
+    are incorrectly merged into one box.
+    """
+    if len(indices) < 2:
+        return None
+    items = sorted(indices, key=lambda i: quad_center(ocr[i][0])[0])
+    boxes = [quad_bbox(ocr[i][0]) for i in items]
+    gap_threshold = med_h * gap_factor
+    best_gap, best_split = 0.0, None
+    for k in range(len(items) - 1):
+        gap = boxes[k + 1][0] - boxes[k][2]
+        if gap > gap_threshold and gap > best_gap:
+            best_gap, best_split = gap, k
+    if best_split is None:
+        return None
+    left_group = [items[i] for i in range(best_split + 1)]
+    right_group = [items[i] for i in range(best_split + 1, len(items))]
+    if not left_group or not right_group:
+        return None
+    return (left_group, right_group)
+
+
+def orientation_compatible(idx_a, idx_b, ocr):
+    """
+    Prevents merging a tall/narrow isolated glyph with wide horizontal text lines.
+    Fixes BOX#1 type problems in debug_clusters_015.
+    """
+    ba = quad_bbox(ocr[idx_a][0])
+    bb = quad_bbox(ocr[idx_b][0])
+    wa, ha = max(1, ba[2]-ba[0]), max(1, ba[3]-ba[1])
+    wb, hb = max(1, bb[2]-bb[0]), max(1, bb[3]-bb[1])
+    ra, rb = wa/ha, wb/hb
+    if (ra < 0.6 and rb > 2.0) or (rb < 0.6 and ra > 2.0):
+        return False
+    return True
+
+
+# ============================================================
+# WIDE QUAD COLUMN SPLIT — pre-grouping
+# ============================================================
+def split_wide_quad_by_column_gap(image_bgr, quad, text, conf, med_h,
+                                   min_gap_factor=1.8):
+    """
+    FIX for BOX#6 type problem:
+    Splits a single OCR quad that spans two distinct text columns by finding
+    the largest vertical gap in its pixel projection. More aggressive than
+    split_oversized_quad_by_content — targets column-level gaps specifically.
+    """
+    x1, y1, x2, y2 = quad_bbox(quad)
+    w, h = x2 - x1, max(1, y2 - y1)
+
+    # Only attempt if the quad is wide enough to plausibly span two columns
+    if w < med_h * 3.0:
+        return [(quad, text, conf)]
+
+    pad = 2
+    roi = image_bgr[max(0, y1-pad):min(image_bgr.shape[0], y2+pad),
+                    max(0, x1):min(image_bgr.shape[1], x2)]
+    if roi.size == 0:
+        return [(quad, text, conf)]
+
+    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
+    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+    v_proj = np.sum(binary, axis=0)
+
+    # Threshold: column gap must be nearly empty
+    gap_threshold = h * 255 * 0.12
+    min_gap_px = max(int(med_h * min_gap_factor), 10)
+
+    gaps, in_gap, gap_start = [], False, 0
+    for x in range(len(v_proj)):
+        if v_proj[x] < gap_threshold:
+            if not in_gap: gap_start, in_gap = x, True
+        else:
+            if in_gap:
+                gw = x - gap_start
+                if gw >= min_gap_px:
+                    gaps.append((gap_start + gw // 2, gw))
+                in_gap = False
+
+    if not gaps:
+        return [(quad, text, conf)]
+
+    # Use the widest gap as the split point
+    gaps.sort(key=lambda g: g[1], reverse=True)
+    split_x_rel = gaps[0][0]
+    split_x_abs = x1 + split_x_rel
+
+    # Ensure the split produces two non-trivial halves
+    if split_x_abs - x1 < med_h or x2 - split_x_abs < med_h:
+        return [(quad, text, conf)]
+
+    if ' ' in text:
+        char_w = w / max(1, len(text))
+        split_idx = int(split_x_rel / max(1e-6, char_w))
+        spaces = [i for i, c in enumerate(text) if c == ' ']
+        if spaces:
+            split_idx = min(spaces, key=lambda i: abs(i - split_idx))
+        tl, tr = text[:split_idx].strip(), text[split_idx:].strip()
+    else:
+        split_idx = int(len(text) * split_x_rel / w)
+        tl, tr = text[:split_idx].strip(), text[split_idx:].strip()
+
+    if tl and tr:
+        return [([[x1,y1],[split_x_abs,y1],[split_x_abs,y2],[x1,y2]], tl, conf),
+                ([[split_x_abs,y1],[x2,y1],[x2,y2],[split_x_abs,y2]], tr, conf)]
+    return [(quad, text, conf)]
+
+
+def apply_column_gap_splits(image_bgr, ocr_list, med_h):
+    """
+    Applies split_wide_quad_by_column_gap to every quad in the list.
+    Run this BEFORE grouping so column-spanning quads never seed bad groups.
+    """
+    result, splits_made = [], 0
+    for quad, text, conf in ocr_list:
+        parts = split_wide_quad_by_column_gap(image_bgr, quad, text, conf, med_h)
+        if len(parts) > 1:
+            splits_made += 1
+        result.extend(parts)
+    if splits_made:
+        print(f"📐 Column-gap split: {splits_made} wide quad(s) split before grouping")
+    return result, splits_made
+
+
+# ============================================================
+# GENERALIZED BOX FIXING FUNCTIONS
+# ============================================================
+def detect_and_split_multi_bubble_boxes(bubble_boxes, bubble_indices, bubble_quads,
+                                        bubbles, ocr, image_bgr):
+    all_h = [max(1, quad_bbox(ocr[i][0])[3] - quad_bbox(ocr[i][0])[1]) for i in range(len(ocr))]
+    med_h = float(np.median(all_h)) if all_h else 14.0
     bubble_contours = detect_speech_bubbles(image_bgr)
-    
-    fixes_applied = []
-    
-    # PAGE 15 FIXES
-    if "15" in page_identifier:
-        # Fix: Merge Box 12 and Box 16 into one box
-        if 12 in bubbles and 16 in bubbles:
-            # Merge indices
-            merged_indices = sorted(set(bubble_indices[12] + bubble_indices[16]))
-            
-            # Rebuild merged box
-            bubbles[12] = build_lines_from_indices(merged_indices, ocr)
-            bubble_boxes[12] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in merged_indices])
-            bubble_quads[12] = [ocr[i][0] for i in merged_indices]
-            bubble_indices[12] = merged_indices
-            
-            # Remove box 16
-            del bubbles[16]
-            del bubble_boxes[16]
-            del bubble_quads[16]
-            del bubble_indices[16]
-            
-            fixes_applied.append("Page 15: Merged BOX#12 and BOX#16")
-    
-    # PAGE 16 FIXES
-    if "16" in page_identifier:
-        next_bid = max(bubbles.keys()) + 1 if bubbles else 100
-        
-        # Fix Box 15: Split quads outside bubble
-        if 15 in bubbles:
-            split_groups = split_indices_by_bubble(bubble_indices[15], ocr, bubble_contours)
-            
-            if len(split_groups) > 1:
-                # Keep main group in BOX#15
-                bubbles[15] = build_lines_from_indices(split_groups[0], ocr)
-                bubble_boxes[15] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in split_groups[0]])
-                bubble_quads[15] = [ocr[i][0] for i in split_groups[0]]
-                bubble_indices[15] = split_groups[0]
-                
-                # Create new boxes for other groups
-                for group in split_groups[1:]:
-                    bubbles[next_bid] = build_lines_from_indices(group, ocr)
-                    bubble_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group])
-                    bubble_quads[next_bid] = [ocr[i][0] for i in group]
-                    bubble_indices[next_bid] = group
-                    next_bid += 1
-                
-                fixes_applied.append(f"Page 16: Split BOX#15 into {len(split_groups)} parts")
-        
-        # Fix Box 8: Split bubble vs outside quads
-        if 8 in bubbles:
-            split_groups = split_indices_by_bubble(bubble_indices[8], ocr, bubble_contours)
-            
-            if len(split_groups) > 1:
-                bubbles[8] = build_lines_from_indices(split_groups[0], ocr)
-                bubble_boxes[8] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in split_groups[0]])
-                bubble_quads[8] = [ocr[i][0] for i in split_groups[0]]
-                bubble_indices[8] = split_groups[0]
-                
-                for group in split_groups[1:]:
-                    bubbles[next_bid] = build_lines_from_indices(group, ocr)
-                    bubble_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group])
-                    bubble_quads[next_bid] = [ocr[i][0] for i in group]
-                    bubble_indices[next_bid] = group
-                    next_bid += 1
-                
-                fixes_applied.append(f"Page 16: Split BOX#8 into {len(split_groups)} parts")
-        
-        # Fix Box 18: Split into 2 separate boxes
-        if 18 in bubbles:
-            # Try bubble-based split first
-            split_groups = split_indices_by_bubble(bubble_indices[18], ocr, bubble_contours)
-            
-            if len(split_groups) == 1:
-                # If bubble detection doesn't work, try vertical alignment
-                split_groups = check_vertical_alignment_split(bubble_indices[18], ocr, threshold=30)
-            
-            if len(split_groups) > 1:
-                bubbles[18] = build_lines_from_indices(split_groups[0], ocr)
-                bubble_boxes[18] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in split_groups[0]])
-                bubble_quads[18] = [ocr[i][0] for i in split_groups[0]]
-                bubble_indices[18] = split_groups[0]
-                
-                for group in split_groups[1:]:
-                    bubbles[next_bid] = build_lines_from_indices(group, ocr)
-                    bubble_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group])
-                    bubble_quads[next_bid] = [ocr[i][0] for i in group]
-                    bubble_indices[next_bid] = group
-                    next_bid += 1
-                
-                fixes_applied.append(f"Page 16: Split BOX#18 into {len(split_groups)} parts")
-    
-    # PAGE 19 FIXES
-    if "19" in page_identifier:
-        next_bid = max(bubbles.keys()) + 1 if bubbles else 100
-        
-        # Fix Box 5: Split into 4 different boxes
-        if 5 in bubbles:
-            # First split by bubble
-            split_groups = split_indices_by_bubble(bubble_indices[5], ocr, bubble_contours)
-            
-            # Then split each group by vertical alignment
-            final_groups = []
+
+    new_bubbles, new_boxes, new_quads, new_indices = {}, {}, {}, {}
+    next_bid = 1
+    splits_made = []
+
+    for bid, indices in bubble_indices.items():
+        if len(indices) < 2:
+            new_bubbles[next_bid] = bubbles[bid]
+            new_boxes[next_bid] = bubble_boxes[bid]
+            new_quads[next_bid] = bubble_quads[bid]
+            new_indices[next_bid] = indices
+            next_bid += 1
+            continue
+
+        split_groups = split_indices_by_bubble(indices, ocr, bubble_contours)
+        if len(split_groups) > 1:
             for group in split_groups:
-                vertical_splits = check_vertical_alignment_split(group, ocr, threshold=25)
-                final_groups.extend(vertical_splits)
-            
-            if len(final_groups) > 1:
-                bubbles[5] = build_lines_from_indices(final_groups[0], ocr)
-                bubble_boxes[5] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in final_groups[0]])
-                bubble_quads[5] = [ocr[i][0] for i in final_groups[0]]
-                bubble_indices[5] = final_groups[0]
-                
-                for group in final_groups[1:]:
-                    bubbles[next_bid] = build_lines_from_indices(group, ocr)
-                    bubble_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group])
-                    bubble_quads[next_bid] = [ocr[i][0] for i in group]
-                    bubble_indices[next_bid] = group
+                if group:
+                    new_bubbles[next_bid] = build_lines_from_indices(group, ocr)
+                    new_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group])
+                    new_quads[next_bid] = [ocr[i][0] for i in group]
+                    new_indices[next_bid] = group
                     next_bid += 1
-                
-                fixes_applied.append(f"Page 19: Split BOX#5 into {len(final_groups)} parts")
-        
-        # Fix Box 11: Split into 2 boxes
-        if 11 in bubbles:
-            split_groups = split_indices_by_bubble(bubble_indices[11], ocr, bubble_contours)
-            
-            if len(split_groups) > 1:
-                bubbles[11] = build_lines_from_indices(split_groups[0], ocr)
-                bubble_boxes[11] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in split_groups[0]])
-                bubble_quads[11] = [ocr[i][0] for i in split_groups[0]]
-                bubble_indices[11] = split_groups[0]
-                
-                for group in split_groups[1:]:
-                    bubbles[next_bid] = build_lines_from_indices(group, ocr)
-                    bubble_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group])
-                    bubble_quads[next_bid] = [ocr[i][0] for i in group]
-                    bubble_indices[next_bid] = group
+            splits_made.append(f"BOX#{bid} → {len(split_groups)} bubbles")
+            continue
+
+        vertical_splits = check_vertical_alignment_split(indices, ocr, threshold=int(med_h * 2.0))
+        if len(vertical_splits) > 1:
+            for group in vertical_splits:
+                if group:
+                    new_bubbles[next_bid] = build_lines_from_indices(group, ocr)
+                    new_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group])
+                    new_quads[next_bid] = [ocr[i][0] for i in group]
+                    new_indices[next_bid] = group
                     next_bid += 1
-                
-                fixes_applied.append(f"Page 19: Split BOX#11 into {len(split_groups)} parts")
-    
-    # Print fixes applied
-    if fixes_applied:
-        print(f"\n🔧 Page-specific fixes applied:")
-        for fix in fixes_applied:
-            print(f"   ✓ {fix}")
-    
+            splits_made.append(f"BOX#{bid} → {len(vertical_splits)} vertical groups")
+            continue
+
+        box = bubble_boxes[bid]
+        x1, y1, x2, y2 = box
+        if (x2 - x1) > med_h * 10:
+            x_centers = [quad_center(ocr[i][0])[0] for i in indices]
+            x_median = np.median(x_centers)
+            left_group = [i for i in indices if quad_center(ocr[i][0])[0] < x_median]
+            right_group = [i for i in indices if quad_center(ocr[i][0])[0] >= x_median]
+            if left_group and right_group:
+                left_box = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in left_group])
+                right_box = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in right_group])
+                if right_box[0] - left_box[2] > med_h * 1.5:
+                    for grp in [left_group, right_group]:
+                        new_bubbles[next_bid] = build_lines_from_indices(grp, ocr)
+                        new_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in grp])
+                        new_quads[next_bid] = [ocr[i][0] for i in grp]
+                        new_indices[next_bid] = grp
+                        next_bid += 1
+                    splits_made.append(f"BOX#{bid} → 2 horizontal panels")
+                    continue
+
+        new_bubbles[next_bid] = bubbles[bid]
+        new_boxes[next_bid] = bubble_boxes[bid]
+        new_quads[next_bid] = bubble_quads[bid]
+        new_indices[next_bid] = indices
+        next_bid += 1
+
+    if splits_made:
+        print(f"\n🔧 Split {len(splits_made)} multi-bubble box(es):")
+        for s in splits_made: print(f"   ✓ {s}")
+
+    return new_bubbles, new_boxes, new_quads, new_indices
+
+
+def detect_and_merge_fragmented_bubbles(bubble_boxes, bubble_indices, bubble_quads,
+                                        bubbles, ocr, image_bgr):
+    all_h = [max(1, quad_bbox(ocr[i][0])[3] - quad_bbox(ocr[i][0])[1]) for i in range(len(ocr))]
+    med_h = float(np.median(all_h)) if all_h else 14.0
+    bubble_contours = detect_speech_bubbles(image_bgr)
+    bids = list(bubble_boxes.keys())
+    to_merge = []
+
+    for i in range(len(bids)):
+        for j in range(i + 1, len(bids)):
+            bid_i, bid_j = bids[i], bids[j]
+            box_i, box_j = bubble_boxes[bid_i], bubble_boxes[bid_j]
+            cx_i = (box_i[0] + box_i[2]) / 2.0
+            cy_i = (box_i[1] + box_i[3]) / 2.0
+            cx_j = (box_j[0] + box_j[2]) / 2.0
+            cy_j = (box_j[1] + box_j[3]) / 2.0
+
+            in_same_bubble = any(
+                cv2.pointPolygonTest(c, (cx_i, cy_i), False) >= 0 and
+                cv2.pointPolygonTest(c, (cx_j, cy_j), False) >= 0
+                for c in bubble_contours
+            )
+
+            if in_same_bubble:
+                if abs(cx_i - cx_j) < med_h * 3.0 and abs(cy_i - cy_j) < med_h * 6.0:
+                    to_merge.append((bid_i, bid_j) if cy_i < cy_j else (bid_j, bid_i))
+
+    if not to_merge:
+        return bubbles, bubble_boxes, bubble_quads, bubble_indices
+
+    print(f"\n🔗 Merging {len(to_merge)} fragmented bubble(s):")
+    merge_groups = {}
+    for top, bottom in to_merge:
+        found = False
+        for key in merge_groups:
+            if top in merge_groups[key] or bottom in merge_groups[key]:
+                merge_groups[key].update({top, bottom})
+                found = True
+                break
+        if not found:
+            merge_groups[len(merge_groups)] = {top, bottom}
+
+    new_bubbles, new_boxes, new_quads, new_indices = {}, {}, {}, {}
+    merged_bids, next_bid = set(), 1
+
+    for merge_set in merge_groups.values():
+        merge_list = sorted(merge_set)
+        print(f"   ✓ Merging: {', '.join(f'#{b}' for b in merge_list)}")
+        all_indices = sorted(set(idx for b in merge_list for idx in bubble_indices[b]))
+        for b in merge_list: merged_bids.add(b)
+        new_bubbles[next_bid] = build_lines_from_indices(all_indices, ocr)
+        new_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in all_indices])
+        new_quads[next_bid] = [ocr[i][0] for i in all_indices]
+        new_indices[next_bid] = all_indices
+        next_bid += 1
+
+    for bid in bids:
+        if bid not in merged_bids:
+            new_bubbles[next_bid] = bubbles[bid]
+            new_boxes[next_bid] = bubble_boxes[bid]
+            new_quads[next_bid] = bubble_quads[bid]
+            new_indices[next_bid] = bubble_indices[bid]
+            next_bid += 1
+
+    return new_bubbles, new_boxes, new_quads, new_indices
+
+
+def merge_boxes_by_proximity_and_overlap(bubble_boxes, bubble_indices, bubble_quads,
+                                          bubbles, ocr, med_h):
+    """
+    FIX for BOX#2+BOX#14 and BOX#7+BOX#18 type problems:
+    Merges boxes whose bounding rectangles are very close vertically AND
+    share significant horizontal overlap — indicating they belong to the
+    same speech bubble that the contour detector missed (e.g. dashed outlines).
+
+    Unlike merge_close_bubbles_by_line_height, this checks BOTH axes strictly
+    to avoid merging boxes from adjacent but distinct bubbles.
+    """
+    bids = sorted(bubble_boxes.keys())
+    merge_map: Dict[int, List[int]] = {}
+    merged_into: Dict[int, int] = {}
+
+    for i, bid_i in enumerate(bids):
+        if bid_i in merged_into:
+            continue
+        box_i = bubble_boxes[bid_i]
+        wi = box_i[2] - box_i[0]
+
+        for j in range(i + 1, len(bids)):
+            bid_j = bids[j]
+            if bid_j in merged_into:
+                continue
+            box_j = bubble_boxes[bid_j]
+            wj = box_j[2] - box_j[0]
+
+            # Vertical gap between the two boxes
+            vert_gap = max(0, max(box_i[1], box_j[1]) - min(box_i[3], box_j[3]))
+
+            # Horizontal overlap ratio (intersection / min width)
+            h_ix1 = max(box_i[0], box_j[0])
+            h_ix2 = min(box_i[2], box_j[2])
+            h_overlap = max(0, h_ix2 - h_ix1)
+            h_overlap_ratio = h_overlap / max(1, min(wi, wj))
+
+            # Merge only when:
+            #   1. Vertical gap is small (boxes are stacked closely)
+            #   2. Horizontal overlap is significant (same column)
+            if vert_gap <= med_h * 1.5 and h_overlap_ratio >= 0.35:
+                root = merged_into.get(bid_i, bid_i)
+                merge_map.setdefault(root, [root])
+                if bid_j not in merge_map[root]:
+                    merge_map[root].append(bid_j)
+                merged_into[bid_j] = root
+
+    if not merge_map:
+        return bubbles, bubble_boxes, bubble_quads, bubble_indices
+
+    print(f"\n🔀 Proximity+overlap merge: {len(merge_map)} group(s):")
+    new_bubbles, new_boxes, new_quads, new_indices = {}, {}, {}, {}
+    processed, next_bid = set(), 1
+
+    for root, group in merge_map.items():
+        group_unique = sorted(set(group))
+        print(f"   ✓ Merging: {', '.join(f'#{b}' for b in group_unique)}")
+        all_indices = sorted(set(idx for b in group_unique for idx in bubble_indices[b]))
+        new_bubbles[next_bid] = build_lines_from_indices(all_indices, ocr)
+        new_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in all_indices])
+        new_quads[next_bid] = [ocr[i][0] for i in all_indices]
+        new_indices[next_bid] = all_indices
+        next_bid += 1
+        processed.update(group_unique)
+
+    for bid in bids:
+        if bid not in processed:
+            new_bubbles[next_bid] = bubbles[bid]
+            new_boxes[next_bid] = bubble_boxes[bid]
+            new_quads[next_bid] = bubble_quads[bid]
+            new_indices[next_bid] = bubble_indices[bid]
+            next_bid += 1
+
+    return new_bubbles, new_boxes, new_quads, new_indices
+
+
+def auto_fix_bubble_detection(bubble_boxes, bubble_indices, bubble_quads,
+                               bubbles, ocr, image_bgr):
+    print("\n🔍 Running automatic bubble detection fixes...")
+    all_h = [max(1, quad_bbox(ocr[i][0])[3] - quad_bbox(ocr[i][0])[1]) for i in range(len(ocr))]
+    med_h = float(np.median(all_h)) if all_h else 14.0
+
+    bubbles, bubble_boxes, bubble_quads, bubble_indices = detect_and_split_multi_bubble_boxes(
+        bubble_boxes, bubble_indices, bubble_quads, bubbles, ocr, image_bgr)
+    bubbles, bubble_boxes, bubble_quads, bubble_indices = detect_and_merge_fragmented_bubbles(
+        bubble_boxes, bubble_indices, bubble_quads, bubbles, ocr, image_bgr)
+    # Second pass: catch fragments missed by contour detection (dashed bubbles, etc.)
+    bubbles, bubble_boxes, bubble_quads, bubble_indices = merge_boxes_by_proximity_and_overlap(
+        bubble_boxes, bubble_indices, bubble_quads, bubbles, ocr, med_h)
     return bubbles, bubble_boxes, bubble_quads, bubble_indices
 
 
+def remove_nested_boxes(bubble_boxes, bubble_indices, bubble_quads, bubbles,
+                        overlap_threshold=0.50):
+    bids = list(bubble_boxes.keys())
+    to_remove = set()
+
+    for i in range(len(bids)):
+        bid_i = bids[i]
+        if bid_i in to_remove: continue
+        box_i = bubble_boxes[bid_i]
+        area_i = max(0, box_i[2]-box_i[0]) * max(0, box_i[3]-box_i[1])
+
+        for j in range(i + 1, len(bids)):
+            bid_j = bids[j]
+            if bid_j in to_remove: continue
+            box_j = bubble_boxes[bid_j]
+            area_j = max(0, box_j[2]-box_j[0]) * max(0, box_j[3]-box_j[1])
+
+            shared = set(bubble_indices[bid_i]).intersection(bubble_indices[bid_j])
+            overlap = boxes_overlap_ratio(box_i, box_j)
+
+            if overlap > overlap_threshold or len(shared) > 0:
+                if area_i >= area_j:
+                    to_remove.add(bid_j)
+                    print(f"   🗑️  Removing BOX#{bid_j} (overlaps BOX#{bid_i})")
+                else:
+                    to_remove.add(bid_i)
+                    print(f"   🗑️  Removing BOX#{bid_i} (overlaps BOX#{bid_j})")
+                    break
+
+    if to_remove:
+        print(f"\n🧹 Removed {len(to_remove)} overlapping/nested box(es)")
+        for bid in to_remove:
+            bubble_boxes.pop(bid, None)
+            bubble_indices.pop(bid, None)
+            bubble_quads.pop(bid, None)
+            bubbles.pop(bid, None)
+
+    return bubbles, bubble_boxes, bubble_quads, bubble_indices
+
+
+def enforce_max_box_size(bubble_boxes, bubble_indices, bubble_quads, bubbles, ocr,
+                         max_width_ratio=0.6, max_height_ratio=0.5, image_shape=None):
+    if image_shape is None:
+        return bubbles, bubble_boxes, bubble_quads, bubble_indices
+    ih, iw = image_shape[:2]
+    max_width, max_height = iw * max_width_ratio, ih * max_height_ratio
+    new_bubbles, new_boxes, new_quads, new_indices = {}, {}, {}, {}
+    next_bid, splits_made = 1, []
+
+    for bid, box in bubble_boxes.items():
+        x1, y1, x2, y2 = box
+        w, h = x2 - x1, y2 - y1
+        if w > max_width or h > max_height:
+            indices = bubble_indices[bid]
+            col_split = split_bubble_if_multiple_columns(indices, ocr, bid=bid,
+                                                         use_aggressive_thresholds=True)
+            if col_split:
+                for grp in col_split:
+                    new_bubbles[next_bid] = build_lines_from_indices(grp, ocr)
+                    new_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in grp])
+                    new_quads[next_bid] = [ocr[i][0] for i in grp]
+                    new_indices[next_bid] = grp
+                    next_bid += 1
+                splits_made.append(f"BOX#{bid} (oversized: {w}x{h}px)")
+                continue
+            row_split = split_bubble_if_multiple_rows(indices, ocr, bid=bid)
+            if row_split:
+                for grp in row_split:
+                    new_bubbles[next_bid] = build_lines_from_indices(grp, ocr)
+                    new_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in grp])
+                    new_quads[next_bid] = [ocr[i][0] for i in grp]
+                    new_indices[next_bid] = grp
+                    next_bid += 1
+                splits_made.append(f"BOX#{bid} (oversized: {w}x{h}px)")
+                continue
+        new_bubbles[next_bid] = bubbles[bid]
+        new_boxes[next_bid] = box
+        new_quads[next_bid] = bubble_quads[bid]
+        new_indices[next_bid] = bubble_indices[bid]
+        next_bid += 1
+
+    if splits_made:
+        print(f"\n📏 Split {len(splits_made)} oversized box(es):")
+        for s in splits_made: print(f"   ✓ {s}")
+    return new_bubbles, new_boxes, new_quads, new_indices
+
+
+def should_merge_groups(group1_indices, group2_indices, ocr, median_height,
+                        max_vertical_gap=None):
+    if max_vertical_gap is None:
+        max_vertical_gap = median_height * 2.5
+    box1 = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group1_indices])
+    box2 = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group2_indices])
+    if box1 is None or box2 is None:
+        return False
+    cx1 = (box1[0] + box1[2]) / 2.0
+    cx2 = (box2[0] + box2[2]) / 2.0
+    if abs(cx1 - cx2) > median_height * 1.8:
+        return False
+    vertical_gap = max(0, max(box1[1], box2[1]) - min(box1[3], box2[3]))
+    return vertical_gap <= max_vertical_gap
+
+
 # ============================================================
 # ENHANCED OCR ENGINE
 # ============================================================
 class ImprovedMacVisionDetector:
     def __init__(self, source_lang="en"):
         lang_key = source_lang.lower().strip()
-        
         lang_map = {
             "en": "en-US", "english": "en-US",
             "es": "es-ES", "spanish": "es-ES",
@@ -572,213 +911,122 @@ class ImprovedMacVisionDetector:
             "ko": "ko-KR", "korean": "ko-KR",
             "zh": "zh-Hans", "chinese": "zh-Hans"
         }
-        
-        apple_lang = lang_map.get(lang_key, "en-US")
-        self.langs = [apple_lang]
+        self.langs = [lang_map.get(lang_key, "en-US")]
         print(f"⚡ Using Enhanced Apple Vision OCR (Language: {self.langs[0]})")
-    
+
     def preprocess_variants(self, image_bgr):
-        """Generate multiple preprocessing variants"""
-        variants = []
-        
-        # Variant 1: Enhanced standard
-        variants.append(("enhanced", enhance_image_for_ocr(image_bgr, upscale_factor=2.5)))
-        
-        # Variant 2: High contrast
+        variants = [("enhanced", enhance_image_for_ocr(image_bgr, upscale_factor=2.5))]
         gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
-        _, high_contrast = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
-        upscaled_hc = cv2.resize(high_contrast, None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC)
-        variants.append(("high_contrast", cv2.cvtColor(upscaled_hc, cv2.COLOR_GRAY2BGR)))
-        
-        # Variant 3: Bilateral filter (preserves edges)
-        bilateral = cv2.bilateralFilter(image_bgr, 9, 75, 75)
-        upscaled_bil = cv2.resize(bilateral, None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC)
-        variants.append(("bilateral", upscaled_bil))
-        
-        # Variant 4: Inverted (for white text on black)
-        inverted = cv2.bitwise_not(image_bgr)
-        upscaled_inv = cv2.resize(inverted, None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC)
-        variants.append(("inverted", upscaled_inv))
-        
-        # Variant 5: Original upscaled
-        upscaled_orig = cv2.resize(image_bgr, None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC)
-        variants.append(("original", upscaled_orig))
-        
+        _, hc = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+        variants.append(("high_contrast", cv2.cvtColor(
+            cv2.resize(hc, None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC),
+            cv2.COLOR_GRAY2BGR)))
+        variants.append(("bilateral", cv2.resize(
+            cv2.bilateralFilter(image_bgr, 9, 75, 75),
+            None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC)))
+        variants.append(("inverted", cv2.resize(
+            cv2.bitwise_not(image_bgr),
+            None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC)))
+        variants.append(("original", cv2.resize(
+            image_bgr, None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC)))
         return variants
-    
+
     def run_vision_ocr(self, image_bgr):
-        """Run Vision OCR on a single image"""
         if image_bgr is None or image_bgr.size == 0:
             return []
-        
         ih, iw = image_bgr.shape[:2]
-        
         success, buffer = cv2.imencode('.png', image_bgr)
         if not success:
             return []
-        
         ns_data = NSData.dataWithBytes_length_(buffer.tobytes(), len(buffer.tobytes()))
         handler = Vision.VNImageRequestHandler.alloc().initWithData_options_(ns_data, None)
         results = []
-        
+
         def completion_handler(request, error):
-            if error:
-                return
-            
-            for observation in request.results():
-                candidate = observation.topCandidates_(1)[0]
-                text = candidate.string()
-                confidence = candidate.confidence()
-                
-                bbox = observation.boundingBox()
+            if error: return
+            for obs in request.results():
+                candidate = obs.topCandidates_(1)[0]
+                text, confidence = candidate.string(), candidate.confidence()
+                bbox = obs.boundingBox()
                 x = bbox.origin.x * iw
-                y_bottom_left = bbox.origin.y * ih
+                y_bl = bbox.origin.y * ih
                 w = bbox.size.width * iw
                 h = bbox.size.height * ih
-                
-                y = ih - y_bottom_left - h
-                
-                quad = [
-                    [int(x), int(y)],
-                    [int(x + w), int(y)],
-                    [int(x + w), int(y + h)],
-                    [int(x), int(y + h)]
-                ]
-                
+                y = ih - y_bl - h
+                quad = [[int(x),int(y)],[int(x+w),int(y)],
+                        [int(x+w),int(y+h)],[int(x),int(y+h)]]
                 results.append((quad, text, confidence))
-        
-        request = Vision.VNRecognizeTextRequest.alloc().initWithCompletionHandler_(completion_handler)
-        request.setRecognitionLevel_(Vision.VNRequestTextRecognitionLevelAccurate)
-        request.setUsesLanguageCorrection_(False)  # Disable for manga
-        request.setRecognitionLanguages_(self.langs)
-        request.setAutomaticallyDetectsLanguage_(True)
-        
-        handler.performRequests_error_([request], None)
+
+        req = Vision.VNRecognizeTextRequest.alloc().initWithCompletionHandler_(completion_handler)
+        req.setRecognitionLevel_(Vision.VNRequestTextRecognitionLevelAccurate)
+        req.setUsesLanguageCorrection_(False)
+        req.setRecognitionLanguages_(self.langs)
+        req.setAutomaticallyDetectsLanguage_(True)
+        handler.performRequests_error_([req], None)
         return results
-    
+
     def merge_multi_pass_results(self, all_results, original_shape):
-        """Merge results from multiple preprocessing passes"""
         if not all_results:
             return []
-        
-        # Scale factor to normalize coordinates back to original
         scale_factor = 2.5
-        
-        # Normalize all quads to original image coordinates
-        normalized_results = []
+        normalized = []
         for variant_name, results in all_results:
             for quad, text, conf in results:
-                # Scale quad back to original size
-                scaled_quad = [[int(p[0] / scale_factor), int(p[1] / scale_factor)] for p in quad]
-                normalized_results.append((scaled_quad, text, conf, variant_name))
-        
-        # Group similar detections (same location, similar text)
+                sq = [[int(p[0]/scale_factor), int(p[1]/scale_factor)] for p in quad]
+                normalized.append((sq, text, conf, variant_name))
+
         def quads_overlap(q1, q2, threshold=0.5):
-            b1 = quad_bbox(q1)
-            b2 = quad_bbox(q2)
-            
-            # Calculate IoU
-            x1 = max(b1[0], b2[0])
-            y1 = max(b1[1], b2[1])
-            x2 = min(b1[2], b2[2])
-            y2 = min(b1[3], b2[3])
-            
-            if x2 < x1 or y2 < y1:
-                return False
-            
-            intersection = (x2 - x1) * (y2 - y1)
-            area1 = (b1[2] - b1[0]) * (b1[3] - b1[1])
-            area2 = (b2[2] - b2[0]) * (b2[3] - b2[1])
-            union = area1 + area2 - intersection
-            
-            iou = intersection / max(union, 1)
-            return iou > threshold
-        
-        # Cluster overlapping detections
-        clusters = []
-        used = set()
-        
-        for i, (quad1, text1, conf1, var1) in enumerate(normalized_results):
-            if i in used:
-                continue
-            
-            cluster = [(quad1, text1, conf1, var1)]
+            b1, b2 = quad_bbox(q1), quad_bbox(q2)
+            x1, y1 = max(b1[0],b2[0]), max(b1[1],b2[1])
+            x2, y2 = min(b1[2],b2[2]), min(b1[3],b2[3])
+            if x2 < x1 or y2 < y1: return False
+            inter = (x2-x1)*(y2-y1)
+            union = (b1[2]-b1[0])*(b1[3]-b1[1]) + (b2[2]-b2[0])*(b2[3]-b2[1]) - inter
+            return inter / max(union, 1) > threshold
+
+        clusters, used = [], set()
+        for i, (q1, t1, c1, v1) in enumerate(normalized):
+            if i in used: continue
+            cluster = [(q1, t1, c1, v1)]
             used.add(i)
-            
-            for j, (quad2, text2, conf2, var2) in enumerate(normalized_results):
-                if j in used or i == j:
-                    continue
-                
-                if quads_overlap(quad1, quad2, threshold=0.5):
-                    cluster.append((quad2, text2, conf2, var2))
+            for j, (q2, t2, c2, v2) in enumerate(normalized):
+                if j in used or i == j: continue
+                if quads_overlap(q1, q2):
+                    cluster.append((q2, t2, c2, v2))
                     used.add(j)
-            
             clusters.append(cluster)
-        
-        # Vote on best result per cluster
+
         final_results = []
         for cluster in clusters:
-            # Sort by confidence
             cluster.sort(key=lambda x: x[2], reverse=True)
-            
-            # Take highest confidence result
-            best_quad, best_text, best_conf, best_var = cluster[0]
-            
-            # If multiple variants agree on text, boost confidence
+            best_quad, best_text, best_conf, _ = cluster[0]
             text_votes = {}
             for _, text, conf, _ in cluster:
-                normalized = normalize_text(text)
-                if normalized:
-                    text_votes[normalized] = text_votes.get(normalized, 0) + conf
-            
+                n = normalize_text(text)
+                if n: text_votes[n] = text_votes.get(n, 0) + conf
             if text_votes:
-                best_voted_text = max(text_votes.items(), key=lambda x: x[1])[0]
-                if best_voted_text != normalize_text(best_text):
-                    # Use voted text if it has more support
-                    best_text = best_voted_text
-            
-            # Apply OCR error fixes
-            best_text = fix_common_ocr_errors(best_text)
-            
-            final_results.append((best_quad, best_text, best_conf))
-        
+                voted = max(text_votes.items(), key=lambda x: x[1])[0]
+                if voted != normalize_text(best_text):
+                    best_text = voted
+            final_results.append((best_quad, fix_common_ocr_errors(best_text), best_conf))
         return final_results
-    
+
     def read(self, image_path_or_array):
-        """Enhanced multi-pass OCR"""
-        if isinstance(image_path_or_array, str):
-            img = cv2.imread(image_path_or_array)
-        else:
-            img = image_path_or_array
-        
+        img = cv2.imread(image_path_or_array) if isinstance(image_path_or_array, str) \
+              else image_path_or_array
         if img is None or img.size == 0:
             return []
-        
-        original_shape = img.shape
-        
-        # Generate preprocessing variants
         variants = self.preprocess_variants(img)
-        
-        # Run OCR on each variant
         all_results = []
-        for variant_name, variant_img in variants:
-            results = self.run_vision_ocr(variant_img)
-            if results:
-                all_results.append((variant_name, results))
-        
-        # Merge and vote on results
-        final_results = self.merge_multi_pass_results(all_results, original_shape)
-        
-        return final_results
+        for vname, vimg in variants:
+            r = self.run_vision_ocr(vimg)
+            if r: all_results.append((vname, r))
+        return self.merge_multi_pass_results(all_results, img.shape)
 
 
-# ============================================================
-# ORIGINAL OCR ENGINE (Fallback)
-# ============================================================
 class MacVisionDetector:
     def __init__(self, source_lang="en"):
         lang_key = source_lang.lower().strip()
-
         lang_map = {
             "en": "en-US", "english": "en-US",
             "es": "es-ES", "spanish": "es-ES",
@@ -790,621 +1038,410 @@ class MacVisionDetector:
             "ko": "ko-KR", "korean": "ko-KR",
             "zh": "zh-Hans", "chinese": "zh-Hans"
         }
-
-        apple_lang = lang_map.get(lang_key, "en-US")
-        self.langs = [apple_lang]
+        self.langs = [lang_map.get(lang_key, "en-US")]
         print(f"⚡ Using Apple Vision OCR (Language: {self.langs[0]})")
 
     def read(self, image_path_or_array):
-        if isinstance(image_path_or_array, str):
-            img = cv2.imread(image_path_or_array)
-        else:
-            img = image_path_or_array
-
+        img = cv2.imread(image_path_or_array) if isinstance(image_path_or_array, str) \
+              else image_path_or_array
         if img is None or img.size == 0:
             return []
-
         ih, iw = img.shape[:2]
-
         success, buffer = cv2.imencode('.png', img)
-        if not success:
-            return []
-
+        if not success: return []
         ns_data = NSData.dataWithBytes_length_(buffer.tobytes(), len(buffer.tobytes()))
         handler = Vision.VNImageRequestHandler.alloc().initWithData_options_(ns_data, None)
         results = []
 
         def completion_handler(request, error):
-            if error:
-                print(f"Vision API Error: {error}")
-                return
-
-            for observation in request.results():
-                candidate = observation.topCandidates_(1)[0]
-                text = candidate.string()
-                confidence = candidate.confidence()
-
-                bbox = observation.boundingBox()
+            if error: return
+            for obs in request.results():
+                candidate = obs.topCandidates_(1)[0]
+                text, confidence = candidate.string(), candidate.confidence()
+                bbox = obs.boundingBox()
                 x = bbox.origin.x * iw
-                y_bottom_left = bbox.origin.y * ih
+                y_bl = bbox.origin.y * ih
                 w = bbox.size.width * iw
                 h = bbox.size.height * ih
-
-                y = ih - y_bottom_left - h
-
-                quad = [
-                    [int(x), int(y)],
-                    [int(x + w), int(y)],
-                    [int(x + w), int(y + h)],
-                    [int(x), int(y + h)]
-                ]
-
+                y = ih - y_bl - h
+                quad = [[int(x),int(y)],[int(x+w),int(y)],
+                        [int(x+w),int(y+h)],[int(x),int(y+h)]]
                 results.append((quad, text, confidence))
 
-        request = Vision.VNRecognizeTextRequest.alloc().initWithCompletionHandler_(completion_handler)
-        request.setRecognitionLevel_(Vision.VNRequestTextRecognitionLevelAccurate)
-        request.setUsesLanguageCorrection_(True)
-        request.setRecognitionLanguages_(self.langs)
-
-        handler.performRequests_error_([request], None)
+        req = Vision.VNRecognizeTextRequest.alloc().initWithCompletionHandler_(completion_handler)
+        req.setRecognitionLevel_(Vision.VNRequestTextRecognitionLevelAccurate)
+        req.setUsesLanguageCorrection_(True)
+        req.setRecognitionLanguages_(self.langs)
+        req.setAutomaticallyDetectsLanguage_(True)
+        handler.performRequests_error_([req], None)
         return results
 
-
-# ============================================================
-# SPLITTERS + QUAD NORMALIZATION
-# ============================================================
-def estimate_char_capacity_width(text_len, med_h, k=0.72):
-    return max(18.0, text_len * med_h * k)
-
-
-def shrink_ocr_quad_to_text(quad, text, med_h):
-    x1, y1, x2, y2 = quad_bbox(quad)
-    w = max(1, x2 - x1)
-    h = max(1, y2 - y1)
-
-    t = (text or "").strip()
-    n = max(1, len(t.replace(" ", "")))
-    exp_w = estimate_char_capacity_width(n, med_h, k=0.62)
-    max_w = max(exp_w * 1.35, h * 1.15)
-
-    if w <= max_w:
-        return quad
-
-    cx = (x1 + x2) / 2.0
-    nw = int(round(max_w))
-    nx1 = int(round(cx - nw / 2))
-    nx2 = int(round(cx + nw / 2))
-
-    return [[nx1, y1], [nx2, y1], [nx2, y2], [nx1, y2]]
-
-
-def normalize_ocr_quads(filtered_ocr):
-    if not filtered_ocr:
-        return filtered_ocr
-
-    hs = [max(1, quad_bbox(q)[3] - quad_bbox(q)[1]) for q, _, _ in filtered_ocr]
-    med_h = float(np.median(hs)) if hs else 14.0
-
-    out = []
-    for quad, text, conf in filtered_ocr:
-        nq = shrink_ocr_quad_to_text(quad, text, med_h)
-        out.append((nq, text, conf))
-    return out
-
-
-def split_abnormal_bridge_quads(image_bgr, filtered_ocr):
-    if not filtered_ocr:
-        return filtered_ocr, 0
-
-    hs = [max(1, quad_bbox(q)[3] - quad_bbox(q)[1]) for q, _, _ in filtered_ocr]
-    med_h = float(np.median(hs)) if hs else 14.0
-
-    out = []
-    splits = 0
-
-    for quad, text, conf in filtered_ocr:
-        x1, y1, x2, y2 = quad_bbox(quad)
-        w = max(1, x2 - x1)
-        h = max(1, y2 - y1)
-
-        if w > med_h * 11.0 and " " in text and len(text) >= 14:
-            roi = image_bgr[max(0, y1):min(image_bgr.shape[0], y2), max(0, x1):min(image_bgr.shape[1], x2)]
-            if roi.size > 0:
-                gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
-                _, inv = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
-                proj = np.sum(inv, axis=0)
-
-                s = int(w * 0.18)
-                e = int(w * 0.82)
-                if e > s:
-                    segment = proj[s:e]
-                    valley_rel = int(np.argmin(segment))
-                    valley_x = s + valley_rel
-
-                    low = float(segment[valley_rel])
-                    meanv = float(np.mean(segment))
-                    if low < meanv * 0.52:
-                        split_x = x1 + valley_x
-
-                        char_w = w / max(1, len(text))
-                        split_idx = int((split_x - x1) / max(1e-6, char_w))
-                        spaces = [i for i, c in enumerate(text) if c == " "]
-                        if spaces:
-                            split_idx = min(spaces, key=lambda i: abs(i - split_idx))
-
-                        left_t = text[:split_idx].strip()
-                        right_t = text[split_idx:].strip()
-
-                        if left_t and right_t:
-                            ql = [[x1, y1], [split_x, y1], [split_x, y2], [x1, y2]]
-                            qr = [[split_x, y1], [x2, y1], [x2, y2], [split_x, y2]]
-                            out.append((ql, left_t, conf))
-                            out.append((qr, right_t, conf))
-                            splits += 1
-                            continue
-
-        out.append((quad, text, conf))
-
-    return out, splits
-
-
-def split_wide_ocr_items(image_bgr, filtered_ocr):
-    new_filtered = []
-    splits_made = 0
-
-    for item in filtered_ocr:
-        quad, text, conf = item
-        x1, y1, x2, y2 = quad_bbox(quad)
-        w = x2 - x1
-        h = max(1, y2 - y1)
-
-        if w > h * 2.5 and len(text) > 5 and ' ' in text:
-            pad = 2
-            roi_y1 = max(0, y1 - pad)
-            roi_y2 = min(image_bgr.shape[0], y2 + pad)
-            roi_x1 = max(0, x1)
-            roi_x2 = min(image_bgr.shape[1], x2)
-
-            roi = image_bgr[roi_y1:roi_y2, roi_x1:roi_x2]
-            if roi.size > 0:
-                gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
-                _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
-                proj = np.sum(thresh, axis=0)
-
-                start_x = int(w * 0.20)
-                end_x = int(w * 0.80)
-
-                if start_x < end_x:
-                    char_w = w / max(1, len(text))
-                    min_gap_width = max(int(char_w * 2.5), int(h * 0.75))
-
-                    gap_threshold = h * 255 * 0.15
-                    gap_mask = proj < gap_threshold
-
-                    best_gap_start = -1
-                    best_gap_len = 0
-                    current_gap_start = -1
-                    current_gap_len = 0
-
-                    for x_rel in range(start_x, end_x):
-                        if gap_mask[x_rel]:
-                            if current_gap_len == 0:
-                                current_gap_start = x_rel
-                            current_gap_len += 1
-                        else:
-                            if current_gap_len > best_gap_len:
-                                best_gap_len = current_gap_len
-                                best_gap_start = current_gap_start
-                            current_gap_len = 0
-
-                    if current_gap_len > best_gap_len:
-                        best_gap_len = current_gap_len
-                        best_gap_start = current_gap_start
-
-                    if best_gap_len >= min_gap_width:
-                        split_x = roi_x1 + best_gap_start + (best_gap_len // 2)
-
-                        split_idx = int((split_x - x1) / max(1e-6, char_w))
-                        spaces = [i for i, c in enumerate(text) if c == ' ']
-                        if spaces:
-                            best_space = min(spaces, key=lambda i: abs(i - split_idx))
-                            if abs(best_space - split_idx) < len(text) * 0.35:
-                                split_idx = best_space
-
-                        text_left = text[:split_idx].strip()
-                        text_right = text[split_idx:].strip()
-
-                        if text_left and text_right:
-                            quad_left = [[x1, y1], [split_x, y1], [split_x, y2], [x1, y2]]
-                            quad_right = [[split_x, y1], [x2, y1], [x2, y2], [split_x, y2]]
-                            new_filtered.append((quad_left, text_left, conf))
-                            new_filtered.append((quad_right, text_right, conf))
-                            splits_made += 1
-                            continue
-
-        new_filtered.append(item)
-
-    return new_filtered, splits_made
-
-
-def split_panel_box(image_bgr, bbox_xyxy, bubble_quads=None):
-    x1, y1, x2, y2 = bbox_xyxy
-    w = x2 - x1
-    h = y2 - y1
-
-    if bubble_quads is not None and len(bubble_quads) < 4:
-        return None
-
-    if w < 50 or h < 50:
-        return None
-
-    roi = image_bgr[y1:y2, x1:x2]
-    if roi.size == 0:
-        return None
-
-    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
-    _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV)
-
-    vertical_projection = np.sum(thresh, axis=0)
-
-    search_start = int(w * 0.25)
-    search_end = int(w * 0.75)
-
-    if search_start >= search_end:
-        return None
-
-    peak_x_relative = np.argmax(vertical_projection[search_start:search_end]) + search_start
-    peak_val = vertical_projection[peak_x_relative]
-
-    threshold_val = h * 255 * 0.25
-    significant_peaks = []
-
-    for x_rel in range(search_start, search_end):
-        if vertical_projection[x_rel] > threshold_val:
-            significant_peaks.append((x_rel, vertical_projection[x_rel]))
-
-    if len(significant_peaks) > 1:
-        min_proj_val = np.min(vertical_projection[search_start:search_end])
-        min_proj_idx = np.argmin(vertical_projection[search_start:search_end]) + search_start
-
-        if min_proj_val < threshold_val * 0.6:
-            split_x_absolute = x1 + min_proj_idx
-            box_left = (x1, y1, split_x_absolute, y2)
-            box_right = (split_x_absolute, y1, x2, y2)
-            return box_left, box_right, split_x_absolute
-
-    if peak_val > (h * 255 * 0.40):
-        split_x_absolute = x1 + peak_x_relative
-        box_left = (x1, y1, split_x_absolute, y2)
-        box_right = (split_x_absolute, y1, x2, y2)
-        return box_left, box_right, split_x_absolute
-
-    return None
-
-
 def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thresholds=False):
-    if len(indices) < 2:
-        return None
-
+    if len(indices) < 2: return None
     boxes = [quad_bbox(ocr[i][0]) for i in indices]
-    sorted_items = sorted(zip(indices, boxes), key=lambda x: x[1][0])
+    hs = [max(1, b[3] - b[1]) for b in boxes]
+    med_h = float(np.median(hs)) if hs else 12.0
+    xs = [(b[0] + b[2]) / 2.0 for b in boxes]
+    xs_sorted = sorted(xs)
 
-    gaps = []
-    current_max_x = sorted_items[0][1][2]
+    gap_thresh = max(med_h * 1.2, 18) if use_aggressive_thresholds else max(med_h * 1.5, 22)
+    best_gap_idx, best_gap_size = None, 0.0
 
-    for i in range(1, len(sorted_items)):
-        idx, b = sorted_items[i]
-        x1 = b[0]
-        gap = x1 - current_max_x
-        gaps.append((i, gap, current_max_x, x1))
-        current_max_x = max(current_max_x, b[2])
+    for i in range(len(xs_sorted) - 1):
+        gap = xs_sorted[i + 1] - xs_sorted[i]
+        if gap > gap_thresh and gap > best_gap_size:
+            best_gap_size, best_gap_idx = gap, i
 
-    if not gaps:
-        return None
+    if best_gap_idx is None: return None
+    split_x = (xs_sorted[best_gap_idx] + xs_sorted[best_gap_idx + 1]) / 2.0
 
-    max_gap_idx, max_gap_size, _, _ = max(gaps, key=lambda x: x[1])
+    left_idxs = [i for i in indices if (quad_bbox(ocr[i][0])[0] + quad_bbox(ocr[i][0])[2]) / 2.0 < split_x]
+    right_idxs = [i for i in indices if (quad_bbox(ocr[i][0])[0] + quad_bbox(ocr[i][0])[2]) / 2.0 >= split_x]
 
-    hs = [b[3] - b[1] for b in boxes]
-    med_h = float(np.median(hs)) if hs else 15.0
-
-    if use_aggressive_thresholds:
-        threshold1 = 60.0
-        threshold2 = med_h * 1.0
-        min_gap = 20.0
-    else:
-        threshold1 = 90.0
-        threshold2 = med_h * 1.5
-        min_gap = 25.0
-
-    if max_gap_size > threshold1 or (max_gap_size > threshold2 and max_gap_size > min_gap):
-        split_idx = max_gap_idx
-        left_indices = [item[0] for item in sorted_items[:split_idx]]
-        right_indices = [item[0] for item in sorted_items[split_idx:]]
-
-        if len(left_indices) < 1 or len(right_indices) < 1:
-            return None
-
-        return left_indices, right_indices
-
-    return None
+    if not left_idxs or not right_idxs: return None
+    return (left_idxs, right_idxs)
 
 
 def split_bubble_if_multiple_rows(indices, ocr, bid=None):
-    if len(indices) < 2:
-        return None
-
+    if len(indices) < 2: return None
     boxes = [quad_bbox(ocr[i][0]) for i in indices]
-    sorted_items = sorted(zip(indices, boxes), key=lambda x: x[1][1])
+    hs = [max(1, b[3] - b[1]) for b in boxes]
+    med_h = float(np.median(hs)) if hs else 12.0
+    ys = [(b[1] + b[3]) / 2.0 for b in boxes]
+    ys_sorted = sorted(ys)
 
-    gaps = []
-    current_max_y = sorted_items[0][1][3]
+    gap_thresh = max(med_h * 2.0, 30)
+    best_gap_idx, best_gap_size = None, 0.0
 
-    for i in range(1, len(sorted_items)):
-        idx, b = sorted_items[i]
-        y1 = b[1]
-        gap = y1 - current_max_y
-        gaps.append((i, gap, current_max_y, y1))
-        current_max_y = max(current_max_y, b[3])
+    for i in range(len(ys_sorted) - 1):
+        gap = ys_sorted[i + 1] - ys_sorted[i]
+        if gap > gap_thresh and gap > best_gap_size:
+            best_gap_size, best_gap_idx = gap, i
 
-    if not gaps:
-        return None
+    if best_gap_idx is None: return None
+    split_y = (ys_sorted[best_gap_idx] + ys_sorted[best_gap_idx + 1]) / 2.0
 
-    max_gap_idx, max_gap_size, _, _ = max(gaps, key=lambda x: x[1])
+    top_idxs    = [i for i in indices if (quad_bbox(ocr[i][0])[1] + quad_bbox(ocr[i][0])[3]) / 2.0 < split_y]
+    bottom_idxs = [i for i in indices if (quad_bbox(ocr[i][0])[1] + quad_bbox(ocr[i][0])[3]) / 2.0 >= split_y]
 
-    hs = [b[3] - b[1] for b in boxes]
-    med_h = float(np.median(hs)) if hs else 15.0
-
-    threshold = med_h * 1.8
-    min_gap = 20.0
-
-    if max_gap_size > threshold and max_gap_size > min_gap:
-        split_idx = max_gap_idx
-        top_indices = [item[0] for item in sorted_items[:split_idx]]
-        bottom_indices = [item[0] for item in sorted_items[split_idx:]]
-
-        if len(top_indices) >= 1 and len(bottom_indices) >= 1:
-            return top_indices, bottom_indices
-
-    return None
-
-
-def is_vertical_text_like(indices, ocr):
-    if len(indices) < 2:
-        return False
-
-    bxs = [quad_bbox(ocr[i][0]) for i in indices]
-    ub = boxes_union_xyxy(bxs)
-    if ub is None:
-        return False
-
-    x1, y1, x2, y2 = ub
-    w = max(1, x2 - x1)
-    h = max(1, y2 - y1)
-
-    aspect = h / w
-    xcs = [((b[0] + b[2]) / 2.0) for b in bxs]
-    x_spread = float(np.std(xcs)) if len(xcs) > 1 else 0.0
-    med_h = float(np.median([max(1, b[3]-b[1]) for b in bxs]))
-
-    ys = sorted([((b[1] + b[3]) / 2.0) for b in bxs])
-    gaps = [ys[i+1] - ys[i] for i in range(len(ys)-1)] if len(ys) >= 2 else [0]
-    med_gap = float(np.median(gaps)) if gaps else 0.0
-
-    return (
-        aspect > 1.35 and
-        x_spread < max(10.0, med_h * 0.9) and
-        med_gap > max(6.0, med_h * 0.35)
-    )
+    if not top_idxs or not bottom_idxs: return None
+    return (top_idxs, bottom_idxs)
 
 
 def split_cluster_by_big_vertical_gap(indices, ocr, factor=1.9, min_gap=22):
-    if len(indices) < 2:
-        return None
+    if len(indices) < 2: return None
+    boxes = [quad_bbox(ocr[i][0]) for i in indices]
+    hs = [max(1, b[3] - b[1]) for b in boxes]
+    med_h = float(np.median(hs)) if hs else 12.0
 
-    items = []
-    for i in indices:
-        b = quad_bbox(ocr[i][0])
-        yc = (b[1] + b[3]) / 2.0
-        h = max(1.0, b[3] - b[1])
-        items.append((i, b, yc, h))
+    items = sorted([(i, quad_bbox(ocr[i][0])) for i in indices],
+                   key=lambda x: (x[1][1] + x[1][3]) / 2.0)
+    gap_thresh = max(med_h * factor, min_gap)
+    best_gap, best_split_idx = 0.0, None
 
-    items.sort(key=lambda t: t[2])
-    med_h = float(np.median([t[3] for t in items])) if items else 12.0
+    for k in range(len(items) - 1):
+        gap = items[k + 1][1][1] - items[k][1][3]
+        if gap > gap_thresh and gap > best_gap:
+            best_gap, best_split_idx = gap, k
 
-    best_k = -1
-    best_gap = -1
-    for k in range(len(items)-1):
-        y_top = items[k][1][3]
-        y_bot = items[k+1][1][1]
-        gap = y_bot - y_top
-        if gap > best_gap:
-            best_gap = gap
-            best_k = k
+    if best_split_idx is None: return None
+    top_idxs    = [it[0] for it in items[:best_split_idx + 1]]
+    bottom_idxs = [it[0] for it in items[best_split_idx + 1:]]
+    if not top_idxs or not bottom_idxs: return None
+    return (top_idxs, bottom_idxs)
 
-    if best_k < 0:
-        return None
 
-    if best_gap > max(min_gap, med_h * factor):
-        a = [t[0] for t in items[:best_k+1]]
-        b = [t[0] for t in items[best_k+1:]]
-        if a and b:
-            return a, b
-    return None
+def is_vertical_text_like(indices, ocr):
+    if len(indices) < 2: return False
+    boxes = [quad_bbox(ocr[i][0]) for i in indices]
+    med_h = float(np.median([max(1, b[3]-b[1]) for b in boxes]))
+    med_w = float(np.median([max(1, b[2]-b[0]) for b in boxes]))
+    if med_h < med_w * 1.2: return False
+    xs = [(b[0]+b[2])/2.0 for b in boxes]
+    ys = [(b[1]+b[3])/2.0 for b in boxes]
+    if (max(ys)-min(ys)) < (max(xs)-min(xs)) * 1.5: return False
+    return True
 
 
 def split_nested_or_side_by_side(indices, ocr):
-    if len(indices) < 2:
-        return None
+    if len(indices) < 2: return None
+    xs = sorted([(quad_bbox(ocr[i][0])[0] + quad_bbox(ocr[i][0])[2]) / 2.0 for i in indices])
+    mid_idx = len(xs) // 2
+    split_x = (xs[mid_idx - 1] + xs[mid_idx]) / 2.0
 
-    boxes = [quad_bbox(ocr[i][0]) for i in indices]
-    xcs = np.array([[(b[0] + b[2]) / 2.0] for b in boxes], dtype=np.float32)
+    left_idxs  = [i for i in indices if (quad_bbox(ocr[i][0])[0]+quad_bbox(ocr[i][0])[2])/2.0 < split_x]
+    right_idxs = [i for i in indices if (quad_bbox(ocr[i][0])[0]+quad_bbox(ocr[i][0])[2])/2.0 >= split_x]
 
-    c1 = float(np.min(xcs))
-    c2 = float(np.max(xcs))
-    if abs(c2 - c1) < 8:
-        return None
-
-    for _ in range(12):
-        g1, g2 = [], []
-        for idx, v in enumerate(xcs[:, 0]):
-            if abs(v - c1) <= abs(v - c2):
-                g1.append(idx)
-            else:
-                g2.append(idx)
-        if not g1 or not g2:
-            return None
-        new_c1 = float(np.mean([xcs[i, 0] for i in g1]))
-        new_c2 = float(np.mean([xcs[i, 0] for i in g2]))
-        if abs(new_c1 - c1) < 0.5 and abs(new_c2 - c2) < 0.5:
-            break
-        c1, c2 = new_c1, new_c2
-
-    left_group = g1 if c1 < c2 else g2
-    right_group = g2 if c1 < c2 else g1
-
-    left_idxs = [indices[i] for i in left_group]
-    right_idxs = [indices[i] for i in right_group]
-    if not left_idxs or not right_idxs:
-        return None
-
-    left_box = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in left_idxs])
-    right_box = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in right_idxs])
-
-    sep = right_box[0] - left_box[2]
-    if sep < -8:
-        return None
-
-    return left_idxs, right_idxs
+    if not left_idxs or not right_idxs: return None
+    return (left_idxs, right_idxs)
 
 
-def merge_close_bubbles_by_line_height(bubbles, bubble_boxes, bubble_quads, bubble_indices, ocr):
-    bids = sorted(bubbles.keys())
-    used = set()
-    out_b, out_bb, out_bq, out_bi = {}, {}, {}, {}
-    nbid = 1
+def split_panel_box(image_bgr, box_xyxy, bubble_quads=None):
+    x1, y1, x2, y2 = box_xyxy
+    ih, iw = image_bgr.shape[:2]
+    x1, y1 = max(0, x1), max(0, y1)
+    x2, y2 = min(iw-1, x2), min(ih-1, y2)
+    if x2 <= x1 or y2 <= y1: return None
+    crop = image_bgr[y1:y2, x1:x2]
+    if crop.size == 0: return None
 
-    all_h = []
-    for i in range(len(ocr)):
-        b = quad_bbox(ocr[i][0])
-        all_h.append(max(1, b[3]-b[1]))
-    med_h = float(np.median(all_h)) if all_h else 14.0
+    gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
+    edges = cv2.Canny(gray, 50, 150)
+    h_proj = np.sum(edges, axis=0)
+    w = x2 - x1
+    if w < 100: return None
 
-    for i, a in enumerate(bids):
-        if a in used:
-            continue
-        used.add(a)
-        group = [a]
+    search_start = int(w * 0.35)
+    search_end   = int(w * 0.65)
+    if search_end <= search_start: return None
+    region = h_proj[search_start:search_end]
+    if len(region) == 0: return None
 
-        ax1, ay1, ax2, ay2 = bubble_boxes[a]
+    threshold  = np.percentile(region, 85)
+    candidates = [x1 + search_start + rx for rx in range(len(region)) if region[rx] >= threshold]
+    if not candidates: return None
+    split_x = int(np.median(candidates))
 
-        for b in bids[i+1:]:
-            if b in used:
-                continue
-            bx1, by1, bx2, by2 = bubble_boxes[b]
+    if bubble_quads:
+        left_count  = sum(1 for q in bubble_quads if quad_center(q)[0] < split_x)
+        right_count = len(bubble_quads) - left_count
+        if left_count == 0 or right_count == 0: return None
 
-            acx, acy = (ax1+ax2)/2.0, (ay1+ay2)/2.0
-            bcx, bcy = (bx1+bx2)/2.0, (by1+by2)/2.0
-            dx, dy = abs(acx-bcx), abs(acy-bcy)
-
-            near = dx < med_h * 10.0 and dy < med_h * 3.6
-            touching = overlap_or_near((ax1, ay1, ax2, ay2), (bx1, by1, bx2, by2), gap=int(med_h*1.25))
-
-            ua = boxes_union_xyxy([(ax1, ay1, ax2, ay2), (bx1, by1, bx2, by2)])
-            area_a = max(1, (ax2-ax1)*(ay2-ay1))
-            area_b = max(1, (bx2-bx1)*(by2-by1))
-            area_u = max(1, (ua[2]-ua[0])*(ua[3]-ua[1]))
-            compact_union = area_u < (area_a + area_b) * 1.65
-
-            if near and touching and compact_union:
-                group.append(b)
-                used.add(b)
-                ax1 = min(ax1, bx1); ay1 = min(ay1, by1); ax2 = max(ax2, bx2); ay2 = max(ay2, by2)
-
-        idxs = []
-        quads = []
-        for g in group:
-            idxs.extend(bubble_indices[g])
-            quads.extend(bubble_quads[g])
-
-        idxs = sorted(set(idxs))
-        ub = boxes_union_xyxy([quad_bbox(ocr[k][0]) for k in idxs])
-        if ub is None:
-            continue
-
-        out_b[nbid] = build_lines_from_indices(idxs, ocr)
-        out_bb[nbid] = ub
-        out_bq[nbid] = quads
-        out_bi[nbid] = idxs
-        nbid += 1
-
-    return out_b, out_bb, out_bq, out_bi
+    return (x1, x2, split_x)
 
 
 # ============================================================
-# PREPROCESS
+# MERGE CLOSE BUBBLES
+# ============================================================
+def merge_close_bubbles_by_line_height(bubbles, bubble_boxes, bubble_quads,
+                                        bubble_indices, ocr):
+    """
+    Merges boxes that are spatially very close (within ~1.4× line height on
+    BOTH axes simultaneously). Strict dual-axis check prevents merging boxes
+    from adjacent but distinct bubbles — fixing the BOX#5+BOX#16 overlap problem.
+    """
+    if not bubbles:
+        return bubbles, bubble_boxes, bubble_quads, bubble_indices
+
+    all_h = [max(1, quad_bbox(ocr[i][0])[3] - quad_bbox(ocr[i][0])[1]) for i in range(len(ocr))]
+    med_h = float(np.median(all_h)) if all_h else 14.0
+    merge_tol = max(8, med_h * 1.4)
+
+    bids = sorted(bubble_boxes.keys())
+    merged_set, merge_map = set(), {}
+
+    for i, bid_i in enumerate(bids):
+        if bid_i in merged_set: continue
+        x1_i, y1_i, x2_i, y2_i = bubble_boxes[bid_i]
+        wi = x2_i - x1_i
+
+        for j in range(i + 1, len(bids)):
+            bid_j = bids[j]
+            if bid_j in merged_set: continue
+            x1_j, y1_j, x2_j, y2_j = bubble_boxes[bid_j]
+            wj = x2_j - x1_j
+
+            gap_x = max(0, max(x1_i, x1_j) - min(x2_i, x2_j))
+            gap_y = max(0, max(y1_i, y1_j) - min(y2_i, y2_j))
+
+            # Horizontal overlap ratio — must be significant to merge
+            h_ix1 = max(x1_i, x1_j)
+            h_ix2 = min(x2_i, x2_j)
+            h_overlap = max(0, h_ix2 - h_ix1)
+            h_overlap_ratio = h_overlap / max(1, min(wi, wj))
+
+            # STRICT: both gap_x AND gap_y must be small, AND boxes must
+            # share meaningful horizontal overlap (same column).
+            # This prevents merging horizontally adjacent distinct bubbles.
+            if gap_x <= merge_tol and gap_y <= merge_tol and h_overlap_ratio >= 0.25:
+                if bid_i not in merge_map:
+                    merge_map[bid_i] = [bid_i]
+                merge_map[bid_i].append(bid_j)
+                merged_set.add(bid_j)
+
+    if not merge_map:
+        return bubbles, bubble_boxes, bubble_quads, bubble_indices
+
+    new_bubbles, new_boxes, new_quads, new_indices = {}, {}, {}, {}
+    next_bid = 1
+
+    for bid in bids:
+        if bid in merged_set: continue
+        if bid in merge_map:
+            group = merge_map[bid]
+            all_indices = sorted(set(idx for b in group for idx in bubble_indices[b]))
+            new_bubbles[next_bid] = build_lines_from_indices(all_indices, ocr)
+            new_boxes[next_bid]   = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in all_indices])
+            new_quads[next_bid]   = [ocr[i][0] for i in all_indices]
+            new_indices[next_bid] = all_indices
+        else:
+            new_bubbles[next_bid] = bubbles[bid]
+            new_boxes[next_bid]   = bubble_boxes[bid]
+            new_quads[next_bid]   = bubble_quads[bid]
+            new_indices[next_bid] = bubble_indices[bid]
+        next_bid += 1
+
+    return new_bubbles, new_boxes, new_quads, new_indices
+
+
+# ============================================================
+# WIDE / BRIDGE QUAD SPLITTING
+# ============================================================
+def split_wide_ocr_items(image_bgr, ocr_list, width_factor=8.0):
+    if not ocr_list: return ocr_list, 0
+    hs = [max(1, quad_bbox(q)[3]-quad_bbox(q)[1]) for q, _, _ in ocr_list]
+    med_h = float(np.median(hs)) if hs else 14.0
+    result, splits_made = [], 0
+
+    for quad, text, conf in ocr_list:
+        x1, y1, x2, y2 = quad_bbox(quad)
+        w = x2 - x1
+        if w > med_h * width_factor:
+            pad = 2
+            roi = image_bgr[max(0,y1-pad):min(image_bgr.shape[0],y2+pad),
+                            max(0,x1):min(image_bgr.shape[1],x2)]
+            if roi.size > 0:
+                gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
+                _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+                v_proj = np.sum(binary, axis=0)
+                gap_threshold = roi.shape[0] * 255 * 0.15
+                gaps, in_gap, gap_start = [], False, 0
+                for x in range(len(v_proj)):
+                    if v_proj[x] < gap_threshold:
+                        if not in_gap: gap_start, in_gap = x, True
+                    else:
+                        if in_gap:
+                            gw = x - gap_start
+                            if gw >= max(int(med_h * 0.6), 12):
+                                gaps.append((gap_start + gw // 2, gw))
+                            in_gap = False
+                if gaps:
+                    gaps.sort(key=lambda g: g[1], reverse=True)
+                    split_x_abs = max(0, x1) + gaps[0][0]
+                    if ' ' in text:
+                        char_w = w / max(1, len(text))
+                        split_idx = int((split_x_abs - x1) / max(1e-6, char_w))
+                        spaces = [i for i, c in enumerate(text) if c == ' ']
+                        if spaces: split_idx = min(spaces, key=lambda i: abs(i - split_idx))
+                        tl, tr = text[:split_idx].strip(), text[split_idx:].strip()
+                    else:
+                        split_idx = int(len(text) * (split_x_abs - x1) / w)
+                        tl, tr = text[:split_idx].strip(), text[split_idx:].strip()
+                    if tl and tr:
+                        result.extend([([[x1,y1],[split_x_abs,y1],[split_x_abs,y2],[x1,y2]], tl, conf),
+                                        ([[split_x_abs,y1],[x2,y1],[x2,y2],[split_x_abs,y2]], tr, conf)])
+                        splits_made += 1
+                        continue
+        result.append((quad, text, conf))
+    return result, splits_made
+
+
+def split_abnormal_bridge_quads(image_bgr, ocr_list, aspect_ratio_threshold=6.0):
+    if not ocr_list: return ocr_list, 0
+    hs = [max(1, quad_bbox(q)[3]-quad_bbox(q)[1]) for q, _, _ in ocr_list]
+    med_h = float(np.median(hs)) if hs else 14.0
+    result, splits_made = [], 0
+
+    for quad, text, conf in ocr_list:
+        x1, y1, x2, y2 = quad_bbox(quad)
+        w, h = x2 - x1, max(1, y2 - y1)
+        if w / h > aspect_ratio_threshold:
+            pad = 2
+            roi = image_bgr[max(0,y1-pad):min(image_bgr.shape[0],y2+pad),
+                            max(0,x1):min(image_bgr.shape[1],x2)]
+            if roi.size > 0:
+                gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
+                _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+                v_proj = np.sum(binary, axis=0)
+                gap_threshold = h * 255 * 0.20
+                gaps, in_gap, gap_start = [], False, 0
+                for x in range(len(v_proj)):
+                    if v_proj[x] < gap_threshold:
+                        if not in_gap: gap_start, in_gap = x, True
+                    else:
+                        if in_gap:
+                            gw = x - gap_start
+                            if gw >= max(int(med_h * 0.8), 15):
+                                gaps.append((gap_start + gw // 2, gw))
+                            in_gap = False
+                if gaps:
+                    gaps.sort(key=lambda g: g[1], reverse=True)
+                    split_x_abs = max(0, x1) + gaps[0][0]
+                    if ' ' in text:
+                        char_w = w / max(1, len(text))
+                        split_idx = int((split_x_abs - x1) / max(1e-6, char_w))
+                        spaces = [i for i, c in enumerate(text) if c == ' ']
+                        if spaces: split_idx = min(spaces, key=lambda i: abs(i - split_idx))
+                        tl, tr = text[:split_idx].strip(), text[split_idx:].strip()
+                    else:
+                        split_idx = int(len(text) * (split_x_abs - x1) / w)
+                        tl, tr = text[:split_idx].strip(), text[split_idx:].strip()
+                    if tl and tr:
+                        result.extend([([[x1,y1],[split_x_abs,y1],[split_x_abs,y2],[x1,y2]], tl, conf),
+                                        ([[split_x_abs,y1],[x2,y1],[x2,y2],[split_x_abs,y2]], tr, conf)])
+                        splits_made += 1
+                        continue
+        result.append((quad, text, conf))
+    return result, splits_made
+
+
+def normalize_ocr_quads(ocr_list):
+    result = []
+    for quad, text, conf in ocr_list:
+        x1, y1, x2, y2 = quad_bbox(quad)
+        pad = 3
+        new_quad = [[x1-pad, y1-pad], [x2+pad, y1-pad], [x2+pad, y2+pad], [x1-pad, y2+pad]]
+        result.append((new_quad, text, conf))
+    return result
+
+
+# ============================================================
+# VISION RE-READ
 # ============================================================
 def preprocess_variant(crop_bgr, mode):
     gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY)
-
-    if mode == "raw":
-        return gray
-    if mode == "clahe":
-        return cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)).apply(gray)
+    if mode == "raw":        return gray
+    if mode == "clahe":      return cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)).apply(gray)
     if mode == "adaptive":
-        den = cv2.GaussianBlur(gray, (3, 3), 0)
+        den = cv2.GaussianBlur(gray, (3,3), 0)
         return cv2.adaptiveThreshold(den, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 35, 11)
     if mode == "otsu":
-        den = cv2.GaussianBlur(gray, (3, 3), 0)
+        den = cv2.GaussianBlur(gray, (3,3), 0)
         _, th = cv2.threshold(den, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
         return th
-    if mode == "invert":
-        return 255 - gray
+    if mode == "invert":     return 255 - gray
     if mode == "bilateral":
         den = cv2.bilateralFilter(gray, 7, 60, 60)
         _, th = cv2.threshold(den, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
         return th
     if mode == "morph_open":
         _, th = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
-        k = np.ones((2, 2), np.uint8)
-        return cv2.morphologyEx(th, cv2.MORPH_OPEN, k)
-
+        return cv2.morphologyEx(th, cv2.MORPH_OPEN, np.ones((2,2), np.uint8))
     return gray
 
 
 def rotate_image_keep_bounds(img, angle_deg):
     h, w = img.shape[:2]
-    c = (w / 2, h / 2)
+    c = (w/2, h/2)
     M = cv2.getRotationMatrix2D(c, angle_deg, 1.0)
-    cos = abs(M[0, 0]); sin = abs(M[0, 1])
-
-    new_w = int((h * sin) + (w * cos))
-    new_h = int((h * cos) + (w * sin))
-    M[0, 2] += (new_w / 2) - c[0]
-    M[1, 2] += (new_h / 2) - c[1]
-
+    cos, sin = abs(M[0,0]), abs(M[0,1])
+    new_w = int((h*sin) + (w*cos))
+    new_h = int((h*cos) + (w*sin))
+    M[0,2] += (new_w/2) - c[0]
+    M[1,2] += (new_h/2) - c[1]
     return cv2.warpAffine(img, M, (new_w, new_h), flags=cv2.INTER_CUBIC, borderValue=255)
 
 
 def rebuild_text_from_vision_result(res):
-    if not res:
-        return ""
-
+    if not res: return ""
     norm = []
     for bbox, txt, conf in res:
-        if not txt or not txt.strip():
-            continue
+        if not txt or not txt.strip(): continue
         b = quad_bbox(bbox)
-        xc = (b[0] + b[2]) / 2.0
-        yc = (b[1] + b[3]) / 2.0
-        h = max(1.0, b[3] - b[1])
-        norm.append((b, txt, conf, xc, yc, h))
-
-    if not norm:
-        return ""
-
-    med_h = float(np.median([x[5] for x in norm]))
+        norm.append((b, txt, conf, (b[0]+b[2])/2.0, (b[1]+b[3])/2.0, max(1.0, b[3]-b[1])))
+    if not norm: return ""
+    med_h   = float(np.median([x[5] for x in norm]))
     row_tol = max(6.0, med_h * 0.75)
-
     norm.sort(key=lambda z: z[4])
     rows = []
     for it in norm:
@@ -1413,58 +1450,41 @@ def rebuild_text_from_vision_result(res):
             if abs(it[4] - r["yc"]) <= row_tol:
                 r["m"].append(it)
                 r["yc"] = float(np.mean([k[4] for k in r["m"]]))
-                placed = True
-                break
-        if not placed:
-            rows.append({"yc": it[4], "m": [it]})
-
+                placed = True; break
+        if not placed: rows.append({"yc": it[4], "m": [it]})
     rows.sort(key=lambda r: r["yc"])
-    lines = []
-    for r in rows:
-        mem = sorted(r["m"], key=lambda z: z[3])
-        line = normalize_text(" ".join(x[1] for x in mem))
-        if line:
-            lines.append(line)
-
-    return normalize_text(" ".join(lines))
+    lines = [normalize_text(" ".join(x[1] for x in sorted(r["m"], key=lambda z: z[3]))) for r in rows]
+    return normalize_text(" ".join(filter(None, lines)))
 
 
 def reread_bubble_with_vision(image_bgr, bbox_xyxy, vision_detector, upscale=3.0, pad=24):
     ih, iw = image_bgr.shape[:2]
     x1, y1, x2, y2 = bbox_xyxy
-    x1 = max(0, int(x1 - pad)); y1 = max(0, int(y1 - pad))
-    x2 = min(iw, int(x2 + pad)); y2 = min(ih, int(y2 + pad))
-
+    x1, y1 = max(0, int(x1-pad)), max(0, int(y1-pad))
+    x2, y2 = min(iw, int(x2+pad)), min(ih, int(y2+pad))
     crop = image_bgr[y1:y2, x1:x2]
-    if crop.size == 0:
-        return None, 0.0, "none"
+    if crop.size == 0: return None, 0.0, "none"
 
-    modes = ["raw", "clahe", "adaptive", "otsu", "invert", "bilateral", "morph_open"]
+    modes  = ["raw", "clahe", "adaptive", "otsu", "invert", "bilateral", "morph_open"]
     angles = [0.0, 1.5, -1.5]
-
     best_v_txt, best_v_sc = "", 0.0
-    up0 = cv2.resize(crop, (int(crop.shape[1] * upscale), int(crop.shape[0] * upscale)), interpolation=cv2.INTER_CUBIC)
+    up0 = cv2.resize(crop, (int(crop.shape[1]*upscale), int(crop.shape[0]*upscale)),
+                     interpolation=cv2.INTER_CUBIC)
 
     for mode in modes:
-        proc = preprocess_variant(up0, mode)
+        proc  = preprocess_variant(up0, mode)
         proc3 = cv2.cvtColor(proc, cv2.COLOR_GRAY2BGR) if len(proc.shape) == 2 else proc
         for a in angles:
             rot = rotate_image_keep_bounds(proc3, a)
-            
-            # Use run_vision_ocr if available (enhanced detector)
-            if hasattr(vision_detector, 'run_vision_ocr'):
-                res = vision_detector.run_vision_ocr(rot)
-            else:
-                res = vision_detector.read(rot)
-            
+            res = (vision_detector.run_vision_ocr(rot)
+                   if hasattr(vision_detector, 'run_vision_ocr')
+                   else vision_detector.read(rot))
             txt = rebuild_text_from_vision_result(res)
-            sc = ocr_candidate_score(txt)
+            sc  = ocr_candidate_score(txt)
             if sc > best_v_sc:
                 best_v_txt, best_v_sc = txt, sc
 
-    if best_v_txt:
-        return best_v_txt, best_v_sc, "vision-reread"
-
+    if best_v_txt: return best_v_txt, best_v_sc, "vision-reread"
     return None, 0.0, "none"
 
 
@@ -1472,202 +1492,128 @@ def reread_bubble_with_vision(image_bgr, bbox_xyxy, vision_detector, upscale=3.0
 # LINES + BUBBLES
 # ============================================================
 def build_lines_from_indices(indices, ocr):
-    if not indices:
-        return []
+    if not indices: return []
     items = []
     for i in indices:
         b = quad_bbox(ocr[i][0])
-        xc = (b[0] + b[2]) / 2.0
-        yc = (b[1] + b[3]) / 2.0
-        h = max(1.0, b[3] - b[1])
-        items.append((i, b, xc, yc, h))
-
-    med_h = float(np.median([it[4] for it in items])) if items else 10.0
+        items.append((i, b, (b[0]+b[2])/2.0, (b[1]+b[3])/2.0, max(1.0, b[3]-b[1])))
+    med_h   = float(np.median([it[4] for it in items])) if items else 10.0
     row_tol = max(6.0, med_h * 0.75)
-
     items.sort(key=lambda x: x[3])
     rows = []
     for it in items:
-        i, b, xc, yc, h = it
         placed = False
         for r in rows:
-            if abs(yc - r["yc"]) <= row_tol:
-                r["m"].append((i, b, xc, yc))
-                r["yc"] = float(np.mean([k[3] for k in r["m"]]))
-                placed = True
-                break
-        if not placed:
-            rows.append({"yc": yc, "m": [(i, b, xc, yc)]})
-
-    rows.sort(key=lambda r: r["yc"])
-    lines = []
-    for r in rows:
-        mem = sorted(r["m"], key=lambda z: z[2])
-        txt = normalize_text(" ".join(ocr[i][1] for i, _, _, _ in mem))
-        if txt and not is_noise_text(txt):
-            lines.append(txt)
-    return lines
-
-
-def build_line_boxes_from_indices(indices, ocr, image_shape=None):
-    if not indices:
-        return []
-
-    items = []
-    for i in indices:
-        b = quad_bbox(ocr[i][0])
-        txt = normalize_text(ocr[i][1])
-        if is_noise_text(txt):
-            continue
-        xc = (b[0] + b[2]) / 2.0
-        yc = (b[1] + b[3]) / 2.0
-        h = max(1.0, b[3] - b[1])
-        items.append({"i": i, "b": b, "txt": txt, "xc": xc, "yc": yc, "h": h})
-
-    if not items:
-        return []
-
-    med_h = float(np.median([it["h"] for it in items]))
-    row_tol = max(6.0, med_h * 0.90)
-    gap_x_tol = max(8.0, med_h * 1.25)
-    pad = max(2, int(round(med_h * 0.14)))
-
-    rows = []
-    for it in sorted(items, key=lambda x: x["yc"]):
-        placed = False
-        for r in rows:
-            if abs(it["yc"] - r["yc"]) <= row_tol:
+            if abs(it[3] - r["yc"]) <= row_tol:
                 r["m"].append(it)
-                r["yc"] = float(np.mean([k["yc"] for k in r["m"]]))
-                placed = True
-                break
-        if not placed:
-            rows.append({"yc": it["yc"], "m": [it]})
-
+                r["yc"] = float(np.mean([k[3] for k in r["m"]]))
+                placed = True; break
+        if not placed: rows.append({"yc": it[3], "m": [it]})
     rows.sort(key=lambda r: r["yc"])
-    out_boxes = []
-
-    for r in rows:
-        mem = sorted(r["m"], key=lambda z: z["xc"])
-        if not mem:
-            continue
-
-        chunks = []
-        cur = [mem[0]]
-        for t in mem[1:]:
-            prev = cur[-1]["b"]
-            b = t["b"]
-            gap = b[0] - prev[2]
-            if gap <= gap_x_tol:
-                cur.append(t)
-            else:
-                chunks.append(cur)
-                cur = [t]
-        chunks.append(cur)
-
-        for ch in chunks:
-            ub = boxes_union_xyxy([x["b"] for x in ch])
-            if ub:
-                x1, y1, x2, y2 = ub
-                out_boxes.append((x1 - pad, y1 - int(round(pad * 1.2)), x2 + pad, y2 + int(round(pad * 0.9))))
-
-    if image_shape is not None:
-        ih, iw = image_shape[:2]
-        clamped = []
-        for b in out_boxes:
-            x1 = max(0, int(b[0])); y1 = max(0, int(b[1]))
-            x2 = min(iw - 1, int(b[2])); y2 = min(ih - 1, int(b[3]))
-            if x2 > x1 and y2 > y1:
-                clamped.append((x1, y1, x2, y2))
-        out_boxes = clamped
-
-    out_boxes.sort(key=lambda z: (z[1], z[0]))
-    return out_boxes
+    return [normalize_text(" ".join(ocr[i][1] for i,_,_,_,_ in sorted(r["m"], key=lambda z: z[2])))
+            for r in rows if r["m"]]
 
 
 def auto_gap(image_path, base=18, ref_w=750):
     img = cv2.imread(image_path)
-    if img is None:
-        return base
-    return base * (img.shape[1] / ref_w)
+    return base * (img.shape[1] / ref_w) if img is not None else base
 
 
-def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=1):
+def group_tokens_vertical(ocr, image_shape, gap_px=18, bbox_padding=1, strict_mode=False):
+    """
+    Groups OCR quads into bubble candidates.
+
+    Generic protections applied:
+      - orientation_compatible(): prevents tall/narrow glyphs merging with wide text lines.
+      - Horizontal gap guard: prevents side-by-side column quads from merging.
+      - detect_horizontal_gap_in_group(): post-merge split for groups with large internal gaps.
+      - Orientation check in secondary merge pass.
+    """
     n = len(ocr)
-    if n == 0:
-        return {}, {}, {}, {}
+    if n == 0: return {}, {}, {}, {}
 
-    boxes = [quad_bbox(r[0]) for r in ocr]
+    boxes   = [quad_bbox(r[0]) for r in ocr]
     centers = [quad_center(r[0]) for r in ocr]
-    hs = [max(1.0, b[3] - b[1]) for b in boxes]
-    med_h = float(np.median(hs)) if hs else 12.0
-    dist_thresh = max(20.0, med_h * 1.8)
-    adaptive_gap_y = max(gap_px, med_h * 2.5)
+    hs      = [max(1.0, b[3]-b[1]) for b in boxes]
+    med_h   = float(np.median(hs)) if hs else 12.0
 
-    p = list(range(n))
+    max_vertical_gap      = med_h * 2.5 if not strict_mode else med_h * 2.0
+    max_horizontal_offset = med_h * 1.8
 
-    def find(x):
-        while p[x] != x:
-            p[x] = p[p[x]]
-            x = p[x]
-        return x
+    sorted_indices = sorted(range(n), key=lambda i: (centers[i][1], centers[i][0]))
+    groups, used   = [], set()
 
-    def unite(a, b):
-        p[find(a)] = find(b)
+    for i in sorted_indices:
+        if i in used: continue
+        current_group = [i]
+        used.add(i)
+        cx_i, cy_i = centers[i]
 
-    for i in range(n):
-        for j in range(i + 1, n):
-            ax1, ay1, ax2, ay2 = boxes[i]
-            bx1, by1, bx2, by2 = boxes[j]
-            gap_x = max(0, max(ax1, bx1) - min(ax2, bx2))
-            gap_y = max(0, max(ay1, by1) - min(ay2, by2))
+        for j in sorted_indices:
+            if j in used or j == i: continue
+            cx_j, cy_j = centers[j]
+            if cy_j <= cy_i: continue
+            if abs(cx_i - cx_j) > max_horizontal_offset: continue
 
-            cx1, cy1 = centers[i]
-            cx2, cy2 = centers[j]
-            is_vertically_aligned = abs(cx1 - cx2) < (med_h * 1.5)
+            # Horizontal gap guard
+            gap_x = max(0, max(boxes[i][0], boxes[j][0]) - min(boxes[i][2], boxes[j][2]))
+            if gap_x > med_h * 1.5: continue
 
-            if gap_x == 0 and gap_y <= (med_h * 3.5):
-                unite(i, j); continue
+            # Orientation compatibility guard
+            if not orientation_compatible(i, j, ocr): continue
 
-            if is_vertically_aligned and gap_y <= (med_h * 3.2):
-                unite(i, j); continue
+            vertical_gap = boxes[j][1] - boxes[current_group[-1]][3]
+            if vertical_gap <= max_vertical_gap:
+                current_group.append(j)
+                used.add(j)
+                cx_i = (cx_i + cx_j) / 2.0
 
-            if gap_x <= gap_px and gap_y <= adaptive_gap_y:
-                unite(i, j); continue
+        if current_group:
+            groups.append(current_group)
 
-            d = ((cx1 - cx2) ** 2 + (cy1 - cy2) ** 2) ** 0.5
-            if d <= dist_thresh and abs(cy1 - cy2) <= med_h * 1.5:
-                unite(i, j)
+    # Secondary merge pass
+    merged_groups, used_groups = [], set()
+    for i, group1 in enumerate(groups):
+        if i in used_groups: continue
+        merged = list(group1)
+        used_groups.add(i)
+        for j, group2 in enumerate(groups):
+            if i == j or j in used_groups: continue
+            if should_merge_groups(merged, group2, ocr, med_h, max_vertical_gap):
+                compat = all(orientation_compatible(a, b, ocr)
+                             for a in merged for b in group2)
+                if compat:
+                    merged.extend(group2)
+                    used_groups.add(j)
+        merged_groups.append(sorted(merged, key=lambda idx: centers[idx][1]))
 
-    groups = {}
-    for i in range(n):
-        groups.setdefault(find(i), []).append(i)
+    # Horizontal gap split pass
+    final_groups = []
+    for group in merged_groups:
+        h_split = detect_horizontal_gap_in_group(group, ocr, med_h, gap_factor=2.5)
+        if h_split:
+            lg, rg = h_split
+            final_groups.append(sorted(lg, key=lambda idx: centers[idx][1]))
+            final_groups.append(sorted(rg, key=lambda idx: centers[idx][1]))
+        else:
+            final_groups.append(group)
 
-    sorted_groups = sorted(
-        groups.values(),
-        key=lambda idxs: (min(boxes[i][1] for i in idxs), min(boxes[i][0] for i in idxs))
-    )
+    final_groups.sort(key=lambda g: (min(centers[i][1] for i in g), min(centers[i][0] for i in g)))
 
     bubbles, bubble_boxes, bubble_quads, bubble_indices = {}, {}, {}, {}
     ih, iw = image_shape[:2]
 
-    for bid, idxs in enumerate(sorted_groups, start=1):
-        idxs = sorted(idxs, key=lambda k: boxes[k][1])
+    for bid, idxs in enumerate(final_groups, start=1):
         lines = build_lines_from_indices(idxs, ocr)
         quads = [ocr[k][0] for k in idxs]
-        ub = boxes_union_xyxy([quad_bbox(q) for q in quads])
-        if ub is None:
-            continue
-
+        ub    = boxes_union_xyxy([quad_bbox(q) for q in quads])
+        if ub is None: continue
         x1, y1, x2, y2 = ub
-        adaptive_pad = max(1, int(round(med_h * 0.16)))
-        x1 = max(0, x1 - adaptive_pad); y1 = max(0, y1 - adaptive_pad)
-        x2 = min(iw - 1, x2 + adaptive_pad); y2 = min(ih - 1, y2 + adaptive_pad)
-
-        bubbles[bid] = lines
-        bubble_boxes[bid] = (x1, y1, x2, y2)
-        bubble_quads[bid] = quads
-        bubble_indices[bid] = idxs
+        ap = max(1, int(round(med_h * 0.16)))
+        bubbles[bid]       = lines
+        bubble_boxes[bid]  = (max(0,x1-ap), max(0,y1-ap), min(iw-1,x2+ap), min(ih-1,y2+ap))
+        bubble_quads[bid]  = quads
+        bubble_indices[bid]= idxs
 
     return bubbles, bubble_boxes, bubble_quads, bubble_indices
 
@@ -1675,54 +1621,46 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=1):
 # ============================================================
 # DEBUG / EXPORT
 # ============================================================
-def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices, clean_lines=None, out_path="debug_clusters.png"):
+def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices,
+                        clean_lines=None, out_path="debug_clusters.png"):
     img = cv2.imread(image_path)
-    if img is None:
-        return
+    if img is None: return
 
     for bbox, txt, conf in ocr:
         pts = np.array(bbox, dtype=np.int32)
-        cv2.fillPoly(img, [pts], (255, 255, 255))
-        cv2.polylines(img, [pts], True, (180, 180, 180), 1)
+        cv2.fillPoly(img, [pts], (255,255,255))
+        cv2.polylines(img, [pts], True, (180,180,180), 1)
 
     for bid, bb in bubble_boxes.items():
         x1, y1, x2, y2 = bb
-        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 220, 0), 2)
-        cv2.putText(img, f"BOX#{bid}", (x1 + 2, max(15, y1 + 16)),
-                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)
+        is_isolated = len(bubble_indices.get(bid, [])) == 1
+        color     = (255,165,0) if is_isolated else (0,220,0)
+        thickness = 3           if is_isolated else 2
+        cv2.rectangle(img, (x1,y1), (x2,y2), color, thickness)
+        label = f"BOX#{bid}" + (" (ISOLATED)" if is_isolated else "")
+        cv2.putText(img, label, (x1+2, max(15, y1+16)),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
 
         if clean_lines and bid in clean_lines:
-            text = clean_lines[bid]
+            text  = clean_lines[bid]
             words = text.split()
-            lines = []
-            cur = ""
+            lines, cur = [], ""
             for w in words:
-                if len(cur) + len(w) < 25:
-                    cur += w + " "
-                else:
-                    lines.append(cur.strip())
-                    cur = w + " "
-            if cur:
-                lines.append(cur.strip())
-
+                if len(cur) + len(w) < 25: cur += w + " "
+                else: lines.append(cur.strip()); cur = w + " "
+            if cur: lines.append(cur.strip())
             y_text = y2 + 18
             for line in lines:
-                cv2.putText(img, line, (x1, y_text), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 3)
-                cv2.putText(img, line, (x1, y_text), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
+                cv2.putText(img, line, (x1, y_text), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 3)
+                cv2.putText(img, line, (x1, y_text), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 1)
                 y_text += 18
 
     cv2.imwrite(out_path, img)
 
 
 def estimate_reading_order(bbox_dict, mode="ltr"):
-    items = []
-    for bid, (x1, y1, x2, y2) in bbox_dict.items():
-        cx = (x1 + x2) / 2.0
-        cy = (y1 + y2) / 2.0
-        items.append((bid, cx, cy))
-
+    items = [(bid, (bb[0]+bb[2])/2.0, (bb[1]+bb[3])/2.0) for bid, bb in bbox_dict.items()]
     items.sort(key=lambda t: t[2])
-
     rows, tol = [], 90
     for it in items:
         placed = False
@@ -1730,50 +1668,14 @@ def estimate_reading_order(bbox_dict, mode="ltr"):
             if abs(it[2] - r["cy"]) <= tol:
                 r["items"].append(it)
                 r["cy"] = float(np.mean([x[2] for x in r["items"]]))
-                placed = True
-                break
-        if not placed:
-            rows.append({"cy": it[2], "items": [it]})
-
+                placed = True; break
+        if not placed: rows.append({"cy": it[2], "items": [it]})
     rows.sort(key=lambda r: r["cy"])
     order = []
     for r in rows:
         r["items"].sort(key=lambda x: x[1], reverse=(mode == "rtl"))
         order.extend([z[0] for z in r["items"]])
-
-    return {bid: i + 1 for i, bid in enumerate(order)}
-
-
-def export_bubbles(filepath, bbox_dict, quads_dict, indices_dict, ocr, reading_map, image_shape):
-    out = {}
-    for bid, bb in bbox_dict.items():
-        x1, y1, x2, y2 = bb
-        quads = quads_dict.get(bid, [])
-        idxs = indices_dict.get(bid, [])
-
-        qboxes = [quad_bbox(q) for q in quads]
-        text_union = boxes_union_xyxy(qboxes)
-
-        line_boxes_xyxy = build_line_boxes_from_indices(idxs, ocr, image_shape=image_shape)
-        line_union_xyxy = boxes_union_xyxy(line_boxes_xyxy)
-        line_union_area = bbox_area_xyxy(line_union_xyxy)
-
-        out[str(bid)] = {
-            "x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1),
-            "reading_order": int(reading_map.get(bid, bid)),
-            "quad_bboxes": [
-                {"x": int(b[0]), "y": int(b[1]), "w": int(b[2] - b[0]), "h": int(b[3] - b[1])}
-                for b in qboxes
-            ],
-            "quads": [[[int(p[0]), int(p[1])] for p in q] for q in quads],
-            "text_bbox": xyxy_to_xywh(text_union),
-            "line_bboxes": [xyxy_to_xywh(lb) for lb in line_boxes_xyxy],
-            "line_union_bbox": xyxy_to_xywh(line_union_xyxy) if line_union_xyxy else None,
-            "line_union_area": int(line_union_area),
-        }
-
-    with open(filepath, "w", encoding="utf-8") as f:
-        json.dump(out, f, indent=2, ensure_ascii=False)
+    return {bid: i+1 for i, bid in enumerate(order)}
 
 
 # ============================================================
@@ -1786,24 +1688,24 @@ def translate_manga_text(
     confidence_threshold=0.03,
     min_text_length=1,
     gap_px="auto",
-    filter_sound_effects=True,
     quality_threshold=0.62,
     export_to_file="output.txt",
     export_bubbles_to="bubbles.json",
     reading_mode="ltr",
     debug=True,
-    use_enhanced_ocr=True
+    use_enhanced_ocr=True,
+    strict_grouping=True,
+    max_box_width_ratio=0.6,
+    max_box_height_ratio=0.5,
+    auto_fix_bubbles=True
 ):
     image = cv2.imread(image_path)
     if image is None:
-        print(f"❌ Cannot load image: {image_path}")
-        return
+        print(f"❌ Cannot load image: {image_path}"); return
 
     resolved_gap = auto_gap(image_path) if gap_px == "auto" else float(gap_px)
-
     print("Loading OCR engines...")
-    
-    # Use enhanced detector
+
     if use_enhanced_ocr:
         detector = ImprovedMacVisionDetector(source_lang=source_lang)
         print("🚀 Using Enhanced Multi-Pass OCR")
@@ -1813,308 +1715,285 @@ def translate_manga_text(
     print("Running detection OCR (Apple Vision)...")
     raw = detector.read(image_path)
     print(f"Raw detections: {len(raw)}")
-    
-    # Secondary pass for missed regions
+
     if use_enhanced_ocr:
         existing_quads = [r[0] for r in raw]
         missed_regions = detect_small_text_regions(image, existing_quads)
-        
         if missed_regions:
             print(f"🔍 Found {len(missed_regions)} potentially missed text regions")
-            
-            # Re-run OCR on missed regions with higher upscaling
             for region in missed_regions:
                 x1, y1, x2, y2 = region
-                # Add padding
                 pad = 10
-                x1 = max(0, x1 - pad)
-                y1 = max(0, y1 - pad)
-                x2 = min(image.shape[1], x2 + pad)
-                y2 = min(image.shape[0], y2 + pad)
-                
+                x1, y1 = max(0, x1-pad), max(0, y1-pad)
+                x2, y2 = min(image.shape[1], x2+pad), min(image.shape[0], y2+pad)
                 crop = image[y1:y2, x1:x2]
                 if crop.size > 0:
-                    # Aggressive upscaling for small text
-                    upscaled = cv2.resize(crop, None, fx=4.0, fy=4.0, interpolation=cv2.INTER_CUBIC)
-                    region_results = detector.run_vision_ocr(upscaled)
-                    
-                    # Scale back and offset coordinates
-                    for quad, text, conf in region_results:
-                        scaled_quad = [[int(p[0]/4.0 + x1), int(p[1]/4.0 + y1)] for p in quad]
-                        raw.append((scaled_quad, text, conf))
-            
+                    upscaled = cv2.resize(crop, None, fx=4.0, fy=4.0,
+                                          interpolation=cv2.INTER_CUBIC)
+                    for quad, text, conf in detector.run_vision_ocr(upscaled):
+                        raw.append(([[int(p[0]/4.0+x1), int(p[1]/4.0+y1)] for p in quad],
+                                    text, conf))
             print(f"📝 Total detections after missed region scan: {len(raw)}")
 
-    filtered = []
-    skipped = 0
+    filtered, skipped = [], 0
     ih, iw = image.shape[:2]
 
     for bbox, text, conf in raw:
-        t = normalize_text(text)
+        t  = normalize_text(text)
         qb = quad_bbox(bbox)
-
-        if conf < confidence_threshold:
+        if conf < confidence_threshold:           skipped += 1; continue
+        if len(t) < min_text_length:              skipped += 1; continue
+        if not is_valid_language(t, source_lang): skipped += 1; continue
+        if not is_meaningful_text(t, source_lang):skipped += 1; continue
+        if qb[1] < int(ih * TOP_BAND_RATIO) and conf < 0.70 and len(t) >= 5:
             skipped += 1; continue
-        if len(t) < min_text_length:
-            skipped += 1; continue
-        if is_noise_text(t):
-            skipped += 1; continue
-        if filter_sound_effects and is_sound_effect(t):
-            skipped += 1; continue
-        if is_title_text(t):
-            skipped += 1; continue
-        if qb[1] < int(ih * TOP_BAND_RATIO):
-            if conf < 0.70 and len(t) >= 5:
-                skipped += 1; continue
-
         filtered.append((bbox, t, conf))
 
     print(f"Kept: {len(filtered)} | Skipped: {skipped}")
     if not filtered:
-        print("⚠️ No text after filtering.")
-        return
+        print("⚠️ No text after filtering."); return
+
+    # ── Pre-grouping quad splits ──────────────────────────────────────────
+    filtered, oversized_splits = validate_and_split_oversized_quads(image, filtered)
+    if oversized_splits > 0:
+        print(f"📐 Split {oversized_splits} oversized quad(s) before grouping")
 
-    # 1) split obvious wide OCR merges
     filtered, splits_made = split_wide_ocr_items(image, filtered)
     if splits_made > 0:
         print(f"✂️  Split {splits_made} wide OCR lines across column gaps.")
 
-    # 2) split giant bridge quads
     filtered, bridge_splits = split_abnormal_bridge_quads(image, filtered)
     if bridge_splits > 0:
         print(f"🧩 Split {bridge_splits} abnormal bridge OCR quad(s).")
 
-    # 3) shrink quads to tighter text footprint
+    # ── Column-gap split: catches BOX#6 type wide quads spanning two columns ──
+    hs_pre  = [max(1, quad_bbox(q)[3]-quad_bbox(q)[1]) for q, _, _ in filtered]
+    med_h_pre = float(np.median(hs_pre)) if hs_pre else 14.0
+    filtered, col_splits = apply_column_gap_splits(image, filtered, med_h_pre)
+    if col_splits > 0:
+        print(f"📐 Column-gap split: {col_splits} quad(s) split before grouping")
+
     filtered = normalize_ocr_quads(filtered)
 
-    bubbles, bubble_boxes, bubble_quads, bubble_indices = group_tokens(
-        filtered, image.shape, gap_px=resolved_gap, bbox_padding=1
+    print("📊 Grouping quads vertically...")
+    bubbles, bubble_boxes, bubble_quads, bubble_indices = group_tokens_vertical(
+        filtered, image.shape, gap_px=resolved_gap, bbox_padding=1, strict_mode=strict_grouping
+    )
+    print(f"   Created {len(bubbles)} initial box(es)")
+
+    if auto_fix_bubbles:
+        bubbles, bubble_boxes, bubble_quads, bubble_indices = auto_fix_bubble_detection(
+            bubble_boxes, bubble_indices, bubble_quads, bubbles, filtered, image
+        )
+
+    bubbles, bubble_boxes, bubble_quads, bubble_indices = enforce_max_box_size(
+        bubble_boxes, bubble_indices, bubble_quads, bubbles, filtered,
+        max_width_ratio=max_box_width_ratio,
+        max_height_ratio=max_box_height_ratio,
+        image_shape=image.shape
     )
 
-    # merge accidental sibling fragments
     bubbles, bubble_boxes, bubble_quads, bubble_indices = merge_close_bubbles_by_line_height(
         bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered
     )
 
-    # Apply page-specific fixes
-    page_identifier = os.path.basename(image_path)
-    bubbles, bubble_boxes, bubble_quads, bubble_indices = apply_page_specific_fixes(
-        bubbles, bubble_boxes, bubble_quads, bubble_indices, 
-        filtered, image, page_identifier
-    )
-
     new_bubbles, new_bubble_boxes, new_bubble_quads, new_bubble_indices = {}, {}, {}, {}
-    next_bid = max(bubbles.keys()) + 1 if bubbles else 1
+    next_bid         = max(bubbles.keys()) + 1 if bubbles else 1
     splits_performed = []
 
     for bid in list(bubbles.keys()):
-        box = bubble_boxes[bid]
+        box          = bubble_boxes[bid]
         bubble_split = None
 
         if is_vertical_text_like(bubble_indices[bid], filtered):
-            vgap_split = split_cluster_by_big_vertical_gap(bubble_indices[bid], filtered, factor=1.7, min_gap=18)
-            if vgap_split:
-                bubble_split = vgap_split
-                splits_performed.append(f"BOX#{bid} (vertical-stack y-gap split)")
+            vgap = split_cluster_by_big_vertical_gap(bubble_indices[bid], filtered,
+                                                     factor=1.7, min_gap=18)
+            if vgap:
+                bubble_split = vgap
+                splits_performed.append(f"BOX#{bid} (vertical-stack y-gap)")
 
         if bubble_split is None:
-            split_result = split_panel_box(image, box, bubble_quads=bubble_quads[bid])
-            if split_result:
-                _, _, split_x = split_result
-                left_idxs, right_idxs = [], []
-                for idx in bubble_indices[bid]:
-                    cx, cy = quad_center(filtered[idx][0])
-                    if cx < split_x:
-                        left_idxs.append(idx)
-                    else:
-                        right_idxs.append(idx)
-
-                if left_idxs and right_idxs:
-                    bubble_split = (left_idxs, right_idxs)
-                    splits_performed.append(f"BOX#{bid} (panel border at x={split_x})")
+            sr = split_panel_box(image, box, bubble_quads=bubble_quads[bid])
+            if sr:
+                _, _, split_x = sr
+                li = [idx for idx in bubble_indices[bid]
+                      if quad_center(filtered[idx][0])[0] < split_x]
+                ri = [idx for idx in bubble_indices[bid]
+                      if quad_center(filtered[idx][0])[0] >= split_x]
+                if li and ri:
+                    bubble_split = (li, ri)
+                    splits_performed.append(f"BOX#{bid} (panel border)")
                 elif len(bubble_quads[bid]) >= 4:
-                    col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid, use_aggressive_thresholds=True)
-                    if col_split:
-                        l, r = col_split
-                        if l and r:
-                            bubble_split = (l, r)
-                            splits_performed.append(f"BOX#{bid} ({len(l)} quads | {len(r)} quads)")
+                    cs = split_bubble_if_multiple_columns(bubble_indices[bid], filtered,
+                                                          bid=bid, use_aggressive_thresholds=True)
+                    if cs:
+                        bubble_split = cs
+                        splits_performed.append(f"BOX#{bid} (aggressive column)")
 
         if bubble_split is None:
-            col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid)
-            if col_split:
-                l, r = col_split
-                if l and r:
-                    bubble_split = (l, r)
-                    splits_performed.append(f"BOX#{bid} (Vertical Column Split: {len(l)} | {len(r)} quads)")
+            cs = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid)
+            if cs:
+                bubble_split = cs
+                splits_performed.append(f"BOX#{bid} (vertical column)")
 
         if bubble_split is None:
-            nested_split = split_nested_or_side_by_side(bubble_indices[bid], filtered)
-            if nested_split:
-                l, r = nested_split
-                if l and r:
-                    bubble_split = (l, r)
-                    splits_performed.append(f"BOX#{bid} (nested/side-by-side forced split)")
+            ns = split_nested_or_side_by_side(bubble_indices[bid], filtered)
+            if ns:
+                bubble_split = ns
+                splits_performed.append(f"BOX#{bid} (nested/side-by-side)")
 
         if bubble_split is None:
-            row_split = split_bubble_if_multiple_rows(bubble_indices[bid], filtered, bid=bid)
-            if row_split:
-                t, b = row_split
-                if t and b:
-                    bubble_split = (t, b)
-                    splits_performed.append(f"BOX#{bid} (Horizontal Row Split: {len(t)} | {len(b)} quads)")
+            rs = split_bubble_if_multiple_rows(bubble_indices[bid], filtered, bid=bid)
+            if rs:
+                bubble_split = rs
+                splits_performed.append(f"BOX#{bid} (horizontal row)")
 
         if bubble_split is None:
-            gy = split_cluster_by_big_vertical_gap(bubble_indices[bid], filtered, factor=1.9, min_gap=22)
+            gy = split_cluster_by_big_vertical_gap(bubble_indices[bid], filtered,
+                                                   factor=1.9, min_gap=22)
             if gy:
-                a, b = gy
-                bubble_split = (a, b)
-                splits_performed.append(f"BOX#{bid} (large vertical-gap split)")
+                bubble_split = gy
+                splits_performed.append(f"BOX#{bid} (large vertical-gap)")
 
         if bubble_split:
-            part1_idxs, part2_idxs = bubble_split
-
-            new_bubbles[bid] = build_lines_from_indices(part1_idxs, filtered)
-            ub_1 = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in part1_idxs])
-            new_bubble_boxes[bid] = (max(0, ub_1[0]-2), max(0, ub_1[1]-2), min(iw-1, ub_1[2]+2), min(ih-1, ub_1[3]+2))
-            new_bubble_quads[bid] = [filtered[i][0] for i in part1_idxs]
-            new_bubble_indices[bid] = part1_idxs
-
-            new_bubbles[next_bid] = build_lines_from_indices(part2_idxs, filtered)
-            ub_2 = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in part2_idxs])
-            new_bubble_boxes[next_bid] = (max(0, ub_2[0]-2), max(0, ub_2[1]-2), min(iw-1, ub_2[2]+2), min(ih-1, ub_2[3]+2))
-            new_bubble_quads[next_bid] = [filtered[i][0] for i in part2_idxs]
-            new_bubble_indices[next_bid] = part2_idxs
+            p1, p2 = bubble_split
+            for part_idxs, part_bid in [(p1, bid), (p2, next_bid)]:
+                ub = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in part_idxs])
+                new_bubbles[part_bid]        = build_lines_from_indices(part_idxs, filtered)
+                new_bubble_boxes[part_bid]   = (max(0,ub[0]-2), max(0,ub[1]-2),
+                                                min(iw-1,ub[2]+2), min(ih-1,ub[3]+2))
+                new_bubble_quads[part_bid]   = [filtered[i][0] for i in part_idxs]
+                new_bubble_indices[part_bid] = part_idxs
             next_bid += 1
         else:
-            new_bubbles[bid] = bubbles[bid]
-            new_bubble_boxes[bid] = bubble_boxes[bid]
-            new_bubble_quads[bid] = bubble_quads[bid]
+            new_bubbles[bid]        = bubbles[bid]
+            new_bubble_boxes[bid]   = bubble_boxes[bid]
+            new_bubble_quads[bid]   = bubble_quads[bid]
             new_bubble_indices[bid] = bubble_indices[bid]
 
     if splits_performed:
-        print(f"\n🔀 Multi-column/row bubble splits detected: {len(splits_performed)}")
-        for split_info in splits_performed:
-            print(f"   ✓ Split {split_info}")
+        print(f"\n🔀 Splits detected: {len(splits_performed)}")
 
-    bubbles = new_bubbles
-    bubble_boxes = new_bubble_boxes
-    bubble_quads = new_bubble_quads
-    bubble_indices = new_bubble_indices
+    bubbles, bubble_boxes, bubble_quads, bubble_indices = remove_nested_boxes(
+        new_bubble_boxes, new_bubble_indices, new_bubble_quads, new_bubbles,
+        overlap_threshold=0.50
+    )
+    print(f"✅ Final box count: {len(bubbles)}")
 
-    translator = GoogleTranslator(source=source_lang, target=target_lang)
-
-    clean_lines: Dict[int, str] = {}
+    # ── OCR quality pass ──────────────────────────────────────────────────
+    translator    = GoogleTranslator(source=source_lang, target=target_lang)
+    clean_lines:  Dict[int, str] = {}
     sources_used: Dict[int, str] = {}
+    translations: Dict[int, str] = {}
 
     for bid, lines in bubbles.items():
         base_txt = normalize_text(" ".join(lines))
-        base_sc = ocr_candidate_score(base_txt)
-
-        txt = base_txt
-        src_used = "vision-base"
-
+        base_sc  = ocr_candidate_score(base_txt)
+        txt, src_used = base_txt, "vision-base"
         if base_sc < quality_threshold:
             rr_txt, rr_sc, rr_src = reread_bubble_with_vision(
-                image_bgr=image,
-                bbox_xyxy=bubble_boxes[bid],
-                vision_detector=detector,
-                upscale=3.0,
-                pad=24
-            )
-            if rr_txt and rr_sc > base_sc + 0.04:
-                txt = rr_txt
-                src_used = rr_src
-
-        txt = txt.replace(" BOMPORTA", " IMPORTA")
-        txt = txt.replace(" TESTO ", " ESTO ")
-        txt = txt.replace(" MIVERDAD", " MI VERDAD")
-
-        clean_lines[bid] = apply_glossary(normalize_text(txt))
+                image, bubble_boxes[bid], detector, upscale=3.0, pad=24)
+            if rr_txt and rr_sc > base_sc + 0.04 and is_valid_language(rr_txt, source_lang):
+                txt, src_used = rr_txt, rr_src
+        clean_lines[bid]  = normalize_text(txt)
         sources_used[bid] = src_used
 
     reading_map = estimate_reading_order(bubble_boxes, mode=reading_mode)
 
+    # ── Single-pass translation cache ────────────────────────────────────
+    for bid in sorted(clean_lines.keys(), key=lambda x: reading_map.get(x, x)):
+        src_txt = clean_lines[bid].strip()
+        if not src_txt: continue
+        if not is_valid_language(src_txt, source_lang): continue
+        if not is_meaningful_text(src_txt, source_lang): continue
+        try:
+            tgt = translator.translate(src_txt) or ""
+            tgt = postprocess_translation_general(tgt).upper()
+        except Exception as e:
+            tgt = f"[Error: {e}]"
+        translations[bid] = tgt
+
     if debug:
-        save_debug_clusters(
-            image_path=image_path,
-            ocr=filtered,
-            bubble_boxes=bubble_boxes,
-            bubble_indices=bubble_indices,
-            clean_lines=clean_lines,
-            out_path="debug_clusters.png"
-        )
+        save_debug_clusters(image_path, filtered, bubble_boxes, bubble_indices,
+                            clean_lines, "debug_clusters.png")
 
-    divider = "─" * 120
+    # ── Text output ───────────────────────────────────────────────────────
+    divider   = "─" * 120
     out_lines = ["BUBBLE|ORDER|OCR_SOURCE|ORIGINAL|TRANSLATED|FLAGS", divider]
-
-    print(divider)
-    print(f"{'BUBBLE':<8} {'ORDER':<6} {'SOURCE':<12} {'ORIGINAL':<40} {'TRANSLATED':<40} FLAGS")
-    print(divider)
+    print(divider + f"\n{'BUBBLE':<8} {'ORDER':<6} {'SOURCE':<12} "
+          f"{'ORIGINAL':<40} {'TRANSLATED':<40} FLAGS\n" + divider)
 
     translated_count = 0
     for bid in sorted(clean_lines.keys(), key=lambda x: reading_map.get(x, x)):
         src_txt = clean_lines[bid].strip()
-        if not src_txt:
-            continue
+        if not src_txt: continue
+        if not is_valid_language(src_txt, source_lang): continue
+        if not is_meaningful_text(src_txt, source_lang): continue
 
-        flags = []
-        try:
-            tgt = translator.translate(src_txt) or ""
-        except Exception as e:
-            tgt = f"[Translation error: {e}]"
-            flags.append("TRANSLATION_ERROR")
-
-        tgt = apply_glossary(postprocess_translation_general(tgt)).upper()
-        src_u = src_txt.upper()
+        flags      = []
+        tgt        = translations.get(bid, "")
+        if not tgt: flags.append("NO_TRANSLATION")
+        src_u      = src_txt.upper()
         src_engine = sources_used.get(bid, "unknown")
 
-        out_lines.append(
-            f"#{bid}|{reading_map.get(bid, bid)}|{src_engine}|{src_u}|{tgt}|{','.join(flags) if flags else '-'}"
-        )
-
-        print(
-            f"#{bid:<7} {reading_map.get(bid, bid):<6} {src_engine:<12} "
-            f"{src_u[:40]:<40} {tgt[:40]:<40} {','.join(flags) if flags else '-'}"
-        )
+        out_lines.append(f"#{bid}|{reading_map.get(bid,bid)}|{src_engine}|{src_u}|{tgt}|"
+                         f"{','.join(flags) if flags else '-'}")
+        print(f"#{bid:<7} {reading_map.get(bid,bid):<6} {src_engine:<12} "
+              f"{src_u[:40]:<40} {tgt[:40]:<40} {','.join(flags) if flags else '-'}")
         translated_count += 1
 
-    out_lines.append(divider)
-    out_lines.append(f"✅ Done! {translated_count} bubble(s) translated, {skipped} detection(s) skipped.")
-
+    out_lines.append(divider + f"\n✅ Done! {translated_count} bubble(s) translated.")
     with open(export_to_file, "w", encoding="utf-8") as f:
         f.write("\n".join(out_lines))
 
-    export_bubbles(
-        export_bubbles_to,
-        bbox_dict=bubble_boxes,
-        quads_dict=bubble_quads,
-        indices_dict=bubble_indices,
-        ocr=filtered,
-        reading_map=reading_map,
-        image_shape=image.shape
-    )
+    # ── bubbles.json ──────────────────────────────────────────────────────
+    bubbles_payload = {}
+    for bid in sorted(clean_lines.keys(), key=lambda x: reading_map.get(x, x)):
+        src_txt = clean_lines[bid].strip()
+        if not src_txt: continue
+        if not is_valid_language(src_txt, source_lang): continue
+        if not is_meaningful_text(src_txt, source_lang): continue
+        box = bubble_boxes.get(bid)
+        tgt = translations.get(bid, "")
+        bubbles_payload[str(bid)] = {
+            "order":      reading_map.get(bid, bid),
+            "ocr_source": sources_used.get(bid, "unknown"),
+            "original":   src_txt.upper(),
+            "translated": tgt,
+            "box": {
+                "x": box[0] if box else 0,
+                "y": box[1] if box else 0,
+                "w": (box[2]-box[0]) if box else 0,
+                "h": (box[3]-box[1]) if box else 0,
+            },
+            "lines": [line.upper() for line in bubbles.get(bid, [])],
+        }
 
-    print(divider)
-    print(f"Saved: {export_to_file}")
-    print(f"Saved: {export_bubbles_to}")
-    if debug:
-        print("Saved: debug_clusters.png")
+    with open(export_bubbles_to, "w", encoding="utf-8") as f:
+        json.dump(bubbles_payload, f, ensure_ascii=False, indent=2)
+
+    print(divider + f"\nSaved: {export_to_file}\nSaved: {export_bubbles_to}")
 
 
+# ============================================================
+# ENTRY POINT
+# ============================================================
 if __name__ == "__main__":
     translate_manga_text(
-        image_path="16.jpg",
+        image_path="17.jpg",
         source_lang="english",
         target_lang="ca",
-        confidence_threshold=0.03,  # Lower threshold for better detection
+        confidence_threshold=0.03,
         min_text_length=1,
         gap_px="auto",
-        filter_sound_effects=True,
         quality_threshold=0.62,
         export_to_file="output.txt",
         export_bubbles_to="bubbles.json",
-        reading_mode="ltr", #rtl or 
+        reading_mode="rtl",
         debug=True,
-        use_enhanced_ocr=True  # Enable enhanced multi-pass OCR
-    )
\ No newline at end of file
+        use_enhanced_ocr=True,
+        strict_grouping=True,
+        max_box_width_ratio=0.6,
+        max_box_height_ratio=0.5,
+        auto_fix_bubbles=True
+    )
diff --git a/pipeline-translator.py b/pipeline-translator.py
index c1b9e9d..a64ad6f 100644
--- a/pipeline-translator.py
+++ b/pipeline-translator.py
@@ -14,10 +14,32 @@ import argparse
 import importlib.util
 from pathlib import Path
 
-# ─────────────────────────────────────────────
+# ─────────────────────────────────────────────────────────────
+#  PIPELINE CONFIGURATION
+#  Single source of truth — mirrors the __main__ block in
+#  manga-translator.py so both entry points stay in sync.
+# ─────────────────────────────────────────────────────────────
+PIPELINE_CONFIG = dict(
+    source_lang          = "english",
+    target_lang          = "ca",
+    confidence_threshold = 0.03,
+    min_text_length      = 1,
+    gap_px               = "auto",
+    quality_threshold    = 0.62,
+    reading_mode         = "rtl",
+    debug                = True,
+    use_enhanced_ocr     = True,
+    strict_grouping      = True,
+    max_box_width_ratio  = 0.6,
+    max_box_height_ratio = 0.5,
+    auto_fix_bubbles     = True,
+)
+
+
+# ─────────────────────────────────────────────────────────────
 #  DYNAMIC MODULE LOADER
-# ─────────────────────────────────────────────
-def load_module(name, filepath):
+# ─────────────────────────────────────────────────────────────
+def load_module(name: str, filepath: str):
     spec = importlib.util.spec_from_file_location(name, filepath)
     if spec is None or spec.loader is None:
         raise FileNotFoundError(f"Cannot load spec for {filepath}")
@@ -25,103 +47,188 @@ def load_module(name, filepath):
     spec.loader.exec_module(module)
     return module
 
-# ─────────────────────────────────────────────
+
+# ─────────────────────────────────────────────────────────────
 #  HELPERS
-# ─────────────────────────────────────────────
-def sorted_pages(chapter_dir):
+# ─────────────────────────────────────────────────────────────
+def sorted_pages(chapter_dir: Path):
     exts = {".jpg", ".jpeg", ".png", ".webp"}
     pages = [
-        p for p in Path(chapter_dir).iterdir()
+        p for p in chapter_dir.iterdir()
         if p.is_file() and p.suffix.lower() in exts
     ]
     return sorted(pages, key=lambda p: p.stem)
 
-def make_page_workdir(chapter_dir, page_stem):
-    workdir = Path(chapter_dir) / "translated" / page_stem
+
+def make_page_workdir(chapter_dir: Path, page_stem: str) -> Path:
+    workdir = chapter_dir / "translated" / page_stem
     workdir.mkdir(parents=True, exist_ok=True)
     return workdir
 
-# ─────────────────────────────────────────────
+
+def verify_translator_api(module) -> bool:
+    """
+    Checks that the loaded module exposes translate_manga_text()
+    and that it accepts all keys defined in PIPELINE_CONFIG.
+    Prints a warning for any missing parameter so mismatches are
+    caught immediately rather than silently falling back to defaults.
+    """
+    import inspect
+
+    fn = getattr(module, "translate_manga_text", None)
+    if fn is None:
+        print("❌ manga-translator.py does not expose translate_manga_text()")
+        return False
+
+    sig    = inspect.signature(fn)
+    params = set(sig.parameters.keys())
+    ok     = True
+
+    for key in PIPELINE_CONFIG:
+        if key not in params:
+            print(f"⚠️  PIPELINE_CONFIG key '{key}' not found in "
+                  f"translate_manga_text() — update pipeline or translator.")
+            ok = False
+
+    return ok
+
+
+# ─────────────────────────────────────────────────────────────
 #  PER-PAGE PIPELINE
-# ─────────────────────────────────────────────
-def process_page(page_path, workdir, translator_module):
+# ─────────────────────────────────────────────────────────────
+def process_page(page_path: Path, workdir: Path, translator_module) -> bool:
     print(f"\n{'─' * 70}")
-    print(f"PAGE: {page_path.name}")
+    print(f"  PAGE : {page_path.name}")
     print(f"{'─' * 70}")
 
     orig_dir = os.getcwd()
     try:
-        # Isolate execution to the specific page's folder
+        # Run inside the page's own workdir so debug images and
+        # output files land there automatically.
         os.chdir(workdir)
 
         print("  ⏳ Extracting text and translating...")
-        
-        # 1) Translate using ONLY the required path arguments.
-        # This forces the function to use its own internal default variables 
-        # (like source_lang, target_lang, confidence_threshold) directly from manga-translator.py
-        translator_module.translate_manga_text(
-            image_path=str(page_path.resolve()),
-            export_to_file="output.txt",
-            export_bubbles_to="bubbles.json"
-        )
-        print("  ✅ Translation and OCR data saved successfully")
 
+        translator_module.translate_manga_text(
+            image_path       = str(page_path.resolve()),
+            export_to_file   = "output.txt",
+            export_bubbles_to= "bubbles.json",
+            **PIPELINE_CONFIG,          # ← all settings from the single config dict
+        )
+
+        # Sanity-check that the expected outputs were actually written
+        for fname in ("output.txt", "bubbles.json"):
+            fpath = workdir / fname
+            if not fpath.exists() or fpath.stat().st_size == 0:
+                print(f"  ⚠️  {fname} is missing or empty after processing.")
+
+        print("  ✅ Translation and OCR data saved successfully")
         return True
 
     except Exception as e:
+        import traceback
         print(f"  ❌ Failed: {e}")
+        traceback.print_exc()
         return False
 
     finally:
         os.chdir(orig_dir)
 
-# ─────────────────────────────────────────────
+
+# ─────────────────────────────────────────────────────────────
 #  MAIN
-# ─────────────────────────────────────────────
+# ─────────────────────────────────────────────────────────────
 def main():
-    parser = argparse.ArgumentParser(description="Manga Translation OCR Batch Pipeline")
-    parser.add_argument("chapter_dir", help="Path to the folder containing manga pages")
+    parser = argparse.ArgumentParser(
+        description="Manga Translation OCR Batch Pipeline"
+    )
+    parser.add_argument(
+        "chapter_dir",
+        help="Path to the folder containing manga page images"
+    )
+    parser.add_argument(
+        "--start", type=int, default=1,
+        help="Start from this page number (1-based, default: 1)"
+    )
+    parser.add_argument(
+        "--end", type=int, default=None,
+        help="Stop after this page number inclusive (default: all)"
+    )
     args = parser.parse_args()
 
     chapter_dir = Path(args.chapter_dir).resolve()
+    if not chapter_dir.is_dir():
+        print(f"❌ Not a directory: {chapter_dir}")
+        sys.exit(1)
 
-    print("Loading translator module...")
-    script_dir = Path(__file__).parent
-    
+    # ── Load translator module ────────────────────────────────
+    script_dir  = Path(__file__).parent
+    module_path = script_dir / "manga-translator.py"
+
+    if not module_path.exists():
+        print(f"❌ manga-translator.py not found in {script_dir}")
+        sys.exit(1)
+
+    print(f"📦 Loading translator from: {module_path}")
     try:
-        translator = load_module("manga_translator", str(script_dir / "manga-translator.py"))
+        translator = load_module("manga_translator", str(module_path))
     except Exception as e:
         print(f"❌ Could not load manga-translator.py: {e}")
         sys.exit(1)
 
-    pages = sorted_pages(chapter_dir)
-    if not pages:
+    # ── API compatibility check ───────────────────────────────
+    if not verify_translator_api(translator):
+        print("❌ Aborting — fix the parameter mismatch above first.")
+        sys.exit(1)
+
+    # ── Discover pages ────────────────────────────────────────
+    all_pages = sorted_pages(chapter_dir)
+    if not all_pages:
         print(f"❌ No images found in: {chapter_dir}")
         sys.exit(1)
 
-    print(f"\n📖 Chapter : {chapter_dir.name}")
-    print(f"   Pages   : {len(pages)}")
-    print("   Note    : Using translation settings directly from manga-translator.py\n")
+    # Apply --start / --end slice (1-based, inclusive)
+    start_idx = max(0, args.start - 1)
+    end_idx   = args.end if args.end is not None else len(all_pages)
+    pages     = all_pages[start_idx:end_idx]
+
+    if not pages:
+        print(f"❌ No pages in range [{args.start}, {args.end}]")
+        sys.exit(1)
+
+    # ── Summary header ────────────────────────────────────────
+    print(f"\n{'═' * 70}")
+    print(f"  📖 Chapter    : {chapter_dir.name}")
+    print(f"  📄 Pages      : {len(pages)} "
+          f"(of {len(all_pages)} total, "
+          f"range {args.start}–{end_idx})")
+    print(f"  🌐 Lang       : {PIPELINE_CONFIG['source_lang']} → "
+          f"{PIPELINE_CONFIG['target_lang']}")
+    print(f"  📖 Read order : {PIPELINE_CONFIG['reading_mode'].upper()}")
+    print(f"  🔍 Enhanced   : {PIPELINE_CONFIG['use_enhanced_ocr']}")
+    print(f"{'═' * 70}\n")
 
     succeeded, failed = [], []
 
     for i, page_path in enumerate(pages, start=1):
-        print(f"[{i}/{len(pages)}] Processing...")
+        print(f"[{i}/{len(pages)}] {page_path.name}")
         workdir = make_page_workdir(chapter_dir, page_path.stem)
-        
+
         if process_page(page_path, workdir, translator):
             succeeded.append(page_path.name)
         else:
             failed.append(page_path.name)
 
+    # ── Final report ──────────────────────────────────────────
     print(f"\n{'═' * 70}")
-    print("PIPELINE COMPLETE")
-    print(f"✅ {len(succeeded)} page(s) succeeded")
+    print("  PIPELINE COMPLETE")
+    print(f"  ✅ {len(succeeded)} page(s) succeeded")
     if failed:
-        print(f"❌ {len(failed)} page(s) failed:")
-        for f in failed:
-            print(f"   • {f}")
+        print(f"  ❌ {len(failed)} page(s) failed:")
+        for name in failed:
+            print(f"     • {name}")
     print(f"{'═' * 70}\n")
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/requirements b/requirements
deleted file mode 100644
index 492c48d..0000000
--- a/requirements
+++ /dev/null
@@ -1,79 +0,0 @@
-aistudio-sdk==0.3.8
-annotated-doc==0.0.4
-annotated-types==0.7.0
-anyio==4.13.0
-bce-python-sdk==0.9.70
-beautifulsoup4==4.14.3
-certifi==2026.2.25
-chardet==7.4.3
-charset-normalizer==3.4.7
-click==8.3.2
-colorlog==6.10.1
-crc32c==2.8
-deep-translator==1.11.4
-easyocr==1.7.2
-filelock==3.28.0
-fsspec==2026.3.0
-future==1.0.0
-h11==0.16.0
-hf-xet==1.4.3
-httpcore==1.0.9
-httpx==0.28.1
-huggingface_hub==1.10.2
-idna==3.11
-ImageIO==2.37.3
-imagesize==2.0.0
-Jinja2==3.1.6
-lazy-loader==0.5
-markdown-it-py==4.0.0
-MarkupSafe==3.0.3
-mdurl==0.1.2
-modelscope==1.35.4
-mpmath==1.3.0
-networkx==3.6.1
-ninja==1.13.0
-numpy==1.26.4
-opencv-contrib-python==4.10.0.84
-opencv-python==4.11.0.86
-opencv-python-headless==4.11.0.86
-opt-einsum==3.3.0
-packaging==26.1
-paddleocr==3.4.1
-paddlepaddle==3.3.1
-paddlex==3.4.3
-pandas==3.0.2
-pillow==12.2.0
-prettytable==3.17.0
-protobuf==7.34.1
-psutil==7.2.2
-py-cpuinfo==9.0.0
-pyclipper==1.4.0
-pycryptodome==3.23.0
-pydantic==2.13.1
-pydantic_core==2.46.1
-Pygments==2.20.0
-pypdfium2==5.7.0
-python-bidi==0.6.7
-python-dateutil==2.9.0.post0
-PyYAML==6.0.2
-requests==2.33.1
-rich==15.0.0
-ruamel.yaml==0.19.1
-safetensors==0.7.0
-scikit-image==0.26.0
-scipy==1.17.1
-shapely==2.1.2
-shellingham==1.5.4
-six==1.17.0
-soupsieve==2.8.3
-sympy==1.14.0
-tifffile==2026.3.3
-torch==2.11.0
-torchvision==0.26.0
-tqdm==4.67.3
-typer==0.24.1
-typing-inspection==0.4.2
-typing_extensions==4.15.0
-ujson==5.12.0
-urllib3==2.6.3
-wcwidth==0.6.0