diff --git a/manga-translator.py b/manga-translator.py
index fc53f22..941cc3d 100644
--- a/manga-translator.py
+++ b/manga-translator.py
@@ -60,16 +60,13 @@ def is_valid_language(text: str, source_lang: str) -> bool:
     clean_text = re.sub(r'[^\w]', '', text)
     if not clean_text:
         return False
-
     lang = source_lang.lower()
-
     if lang in ['en', 'english', 'es', 'spanish', 'fr', 'french',
                 'it', 'italian', 'ca', 'catalan', 'de', 'german']:
         foreign_chars = len(re.findall(
             r'[\u0600-\u06FF\u0750-\u077F\u3040-\u30FF'
             r'\u3400-\u4DBF\u4E00-\u9FFF\uAC00-\uD7AF\u1100-\u11FF]',
-            clean_text
-        ))
+            clean_text))
         if foreign_chars > 0:
             return False
         latin_chars = len(re.findall(r'[a-zA-ZÀ-ÿ]', clean_text))
@@ -79,25 +76,21 @@ def is_valid_language(text: str, source_lang: str) -> bool:
         if total <= 6:
             return (latin_chars / total) >= 0.55
         return (latin_chars / total) >= 0.45
-
     elif lang in ['ja', 'japanese']:
         ja_chars = len(re.findall(r'[\u3040-\u30FF\u3400-\u4DBF\u4E00-\u9FFF]', clean_text))
         if len(clean_text) <= 3:
             return ja_chars >= 1
         return (ja_chars / len(clean_text)) >= 0.4
-
     elif lang in ['ko', 'korean']:
         ko_chars = len(re.findall(r'[\uAC00-\uD7AF\u1100-\u11FF]', clean_text))
         if len(clean_text) <= 3:
             return ko_chars >= 1
         return (ko_chars / len(clean_text)) >= 0.4
-
     elif lang in ['zh', 'chinese']:
         zh_chars = len(re.findall(r'[\u4E00-\u9FFF\u3400-\u4DBF]', clean_text))
         if len(clean_text) <= 3:
             return zh_chars >= 1
         return (zh_chars / len(clean_text)) >= 0.4
-
     return True
 
 
@@ -122,6 +115,7 @@ _MANGA_INTERJECTIONS = {
     'OK', 'OK!', 'OKAY',
     'EEEEP', 'EEEP',
     'OMIGOSH',
+    'BECKY', 'BECKY!',
     'HMM', 'HMM...',
     'TSK', 'TCH',
     'GRRR','I','A',
@@ -139,30 +133,25 @@ def is_meaningful_text(text: str, source_lang: str, min_alpha_chars: int = 2) ->
     t_alpha_only = re.sub(r'[^A-Za-zÀ-ÿ]', '', t_upper)
     if t_upper in _MANGA_INTERJECTIONS or t_alpha_only in _MANGA_INTERJECTIONS:
         return True
-
     alpha_count = sum(c.isalpha() for c in t)
     if alpha_count < min_alpha_chars:
         return False
     if t_upper in _NOISE_TOKENS:
         return False
-
     lang = source_lang.lower()
     if lang in ['en', 'english', 'es', 'spanish', 'fr', 'french',
                 'it', 'italian', 'ca', 'catalan', 'de', 'german']:
         non_alpha = sum(not c.isalpha() for c in t)
         if len(t) > 0 and (non_alpha / len(t)) > 0.60:
             return False
-
     if len(t) >= 3 and len(set(t_upper)) == 1:
         return False
-
     if lang in ['en', 'english', 'es', 'spanish', 'fr', 'french',
                 'it', 'italian', 'ca', 'catalan', 'de', 'german']:
         if len(t) > 4:
             vowels = len(re.findall(r'[AEIOUaeiouÀ-ÿ]', t))
             if vowels == 0:
                 return False
-
     return True
 
 
@@ -195,7 +184,8 @@ def xyxy_to_xywh(b):
     if b is None:
         return None
     x1, y1, x2, y2 = b
-    return {"x": int(x1), "y": int(y1), "w": int(max(0, x2 - x1)), "h": int(max(0, y2 - y1))}
+    return {"x": int(x1), "y": int(y1),
+            "w": int(max(0, x2 - x1)), "h": int(max(0, y2 - y1))}
 
 def overlap_or_near(a, b, gap=0):
     ax1, ay1, ax2, ay2 = a
@@ -205,13 +195,10 @@ def overlap_or_near(a, b, gap=0):
     return gap_x <= gap and gap_y <= gap
 
 def boxes_iou(a, b):
-    """Intersection over Union for two xyxy boxes."""
     ax1, ay1, ax2, ay2 = a
     bx1, by1, bx2, by2 = b
-    ix1 = max(ax1, bx1)
-    iy1 = max(ay1, by1)
-    ix2 = min(ax2, bx2)
-    iy2 = min(ay2, by2)
+    ix1, iy1 = max(ax1, bx1), max(ay1, by1)
+    ix2, iy2 = min(ax2, bx2), min(ay2, by2)
     inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
     if inter == 0:
         return 0.0
@@ -223,10 +210,8 @@ def boxes_overlap_ratio(a, b):
     """Ratio of intersection to the SMALLER box area."""
     ax1, ay1, ax2, ay2 = a
     bx1, by1, bx2, by2 = b
-    ix1 = max(ax1, bx1)
-    iy1 = max(ay1, by1)
-    ix2 = min(ax2, bx2)
-    iy2 = min(ay2, by2)
+    ix1, iy1 = max(ax1, bx1), max(ay1, by1)
+    ix2, iy2 = min(ax2, bx2), min(ay2, by2)
     inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
     if inter == 0:
         return 0.0
@@ -241,11 +226,11 @@ def ocr_candidate_score(text: str) -> float:
     n = len(t)
     if n == 0:
         return 0.0
-    alpha = sum(c.isalpha() for c in t) / n
-    spaces = sum(c.isspace() for c in t) / n
+    alpha    = sum(c.isalpha() for c in t) / n
+    spaces   = sum(c.isspace() for c in t) / n
     punct_ok = sum(c in ".,!?'-:;()[]\"¡¿" for c in t) / n
-    bad = len(re.findall(r"[^\w\s\.\,\!\?\-\'\:\;\(\)\[\]\"¡¿]", t)) / n
-    penalty = 0.0
+    bad      = len(re.findall(r"[^\w\s\.\,\!\?\-\'\:\;\(\)\[\]\"¡¿]", t)) / n
+    penalty  = 0.0
     if re.search(r"\b[A-Z]\b", t):
         penalty += 0.05
     if re.search(r"[0-9]{2,}", t):
@@ -255,15 +240,11 @@ def ocr_candidate_score(text: str) -> float:
 
 def quad_is_horizontal(quad, ratio_threshold=1.5) -> bool:
     x1, y1, x2, y2 = quad_bbox(quad)
-    w = max(1, x2 - x1)
-    h = max(1, y2 - y1)
-    return (w / h) >= ratio_threshold
+    return (max(1, x2 - x1) / max(1, y2 - y1)) >= ratio_threshold
 
 def quad_is_vertical(quad, ratio_threshold=1.5) -> bool:
     x1, y1, x2, y2 = quad_bbox(quad)
-    w = max(1, x2 - x1)
-    h = max(1, y2 - y1)
-    return (h / w) >= ratio_threshold
+    return (max(1, y2 - y1) / max(1, x2 - x1)) >= ratio_threshold
 
 
 # ============================================================
@@ -273,34 +254,35 @@ def enhance_image_for_ocr(image_bgr, upscale_factor=2.5):
     h, w = image_bgr.shape[:2]
     upscaled = cv2.resize(image_bgr, (int(w * upscale_factor), int(h * upscale_factor)),
                           interpolation=cv2.INTER_CUBIC)
-    gray = cv2.cvtColor(upscaled, cv2.COLOR_BGR2GRAY)
-    denoised = cv2.fastNlMeansDenoising(gray, None, h=10, templateWindowSize=7, searchWindowSize=21)
-    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
+    gray     = cv2.cvtColor(upscaled, cv2.COLOR_BGR2GRAY)
+    denoised = cv2.fastNlMeansDenoising(gray, None, h=10,
+                                         templateWindowSize=7, searchWindowSize=21)
+    clahe    = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
     enhanced = clahe.apply(denoised)
-    kernel_sharpen = np.array([[-1,-1,-1], [-1, 9,-1], [-1,-1,-1]])
-    sharpened = cv2.filter2D(enhanced, -1, kernel_sharpen)
-    binary = cv2.adaptiveThreshold(sharpened, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-                                   cv2.THRESH_BINARY, 11, 2)
-    kernel = np.ones((2, 2), np.uint8)
-    cleaned = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
+    sharpened = cv2.filter2D(enhanced, -1,
+                              np.array([[-1,-1,-1],[-1,9,-1],[-1,-1,-1]]))
+    binary  = cv2.adaptiveThreshold(sharpened, 255,
+                                     cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+                                     cv2.THRESH_BINARY, 11, 2)
+    cleaned = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, np.ones((2, 2), np.uint8))
     return cv2.cvtColor(cleaned, cv2.COLOR_GRAY2BGR)
 
 def detect_small_text_regions(image_bgr, existing_quads):
-    gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
-    mask = np.zeros(gray.shape, dtype=np.uint8)
+    gray    = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
+    mask    = np.zeros(gray.shape, dtype=np.uint8)
     for quad in existing_quads:
-        pts = np.array(quad, dtype=np.int32)
-        cv2.fillPoly(mask, [pts], 255)
-    mask_inv = cv2.bitwise_not(mask)
-    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+        cv2.fillPoly(mask, [np.array(quad, dtype=np.int32)], 255)
+    mask_inv      = cv2.bitwise_not(mask)
+    _, binary     = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
     binary_masked = cv2.bitwise_and(binary, binary, mask=mask_inv)
-    contours, _ = cv2.findContours(binary_masked, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    contours, _   = cv2.findContours(binary_masked, cv2.RETR_EXTERNAL,
+                                      cv2.CHAIN_APPROX_SIMPLE)
     text_regions = []
     for contour in contours:
         x, y, w, h = cv2.boundingRect(contour)
         area = w * h
-        if 50 < area < 5000 and 0.1 < h/max(w, 1) < 10:
-            text_regions.append((x, y, x+w, y+h))
+        if 50 < area < 5000 and 0.1 < h / max(w, 1) < 10:
+            text_regions.append((x, y, x + w, y + h))
     return text_regions
 
 
@@ -308,7 +290,7 @@ def detect_small_text_regions(image_bgr, existing_quads):
 # SPEECH BUBBLE DETECTION
 # ============================================================
 def detect_speech_bubbles(image_bgr: np.ndarray) -> List[np.ndarray]:
-    gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
+    gray   = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
     thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                    cv2.THRESH_BINARY_INV, 11, 2)
     contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
@@ -322,10 +304,9 @@ def is_quad_in_bubble(quad_bbox_xyxy, bubble_contour, tolerance=5):
 def split_indices_by_bubble(indices, ocr, bubble_contours):
     if not indices:
         return []
-    bubble_groups = {}
-    outside_group = []
+    bubble_groups, outside_group = {}, []
     for idx in indices:
-        bbox = quad_bbox(ocr[idx][0])
+        bbox  = quad_bbox(ocr[idx][0])
         found = False
         for bidx, bubble in enumerate(bubble_contours):
             if is_quad_in_bubble(bbox, bubble):
@@ -342,7 +323,8 @@ def split_indices_by_bubble(indices, ocr, bubble_contours):
 def check_vertical_alignment_split(indices, ocr, threshold=20):
     if len(indices) <= 1:
         return [indices]
-    items = sorted([(idx, quad_bbox(ocr[idx][0])) for idx in indices], key=lambda x: x[1][1])
+    items = sorted([(idx, quad_bbox(ocr[idx][0])) for idx in indices],
+                   key=lambda x: x[1][1])
     groups, current_group = [], [items[0][0]]
     for i in range(1, len(items)):
         if items[i][1][1] - items[i-1][1][3] > threshold:
@@ -366,9 +348,9 @@ def is_quad_oversized(quad, median_height, width_threshold=8.0):
 def split_oversized_quad_by_content(image_bgr, quad, text, conf, median_height):
     x1, y1, x2, y2 = quad_bbox(quad)
     w, h = x2 - x1, max(1, y2 - y1)
-    pad = 2
-    roi = image_bgr[max(0,y1-pad):min(image_bgr.shape[0],y2+pad),
-                    max(0,x1):min(image_bgr.shape[1],x2)]
+    pad  = 2
+    roi  = image_bgr[max(0,y1-pad):min(image_bgr.shape[0],y2+pad),
+                     max(0,x1):min(image_bgr.shape[1],x2)]
     if roi.size == 0:
         return [(quad, text, conf)]
     gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
@@ -390,15 +372,15 @@ def split_oversized_quad_by_content(image_bgr, quad, text, conf, median_height):
     gaps.sort(key=lambda g: g[1], reverse=True)
     split_x_abs = max(0, x1) + gaps[0][0]
     if ' ' in text:
-        char_w = w / max(1, len(text))
+        char_w    = w / max(1, len(text))
         split_idx = int((split_x_abs - x1) / max(1e-6, char_w))
-        spaces = [i for i, c in enumerate(text) if c == ' ']
+        spaces    = [i for i, c in enumerate(text) if c == ' ']
         if spaces:
             split_idx = min(spaces, key=lambda i: abs(i - split_idx))
         tl, tr = text[:split_idx].strip(), text[split_idx:].strip()
     else:
         split_idx = int(len(text) * (split_x_abs - x1) / w)
-        tl, tr = text[:split_idx].strip(), text[split_idx:].strip()
+        tl, tr    = text[:split_idx].strip(), text[split_idx:].strip()
     if tl and tr:
         return [([[x1,y1],[split_x_abs,y1],[split_x_abs,y2],[x1,y2]], tl, conf),
                 ([[split_x_abs,y1],[x2,y1],[x2,y2],[split_x_abs,y2]], tr, conf)]
@@ -407,7 +389,7 @@ def split_oversized_quad_by_content(image_bgr, quad, text, conf, median_height):
 def validate_and_split_oversized_quads(image_bgr, filtered_ocr):
     if not filtered_ocr:
         return filtered_ocr, 0
-    heights = [max(1, quad_bbox(q)[3] - quad_bbox(q)[1]) for q, _, _ in filtered_ocr]
+    heights       = [max(1, quad_bbox(q)[3] - quad_bbox(q)[1]) for q, _, _ in filtered_ocr]
     median_height = float(np.median(heights)) if heights else 14.0
     result, splits_made = [], 0
     for quad, text, conf in filtered_ocr:
@@ -426,15 +408,10 @@ def validate_and_split_oversized_quads(image_bgr, filtered_ocr):
 # HORIZONTAL GAP DETECTION AT QUAD LEVEL
 # ============================================================
 def detect_horizontal_gap_in_group(indices, ocr, med_h, gap_factor=2.5):
-    """
-    Detects a large horizontal gap between quads within a group and splits them.
-    Fixes cases like BOX#8 in debug_clusters_016 where two column groups
-    are incorrectly merged into one box.
-    """
     if len(indices) < 2:
         return None
-    items = sorted(indices, key=lambda i: quad_center(ocr[i][0])[0])
-    boxes = [quad_bbox(ocr[i][0]) for i in items]
+    items     = sorted(indices, key=lambda i: quad_center(ocr[i][0])[0])
+    boxes     = [quad_bbox(ocr[i][0]) for i in items]
     gap_threshold = med_h * gap_factor
     best_gap, best_split = 0.0, None
     for k in range(len(items) - 1):
@@ -443,23 +420,18 @@ def detect_horizontal_gap_in_group(indices, ocr, med_h, gap_factor=2.5):
             best_gap, best_split = gap, k
     if best_split is None:
         return None
-    left_group = [items[i] for i in range(best_split + 1)]
+    left_group  = [items[i] for i in range(best_split + 1)]
     right_group = [items[i] for i in range(best_split + 1, len(items))]
     if not left_group or not right_group:
         return None
     return (left_group, right_group)
 
-
 def orientation_compatible(idx_a, idx_b, ocr):
-    """
-    Prevents merging a tall/narrow isolated glyph with wide horizontal text lines.
-    Fixes BOX#1 type problems in debug_clusters_015.
-    """
     ba = quad_bbox(ocr[idx_a][0])
     bb = quad_bbox(ocr[idx_b][0])
     wa, ha = max(1, ba[2]-ba[0]), max(1, ba[3]-ba[1])
     wb, hb = max(1, bb[2]-bb[0]), max(1, bb[3]-bb[1])
-    ra, rb = wa/ha, wb/hb
+    ra, rb = wa / ha, wb / hb
     if (ra < 0.6 and rb > 2.0) or (rb < 0.6 and ra > 2.0):
         return False
     return True
@@ -470,33 +442,20 @@ def orientation_compatible(idx_a, idx_b, ocr):
 # ============================================================
 def split_wide_quad_by_column_gap(image_bgr, quad, text, conf, med_h,
                                    min_gap_factor=1.8):
-    """
-    FIX for BOX#6 type problem:
-    Splits a single OCR quad that spans two distinct text columns by finding
-    the largest vertical gap in its pixel projection. More aggressive than
-    split_oversized_quad_by_content — targets column-level gaps specifically.
-    """
     x1, y1, x2, y2 = quad_bbox(quad)
     w, h = x2 - x1, max(1, y2 - y1)
-
-    # Only attempt if the quad is wide enough to plausibly span two columns
     if w < med_h * 3.0:
         return [(quad, text, conf)]
-
     pad = 2
-    roi = image_bgr[max(0, y1-pad):min(image_bgr.shape[0], y2+pad),
-                    max(0, x1):min(image_bgr.shape[1], x2)]
+    roi = image_bgr[max(0,y1-pad):min(image_bgr.shape[0],y2+pad),
+                    max(0,x1):min(image_bgr.shape[1],x2)]
     if roi.size == 0:
         return [(quad, text, conf)]
-
     gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
     _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
-    v_proj = np.sum(binary, axis=0)
-
-    # Threshold: column gap must be nearly empty
+    v_proj    = np.sum(binary, axis=0)
     gap_threshold = h * 255 * 0.12
-    min_gap_px = max(int(med_h * min_gap_factor), 10)
-
+    min_gap_px    = max(int(med_h * min_gap_factor), 10)
     gaps, in_gap, gap_start = [], False, 0
     for x in range(len(v_proj)):
         if v_proj[x] < gap_threshold:
@@ -507,41 +466,29 @@ def split_wide_quad_by_column_gap(image_bgr, quad, text, conf, med_h,
                 if gw >= min_gap_px:
                     gaps.append((gap_start + gw // 2, gw))
                 in_gap = False
-
     if not gaps:
         return [(quad, text, conf)]
-
-    # Use the widest gap as the split point
     gaps.sort(key=lambda g: g[1], reverse=True)
     split_x_rel = gaps[0][0]
     split_x_abs = x1 + split_x_rel
-
-    # Ensure the split produces two non-trivial halves
     if split_x_abs - x1 < med_h or x2 - split_x_abs < med_h:
         return [(quad, text, conf)]
-
     if ' ' in text:
-        char_w = w / max(1, len(text))
+        char_w    = w / max(1, len(text))
         split_idx = int(split_x_rel / max(1e-6, char_w))
-        spaces = [i for i, c in enumerate(text) if c == ' ']
+        spaces    = [i for i, c in enumerate(text) if c == ' ']
         if spaces:
             split_idx = min(spaces, key=lambda i: abs(i - split_idx))
         tl, tr = text[:split_idx].strip(), text[split_idx:].strip()
     else:
         split_idx = int(len(text) * split_x_rel / w)
-        tl, tr = text[:split_idx].strip(), text[split_idx:].strip()
-
+        tl, tr    = text[:split_idx].strip(), text[split_idx:].strip()
     if tl and tr:
         return [([[x1,y1],[split_x_abs,y1],[split_x_abs,y2],[x1,y2]], tl, conf),
                 ([[split_x_abs,y1],[x2,y1],[x2,y2],[split_x_abs,y2]], tr, conf)]
     return [(quad, text, conf)]
 
-
 def apply_column_gap_splits(image_bgr, ocr_list, med_h):
-    """
-    Applies split_wide_quad_by_column_gap to every quad in the list.
-    Run this BEFORE grouping so column-spanning quads never seed bad groups.
-    """
     result, splits_made = [], 0
     for quad, text, conf in ocr_list:
         parts = split_wide_quad_by_column_gap(image_bgr, quad, text, conf, med_h)
@@ -558,19 +505,18 @@ def apply_column_gap_splits(image_bgr, ocr_list, med_h):
 # ============================================================
 def detect_and_split_multi_bubble_boxes(bubble_boxes, bubble_indices, bubble_quads,
                                         bubbles, ocr, image_bgr):
-    all_h = [max(1, quad_bbox(ocr[i][0])[3] - quad_bbox(ocr[i][0])[1]) for i in range(len(ocr))]
-    med_h = float(np.median(all_h)) if all_h else 14.0
+    all_h = [max(1, quad_bbox(ocr[i][0])[3] - quad_bbox(ocr[i][0])[1])
+             for i in range(len(ocr))]
+    med_h           = float(np.median(all_h)) if all_h else 14.0
     bubble_contours = detect_speech_bubbles(image_bgr)
-
     new_bubbles, new_boxes, new_quads, new_indices = {}, {}, {}, {}
-    next_bid = 1
-    splits_made = []
+    next_bid, splits_made = 1, []
 
     for bid, indices in bubble_indices.items():
         if len(indices) < 2:
             new_bubbles[next_bid] = bubbles[bid]
-            new_boxes[next_bid] = bubble_boxes[bid]
-            new_quads[next_bid] = bubble_quads[bid]
+            new_boxes[next_bid]   = bubble_boxes[bid]
+            new_quads[next_bid]   = bubble_quads[bid]
             new_indices[next_bid] = indices
             next_bid += 1
             continue
@@ -580,20 +526,21 @@ def detect_and_split_multi_bubble_boxes(bubble_boxes, bubble_indices, bubble_qua
             for group in split_groups:
                 if group:
                     new_bubbles[next_bid] = build_lines_from_indices(group, ocr)
-                    new_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group])
-                    new_quads[next_bid] = [ocr[i][0] for i in group]
+                    new_boxes[next_bid]   = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group])
+                    new_quads[next_bid]   = [ocr[i][0] for i in group]
                     new_indices[next_bid] = group
                     next_bid += 1
             splits_made.append(f"BOX#{bid} → {len(split_groups)} bubbles")
             continue
 
-        vertical_splits = check_vertical_alignment_split(indices, ocr, threshold=int(med_h * 2.0))
+        vertical_splits = check_vertical_alignment_split(indices, ocr,
+                                                          threshold=int(med_h * 2.0))
         if len(vertical_splits) > 1:
             for group in vertical_splits:
                 if group:
                     new_bubbles[next_bid] = build_lines_from_indices(group, ocr)
-                    new_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group])
-                    new_quads[next_bid] = [ocr[i][0] for i in group]
+                    new_boxes[next_bid]   = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group])
+                    new_quads[next_bid]   = [ocr[i][0] for i in group]
                     new_indices[next_bid] = group
                     next_bid += 1
             splits_made.append(f"BOX#{bid} → {len(vertical_splits)} vertical groups")
@@ -603,42 +550,42 @@ def detect_and_split_multi_bubble_boxes(bubble_boxes, bubble_indices, bubble_qua
         x1, y1, x2, y2 = box
         if (x2 - x1) > med_h * 10:
             x_centers = [quad_center(ocr[i][0])[0] for i in indices]
-            x_median = np.median(x_centers)
-            left_group = [i for i in indices if quad_center(ocr[i][0])[0] < x_median]
+            x_median  = np.median(x_centers)
+            left_group  = [i for i in indices if quad_center(ocr[i][0])[0] < x_median]
             right_group = [i for i in indices if quad_center(ocr[i][0])[0] >= x_median]
             if left_group and right_group:
-                left_box = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in left_group])
+                left_box  = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in left_group])
                 right_box = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in right_group])
                 if right_box[0] - left_box[2] > med_h * 1.5:
                     for grp in [left_group, right_group]:
                         new_bubbles[next_bid] = build_lines_from_indices(grp, ocr)
-                        new_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in grp])
-                        new_quads[next_bid] = [ocr[i][0] for i in grp]
+                        new_boxes[next_bid]   = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in grp])
+                        new_quads[next_bid]   = [ocr[i][0] for i in grp]
                         new_indices[next_bid] = grp
                         next_bid += 1
                     splits_made.append(f"BOX#{bid} → 2 horizontal panels")
                     continue
 
         new_bubbles[next_bid] = bubbles[bid]
-        new_boxes[next_bid] = bubble_boxes[bid]
-        new_quads[next_bid] = bubble_quads[bid]
+        new_boxes[next_bid]   = bubble_boxes[bid]
+        new_quads[next_bid]   = bubble_quads[bid]
         new_indices[next_bid] = indices
         next_bid += 1
 
     if splits_made:
         print(f"\n🔧 Split {len(splits_made)} multi-bubble box(es):")
         for s in splits_made: print(f"   ✓ {s}")
-
     return new_bubbles, new_boxes, new_quads, new_indices
 
 
 def detect_and_merge_fragmented_bubbles(bubble_boxes, bubble_indices, bubble_quads,
                                         bubbles, ocr, image_bgr):
-    all_h = [max(1, quad_bbox(ocr[i][0])[3] - quad_bbox(ocr[i][0])[1]) for i in range(len(ocr))]
-    med_h = float(np.median(all_h)) if all_h else 14.0
+    all_h = [max(1, quad_bbox(ocr[i][0])[3] - quad_bbox(ocr[i][0])[1])
+             for i in range(len(ocr))]
+    med_h           = float(np.median(all_h)) if all_h else 14.0
     bubble_contours = detect_speech_bubbles(image_bgr)
-    bids = list(bubble_boxes.keys())
-    to_merge = []
+    bids            = list(bubble_boxes.keys())
+    to_merge        = []
 
     for i in range(len(bids)):
         for j in range(i + 1, len(bids)):
@@ -648,13 +595,11 @@ def detect_and_merge_fragmented_bubbles(bubble_boxes, bubble_indices, bubble_qua
             cy_i = (box_i[1] + box_i[3]) / 2.0
             cx_j = (box_j[0] + box_j[2]) / 2.0
             cy_j = (box_j[1] + box_j[3]) / 2.0
-
             in_same_bubble = any(
                 cv2.pointPolygonTest(c, (cx_i, cy_i), False) >= 0 and
                 cv2.pointPolygonTest(c, (cx_j, cy_j), False) >= 0
                 for c in bubble_contours
             )
-
             if in_same_bubble:
                 if abs(cx_i - cx_j) < med_h * 3.0 and abs(cy_i - cy_j) < med_h * 6.0:
                     to_merge.append((bid_i, bid_j) if cy_i < cy_j else (bid_j, bid_i))
@@ -669,76 +614,69 @@ def detect_and_merge_fragmented_bubbles(bubble_boxes, bubble_indices, bubble_qua
         for key in merge_groups:
             if top in merge_groups[key] or bottom in merge_groups[key]:
                 merge_groups[key].update({top, bottom})
-                found = True
-                break
+                found = True; break
         if not found:
             merge_groups[len(merge_groups)] = {top, bottom}
 
     new_bubbles, new_boxes, new_quads, new_indices = {}, {}, {}, {}
     merged_bids, next_bid = set(), 1
-
     for merge_set in merge_groups.values():
-        merge_list = sorted(merge_set)
+        merge_list  = sorted(merge_set)
         print(f"   ✓ Merging: {', '.join(f'#{b}' for b in merge_list)}")
         all_indices = sorted(set(idx for b in merge_list for idx in bubble_indices[b]))
         for b in merge_list: merged_bids.add(b)
         new_bubbles[next_bid] = build_lines_from_indices(all_indices, ocr)
-        new_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in all_indices])
-        new_quads[next_bid] = [ocr[i][0] for i in all_indices]
+        new_boxes[next_bid]   = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in all_indices])
+        new_quads[next_bid]   = [ocr[i][0] for i in all_indices]
         new_indices[next_bid] = all_indices
         next_bid += 1
-
     for bid in bids:
         if bid not in merged_bids:
             new_bubbles[next_bid] = bubbles[bid]
-            new_boxes[next_bid] = bubble_boxes[bid]
-            new_quads[next_bid] = bubble_quads[bid]
+            new_boxes[next_bid]   = bubble_boxes[bid]
+            new_quads[next_bid]   = bubble_quads[bid]
             new_indices[next_bid] = bubble_indices[bid]
             next_bid += 1
-
     return new_bubbles, new_boxes, new_quads, new_indices
 
 
 def merge_boxes_by_proximity_and_overlap(bubble_boxes, bubble_indices, bubble_quads,
                                           bubbles, ocr, med_h):
     """
-    FIX for BOX#2+BOX#14 and BOX#7+BOX#18 type problems:
-    Merges boxes whose bounding rectangles are very close vertically AND
-    share significant horizontal overlap — indicating they belong to the
-    same speech bubble that the contour detector missed (e.g. dashed outlines).
+    Merges boxes that are vertically close AND share significant horizontal overlap.
 
-    Unlike merge_close_bubbles_by_line_height, this checks BOTH axes strictly
-    to avoid merging boxes from adjacent but distinct bubbles.
+    Single-quad boxes participate fully — no isolation treatment.
+    This fixes BOX#2+#16, BOX#8+#21, BOX#9+#22 type problems where a
+    single-line detection sits directly above/below a multi-line box in the
+    same speech bubble.
+
+    Merge criteria (both must be true):
+      1. Vertical gap ≤ 1.5 × med_h
+      2. Horizontal overlap ratio ≥ 0.35
     """
     bids = sorted(bubble_boxes.keys())
-    merge_map: Dict[int, List[int]] = {}
-    merged_into: Dict[int, int] = {}
+    merge_map:   Dict[int, List[int]] = {}
+    merged_into: Dict[int, int]       = {}
 
     for i, bid_i in enumerate(bids):
         if bid_i in merged_into:
             continue
         box_i = bubble_boxes[bid_i]
-        wi = box_i[2] - box_i[0]
+        wi    = max(1, box_i[2] - box_i[0])
 
         for j in range(i + 1, len(bids)):
             bid_j = bids[j]
             if bid_j in merged_into:
                 continue
             box_j = bubble_boxes[bid_j]
-            wj = box_j[2] - box_j[0]
+            wj    = max(1, box_j[2] - box_j[0])
 
-            # Vertical gap between the two boxes
             vert_gap = max(0, max(box_i[1], box_j[1]) - min(box_i[3], box_j[3]))
-
-            # Horizontal overlap ratio (intersection / min width)
-            h_ix1 = max(box_i[0], box_j[0])
-            h_ix2 = min(box_i[2], box_j[2])
-            h_overlap = max(0, h_ix2 - h_ix1)
+            h_ix1    = max(box_i[0], box_j[0])
+            h_ix2    = min(box_i[2], box_j[2])
+            h_overlap       = max(0, h_ix2 - h_ix1)
             h_overlap_ratio = h_overlap / max(1, min(wi, wj))
 
-            # Merge only when:
-            #   1. Vertical gap is small (boxes are stacked closely)
-            #   2. Horizontal overlap is significant (same column)
             if vert_gap <= med_h * 1.5 and h_overlap_ratio >= 0.35:
                 root = merged_into.get(bid_i, bid_i)
                 merge_map.setdefault(root, [root])
@@ -758,8 +696,8 @@ def merge_boxes_by_proximity_and_overlap(bubble_boxes, bubble_indices, bubble_qu
         print(f"   ✓ Merging: {', '.join(f'#{b}' for b in group_unique)}")
         all_indices = sorted(set(idx for b in group_unique for idx in bubble_indices[b]))
         new_bubbles[next_bid] = build_lines_from_indices(all_indices, ocr)
-        new_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in all_indices])
-        new_quads[next_bid] = [ocr[i][0] for i in all_indices]
+        new_boxes[next_bid]   = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in all_indices])
+        new_quads[next_bid]   = [ocr[i][0] for i in all_indices]
         new_indices[next_bid] = all_indices
         next_bid += 1
         processed.update(group_unique)
@@ -767,8 +705,8 @@ def merge_boxes_by_proximity_and_overlap(bubble_boxes, bubble_indices, bubble_qu
     for bid in bids:
         if bid not in processed:
             new_bubbles[next_bid] = bubbles[bid]
-            new_boxes[next_bid] = bubble_boxes[bid]
-            new_quads[next_bid] = bubble_quads[bid]
+            new_boxes[next_bid]   = bubble_boxes[bid]
+            new_quads[next_bid]   = bubble_quads[bid]
             new_indices[next_bid] = bubble_indices[bid]
             next_bid += 1
 
@@ -777,40 +715,55 @@ def merge_boxes_by_proximity_and_overlap(bubble_boxes, bubble_indices, bubble_qu
 
 def auto_fix_bubble_detection(bubble_boxes, bubble_indices, bubble_quads,
                                bubbles, ocr, image_bgr):
+    """
+    Full fix pipeline:
+      1. Split boxes that span multiple speech bubbles.
+      2. Merge fragments detected inside the same contour.
+      3. Merge fragments missed by contour detection (proximity+overlap) — pass 1.
+      4. Second proximity pass — catches chains resolved after pass 1.
+    """
     print("\n🔍 Running automatic bubble detection fixes...")
-    all_h = [max(1, quad_bbox(ocr[i][0])[3] - quad_bbox(ocr[i][0])[1]) for i in range(len(ocr))]
+    all_h = [max(1, quad_bbox(ocr[i][0])[3] - quad_bbox(ocr[i][0])[1])
+             for i in range(len(ocr))]
     med_h = float(np.median(all_h)) if all_h else 14.0
 
-    bubbles, bubble_boxes, bubble_quads, bubble_indices = detect_and_split_multi_bubble_boxes(
-        bubble_boxes, bubble_indices, bubble_quads, bubbles, ocr, image_bgr)
-    bubbles, bubble_boxes, bubble_quads, bubble_indices = detect_and_merge_fragmented_bubbles(
-        bubble_boxes, bubble_indices, bubble_quads, bubbles, ocr, image_bgr)
-    # Second pass: catch fragments missed by contour detection (dashed bubbles, etc.)
-    bubbles, bubble_boxes, bubble_quads, bubble_indices = merge_boxes_by_proximity_and_overlap(
-        bubble_boxes, bubble_indices, bubble_quads, bubbles, ocr, med_h)
+    bubbles, bubble_boxes, bubble_quads, bubble_indices = \
+        detect_and_split_multi_bubble_boxes(
+            bubble_boxes, bubble_indices, bubble_quads, bubbles, ocr, image_bgr)
+
+    bubbles, bubble_boxes, bubble_quads, bubble_indices = \
+        detect_and_merge_fragmented_bubbles(
+            bubble_boxes, bubble_indices, bubble_quads, bubbles, ocr, image_bgr)
+
+    # Pass 1
+    bubbles, bubble_boxes, bubble_quads, bubble_indices = \
+        merge_boxes_by_proximity_and_overlap(
+            bubble_boxes, bubble_indices, bubble_quads, bubbles, ocr, med_h)
+
+    # Pass 2 — catches chains only visible after pass 1
+    bubbles, bubble_boxes, bubble_quads, bubble_indices = \
+        merge_boxes_by_proximity_and_overlap(
+            bubble_boxes, bubble_indices, bubble_quads, bubbles, ocr, med_h)
+
     return bubbles, bubble_boxes, bubble_quads, bubble_indices
 
 
 def remove_nested_boxes(bubble_boxes, bubble_indices, bubble_quads, bubbles,
                         overlap_threshold=0.50):
-    bids = list(bubble_boxes.keys())
+    bids      = list(bubble_boxes.keys())
     to_remove = set()
-
     for i in range(len(bids)):
         bid_i = bids[i]
         if bid_i in to_remove: continue
-        box_i = bubble_boxes[bid_i]
+        box_i  = bubble_boxes[bid_i]
         area_i = max(0, box_i[2]-box_i[0]) * max(0, box_i[3]-box_i[1])
-
         for j in range(i + 1, len(bids)):
             bid_j = bids[j]
             if bid_j in to_remove: continue
-            box_j = bubble_boxes[bid_j]
+            box_j  = bubble_boxes[bid_j]
             area_j = max(0, box_j[2]-box_j[0]) * max(0, box_j[3]-box_j[1])
-
-            shared = set(bubble_indices[bid_i]).intersection(bubble_indices[bid_j])
+            shared  = set(bubble_indices[bid_i]).intersection(bubble_indices[bid_j])
             overlap = boxes_overlap_ratio(box_i, box_j)
-
             if overlap > overlap_threshold or len(shared) > 0:
                 if area_i >= area_j:
                     to_remove.add(bid_j)
@@ -819,7 +772,6 @@ def remove_nested_boxes(bubble_boxes, bubble_indices, bubble_quads, bubbles,
                     to_remove.add(bid_i)
                     print(f"   🗑️  Removing BOX#{bid_i} (overlaps BOX#{bid_j})")
                     break
-
     if to_remove:
         print(f"\n🧹 Removed {len(to_remove)} overlapping/nested box(es)")
         for bid in to_remove:
@@ -827,7 +779,6 @@ def remove_nested_boxes(bubble_boxes, bubble_indices, bubble_quads, bubbles,
             bubble_indices.pop(bid, None)
             bubble_quads.pop(bid, None)
             bubbles.pop(bid, None)
-
     return bubbles, bubble_boxes, bubble_quads, bubble_indices
 
 
@@ -844,14 +795,14 @@ def enforce_max_box_size(bubble_boxes, bubble_indices, bubble_quads, bubbles, oc
         x1, y1, x2, y2 = box
         w, h = x2 - x1, y2 - y1
         if w > max_width or h > max_height:
-            indices = bubble_indices[bid]
+            indices  = bubble_indices[bid]
             col_split = split_bubble_if_multiple_columns(indices, ocr, bid=bid,
-                                                         use_aggressive_thresholds=True)
+                                                          use_aggressive_thresholds=True)
             if col_split:
                 for grp in col_split:
                     new_bubbles[next_bid] = build_lines_from_indices(grp, ocr)
-                    new_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in grp])
-                    new_quads[next_bid] = [ocr[i][0] for i in grp]
+                    new_boxes[next_bid]   = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in grp])
+                    new_quads[next_bid]   = [ocr[i][0] for i in grp]
                     new_indices[next_bid] = grp
                     next_bid += 1
                 splits_made.append(f"BOX#{bid} (oversized: {w}x{h}px)")
@@ -860,15 +811,15 @@ def enforce_max_box_size(bubble_boxes, bubble_indices, bubble_quads, bubbles, oc
             if row_split:
                 for grp in row_split:
                     new_bubbles[next_bid] = build_lines_from_indices(grp, ocr)
-                    new_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in grp])
-                    new_quads[next_bid] = [ocr[i][0] for i in grp]
+                    new_boxes[next_bid]   = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in grp])
+                    new_quads[next_bid]   = [ocr[i][0] for i in grp]
                     new_indices[next_bid] = grp
                     next_bid += 1
                 splits_made.append(f"BOX#{bid} (oversized: {w}x{h}px)")
                 continue
         new_bubbles[next_bid] = bubbles[bid]
-        new_boxes[next_bid] = box
-        new_quads[next_bid] = bubble_quads[bid]
+        new_boxes[next_bid]   = box
+        new_quads[next_bid]   = bubble_quads[bid]
         new_indices[next_bid] = bubble_indices[bid]
         next_bid += 1
 
@@ -918,17 +869,19 @@ class ImprovedMacVisionDetector:
         variants = [("enhanced", enhance_image_for_ocr(image_bgr, upscale_factor=2.5))]
         gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
         _, hc = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
-        variants.append(("high_contrast", cv2.cvtColor(
-            cv2.resize(hc, None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC),
-            cv2.COLOR_GRAY2BGR)))
-        variants.append(("bilateral", cv2.resize(
-            cv2.bilateralFilter(image_bgr, 9, 75, 75),
-            None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC)))
-        variants.append(("inverted", cv2.resize(
-            cv2.bitwise_not(image_bgr),
-            None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC)))
-        variants.append(("original", cv2.resize(
-            image_bgr, None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC)))
+        variants.append(("high_contrast",
+                          cv2.cvtColor(cv2.resize(hc, None, fx=2.5, fy=2.5,
+                                                   interpolation=cv2.INTER_CUBIC),
+                                       cv2.COLOR_GRAY2BGR)))
+        variants.append(("bilateral",
+                          cv2.resize(cv2.bilateralFilter(image_bgr, 9, 75, 75),
+                                     None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC)))
+        variants.append(("inverted",
+                          cv2.resize(cv2.bitwise_not(image_bgr),
+                                     None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC)))
+        variants.append(("original",
+                          cv2.resize(image_bgr, None, fx=2.5, fy=2.5,
+                                     interpolation=cv2.INTER_CUBIC)))
         return variants
 
     def run_vision_ocr(self, image_bgr):
@@ -945,17 +898,17 @@ class ImprovedMacVisionDetector:
         def completion_handler(request, error):
             if error: return
             for obs in request.results():
-                candidate = obs.topCandidates_(1)[0]
-                text, confidence = candidate.string(), candidate.confidence()
-                bbox = obs.boundingBox()
-                x = bbox.origin.x * iw
-                y_bl = bbox.origin.y * ih
-                w = bbox.size.width * iw
-                h = bbox.size.height * ih
-                y = ih - y_bl - h
+                candidate  = obs.topCandidates_(1)[0]
+                text, conf = candidate.string(), candidate.confidence()
+                bbox       = obs.boundingBox()
+                x          = bbox.origin.x * iw
+                y_bl       = bbox.origin.y * ih
+                w          = bbox.size.width * iw
+                h          = bbox.size.height * ih
+                y          = ih - y_bl - h
                 quad = [[int(x),int(y)],[int(x+w),int(y)],
                         [int(x+w),int(y+h)],[int(x),int(y+h)]]
-                results.append((quad, text, confidence))
+                results.append((quad, text, conf))
 
         req = Vision.VNRecognizeTextRequest.alloc().initWithCompletionHandler_(completion_handler)
         req.setRecognitionLevel_(Vision.VNRequestTextRecognitionLevelAccurate)
@@ -969,7 +922,7 @@ class ImprovedMacVisionDetector:
         if not all_results:
             return []
         scale_factor = 2.5
-        normalized = []
+        normalized   = []
         for variant_name, results in all_results:
             for quad, text, conf in results:
                 sq = [[int(p[0]/scale_factor), int(p[1]/scale_factor)] for p in quad]
@@ -981,7 +934,8 @@ class ImprovedMacVisionDetector:
             x2, y2 = min(b1[2],b2[2]), min(b1[3],b2[3])
             if x2 < x1 or y2 < y1: return False
             inter = (x2-x1)*(y2-y1)
-            union = (b1[2]-b1[0])*(b1[3]-b1[1]) + (b2[2]-b2[0])*(b2[3]-b2[1]) - inter
+            union = ((b1[2]-b1[0])*(b1[3]-b1[1]) +
+                     (b2[2]-b2[0])*(b2[3]-b2[1]) - inter)
             return inter / max(union, 1) > threshold
 
         clusters, used = [], set()
@@ -1016,7 +970,7 @@ class ImprovedMacVisionDetector:
               else image_path_or_array
         if img is None or img.size == 0:
             return []
-        variants = self.preprocess_variants(img)
+        variants    = self.preprocess_variants(img)
         all_results = []
         for vname, vimg in variants:
             r = self.run_vision_ocr(vimg)
@@ -1056,17 +1010,17 @@ class MacVisionDetector:
         def completion_handler(request, error):
             if error: return
             for obs in request.results():
-                candidate = obs.topCandidates_(1)[0]
-                text, confidence = candidate.string(), candidate.confidence()
-                bbox = obs.boundingBox()
-                x = bbox.origin.x * iw
-                y_bl = bbox.origin.y * ih
-                w = bbox.size.width * iw
-                h = bbox.size.height * ih
-                y = ih - y_bl - h
+                candidate  = obs.topCandidates_(1)[0]
+                text, conf = candidate.string(), candidate.confidence()
+                bbox       = obs.boundingBox()
+                x          = bbox.origin.x * iw
+                y_bl       = bbox.origin.y * ih
+                w          = bbox.size.width * iw
+                h          = bbox.size.height * ih
+                y          = ih - y_bl - h
                 quad = [[int(x),int(y)],[int(x+w),int(y)],
                         [int(x+w),int(y+h)],[int(x),int(y+h)]]
-                results.append((quad, text, confidence))
+                results.append((quad, text, conf))
 
         req = Vision.VNRecognizeTextRequest.alloc().initWithCompletionHandler_(completion_handler)
         req.setRecognitionLevel_(Vision.VNRequestTextRecognitionLevelAccurate)
@@ -1076,79 +1030,74 @@ class MacVisionDetector:
         handler.performRequests_error_([req], None)
         return results
 
-def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thresholds=False):
+
+# ============================================================
+# COLUMN / ROW SPLITTING
+# ============================================================
+def split_bubble_if_multiple_columns(indices, ocr, bid=None,
+                                      use_aggressive_thresholds=False):
     if len(indices) < 2: return None
-    boxes = [quad_bbox(ocr[i][0]) for i in indices]
-    hs = [max(1, b[3] - b[1]) for b in boxes]
-    med_h = float(np.median(hs)) if hs else 12.0
-    xs = [(b[0] + b[2]) / 2.0 for b in boxes]
+    boxes  = [quad_bbox(ocr[i][0]) for i in indices]
+    hs     = [max(1, b[3]-b[1]) for b in boxes]
+    med_h  = float(np.median(hs)) if hs else 12.0
+    xs     = [(b[0]+b[2])/2.0 for b in boxes]
     xs_sorted = sorted(xs)
-
-    gap_thresh = max(med_h * 1.2, 18) if use_aggressive_thresholds else max(med_h * 1.5, 22)
+    gap_thresh = max(med_h*1.2, 18) if use_aggressive_thresholds else max(med_h*1.5, 22)
     best_gap_idx, best_gap_size = None, 0.0
-
     for i in range(len(xs_sorted) - 1):
-        gap = xs_sorted[i + 1] - xs_sorted[i]
+        gap = xs_sorted[i+1] - xs_sorted[i]
         if gap > gap_thresh and gap > best_gap_size:
             best_gap_size, best_gap_idx = gap, i
-
     if best_gap_idx is None: return None
-    split_x = (xs_sorted[best_gap_idx] + xs_sorted[best_gap_idx + 1]) / 2.0
-
-    left_idxs = [i for i in indices if (quad_bbox(ocr[i][0])[0] + quad_bbox(ocr[i][0])[2]) / 2.0 < split_x]
-    right_idxs = [i for i in indices if (quad_bbox(ocr[i][0])[0] + quad_bbox(ocr[i][0])[2]) / 2.0 >= split_x]
-
+    split_x = (xs_sorted[best_gap_idx] + xs_sorted[best_gap_idx+1]) / 2.0
+    left_idxs  = [i for i in indices
+                  if (quad_bbox(ocr[i][0])[0]+quad_bbox(ocr[i][0])[2])/2.0 < split_x]
+    right_idxs = [i for i in indices
+                  if (quad_bbox(ocr[i][0])[0]+quad_bbox(ocr[i][0])[2])/2.0 >= split_x]
     if not left_idxs or not right_idxs: return None
     return (left_idxs, right_idxs)
 
-
 def split_bubble_if_multiple_rows(indices, ocr, bid=None):
     if len(indices) < 2: return None
-    boxes = [quad_bbox(ocr[i][0]) for i in indices]
-    hs = [max(1, b[3] - b[1]) for b in boxes]
-    med_h = float(np.median(hs)) if hs else 12.0
-    ys = [(b[1] + b[3]) / 2.0 for b in boxes]
+    boxes  = [quad_bbox(ocr[i][0]) for i in indices]
+    hs     = [max(1, b[3]-b[1]) for b in boxes]
+    med_h  = float(np.median(hs)) if hs else 12.0
+    ys     = [(b[1]+b[3])/2.0 for b in boxes]
     ys_sorted = sorted(ys)
-
     gap_thresh = max(med_h * 2.0, 30)
     best_gap_idx, best_gap_size = None, 0.0
-
     for i in range(len(ys_sorted) - 1):
-        gap = ys_sorted[i + 1] - ys_sorted[i]
+        gap = ys_sorted[i+1] - ys_sorted[i]
         if gap > gap_thresh and gap > best_gap_size:
             best_gap_size, best_gap_idx = gap, i
-
     if best_gap_idx is None: return None
-    split_y = (ys_sorted[best_gap_idx] + ys_sorted[best_gap_idx + 1]) / 2.0
-
-    top_idxs    = [i for i in indices if (quad_bbox(ocr[i][0])[1] + quad_bbox(ocr[i][0])[3]) / 2.0 < split_y]
-    bottom_idxs = [i for i in indices if (quad_bbox(ocr[i][0])[1] + quad_bbox(ocr[i][0])[3]) / 2.0 >= split_y]
-
-    if not top_idxs or not bottom_idxs: return None
-    return (top_idxs, bottom_idxs)
+    split_y    = (ys_sorted[best_gap_idx] + ys_sorted[best_gap_idx+1]) / 2.0
+    top_idxs   = [i for i in indices
+                  if (quad_bbox(ocr[i][0])[1]+quad_bbox(ocr[i][0])[3])/2.0 < split_y]
+    bot_idxs   = [i for i in indices
+                  if (quad_bbox(ocr[i][0])[1]+quad_bbox(ocr[i][0])[3])/2.0 >= split_y]
+    if not top_idxs or not bot_idxs: return None
+    return (top_idxs, bot_idxs)
 
 
 def split_cluster_by_big_vertical_gap(indices, ocr, factor=1.9, min_gap=22):
     if len(indices) < 2: return None
-    boxes = [quad_bbox(ocr[i][0]) for i in indices]
-    hs = [max(1, b[3] - b[1]) for b in boxes]
-    med_h = float(np.median(hs)) if hs else 12.0
-
-    items = sorted([(i, quad_bbox(ocr[i][0])) for i in indices],
-                   key=lambda x: (x[1][1] + x[1][3]) / 2.0)
+    boxes  = [quad_bbox(ocr[i][0]) for i in indices]
+    hs     = [max(1, b[3]-b[1]) for b in boxes]
+    med_h  = float(np.median(hs)) if hs else 12.0
+    items  = sorted([(i, quad_bbox(ocr[i][0])) for i in indices],
+                    key=lambda x: (x[1][1]+x[1][3])/2.0)
     gap_thresh = max(med_h * factor, min_gap)
     best_gap, best_split_idx = 0.0, None
-
     for k in range(len(items) - 1):
-        gap = items[k + 1][1][1] - items[k][1][3]
+        gap = items[k+1][1][1] - items[k][1][3]
         if gap > gap_thresh and gap > best_gap:
             best_gap, best_split_idx = gap, k
-
     if best_split_idx is None: return None
-    top_idxs    = [it[0] for it in items[:best_split_idx + 1]]
-    bottom_idxs = [it[0] for it in items[best_split_idx + 1:]]
-    if not top_idxs or not bottom_idxs: return None
-    return (top_idxs, bottom_idxs)
+    top_idxs = [it[0] for it in items[:best_split_idx+1]]
+    bot_idxs = [it[0] for it in items[best_split_idx+1:]]
+    if not top_idxs or not bot_idxs: return None
+    return (top_idxs, bot_idxs)
 
 
 def is_vertical_text_like(indices, ocr):
@@ -1165,13 +1114,14 @@ def is_vertical_text_like(indices, ocr):
 
 def split_nested_or_side_by_side(indices, ocr):
     if len(indices) < 2: return None
-    xs = sorted([(quad_bbox(ocr[i][0])[0] + quad_bbox(ocr[i][0])[2]) / 2.0 for i in indices])
+    xs      = sorted([(quad_bbox(ocr[i][0])[0]+quad_bbox(ocr[i][0])[2])/2.0
+                       for i in indices])
     mid_idx = len(xs) // 2
-    split_x = (xs[mid_idx - 1] + xs[mid_idx]) / 2.0
-
-    left_idxs  = [i for i in indices if (quad_bbox(ocr[i][0])[0]+quad_bbox(ocr[i][0])[2])/2.0 < split_x]
-    right_idxs = [i for i in indices if (quad_bbox(ocr[i][0])[0]+quad_bbox(ocr[i][0])[2])/2.0 >= split_x]
-
+    split_x = (xs[mid_idx-1] + xs[mid_idx]) / 2.0
+    left_idxs  = [i for i in indices
+                  if (quad_bbox(ocr[i][0])[0]+quad_bbox(ocr[i][0])[2])/2.0 < split_x]
+    right_idxs = [i for i in indices
+                  if (quad_bbox(ocr[i][0])[0]+quad_bbox(ocr[i][0])[2])/2.0 >= split_x]
     if not left_idxs or not right_idxs: return None
     return (left_idxs, right_idxs)
 
@@ -1184,29 +1134,25 @@ def split_panel_box(image_bgr, box_xyxy, bubble_quads=None):
     if x2 <= x1 or y2 <= y1: return None
     crop = image_bgr[y1:y2, x1:x2]
     if crop.size == 0: return None
-
-    gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
-    edges = cv2.Canny(gray, 50, 150)
+    gray   = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
+    edges  = cv2.Canny(gray, 50, 150)
     h_proj = np.sum(edges, axis=0)
-    w = x2 - x1
+    w      = x2 - x1
     if w < 100: return None
-
     search_start = int(w * 0.35)
     search_end   = int(w * 0.65)
     if search_end <= search_start: return None
     region = h_proj[search_start:search_end]
     if len(region) == 0: return None
-
     threshold  = np.percentile(region, 85)
-    candidates = [x1 + search_start + rx for rx in range(len(region)) if region[rx] >= threshold]
+    candidates = [x1 + search_start + rx
+                  for rx in range(len(region)) if region[rx] >= threshold]
     if not candidates: return None
     split_x = int(np.median(candidates))
-
     if bubble_quads:
-        left_count  = sum(1 for q in bubble_quads if quad_center(q)[0] < split_x)
-        right_count = len(bubble_quads) - left_count
-        if left_count == 0 or right_count == 0: return None
-
+        lc = sum(1 for q in bubble_quads if quad_center(q)[0] < split_x)
+        rc = len(bubble_quads) - lc
+        if lc == 0 or rc == 0: return None
     return (x1, x2, split_x)
 
 
@@ -1216,15 +1162,19 @@ def split_panel_box(image_bgr, box_xyxy, bubble_quads=None):
 def merge_close_bubbles_by_line_height(bubbles, bubble_boxes, bubble_quads,
                                         bubble_indices, ocr):
     """
-    Merges boxes that are spatially very close (within ~1.4× line height on
-    BOTH axes simultaneously). Strict dual-axis check prevents merging boxes
-    from adjacent but distinct bubbles — fixing the BOX#5+BOX#16 overlap problem.
+    Merges boxes that are spatially very close on BOTH axes AND share
+    meaningful horizontal overlap (same column).
+
+    Single-quad boxes participate fully — no special isolation treatment.
+    The h_overlap_ratio >= 0.25 guard prevents merging horizontally
+    adjacent distinct bubbles.
     """
     if not bubbles:
         return bubbles, bubble_boxes, bubble_quads, bubble_indices
 
-    all_h = [max(1, quad_bbox(ocr[i][0])[3] - quad_bbox(ocr[i][0])[1]) for i in range(len(ocr))]
-    med_h = float(np.median(all_h)) if all_h else 14.0
+    all_h     = [max(1, quad_bbox(ocr[i][0])[3]-quad_bbox(ocr[i][0])[1])
+                 for i in range(len(ocr))]
+    med_h     = float(np.median(all_h)) if all_h else 14.0
     merge_tol = max(8, med_h * 1.4)
 
     bids = sorted(bubble_boxes.keys())
@@ -1233,26 +1183,22 @@ def merge_close_bubbles_by_line_height(bubbles, bubble_boxes, bubble_quads,
     for i, bid_i in enumerate(bids):
         if bid_i in merged_set: continue
         x1_i, y1_i, x2_i, y2_i = bubble_boxes[bid_i]
-        wi = x2_i - x1_i
+        wi = max(1, x2_i - x1_i)
 
         for j in range(i + 1, len(bids)):
             bid_j = bids[j]
             if bid_j in merged_set: continue
             x1_j, y1_j, x2_j, y2_j = bubble_boxes[bid_j]
-            wj = x2_j - x1_j
+            wj = max(1, x2_j - x1_j)
 
             gap_x = max(0, max(x1_i, x1_j) - min(x2_i, x2_j))
             gap_y = max(0, max(y1_i, y1_j) - min(y2_i, y2_j))
 
-            # Horizontal overlap ratio — must be significant to merge
-            h_ix1 = max(x1_i, x1_j)
-            h_ix2 = min(x2_i, x2_j)
-            h_overlap = max(0, h_ix2 - h_ix1)
+            h_ix1           = max(x1_i, x1_j)
+            h_ix2           = min(x2_i, x2_j)
+            h_overlap       = max(0, h_ix2 - h_ix1)
             h_overlap_ratio = h_overlap / max(1, min(wi, wj))
 
-            # STRICT: both gap_x AND gap_y must be small, AND boxes must
-            # share meaningful horizontal overlap (same column).
-            # This prevents merging horizontally adjacent distinct bubbles.
             if gap_x <= merge_tol and gap_y <= merge_tol and h_overlap_ratio >= 0.25:
                 if bid_i not in merge_map:
                     merge_map[bid_i] = [bid_i]
@@ -1264,11 +1210,10 @@ def merge_close_bubbles_by_line_height(bubbles, bubble_boxes, bubble_quads,
 
     new_bubbles, new_boxes, new_quads, new_indices = {}, {}, {}, {}
     next_bid = 1
-
     for bid in bids:
         if bid in merged_set: continue
         if bid in merge_map:
-            group = merge_map[bid]
+            group       = merge_map[bid]
             all_indices = sorted(set(idx for b in group for idx in bubble_indices[b]))
             new_bubbles[next_bid] = build_lines_from_indices(all_indices, ocr)
             new_boxes[next_bid]   = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in all_indices])
@@ -1289,7 +1234,7 @@ def merge_close_bubbles_by_line_height(bubbles, bubble_boxes, bubble_quads,
 # ============================================================
 def split_wide_ocr_items(image_bgr, ocr_list, width_factor=8.0):
     if not ocr_list: return ocr_list, 0
-    hs = [max(1, quad_bbox(q)[3]-quad_bbox(q)[1]) for q, _, _ in ocr_list]
+    hs    = [max(1, quad_bbox(q)[3]-quad_bbox(q)[1]) for q, _, _ in ocr_list]
     med_h = float(np.median(hs)) if hs else 14.0
     result, splits_made = [], 0
 
@@ -1302,8 +1247,9 @@ def split_wide_ocr_items(image_bgr, ocr_list, width_factor=8.0):
                             max(0,x1):min(image_bgr.shape[1],x2)]
             if roi.size > 0:
                 gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
-                _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
-                v_proj = np.sum(binary, axis=0)
+                _, binary = cv2.threshold(gray, 0, 255,
+                                          cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+                v_proj        = np.sum(binary, axis=0)
                 gap_threshold = roi.shape[0] * 255 * 0.15
                 gaps, in_gap, gap_start = [], False, 0
                 for x in range(len(v_proj)):
@@ -1319,17 +1265,19 @@ def split_wide_ocr_items(image_bgr, ocr_list, width_factor=8.0):
                     gaps.sort(key=lambda g: g[1], reverse=True)
                     split_x_abs = max(0, x1) + gaps[0][0]
                     if ' ' in text:
-                        char_w = w / max(1, len(text))
+                        char_w    = w / max(1, len(text))
                         split_idx = int((split_x_abs - x1) / max(1e-6, char_w))
-                        spaces = [i for i, c in enumerate(text) if c == ' ']
-                        if spaces: split_idx = min(spaces, key=lambda i: abs(i - split_idx))
+                        spaces    = [i for i, c in enumerate(text) if c == ' ']
+                        if spaces:
+                            split_idx = min(spaces, key=lambda i: abs(i - split_idx))
                         tl, tr = text[:split_idx].strip(), text[split_idx:].strip()
                     else:
                         split_idx = int(len(text) * (split_x_abs - x1) / w)
-                        tl, tr = text[:split_idx].strip(), text[split_idx:].strip()
+                        tl, tr    = text[:split_idx].strip(), text[split_idx:].strip()
                     if tl and tr:
-                        result.extend([([[x1,y1],[split_x_abs,y1],[split_x_abs,y2],[x1,y2]], tl, conf),
-                                        ([[split_x_abs,y1],[x2,y1],[x2,y2],[split_x_abs,y2]], tr, conf)])
+                        result.extend([
+                            ([[x1,y1],[split_x_abs,y1],[split_x_abs,y2],[x1,y2]], tl, conf),
+                            ([[split_x_abs,y1],[x2,y1],[x2,y2],[split_x_abs,y2]], tr, conf)])
                         splits_made += 1
                         continue
         result.append((quad, text, conf))
@@ -1338,7 +1286,7 @@ def split_wide_ocr_items(image_bgr, ocr_list, width_factor=8.0):
 
 def split_abnormal_bridge_quads(image_bgr, ocr_list, aspect_ratio_threshold=6.0):
     if not ocr_list: return ocr_list, 0
-    hs = [max(1, quad_bbox(q)[3]-quad_bbox(q)[1]) for q, _, _ in ocr_list]
+    hs    = [max(1, quad_bbox(q)[3]-quad_bbox(q)[1]) for q, _, _ in ocr_list]
     med_h = float(np.median(hs)) if hs else 14.0
     result, splits_made = [], 0
 
@@ -1351,8 +1299,9 @@ def split_abnormal_bridge_quads(image_bgr, ocr_list, aspect_ratio_threshold=6.0)
                             max(0,x1):min(image_bgr.shape[1],x2)]
             if roi.size > 0:
                 gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
-                _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
-                v_proj = np.sum(binary, axis=0)
+                _, binary = cv2.threshold(gray, 0, 255,
+                                          cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+                v_proj        = np.sum(binary, axis=0)
                 gap_threshold = h * 255 * 0.20
                 gaps, in_gap, gap_start = [], False, 0
                 for x in range(len(v_proj)):
@@ -1368,17 +1317,19 @@ def split_abnormal_bridge_quads(image_bgr, ocr_list, aspect_ratio_threshold=6.0)
                     gaps.sort(key=lambda g: g[1], reverse=True)
                     split_x_abs = max(0, x1) + gaps[0][0]
                     if ' ' in text:
-                        char_w = w / max(1, len(text))
+                        char_w    = w / max(1, len(text))
                         split_idx = int((split_x_abs - x1) / max(1e-6, char_w))
-                        spaces = [i for i, c in enumerate(text) if c == ' ']
-                        if spaces: split_idx = min(spaces, key=lambda i: abs(i - split_idx))
+                        spaces    = [i for i, c in enumerate(text) if c == ' ']
+                        if spaces:
+                            split_idx = min(spaces, key=lambda i: abs(i - split_idx))
                         tl, tr = text[:split_idx].strip(), text[split_idx:].strip()
                     else:
                         split_idx = int(len(text) * (split_x_abs - x1) / w)
-                        tl, tr = text[:split_idx].strip(), text[split_idx:].strip()
+                        tl, tr    = text[:split_idx].strip(), text[split_idx:].strip()
                     if tl and tr:
-                        result.extend([([[x1,y1],[split_x_abs,y1],[split_x_abs,y2],[x1,y2]], tl, conf),
-                                        ([[split_x_abs,y1],[x2,y1],[x2,y2],[split_x_abs,y2]], tr, conf)])
+                        result.extend([
+                            ([[x1,y1],[split_x_abs,y1],[split_x_abs,y2],[x1,y2]], tl, conf),
+                            ([[split_x_abs,y1],[x2,y1],[x2,y2],[split_x_abs,y2]], tr, conf)])
                         splits_made += 1
                         continue
         result.append((quad, text, conf))
@@ -1389,8 +1340,8 @@ def normalize_ocr_quads(ocr_list):
     result = []
     for quad, text, conf in ocr_list:
         x1, y1, x2, y2 = quad_bbox(quad)
-        pad = 3
-        new_quad = [[x1-pad, y1-pad], [x2+pad, y1-pad], [x2+pad, y2+pad], [x1-pad, y2+pad]]
+        pad     = 3
+        new_quad = [[x1-pad,y1-pad],[x2+pad,y1-pad],[x2+pad,y2+pad],[x1-pad,y2+pad]]
         result.append((new_quad, text, conf))
     return result
 
@@ -1401,10 +1352,12 @@ def normalize_ocr_quads(ocr_list):
 def preprocess_variant(crop_bgr, mode):
     gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY)
     if mode == "raw":        return gray
-    if mode == "clahe":      return cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)).apply(gray)
+    if mode == "clahe":      return cv2.createCLAHE(clipLimit=2.0,
+                                                     tileGridSize=(8,8)).apply(gray)
     if mode == "adaptive":
         den = cv2.GaussianBlur(gray, (3,3), 0)
-        return cv2.adaptiveThreshold(den, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 35, 11)
+        return cv2.adaptiveThreshold(den, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+                                     cv2.THRESH_BINARY, 35, 11)
     if mode == "otsu":
         den = cv2.GaussianBlur(gray, (3,3), 0)
         _, th = cv2.threshold(den, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
@@ -1422,14 +1375,15 @@ def preprocess_variant(crop_bgr, mode):
 
 def rotate_image_keep_bounds(img, angle_deg):
     h, w = img.shape[:2]
-    c = (w/2, h/2)
-    M = cv2.getRotationMatrix2D(c, angle_deg, 1.0)
+    c    = (w/2, h/2)
+    M    = cv2.getRotationMatrix2D(c, angle_deg, 1.0)
     cos, sin = abs(M[0,0]), abs(M[0,1])
     new_w = int((h*sin) + (w*cos))
     new_h = int((h*cos) + (w*sin))
     M[0,2] += (new_w/2) - c[0]
     M[1,2] += (new_h/2) - c[1]
-    return cv2.warpAffine(img, M, (new_w, new_h), flags=cv2.INTER_CUBIC, borderValue=255)
+    return cv2.warpAffine(img, M, (new_w, new_h),
+                          flags=cv2.INTER_CUBIC, borderValue=255)
 
 
 def rebuild_text_from_vision_result(res):
@@ -1438,7 +1392,8 @@ def rebuild_text_from_vision_result(res):
     for bbox, txt, conf in res:
         if not txt or not txt.strip(): continue
         b = quad_bbox(bbox)
-        norm.append((b, txt, conf, (b[0]+b[2])/2.0, (b[1]+b[3])/2.0, max(1.0, b[3]-b[1])))
+        norm.append((b, txt, conf,
+                     (b[0]+b[2])/2.0, (b[1]+b[3])/2.0, max(1.0, b[3]-b[1])))
     if not norm: return ""
     med_h   = float(np.median([x[5] for x in norm]))
     row_tol = max(6.0, med_h * 0.75)
@@ -1453,22 +1408,25 @@ def rebuild_text_from_vision_result(res):
                 placed = True; break
         if not placed: rows.append({"yc": it[4], "m": [it]})
     rows.sort(key=lambda r: r["yc"])
-    lines = [normalize_text(" ".join(x[1] for x in sorted(r["m"], key=lambda z: z[3]))) for r in rows]
+    lines = [normalize_text(" ".join(x[1] for x in sorted(r["m"], key=lambda z: z[3])))
+             for r in rows]
     return normalize_text(" ".join(filter(None, lines)))
 
 
-def reread_bubble_with_vision(image_bgr, bbox_xyxy, vision_detector, upscale=3.0, pad=24):
+def reread_bubble_with_vision(image_bgr, bbox_xyxy, vision_detector,
+                               upscale=3.0, pad=24):
     ih, iw = image_bgr.shape[:2]
     x1, y1, x2, y2 = bbox_xyxy
     x1, y1 = max(0, int(x1-pad)), max(0, int(y1-pad))
     x2, y2 = min(iw, int(x2+pad)), min(ih, int(y2+pad))
-    crop = image_bgr[y1:y2, x1:x2]
+    crop    = image_bgr[y1:y2, x1:x2]
     if crop.size == 0: return None, 0.0, "none"
 
     modes  = ["raw", "clahe", "adaptive", "otsu", "invert", "bilateral", "morph_open"]
     angles = [0.0, 1.5, -1.5]
     best_v_txt, best_v_sc = "", 0.0
-    up0 = cv2.resize(crop, (int(crop.shape[1]*upscale), int(crop.shape[0]*upscale)),
+    up0 = cv2.resize(crop,
+                     (int(crop.shape[1]*upscale), int(crop.shape[0]*upscale)),
                      interpolation=cv2.INTER_CUBIC)
 
     for mode in modes:
@@ -1510,7 +1468,9 @@ def build_lines_from_indices(indices, ocr):
                 placed = True; break
         if not placed: rows.append({"yc": it[3], "m": [it]})
     rows.sort(key=lambda r: r["yc"])
-    return [normalize_text(" ".join(ocr[i][1] for i,_,_,_,_ in sorted(r["m"], key=lambda z: z[2])))
+    return [normalize_text(
+                " ".join(ocr[i][1]
+                         for i, _, _, _, _ in sorted(r["m"], key=lambda z: z[2])))
             for r in rows if r["m"]]
 
 
@@ -1519,16 +1479,8 @@ def auto_gap(image_path, base=18, ref_w=750):
     return base * (img.shape[1] / ref_w) if img is not None else base
 
 
-def group_tokens_vertical(ocr, image_shape, gap_px=18, bbox_padding=1, strict_mode=False):
-    """
-    Groups OCR quads into bubble candidates.
-
-    Generic protections applied:
-      - orientation_compatible(): prevents tall/narrow glyphs merging with wide text lines.
-      - Horizontal gap guard: prevents side-by-side column quads from merging.
-      - detect_horizontal_gap_in_group(): post-merge split for groups with large internal gaps.
-      - Orientation check in secondary merge pass.
-    """
+def group_tokens_vertical(ocr, image_shape, gap_px=18, bbox_padding=1,
+                           strict_mode=False):
     n = len(ocr)
     if n == 0: return {}, {}, {}, {}
 
@@ -1547,12 +1499,12 @@ def group_tokens_vertical(ocr, image_shape, gap_px=18, bbox_padding=1, strict_mo
         if i in used: continue
         current_group = [i]
         used.add(i)
-        cx_i, cy_i = centers[i]
+        cx_i = centers[i][0]
 
         for j in sorted_indices:
             if j in used or j == i: continue
             cx_j, cy_j = centers[j]
-            if cy_j <= cy_i: continue
+            if cy_j <= centers[i][1]: continue
             if abs(cx_i - cx_j) > max_horizontal_offset: continue
 
             # Horizontal gap guard
@@ -1598,7 +1550,8 @@ def group_tokens_vertical(ocr, image_shape, gap_px=18, bbox_padding=1, strict_mo
         else:
             final_groups.append(group)
 
-    final_groups.sort(key=lambda g: (min(centers[i][1] for i in g), min(centers[i][0] for i in g)))
+    final_groups.sort(key=lambda g: (min(centers[i][1] for i in g),
+                                      min(centers[i][0] for i in g)))
 
     bubbles, bubble_boxes, bubble_quads, bubble_indices = {}, {}, {}, {}
     ih, iw = image_shape[:2]
@@ -1610,35 +1563,99 @@ def group_tokens_vertical(ocr, image_shape, gap_px=18, bbox_padding=1, strict_mo
         if ub is None: continue
         x1, y1, x2, y2 = ub
         ap = max(1, int(round(med_h * 0.16)))
-        bubbles[bid]       = lines
-        bubble_boxes[bid]  = (max(0,x1-ap), max(0,y1-ap), min(iw-1,x2+ap), min(ih-1,y2+ap))
-        bubble_quads[bid]  = quads
-        bubble_indices[bid]= idxs
+        bubbles[bid]        = lines
+        bubble_boxes[bid]   = (max(0,x1-ap), max(0,y1-ap),
+                                min(iw-1,x2+ap), min(ih-1,y2+ap))
+        bubble_quads[bid]   = quads
+        bubble_indices[bid] = idxs
 
     return bubbles, bubble_boxes, bubble_quads, bubble_indices
 
 
+# ============================================================
+# SPLIT HELPER — centralises all split strategies
+# ============================================================
+def _split_bubble_if_needed(bid, bubble_indices, bubble_quads, bubble_boxes,
+                             filtered, image, iw, ih):
+    """
+    Attempts all split strategies in priority order.
+    Returns ((part1_indices, part2_indices), reason_str) or (None, None).
+
+    BOX#18 fix: split_cluster_by_big_vertical_gap factor lowered to 1.4
+    so the gap between the top speech bubble and the bottom cluster triggers.
+    """
+    indices = bubble_indices[bid]
+    box     = bubble_boxes[bid]
+
+    # 1. Vertical-stack gap (sensitive — catches top-vs-bottom cluster)
+    if is_vertical_text_like(indices, filtered):
+        vgap = split_cluster_by_big_vertical_gap(indices, filtered,
+                                                  factor=1.4, min_gap=18)
+        if vgap:
+            return vgap, "vertical-stack y-gap"
+
+    # 2. Panel border
+    sr = split_panel_box(image, box, bubble_quads=bubble_quads[bid])
+    if sr:
+        _, _, split_x = sr
+        li = [idx for idx in indices if quad_center(filtered[idx][0])[0] < split_x]
+        ri = [idx for idx in indices if quad_center(filtered[idx][0])[0] >= split_x]
+        if li and ri:
+            return (li, ri), "panel border"
+        elif len(bubble_quads[bid]) >= 4:
+            cs = split_bubble_if_multiple_columns(indices, filtered, bid=bid,
+                                                   use_aggressive_thresholds=True)
+            if cs:
+                return cs, "aggressive column"
+
+    # 3. Column gap
+    cs = split_bubble_if_multiple_columns(indices, filtered, bid=bid)
+    if cs:
+        return cs, "vertical column"
+
+    # 4. Nested / side-by-side
+    ns = split_nested_or_side_by_side(indices, filtered)
+    if ns:
+        return ns, "nested/side-by-side"
+
+    # 5. Row split
+    rs = split_bubble_if_multiple_rows(indices, filtered, bid=bid)
+    if rs:
+        return rs, "horizontal row"
+
+    # 6. Large vertical gap (general, less sensitive)
+    gy = split_cluster_by_big_vertical_gap(indices, filtered, factor=1.9, min_gap=22)
+    if gy:
+        return gy, "large vertical-gap"
+
+    return None, None
+
+
 # ============================================================
 # DEBUG / EXPORT
 # ============================================================
 def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices,
                         clean_lines=None, out_path="debug_clusters.png"):
+    """
+    Draws all detected boxes.
+    Single-quad boxes are drawn in orange for visibility but are NOT
+    labelled as (ISOLATED) — they participate fully in merge passes.
+    """
     img = cv2.imread(image_path)
     if img is None: return
 
     for bbox, txt, conf in ocr:
         pts = np.array(bbox, dtype=np.int32)
-        cv2.fillPoly(img, [pts], (255,255,255))
-        cv2.polylines(img, [pts], True, (180,180,180), 1)
+        cv2.fillPoly(img, [pts], (255, 255, 255))
+        cv2.polylines(img, [pts], True, (180, 180, 180), 1)
 
     for bid, bb in bubble_boxes.items():
         x1, y1, x2, y2 = bb
-        is_isolated = len(bubble_indices.get(bid, [])) == 1
-        color     = (255,165,0) if is_isolated else (0,220,0)
-        thickness = 3           if is_isolated else 2
-        cv2.rectangle(img, (x1,y1), (x2,y2), color, thickness)
-        label = f"BOX#{bid}" + (" (ISOLATED)" if is_isolated else "")
-        cv2.putText(img, label, (x1+2, max(15, y1+16)),
+        n_quads   = len(bubble_indices.get(bid, []))
+        color     = (255, 165, 0) if n_quads == 1 else (0, 220, 0)
+        thickness = 3             if n_quads == 1 else 2
+        cv2.rectangle(img, (x1, y1), (x2, y2), color, thickness)
+        cv2.putText(img, f"BOX#{bid}", (x1+2, max(15, y1+16)),
                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
 
         if clean_lines and bid in clean_lines:
@@ -1651,15 +1668,18 @@ def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices,
             if cur: lines.append(cur.strip())
             y_text = y2 + 18
             for line in lines:
-                cv2.putText(img, line, (x1, y_text), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 3)
-                cv2.putText(img, line, (x1, y_text), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 1)
+                cv2.putText(img, line, (x1, y_text),
+                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 3)
+                cv2.putText(img, line, (x1, y_text),
+                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 1)
                 y_text += 18
 
     cv2.imwrite(out_path, img)
 
 
 def estimate_reading_order(bbox_dict, mode="ltr"):
-    items = [(bid, (bb[0]+bb[2])/2.0, (bb[1]+bb[3])/2.0) for bid, bb in bbox_dict.items()]
+    items = [(bid, (bb[0]+bb[2])/2.0, (bb[1]+bb[3])/2.0)
+             for bid, bb in bbox_dict.items()]
     items.sort(key=lambda t: t[2])
     rows, tol = [], 90
     for it in items:
@@ -1704,6 +1724,7 @@ def translate_manga_text(
         print(f"❌ Cannot load image: {image_path}"); return
 
     resolved_gap = auto_gap(image_path) if gap_px == "auto" else float(gap_px)
+    ih, iw       = image.shape[:2]
     print("Loading OCR engines...")
 
     if use_enhanced_ocr:
@@ -1722,29 +1743,28 @@ def translate_manga_text(
         if missed_regions:
             print(f"🔍 Found {len(missed_regions)} potentially missed text regions")
             for region in missed_regions:
-                x1, y1, x2, y2 = region
+                rx1, ry1, rx2, ry2 = region
                 pad = 10
-                x1, y1 = max(0, x1-pad), max(0, y1-pad)
-                x2, y2 = min(image.shape[1], x2+pad), min(image.shape[0], y2+pad)
-                crop = image[y1:y2, x1:x2]
+                rx1, ry1 = max(0, rx1-pad), max(0, ry1-pad)
+                rx2, ry2 = min(iw, rx2+pad), min(ih, ry2+pad)
+                crop = image[ry1:ry2, rx1:rx2]
                 if crop.size > 0:
                     upscaled = cv2.resize(crop, None, fx=4.0, fy=4.0,
                                           interpolation=cv2.INTER_CUBIC)
                     for quad, text, conf in detector.run_vision_ocr(upscaled):
-                        raw.append(([[int(p[0]/4.0+x1), int(p[1]/4.0+y1)] for p in quad],
-                                    text, conf))
+                        raw.append(([[int(p[0]/4.0+rx1), int(p[1]/4.0+ry1)]
+                                     for p in quad], text, conf))
             print(f"📝 Total detections after missed region scan: {len(raw)}")
 
+    # ── Filtering ─────────────────────────────────────────────────────────
     filtered, skipped = [], 0
-    ih, iw = image.shape[:2]
-
     for bbox, text, conf in raw:
         t  = normalize_text(text)
         qb = quad_bbox(bbox)
-        if conf < confidence_threshold:           skipped += 1; continue
-        if len(t) < min_text_length:              skipped += 1; continue
-        if not is_valid_language(t, source_lang): skipped += 1; continue
-        if not is_meaningful_text(t, source_lang):skipped += 1; continue
+        if conf < confidence_threshold:            skipped += 1; continue
+        if len(t) < min_text_length:               skipped += 1; continue
+        if not is_valid_language(t, source_lang):  skipped += 1; continue
+        if not is_meaningful_text(t, source_lang): skipped += 1; continue
         if qb[1] < int(ih * TOP_BAND_RATIO) and conf < 0.70 and len(t) >= 5:
             skipped += 1; continue
         filtered.append((bbox, t, conf))
@@ -1758,105 +1778,56 @@ def translate_manga_text(
     if oversized_splits > 0:
         print(f"📐 Split {oversized_splits} oversized quad(s) before grouping")
 
-    filtered, splits_made = split_wide_ocr_items(image, filtered)
-    if splits_made > 0:
-        print(f"✂️  Split {splits_made} wide OCR lines across column gaps.")
+    filtered, wide_splits = split_wide_ocr_items(image, filtered)
+    if wide_splits > 0:
+        print(f"✂️  Split {wide_splits} wide OCR lines across column gaps.")
 
     filtered, bridge_splits = split_abnormal_bridge_quads(image, filtered)
     if bridge_splits > 0:
         print(f"🧩 Split {bridge_splits} abnormal bridge OCR quad(s).")
 
-    # ── Column-gap split: catches BOX#6 type wide quads spanning two columns ──
-    hs_pre  = [max(1, quad_bbox(q)[3]-quad_bbox(q)[1]) for q, _, _ in filtered]
+    # Column-gap split: catches wide quads spanning two columns (BOX#6 type)
+    hs_pre    = [max(1, quad_bbox(q)[3]-quad_bbox(q)[1]) for q, _, _ in filtered]
     med_h_pre = float(np.median(hs_pre)) if hs_pre else 14.0
     filtered, col_splits = apply_column_gap_splits(image, filtered, med_h_pre)
-    if col_splits > 0:
-        print(f"📐 Column-gap split: {col_splits} quad(s) split before grouping")
 
     filtered = normalize_ocr_quads(filtered)
 
+    # ── Grouping ──────────────────────────────────────────────────────────
     print("📊 Grouping quads vertically...")
     bubbles, bubble_boxes, bubble_quads, bubble_indices = group_tokens_vertical(
-        filtered, image.shape, gap_px=resolved_gap, bbox_padding=1, strict_mode=strict_grouping
-    )
+        filtered, image.shape, gap_px=resolved_gap,
+        bbox_padding=1, strict_mode=strict_grouping)
     print(f"   Created {len(bubbles)} initial box(es)")
 
+    # ── Auto-fix (split + merge) ──────────────────────────────────────────
     if auto_fix_bubbles:
         bubbles, bubble_boxes, bubble_quads, bubble_indices = auto_fix_bubble_detection(
-            bubble_boxes, bubble_indices, bubble_quads, bubbles, filtered, image
-        )
+            bubble_boxes, bubble_indices, bubble_quads, bubbles, filtered, image)
 
+    # ── Enforce max box size ──────────────────────────────────────────────
     bubbles, bubble_boxes, bubble_quads, bubble_indices = enforce_max_box_size(
         bubble_boxes, bubble_indices, bubble_quads, bubbles, filtered,
         max_width_ratio=max_box_width_ratio,
         max_height_ratio=max_box_height_ratio,
-        image_shape=image.shape
-    )
+        image_shape=image.shape)
 
+    # ── Close-proximity merge ─────────────────────────────────────────────
     bubbles, bubble_boxes, bubble_quads, bubble_indices = merge_close_bubbles_by_line_height(
-        bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered
-    )
+        bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered)
 
+    # ── Per-bubble split pass ─────────────────────────────────────────────
     new_bubbles, new_bubble_boxes, new_bubble_quads, new_bubble_indices = {}, {}, {}, {}
     next_bid         = max(bubbles.keys()) + 1 if bubbles else 1
     splits_performed = []
 
     for bid in list(bubbles.keys()):
-        box          = bubble_boxes[bid]
-        bubble_split = None
+        split_result, split_reason = _split_bubble_if_needed(
+            bid, bubble_indices, bubble_quads, bubble_boxes, filtered, image, iw, ih)
 
-        if is_vertical_text_like(bubble_indices[bid], filtered):
-            vgap = split_cluster_by_big_vertical_gap(bubble_indices[bid], filtered,
-                                                     factor=1.7, min_gap=18)
-            if vgap:
-                bubble_split = vgap
-                splits_performed.append(f"BOX#{bid} (vertical-stack y-gap)")
-
-        if bubble_split is None:
-            sr = split_panel_box(image, box, bubble_quads=bubble_quads[bid])
-            if sr:
-                _, _, split_x = sr
-                li = [idx for idx in bubble_indices[bid]
-                      if quad_center(filtered[idx][0])[0] < split_x]
-                ri = [idx for idx in bubble_indices[bid]
-                      if quad_center(filtered[idx][0])[0] >= split_x]
-                if li and ri:
-                    bubble_split = (li, ri)
-                    splits_performed.append(f"BOX#{bid} (panel border)")
-                elif len(bubble_quads[bid]) >= 4:
-                    cs = split_bubble_if_multiple_columns(bubble_indices[bid], filtered,
-                                                          bid=bid, use_aggressive_thresholds=True)
-                    if cs:
-                        bubble_split = cs
-                        splits_performed.append(f"BOX#{bid} (aggressive column)")
-
-        if bubble_split is None:
-            cs = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid)
-            if cs:
-                bubble_split = cs
-                splits_performed.append(f"BOX#{bid} (vertical column)")
-
-        if bubble_split is None:
-            ns = split_nested_or_side_by_side(bubble_indices[bid], filtered)
-            if ns:
-                bubble_split = ns
-                splits_performed.append(f"BOX#{bid} (nested/side-by-side)")
-
-        if bubble_split is None:
-            rs = split_bubble_if_multiple_rows(bubble_indices[bid], filtered, bid=bid)
-            if rs:
-                bubble_split = rs
-                splits_performed.append(f"BOX#{bid} (horizontal row)")
-
-        if bubble_split is None:
-            gy = split_cluster_by_big_vertical_gap(bubble_indices[bid], filtered,
-                                                   factor=1.9, min_gap=22)
-            if gy:
-                bubble_split = gy
-                splits_performed.append(f"BOX#{bid} (large vertical-gap)")
-
-        if bubble_split:
-            p1, p2 = bubble_split
+        if split_result:
+            p1, p2 = split_result
+            splits_performed.append(f"BOX#{bid} ({split_reason})")
             for part_idxs, part_bid in [(p1, bid), (p2, next_bid)]:
                 ub = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in part_idxs])
                 new_bubbles[part_bid]        = build_lines_from_indices(part_idxs, filtered)
@@ -1873,11 +1844,12 @@ def translate_manga_text(
 
     if splits_performed:
         print(f"\n🔀 Splits detected: {len(splits_performed)}")
+        for s in splits_performed: print(f"   ✓ {s}")
 
+    # ── Remove nested / duplicate boxes ──────────────────────────────────
     bubbles, bubble_boxes, bubble_quads, bubble_indices = remove_nested_boxes(
         new_bubble_boxes, new_bubble_indices, new_bubble_quads, new_bubbles,
-        overlap_threshold=0.50
-    )
+        overlap_threshold=0.50)
     print(f"✅ Final box count: {len(bubbles)}")
 
     # ── OCR quality pass ──────────────────────────────────────────────────
@@ -1900,7 +1872,7 @@ def translate_manga_text(
 
     reading_map = estimate_reading_order(bubble_boxes, mode=reading_mode)
 
-    # ── Single-pass translation cache ────────────────────────────────────
+    # ── Translation ───────────────────────────────────────────────────────
     for bid in sorted(clean_lines.keys(), key=lambda x: reading_map.get(x, x)):
         src_txt = clean_lines[bid].strip()
         if not src_txt: continue
@@ -1936,8 +1908,9 @@ def translate_manga_text(
         src_u      = src_txt.upper()
         src_engine = sources_used.get(bid, "unknown")
 
-        out_lines.append(f"#{bid}|{reading_map.get(bid,bid)}|{src_engine}|{src_u}|{tgt}|"
-                         f"{','.join(flags) if flags else '-'}")
+        out_lines.append(
+            f"#{bid}|{reading_map.get(bid,bid)}|{src_engine}|{src_u}|{tgt}|"
+            f"{','.join(flags) if flags else '-'}")
         print(f"#{bid:<7} {reading_map.get(bid,bid):<6} {src_engine:<12} "
               f"{src_u[:40]:<40} {tgt[:40]:<40} {','.join(flags) if flags else '-'}")
         translated_count += 1
@@ -1980,7 +1953,7 @@ def translate_manga_text(
 # ============================================================
 if __name__ == "__main__":
     translate_manga_text(
-        image_path="17.jpg",
+        image_path="19.png",
         source_lang="english",
         target_lang="ca",
         confidence_threshold=0.03,
@@ -1997,3 +1970,917 @@ if __name__ == "__main__":
         max_box_height_ratio=0.5,
         auto_fix_bubbles=True
     )
+    
+def split_bubble_if_multiple_rows(indices, ocr, bid=None):
+    if len(indices) < 2: return None
+    boxes  = [quad_bbox(ocr[i][0]) for i in indices]
+    hs     = [max(1, b[3]-b[1]) for b in boxes]
+    med_h  = float(np.median(hs)) if hs else 12.0
+    ys     = [(b[1]+b[3])/2.0 for b in boxes]
+    ys_sorted = sorted(ys)
+    gap_thresh = max(med_h * 2.0, 30)
+    best_gap_idx, best_gap_size = None, 0.0
+    for i in range(len(ys_sorted) - 1):
+        gap = ys_sorted[i+1] - ys_sorted[i]
+        if gap > gap_thresh and gap > best_gap_size:
+            best_gap_size, best_gap_idx = gap, i
+    if best_gap_idx is None: return None
+    split_y    = (ys_sorted[best_gap_idx] + ys_sorted[best_gap_idx+1]) / 2.0
+    top_idxs   = [i for i in indices
+                  if (quad_bbox(ocr[i][0])[1]+quad_bbox(ocr[i][0])[3])/2.0 < split_y]
+    bot_idxs   = [i for i in indices
+                  if (quad_bbox(ocr[i][0])[1]+quad_bbox(ocr[i][0])[3])/2.0 >= split_y]
+    if not top_idxs or not bot_idxs: return None
+    return (top_idxs, bot_idxs)
+
+
+def split_cluster_by_big_vertical_gap(indices, ocr, factor=1.9, min_gap=22):
+    if len(indices) < 2: return None
+    boxes  = [quad_bbox(ocr[i][0]) for i in indices]
+    hs     = [max(1, b[3]-b[1]) for b in boxes]
+    med_h  = float(np.median(hs)) if hs else 12.0
+    items  = sorted([(i, quad_bbox(ocr[i][0])) for i in indices],
+                    key=lambda x: (x[1][1]+x[1][3])/2.0)
+    gap_thresh = max(med_h * factor, min_gap)
+    best_gap, best_split_idx = 0.0, None
+    for k in range(len(items) - 1):
+        gap = items[k+1][1][1] - items[k][1][3]
+        if gap > gap_thresh and gap > best_gap:
+            best_gap, best_split_idx = gap, k
+    if best_split_idx is None: return None
+    top_idxs = [it[0] for it in items[:best_split_idx+1]]
+    bot_idxs = [it[0] for it in items[best_split_idx+1:]]
+    if not top_idxs or not bot_idxs: return None
+    return (top_idxs, bot_idxs)
+
+
+def is_vertical_text_like(indices, ocr):
+    if len(indices) < 2: return False
+    boxes = [quad_bbox(ocr[i][0]) for i in indices]
+    med_h = float(np.median([max(1, b[3]-b[1]) for b in boxes]))
+    med_w = float(np.median([max(1, b[2]-b[0]) for b in boxes]))
+    if med_h < med_w * 1.2: return False
+    xs = [(b[0]+b[2])/2.0 for b in boxes]
+    ys = [(b[1]+b[3])/2.0 for b in boxes]
+    if (max(ys)-min(ys)) < (max(xs)-min(xs)) * 1.5: return False
+    return True
+
+
+def split_nested_or_side_by_side(indices, ocr):
+    if len(indices) < 2: return None
+    xs      = sorted([(quad_bbox(ocr[i][0])[0]+quad_bbox(ocr[i][0])[2])/2.0
+                       for i in indices])
+    mid_idx = len(xs) // 2
+    split_x = (xs[mid_idx-1] + xs[mid_idx]) / 2.0
+    left_idxs  = [i for i in indices
+                  if (quad_bbox(ocr[i][0])[0]+quad_bbox(ocr[i][0])[2])/2.0 < split_x]
+    right_idxs = [i for i in indices
+                  if (quad_bbox(ocr[i][0])[0]+quad_bbox(ocr[i][0])[2])/2.0 >= split_x]
+    if not left_idxs or not right_idxs: return None
+    return (left_idxs, right_idxs)
+
+
+def split_panel_box(image_bgr, box_xyxy, bubble_quads=None):
+    x1, y1, x2, y2 = box_xyxy
+    ih, iw = image_bgr.shape[:2]
+    x1, y1 = max(0, x1), max(0, y1)
+    x2, y2 = min(iw-1, x2), min(ih-1, y2)
+    if x2 <= x1 or y2 <= y1: return None
+    crop = image_bgr[y1:y2, x1:x2]
+    if crop.size == 0: return None
+    gray   = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
+    edges  = cv2.Canny(gray, 50, 150)
+    h_proj = np.sum(edges, axis=0)
+    w      = x2 - x1
+    if w < 100: return None
+    search_start = int(w * 0.35)
+    search_end   = int(w * 0.65)
+    if search_end <= search_start: return None
+    region = h_proj[search_start:search_end]
+    if len(region) == 0: return None
+    threshold  = np.percentile(region, 85)
+    candidates = [x1 + search_start + rx
+                  for rx in range(len(region)) if region[rx] >= threshold]
+    if not candidates: return None
+    split_x = int(np.median(candidates))
+    if bubble_quads:
+        lc = sum(1 for q in bubble_quads if quad_center(q)[0] < split_x)
+        rc = len(bubble_quads) - lc
+        if lc == 0 or rc == 0: return None
+    return (x1, x2, split_x)
+
+
+# ============================================================
+# MERGE CLOSE BUBBLES
+# ============================================================
+def merge_close_bubbles_by_line_height(bubbles, bubble_boxes, bubble_quads,
+                                        bubble_indices, ocr):
+    """
+    Merges boxes that are spatially very close on BOTH axes AND share
+    meaningful horizontal overlap (same column).
+
+    Single-quad boxes participate fully — no special isolation treatment.
+    The h_overlap_ratio >= 0.25 guard prevents merging horizontally
+    adjacent distinct bubbles.
+    """
+    if not bubbles:
+        return bubbles, bubble_boxes, bubble_quads, bubble_indices
+
+    all_h     = [max(1, quad_bbox(ocr[i][0])[3]-quad_bbox(ocr[i][0])[1])
+                 for i in range(len(ocr))]
+    med_h     = float(np.median(all_h)) if all_h else 14.0
+    merge_tol = max(8, med_h * 1.4)
+
+    bids = sorted(bubble_boxes.keys())
+    merged_set, merge_map = set(), {}
+
+    for i, bid_i in enumerate(bids):
+        if bid_i in merged_set: continue
+        x1_i, y1_i, x2_i, y2_i = bubble_boxes[bid_i]
+        wi = max(1, x2_i - x1_i)
+
+        for j in range(i + 1, len(bids)):
+            bid_j = bids[j]
+            if bid_j in merged_set: continue
+            x1_j, y1_j, x2_j, y2_j = bubble_boxes[bid_j]
+            wj = max(1, x2_j - x1_j)
+
+            gap_x = max(0, max(x1_i, x1_j) - min(x2_i, x2_j))
+            gap_y = max(0, max(y1_i, y1_j) - min(y2_i, y2_j))
+
+            h_ix1           = max(x1_i, x1_j)
+            h_ix2           = min(x2_i, x2_j)
+            h_overlap       = max(0, h_ix2 - h_ix1)
+            h_overlap_ratio = h_overlap / max(1, min(wi, wj))
+
+            if gap_x <= merge_tol and gap_y <= merge_tol and h_overlap_ratio >= 0.25:
+                if bid_i not in merge_map:
+                    merge_map[bid_i] = [bid_i]
+                merge_map[bid_i].append(bid_j)
+                merged_set.add(bid_j)
+
+    if not merge_map:
+        return bubbles, bubble_boxes, bubble_quads, bubble_indices
+
+    new_bubbles, new_boxes, new_quads, new_indices = {}, {}, {}, {}
+    next_bid = 1
+    for bid in bids:
+        if bid in merged_set: continue
+        if bid in merge_map:
+            group       = merge_map[bid]
+            all_indices = sorted(set(idx for b in group for idx in bubble_indices[b]))
+            new_bubbles[next_bid] = build_lines_from_indices(all_indices, ocr)
+            new_boxes[next_bid]   = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in all_indices])
+            new_quads[next_bid]   = [ocr[i][0] for i in all_indices]
+            new_indices[next_bid] = all_indices
+        else:
+            new_bubbles[next_bid] = bubbles[bid]
+            new_boxes[next_bid]   = bubble_boxes[bid]
+            new_quads[next_bid]   = bubble_quads[bid]
+            new_indices[next_bid] = bubble_indices[bid]
+        next_bid += 1
+
+    return new_bubbles, new_boxes, new_quads, new_indices
+
+
+# ============================================================
+# WIDE / BRIDGE QUAD SPLITTING
+# ============================================================
+def split_wide_ocr_items(image_bgr, ocr_list, width_factor=8.0):
+    if not ocr_list: return ocr_list, 0
+    hs    = [max(1, quad_bbox(q)[3]-quad_bbox(q)[1]) for q, _, _ in ocr_list]
+    med_h = float(np.median(hs)) if hs else 14.0
+    result, splits_made = [], 0
+
+    for quad, text, conf in ocr_list:
+        x1, y1, x2, y2 = quad_bbox(quad)
+        w = x2 - x1
+        if w > med_h * width_factor:
+            pad = 2
+            roi = image_bgr[max(0,y1-pad):min(image_bgr.shape[0],y2+pad),
+                            max(0,x1):min(image_bgr.shape[1],x2)]
+            if roi.size > 0:
+                gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
+                _, binary = cv2.threshold(gray, 0, 255,
+                                          cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+                v_proj        = np.sum(binary, axis=0)
+                gap_threshold = roi.shape[0] * 255 * 0.15
+                gaps, in_gap, gap_start = [], False, 0
+                for x in range(len(v_proj)):
+                    if v_proj[x] < gap_threshold:
+                        if not in_gap: gap_start, in_gap = x, True
+                    else:
+                        if in_gap:
+                            gw = x - gap_start
+                            if gw >= max(int(med_h * 0.6), 12):
+                                gaps.append((gap_start + gw // 2, gw))
+                            in_gap = False
+                if gaps:
+                    gaps.sort(key=lambda g: g[1], reverse=True)
+                    split_x_abs = max(0, x1) + gaps[0][0]
+                    if ' ' in text:
+                        char_w    = w / max(1, len(text))
+                        split_idx = int((split_x_abs - x1) / max(1e-6, char_w))
+                        spaces    = [i for i, c in enumerate(text) if c == ' ']
+                        if spaces:
+                            split_idx = min(spaces, key=lambda i: abs(i - split_idx))
+                        tl, tr = text[:split_idx].strip(), text[split_idx:].strip()
+                    else:
+                        split_idx = int(len(text) * (split_x_abs - x1) / w)
+                        tl, tr    = text[:split_idx].strip(), text[split_idx:].strip()
+                    if tl and tr:
+                        result.extend([
+                            ([[x1,y1],[split_x_abs,y1],[split_x_abs,y2],[x1,y2]], tl, conf),
+                            ([[split_x_abs,y1],[x2,y1],[x2,y2],[split_x_abs,y2]], tr, conf)])
+                        splits_made += 1
+                        continue
+        result.append((quad, text, conf))
+    return result, splits_made
+
+
+def split_abnormal_bridge_quads(image_bgr, ocr_list, aspect_ratio_threshold=6.0):
+    if not ocr_list: return ocr_list, 0
+    hs    = [max(1, quad_bbox(q)[3]-quad_bbox(q)[1]) for q, _, _ in ocr_list]
+    med_h = float(np.median(hs)) if hs else 14.0
+    result, splits_made = [], 0
+
+    for quad, text, conf in ocr_list:
+        x1, y1, x2, y2 = quad_bbox(quad)
+        w, h = x2 - x1, max(1, y2 - y1)
+        if w / h > aspect_ratio_threshold:
+            pad = 2
+            roi = image_bgr[max(0,y1-pad):min(image_bgr.shape[0],y2+pad),
+                            max(0,x1):min(image_bgr.shape[1],x2)]
+            if roi.size > 0:
+                gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
+                _, binary = cv2.threshold(gray, 0, 255,
+                                          cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+                v_proj        = np.sum(binary, axis=0)
+                gap_threshold = h * 255 * 0.20
+                gaps, in_gap, gap_start = [], False, 0
+                for x in range(len(v_proj)):
+                    if v_proj[x] < gap_threshold:
+                        if not in_gap: gap_start, in_gap = x, True
+                    else:
+                        if in_gap:
+                            gw = x - gap_start
+                            if gw >= max(int(med_h * 0.8), 15):
+                                gaps.append((gap_start + gw // 2, gw))
+                            in_gap = False
+                if gaps:
+                    gaps.sort(key=lambda g: g[1], reverse=True)
+                    split_x_abs = max(0, x1) + gaps[0][0]
+                    if ' ' in text:
+                        char_w    = w / max(1, len(text))
+                        split_idx = int((split_x_abs - x1) / max(1e-6, char_w))
+                        spaces    = [i for i, c in enumerate(text) if c == ' ']
+                        if spaces:
+                            split_idx = min(spaces, key=lambda i: abs(i - split_idx))
+                        tl, tr = text[:split_idx].strip(), text[split_idx:].strip()
+                    else:
+                        split_idx = int(len(text) * (split_x_abs - x1) / w)
+                        tl, tr    = text[:split_idx].strip(), text[split_idx:].strip()
+                    if tl and tr:
+                        result.extend([
+                            ([[x1,y1],[split_x_abs,y1],[split_x_abs,y2],[x1,y2]], tl, conf),
+                            ([[split_x_abs,y1],[x2,y1],[x2,y2],[split_x_abs,y2]], tr, conf)])
+                        splits_made += 1
+                        continue
+        result.append((quad, text, conf))
+    return result, splits_made
+
+
+def normalize_ocr_quads(ocr_list):
+    result = []
+    for quad, text, conf in ocr_list:
+        x1, y1, x2, y2 = quad_bbox(quad)
+        pad     = 3
+        new_quad = [[x1-pad,y1-pad],[x2+pad,y1-pad],[x2+pad,y2+pad],[x1-pad,y2+pad]]
+        result.append((new_quad, text, conf))
+    return result
+
+
+# ============================================================
+# VISION RE-READ
+# ============================================================
+def preprocess_variant(crop_bgr, mode):
+    gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY)
+    if mode == "raw":        return gray
+    if mode == "clahe":      return cv2.createCLAHE(clipLimit=2.0,
+                                                     tileGridSize=(8,8)).apply(gray)
+    if mode == "adaptive":
+        den = cv2.GaussianBlur(gray, (3,3), 0)
+        return cv2.adaptiveThreshold(den, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+                                     cv2.THRESH_BINARY, 35, 11)
+    if mode == "otsu":
+        den = cv2.GaussianBlur(gray, (3,3), 0)
+        _, th = cv2.threshold(den, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+        return th
+    if mode == "invert":     return 255 - gray
+    if mode == "bilateral":
+        den = cv2.bilateralFilter(gray, 7, 60, 60)
+        _, th = cv2.threshold(den, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+        return th
+    if mode == "morph_open":
+        _, th = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+        return cv2.morphologyEx(th, cv2.MORPH_OPEN, np.ones((2,2), np.uint8))
+    return gray
+
+
+def rotate_image_keep_bounds(img, angle_deg):
+    h, w = img.shape[:2]
+    c    = (w/2, h/2)
+    M    = cv2.getRotationMatrix2D(c, angle_deg, 1.0)
+    cos, sin = abs(M[0,0]), abs(M[0,1])
+    new_w = int((h*sin) + (w*cos))
+    new_h = int((h*cos) + (w*sin))
+    M[0,2] += (new_w/2) - c[0]
+    M[1,2] += (new_h/2) - c[1]
+    return cv2.warpAffine(img, M, (new_w, new_h),
+                          flags=cv2.INTER_CUBIC, borderValue=255)
+
+
+def rebuild_text_from_vision_result(res):
+    if not res: return ""
+    norm = []
+    for bbox, txt, conf in res:
+        if not txt or not txt.strip(): continue
+        b = quad_bbox(bbox)
+        norm.append((b, txt, conf,
+                     (b[0]+b[2])/2.0, (b[1]+b[3])/2.0, max(1.0, b[3]-b[1])))
+    if not norm: return ""
+    med_h   = float(np.median([x[5] for x in norm]))
+    row_tol = max(6.0, med_h * 0.75)
+    norm.sort(key=lambda z: z[4])
+    rows = []
+    for it in norm:
+        placed = False
+        for r in rows:
+            if abs(it[4] - r["yc"]) <= row_tol:
+                r["m"].append(it)
+                r["yc"] = float(np.mean([k[4] for k in r["m"]]))
+                placed = True; break
+        if not placed: rows.append({"yc": it[4], "m": [it]})
+    rows.sort(key=lambda r: r["yc"])
+    lines = [normalize_text(" ".join(x[1] for x in sorted(r["m"], key=lambda z: z[3])))
+             for r in rows]
+    return normalize_text(" ".join(filter(None, lines)))
+
+
+def reread_bubble_with_vision(image_bgr, bbox_xyxy, vision_detector,
+                               upscale=3.0, pad=24):
+    ih, iw = image_bgr.shape[:2]
+    x1, y1, x2, y2 = bbox_xyxy
+    x1, y1 = max(0, int(x1-pad)), max(0, int(y1-pad))
+    x2, y2 = min(iw, int(x2+pad)), min(ih, int(y2+pad))
+    crop    = image_bgr[y1:y2, x1:x2]
+    if crop.size == 0: return None, 0.0, "none"
+
+    modes  = ["raw", "clahe", "adaptive", "otsu", "invert", "bilateral", "morph_open"]
+    angles = [0.0, 1.5, -1.5]
+    best_v_txt, best_v_sc = "", 0.0
+    up0 = cv2.resize(crop,
+                     (int(crop.shape[1]*upscale), int(crop.shape[0]*upscale)),
+                     interpolation=cv2.INTER_CUBIC)
+
+    for mode in modes:
+        proc  = preprocess_variant(up0, mode)
+        proc3 = cv2.cvtColor(proc, cv2.COLOR_GRAY2BGR) if len(proc.shape) == 2 else proc
+        for a in angles:
+            rot = rotate_image_keep_bounds(proc3, a)
+            res = (vision_detector.run_vision_ocr(rot)
+                   if hasattr(vision_detector, 'run_vision_ocr')
+                   else vision_detector.read(rot))
+            txt = rebuild_text_from_vision_result(res)
+            sc  = ocr_candidate_score(txt)
+            if sc > best_v_sc:
+                best_v_txt, best_v_sc = txt, sc
+
+    if best_v_txt: return best_v_txt, best_v_sc, "vision-reread"
+    return None, 0.0, "none"
+
+
+# ============================================================
+# LINES + BUBBLES
+# ============================================================
+def build_lines_from_indices(indices, ocr):
+    if not indices: return []
+    items = []
+    for i in indices:
+        b = quad_bbox(ocr[i][0])
+        items.append((i, b, (b[0]+b[2])/2.0, (b[1]+b[3])/2.0, max(1.0, b[3]-b[1])))
+    med_h   = float(np.median([it[4] for it in items])) if items else 10.0
+    row_tol = max(6.0, med_h * 0.75)
+    items.sort(key=lambda x: x[3])
+    rows = []
+    for it in items:
+        placed = False
+        for r in rows:
+            if abs(it[3] - r["yc"]) <= row_tol:
+                r["m"].append(it)
+                r["yc"] = float(np.mean([k[3] for k in r["m"]]))
+                placed = True; break
+        if not placed: rows.append({"yc": it[3], "m": [it]})
+    rows.sort(key=lambda r: r["yc"])
+    return [normalize_text(
+                " ".join(ocr[i][1]
+                         for i, _, _, _, _ in sorted(r["m"], key=lambda z: z[2])))
+            for r in rows if r["m"]]
+
+
+def auto_gap(image_path, base=18, ref_w=750):
+    img = cv2.imread(image_path)
+    return base * (img.shape[1] / ref_w) if img is not None else base
+
+
+def group_tokens_vertical(ocr, image_shape, gap_px=18, bbox_padding=1,
+                           strict_mode=False):
+    n = len(ocr)
+    if n == 0: return {}, {}, {}, {}
+
+    boxes   = [quad_bbox(r[0]) for r in ocr]
+    centers = [quad_center(r[0]) for r in ocr]
+    hs      = [max(1.0, b[3]-b[1]) for b in boxes]
+    med_h   = float(np.median(hs)) if hs else 12.0
+
+    max_vertical_gap      = med_h * 2.5 if not strict_mode else med_h * 2.0
+    max_horizontal_offset = med_h * 1.8
+
+    sorted_indices = sorted(range(n), key=lambda i: (centers[i][1], centers[i][0]))
+    groups, used   = [], set()
+
+    for i in sorted_indices:
+        if i in used: continue
+        current_group = [i]
+        used.add(i)
+        cx_i = centers[i][0]
+
+        for j in sorted_indices:
+            if j in used or j == i: continue
+            cx_j, cy_j = centers[j]
+            if cy_j <= centers[i][1]: continue
+            if abs(cx_i - cx_j) > max_horizontal_offset: continue
+
+            # Horizontal gap guard
+            gap_x = max(0, max(boxes[i][0], boxes[j][0]) - min(boxes[i][2], boxes[j][2]))
+            if gap_x > med_h * 1.5: continue
+
+            # Orientation compatibility guard
+            if not orientation_compatible(i, j, ocr): continue
+
+            vertical_gap = boxes[j][1] - boxes[current_group[-1]][3]
+            if vertical_gap <= max_vertical_gap:
+                current_group.append(j)
+                used.add(j)
+                cx_i = (cx_i + cx_j) / 2.0
+
+        if current_group:
+            groups.append(current_group)
+
+    # Secondary merge pass
+    merged_groups, used_groups = [], set()
+    for i, group1 in enumerate(groups):
+        if i in used_groups: continue
+        merged = list(group1)
+        used_groups.add(i)
+        for j, group2 in enumerate(groups):
+            if i == j or j in used_groups: continue
+            if should_merge_groups(merged, group2, ocr, med_h, max_vertical_gap):
+                compat = all(orientation_compatible(a, b, ocr)
+                             for a in merged for b in group2)
+                if compat:
+                    merged.extend(group2)
+                    used_groups.add(j)
+        merged_groups.append(sorted(merged, key=lambda idx: centers[idx][1]))
+
+    # Horizontal gap split pass
+    final_groups = []
+    for group in merged_groups:
+        h_split = detect_horizontal_gap_in_group(group, ocr, med_h, gap_factor=2.5)
+        if h_split:
+            lg, rg = h_split
+            final_groups.append(sorted(lg, key=lambda idx: centers[idx][1]))
+            final_groups.append(sorted(rg, key=lambda idx: centers[idx][1]))
+        else:
+            final_groups.append(group)
+
+    final_groups.sort(key=lambda g: (min(centers[i][1] for i in g),
+                                      min(centers[i][0] for i in g)))
+
+    bubbles, bubble_boxes, bubble_quads, bubble_indices = {}, {}, {}, {}
+    ih, iw = image_shape[:2]
+
+    for bid, idxs in enumerate(final_groups, start=1):
+        lines = build_lines_from_indices(idxs, ocr)
+        quads = [ocr[k][0] for k in idxs]
+        ub    = boxes_union_xyxy([quad_bbox(q) for q in quads])
+        if ub is None: continue
+        x1, y1, x2, y2 = ub
+        ap = max(1, int(round(med_h * 0.16)))
+        bubbles[bid]        = lines
+        bubble_boxes[bid]   = (max(0,x1-ap), max(0,y1-ap),
+                                min(iw-1,x2+ap), min(ih-1,y2+ap))
+        bubble_quads[bid]   = quads
+        bubble_indices[bid] = idxs
+
+    return bubbles, bubble_boxes, bubble_quads, bubble_indices
+
+
+# ============================================================
+# SPLIT HELPER — centralises all split strategies
+# ============================================================
+def _split_bubble_if_needed(bid, bubble_indices, bubble_quads, bubble_boxes,
+                             filtered, image, iw, ih):
+    """
+    Attempts all split strategies in priority order.
+    Returns ((part1_indices, part2_indices), reason_str) or (None, None).
+
+    BOX#18 fix: split_cluster_by_big_vertical_gap factor lowered to 1.4
+    so the gap between the top speech bubble and the bottom cluster triggers.
+    """
+    indices = bubble_indices[bid]
+    box     = bubble_boxes[bid]
+
+    # 1. Vertical-stack gap (sensitive — catches top-vs-bottom cluster)
+    if is_vertical_text_like(indices, filtered):
+        vgap = split_cluster_by_big_vertical_gap(indices, filtered,
+                                                  factor=1.4, min_gap=18)
+        if vgap:
+            return vgap, "vertical-stack y-gap"
+
+    # 2. Panel border
+    sr = split_panel_box(image, box, bubble_quads=bubble_quads[bid])
+    if sr:
+        _, _, split_x = sr
+        li = [idx for idx in indices if quad_center(filtered[idx][0])[0] < split_x]
+        ri = [idx for idx in indices if quad_center(filtered[idx][0])[0] >= split_x]
+        if li and ri:
+            return (li, ri), "panel border"
+        elif len(bubble_quads[bid]) >= 4:
+            cs = split_bubble_if_multiple_columns(indices, filtered, bid=bid,
+                                                   use_aggressive_thresholds=True)
+            if cs:
+                return cs, "aggressive column"
+
+    # 3. Column gap
+    cs = split_bubble_if_multiple_columns(indices, filtered, bid=bid)
+    if cs:
+        return cs, "vertical column"
+
+    # 4. Nested / side-by-side
+    ns = split_nested_or_side_by_side(indices, filtered)
+    if ns:
+        return ns, "nested/side-by-side"
+
+    # 5. Row split
+    rs = split_bubble_if_multiple_rows(indices, filtered, bid=bid)
+    if rs:
+        return rs, "horizontal row"
+
+    # 6. Large vertical gap (general, less sensitive)
+    gy = split_cluster_by_big_vertical_gap(indices, filtered, factor=1.9, min_gap=22)
+    if gy:
+        return gy, "large vertical-gap"
+
+    return None, None
+
+
+# ============================================================
+# DEBUG / EXPORT
+# ============================================================
+def save_debug_clusters(image_path, ocr, bubble_boxes, bubble_indices,
+                        clean_lines=None, out_path="debug_clusters.png"):
+    """
+    Draws all detected boxes.
+    Single-quad boxes are drawn in orange for visibility but are NOT
+    labelled as (ISOLATED) — they participate fully in merge passes.
+    """
+    img = cv2.imread(image_path)
+    if img is None: return
+
+    for bbox, txt, conf in ocr:
+        pts = np.array(bbox, dtype=np.int32)
+        cv2.fillPoly(img, [pts], (255, 255, 255))
+        cv2.polylines(img, [pts], True, (180, 180, 180), 1)
+
+    for bid, bb in bubble_boxes.items():
+        x1, y1, x2, y2 = bb
+        n_quads   = len(bubble_indices.get(bid, []))
+        color     = (255, 165, 0) if n_quads == 1 else (0, 220, 0)
+        thickness = 3             if n_quads == 1 else 2
+        cv2.rectangle(img, (x1, y1), (x2, y2), color, thickness)
+        cv2.putText(img, f"BOX#{bid}", (x1+2, max(15, y1+16)),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
+
+        if clean_lines and bid in clean_lines:
+            text  = clean_lines[bid]
+            words = text.split()
+            lines, cur = [], ""
+            for w in words:
+                if len(cur) + len(w) < 25: cur += w + " "
+                else: lines.append(cur.strip()); cur = w + " "
+            if cur: lines.append(cur.strip())
+            y_text = y2 + 18
+            for line in lines:
+                cv2.putText(img, line, (x1, y_text),
+                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 3)
+                cv2.putText(img, line, (x1, y_text),
+                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 1)
+                y_text += 18
+
+    cv2.imwrite(out_path, img)
+
+
+def estimate_reading_order(bbox_dict, mode="ltr"):
+    items = [(bid, (bb[0]+bb[2])/2.0, (bb[1]+bb[3])/2.0)
+             for bid, bb in bbox_dict.items()]
+    items.sort(key=lambda t: t[2])
+    rows, tol = [], 90
+    for it in items:
+        placed = False
+        for r in rows:
+            if abs(it[2] - r["cy"]) <= tol:
+                r["items"].append(it)
+                r["cy"] = float(np.mean([x[2] for x in r["items"]]))
+                placed = True; break
+        if not placed: rows.append({"cy": it[2], "items": [it]})
+    rows.sort(key=lambda r: r["cy"])
+    order = []
+    for r in rows:
+        r["items"].sort(key=lambda x: x[1], reverse=(mode == "rtl"))
+        order.extend([z[0] for z in r["items"]])
+    return {bid: i+1 for i, bid in enumerate(order)}
+
+
+# ============================================================
+# MAIN PIPELINE
+# ============================================================
+def translate_manga_text(
+    image_path="001-page.png",
+    source_lang="en",
+    target_lang="ca",
+    confidence_threshold=0.03,
+    min_text_length=1,
+    gap_px="auto",
+    quality_threshold=0.62,
+    export_to_file="output.txt",
+    export_bubbles_to="bubbles.json",
+    reading_mode="ltr",
+    debug=True,
+    use_enhanced_ocr=True,
+    strict_grouping=True,
+    max_box_width_ratio=0.6,
+    max_box_height_ratio=0.5,
+    auto_fix_bubbles=True
+):
+    image = cv2.imread(image_path)
+    if image is None:
+        print(f"❌ Cannot load image: {image_path}"); return
+
+    resolved_gap = auto_gap(image_path) if gap_px == "auto" else float(gap_px)
+    ih, iw       = image.shape[:2]
+    print("Loading OCR engines...")
+
+    if use_enhanced_ocr:
+        detector = ImprovedMacVisionDetector(source_lang=source_lang)
+        print("🚀 Using Enhanced Multi-Pass OCR")
+    else:
+        detector = MacVisionDetector(source_lang=source_lang)
+
+    print("Running detection OCR (Apple Vision)...")
+    raw = detector.read(image_path)
+    print(f"Raw detections: {len(raw)}")
+
+    if use_enhanced_ocr:
+        existing_quads = [r[0] for r in raw]
+        missed_regions = detect_small_text_regions(image, existing_quads)
+        if missed_regions:
+            print(f"🔍 Found {len(missed_regions)} potentially missed text regions")
+            for region in missed_regions:
+                rx1, ry1, rx2, ry2 = region
+                pad = 10
+                rx1, ry1 = max(0, rx1-pad), max(0, ry1-pad)
+                rx2, ry2 = min(iw, rx2+pad), min(ih, ry2+pad)
+                crop = image[ry1:ry2, rx1:rx2]
+                if crop.size > 0:
+                    upscaled = cv2.resize(crop, None, fx=4.0, fy=4.0,
+                                          interpolation=cv2.INTER_CUBIC)
+                    for quad, text, conf in detector.run_vision_ocr(upscaled):
+                        raw.append(([[int(p[0]/4.0+rx1), int(p[1]/4.0+ry1)]
+                                     for p in quad], text, conf))
+            print(f"📝 Total detections after missed region scan: {len(raw)}")
+
+    # ── Filtering ─────────────────────────────────────────────────────────
+    filtered, skipped = [], 0
+    for bbox, text, conf in raw:
+        t  = normalize_text(text)
+        qb = quad_bbox(bbox)
+        if conf < confidence_threshold:            skipped += 1; continue
+        if len(t) < min_text_length:               skipped += 1; continue
+        if not is_valid_language(t, source_lang):  skipped += 1; continue
+        if not is_meaningful_text(t, source_lang): skipped += 1; continue
+        if qb[1] < int(ih * TOP_BAND_RATIO) and conf < 0.70 and len(t) >= 5:
+            skipped += 1; continue
+        filtered.append((bbox, t, conf))
+
+    print(f"Kept: {len(filtered)} | Skipped: {skipped}")
+    if not filtered:
+        print("⚠️ No text after filtering."); return
+
+    # ── Pre-grouping quad splits ──────────────────────────────────────────
+    filtered, oversized_splits = validate_and_split_oversized_quads(image, filtered)
+    if oversized_splits > 0:
+        print(f"📐 Split {oversized_splits} oversized quad(s) before grouping")
+
+    filtered, wide_splits = split_wide_ocr_items(image, filtered)
+    if wide_splits > 0:
+        print(f"✂️  Split {wide_splits} wide OCR lines across column gaps.")
+
+    filtered, bridge_splits = split_abnormal_bridge_quads(image, filtered)
+    if bridge_splits > 0:
+        print(f"🧩 Split {bridge_splits} abnormal bridge OCR quad(s).")
+
+    # Column-gap split: catches wide quads spanning two columns (BOX#6 type)
+    hs_pre    = [max(1, quad_bbox(q)[3]-quad_bbox(q)[1]) for q, _, _ in filtered]
+    med_h_pre = float(np.median(hs_pre)) if hs_pre else 14.0
+    filtered, col_splits = apply_column_gap_splits(image, filtered, med_h_pre)
+
+    filtered = normalize_ocr_quads(filtered)
+
+    # ── Grouping ──────────────────────────────────────────────────────────
+    print("📊 Grouping quads vertically...")
+    bubbles, bubble_boxes, bubble_quads, bubble_indices = group_tokens_vertical(
+        filtered, image.shape, gap_px=resolved_gap,
+        bbox_padding=1, strict_mode=strict_grouping)
+    print(f"   Created {len(bubbles)} initial box(es)")
+
+    # ── Auto-fix (split + merge) ──────────────────────────────────────────
+    if auto_fix_bubbles:
+        bubbles, bubble_boxes, bubble_quads, bubble_indices = auto_fix_bubble_detection(
+            bubble_boxes, bubble_indices, bubble_quads, bubbles, filtered, image)
+
+    # ── Enforce max box size ──────────────────────────────────────────────
+    bubbles, bubble_boxes, bubble_quads, bubble_indices = enforce_max_box_size(
+        bubble_boxes, bubble_indices, bubble_quads, bubbles, filtered,
+        max_width_ratio=max_box_width_ratio,
+        max_height_ratio=max_box_height_ratio,
+        image_shape=image.shape)
+
+    # ── Close-proximity merge ─────────────────────────────────────────────
+    bubbles, bubble_boxes, bubble_quads, bubble_indices = merge_close_bubbles_by_line_height(
+        bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered)
+
+    # ── Per-bubble split pass ─────────────────────────────────────────────
+    new_bubbles, new_bubble_boxes, new_bubble_quads, new_bubble_indices = {}, {}, {}, {}
+    next_bid         = max(bubbles.keys()) + 1 if bubbles else 1
+    splits_performed = []
+
+    for bid in list(bubbles.keys()):
+        split_result, split_reason = _split_bubble_if_needed(
+            bid, bubble_indices, bubble_quads, bubble_boxes, filtered, image, iw, ih)
+
+        if split_result:
+            p1, p2 = split_result
+            splits_performed.append(f"BOX#{bid} ({split_reason})")
+            for part_idxs, part_bid in [(p1, bid), (p2, next_bid)]:
+                ub = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in part_idxs])
+                new_bubbles[part_bid]        = build_lines_from_indices(part_idxs, filtered)
+                new_bubble_boxes[part_bid]   = (max(0,ub[0]-2), max(0,ub[1]-2),
+                                                min(iw-1,ub[2]+2), min(ih-1,ub[3]+2))
+                new_bubble_quads[part_bid]   = [filtered[i][0] for i in part_idxs]
+                new_bubble_indices[part_bid] = part_idxs
+            next_bid += 1
+        else:
+            new_bubbles[bid]        = bubbles[bid]
+            new_bubble_boxes[bid]   = bubble_boxes[bid]
+            new_bubble_quads[bid]   = bubble_quads[bid]
+            new_bubble_indices[bid] = bubble_indices[bid]
+
+    if splits_performed:
+        print(f"\n🔀 Splits detected: {len(splits_performed)}")
+        for s in splits_performed: print(f"   ✓ {s}")
+
+    # ── Remove nested / duplicate boxes ──────────────────────────────────
+    bubbles, bubble_boxes, bubble_quads, bubble_indices = remove_nested_boxes(
+        new_bubble_boxes, new_bubble_indices, new_bubble_quads, new_bubbles,
+        overlap_threshold=0.50)
+    print(f"✅ Final box count: {len(bubbles)}")
+
+    # ── OCR quality pass ──────────────────────────────────────────────────
+    translator    = GoogleTranslator(source=source_lang, target=target_lang)
+    clean_lines:  Dict[int, str] = {}
+    sources_used: Dict[int, str] = {}
+    translations: Dict[int, str] = {}
+
+    for bid, lines in bubbles.items():
+        base_txt = normalize_text(" ".join(lines))
+        base_sc  = ocr_candidate_score(base_txt)
+        txt, src_used = base_txt, "vision-base"
+        if base_sc < quality_threshold:
+            rr_txt, rr_sc, rr_src = reread_bubble_with_vision(
+                image, bubble_boxes[bid], detector, upscale=3.0, pad=24)
+            if rr_txt and rr_sc > base_sc + 0.04 and is_valid_language(rr_txt, source_lang):
+                txt, src_used = rr_txt, rr_src
+        clean_lines[bid]  = normalize_text(txt)
+        sources_used[bid] = src_used
+
+    reading_map = estimate_reading_order(bubble_boxes, mode=reading_mode)
+
+    # ── Translation ───────────────────────────────────────────────────────
+    for bid in sorted(clean_lines.keys(), key=lambda x: reading_map.get(x, x)):
+        src_txt = clean_lines[bid].strip()
+        if not src_txt: continue
+        if not is_valid_language(src_txt, source_lang): continue
+        if not is_meaningful_text(src_txt, source_lang): continue
+        try:
+            tgt = translator.translate(src_txt) or ""
+            tgt = postprocess_translation_general(tgt).upper()
+        except Exception as e:
+            tgt = f"[Error: {e}]"
+        translations[bid] = tgt
+
+    if debug:
+        save_debug_clusters(image_path, filtered, bubble_boxes, bubble_indices,
+                            clean_lines, "debug_clusters.png")
+
+    # ── Text output ───────────────────────────────────────────────────────
+    divider   = "─" * 120
+    out_lines = ["BUBBLE|ORDER|OCR_SOURCE|ORIGINAL|TRANSLATED|FLAGS", divider]
+    print(divider + f"\n{'BUBBLE':<8} {'ORDER':<6} {'SOURCE':<12} "
+          f"{'ORIGINAL':<40} {'TRANSLATED':<40} FLAGS\n" + divider)
+
+    translated_count = 0
+    for bid in sorted(clean_lines.keys(), key=lambda x: reading_map.get(x, x)):
+        src_txt = clean_lines[bid].strip()
+        if not src_txt: continue
+        if not is_valid_language(src_txt, source_lang): continue
+        if not is_meaningful_text(src_txt, source_lang): continue
+
+        flags      = []
+        tgt        = translations.get(bid, "")
+        if not tgt: flags.append("NO_TRANSLATION")
+        src_u      = src_txt.upper()
+        src_engine = sources_used.get(bid, "unknown")
+
+        out_lines.append(
+            f"#{bid}|{reading_map.get(bid,bid)}|{src_engine}|{src_u}|{tgt}|"
+            f"{','.join(flags) if flags else '-'}")
+        print(f"#{bid:<7} {reading_map.get(bid,bid):<6} {src_engine:<12} "
+              f"{src_u[:40]:<40} {tgt[:40]:<40} {','.join(flags) if flags else '-'}")
+        translated_count += 1
+
+    out_lines.append(divider + f"\n✅ Done! {translated_count} bubble(s) translated.")
+    with open(export_to_file, "w", encoding="utf-8") as f:
+        f.write("\n".join(out_lines))
+
+    # ── bubbles.json ──────────────────────────────────────────────────────
+    bubbles_payload = {}
+    for bid in sorted(clean_lines.keys(), key=lambda x: reading_map.get(x, x)):
+        src_txt = clean_lines[bid].strip()
+        if not src_txt: continue
+        if not is_valid_language(src_txt, source_lang): continue
+        if not is_meaningful_text(src_txt, source_lang): continue
+        box = bubble_boxes.get(bid)
+        tgt = translations.get(bid, "")
+        bubbles_payload[str(bid)] = {
+            "order":      reading_map.get(bid, bid),
+            "ocr_source": sources_used.get(bid, "unknown"),
+            "original":   src_txt.upper(),
+            "translated": tgt,
+            "box": {
+                "x": box[0] if box else 0,
+                "y": box[1] if box else 0,
+                "w": (box[2]-box[0]) if box else 0,
+                "h": (box[3]-box[1]) if box else 0,
+            },
+            "lines": [line.upper() for line in bubbles.get(bid, [])],
+        }
+
+    with open(export_bubbles_to, "w", encoding="utf-8") as f:
+        json.dump(bubbles_payload, f, ensure_ascii=False, indent=2)
+
+    print(divider + f"\nSaved: {export_to_file}\nSaved: {export_bubbles_to}")
+
+
+# ============================================================
+# ENTRY POINT
+# ============================================================
+if __name__ == "__main__":
+    translate_manga_text(
+        image_path="19.png",
+        source_lang="english",
+        target_lang="ca",
+        confidence_threshold=0.03,
+        min_text_length=1,
+        gap_px="auto",
+        quality_threshold=0.62,
+        export_to_file="output.txt",
+        export_bubbles_to="bubbles.json",
+        reading_mode="rtl",
+        debug=True,
+        use_enhanced_ocr=True,
+        strict_grouping=True,
+        max_box_width_ratio=0.6,
+        max_box_height_ratio=0.5,
+        auto_fix_bubbles=True
+    )
\ No newline at end of file