Everything

2026-04-22 16:27:56 +02:00
parent b730037a06
commit d77db83cfe
13 changed files with 2 additions and 4259 deletions
--- a/older-code/analyze_box5.py
+++ b/older-code/analyze_box5.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+import sys
+sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
+
+import cv2
+import json
+import numpy as np
+
+# Import functions from manga-translator.py
+import importlib.util
+spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
+mt = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(mt)
+
+image_path = '004.png'
+detector = mt.MacVisionDetector(source_lang='en')
+raw = detector.read(image_path)
+
+# Load current bubbles to see what box 5 contains
+with open('bubbles.json') as f:
+    bubbles_data = json.load(f)
+    box5_data = bubbles_data['5']
+    box5_bounds = (box5_data['x'], box5_data['y'], box5_data['x'] + box5_data['w'], box5_data['y'] + box5_data['h'])
+    print(f'Box 5 bounds (xyxy): {box5_bounds}')
+    print()
+
+# Print all detections sorted by position
+print('All raw detections:')
+for i, (bbox, text, conf) in enumerate(sorted(raw, key=lambda x: (mt.quad_bbox(x[0])[1], mt.quad_bbox(x[0])[0]))):
+    b = mt.quad_bbox(bbox)
+    t_norm = mt.normalize_text(text)
+    print(f'{i:2d}. [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] conf={conf:.2f} text="{t_norm}"')
+    
+    # Check if this overlaps with box 5
+    b5_x1, b5_y1, b5_x2, b5_y2 = box5_bounds
+    if not (b[2] < b5_x1 or b[0] > b5_x2 or b[3] < b5_y1 or b[1] > b5_y2):
+        print(f'  ^ OVERLAPS with Box 5!')
--- a/older-code/analyze_box7_split.py
+++ b/older-code/analyze_box7_split.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+import sys
+sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
+
+import cv2
+import json
+import numpy as np
+import importlib.util
+
+spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
+mt = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(mt)
+
+image_path = '004.png'
+detector = mt.MacVisionDetector(source_lang='en')
+raw = detector.read(image_path)
+image = cv2.imread(image_path)
+
+# Filter as the pipeline does
+filtered = []
+for bbox, text, conf in raw:
+    t = mt.normalize_text(text)
+    qb = mt.quad_bbox(bbox)
+    
+    if conf < 0.12:
+        continue
+    if len(t) < 1:
+        continue
+    if mt.is_noise_text(t):
+        continue
+    if mt.is_sound_effect(t):
+        continue
+    if mt.is_title_text(t):
+        continue
+    
+    filtered.append((bbox, t, conf))
+
+# Run grouping
+bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
+    filtered, image.shape, gap_px=18, bbox_padding=3
+)
+
+print("=== BUBBLE 7 & 8 ANALYSIS ===\n")
+print("Current Bubble 7 (right side content):")
+for bid in [7]:
+    if bid in bubble_indices:
+        box = bubble_boxes[bid]
+        print(f"  Box: {box}")
+        print(f"  Indices: {bubble_indices[bid]}")
+        indices = bubble_indices[bid]
+        boxes = [mt.quad_bbox(filtered[i][0]) for i in indices]
+        min_x = min(b[0] for b in boxes)
+        max_x = max(b[2] for b in boxes)
+        print(f"  X range: {min_x} - {max_x}")
+        for idx in indices:
+            b = mt.quad_bbox(filtered[idx][0])
+            print(f"    {idx}: x=[{b[0]:3d},{b[2]:3d}] y=[{b[1]:3d},{b[3]:3d}] = {filtered[idx][1]}")
+
+print("\nCurrent Bubble 8 (left side content):")
+for bid in [8]:
+    if bid in bubble_indices:
+        box = bubble_boxes[bid]
+        print(f"  Box: {box}")
+        print(f"  Indices: {bubble_indices[bid]}")
+        indices = bubble_indices[bid]
+        boxes = [mt.quad_bbox(filtered[i][0]) for i in indices]
+        min_x = min(b[0] for b in boxes)
+        max_x = max(b[2] for b in boxes)
+        print(f"  X range: {min_x} - {max_x}")
+        for idx in indices:
+            b = mt.quad_bbox(filtered[idx][0])
+            print(f"    {idx}: x=[{b[0]:3d},{b[2]:3d}] y=[{b[1]:3d},{b[3]:3d}] = {filtered[idx][1]}")
+
+# Check the horizontal gap between them
+print("\n=== GAP ANALYSIS ===")
+if 7 in bubble_indices and 8 in bubble_indices:
+    boxes7 = [mt.quad_bbox(filtered[i][0]) for i in bubble_indices[7]]
+    boxes8 = [mt.quad_bbox(filtered[i][0]) for i in bubble_indices[8]]
+    
+    max_x7 = max(b[2] for b in boxes7)
+    min_x8 = min(b[0] for b in boxes8)
+    
+    print(f"Bubble 7 max X: {max_x7}")
+    print(f"Bubble 8 min X: {min_x8}")
+    print(f"Horizontal gap: {min_x8 - max_x7}")
+    
+    # Check Y overlap
+    min_y7 = min(b[1] for b in boxes7)
+    max_y7 = max(b[3] for b in boxes7)
+    min_y8 = min(b[1] for b in boxes8)
+    max_y8 = max(b[3] for b in boxes8)
+    
+    print(f"\nBubble 7 Y range: {min_y7} - {max_y7}")
+    print(f"Bubble 8 Y range: {min_y8} - {max_y8}")
+    print(f"Y overlap: {max(0, min(max_y7, max_y8) - max(min_y7, min_y8))} pixels")
--- a/older-code/analyze_grouping.py
+++ b/older-code/analyze_grouping.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+import sys
+sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
+
+import cv2
+import json
+import numpy as np
+import importlib.util
+
+spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
+mt = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(mt)
+
+image_path = '004.png'
+detector = mt.MacVisionDetector(source_lang='en')
+raw = detector.read(image_path)
+image = cv2.imread(image_path)
+
+# Filter as the pipeline does
+filtered = []
+for bbox, text, conf in raw:
+    t = mt.normalize_text(text)
+    qb = mt.quad_bbox(bbox)
+    
+    if conf < 0.12:
+        continue
+    if len(t) < 1:
+        continue
+    if mt.is_noise_text(t):
+        continue
+    if mt.is_sound_effect(t):
+        continue
+    if mt.is_title_text(t):
+        continue
+    
+    filtered.append((bbox, t, conf))
+
+print(f"Filtered {len(filtered)} detections")
+
+# Now run grouping
+bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
+    filtered, image.shape, gap_px=18, bbox_padding=3
+)
+
+# Find which bubble contains box 5
+box5_region = (378, 570, 536, 753)
+
+print("\n=== BUBBLES ===")
+for bid, box in bubble_boxes.items():
+    print(f"Bubble {bid}: {box}")
+    print(f"  Indices: {bubble_indices[bid]}")
+    print(f"  Detections:")
+    for idx in bubble_indices[bid]:
+        b = mt.quad_bbox(filtered[idx][0])
+        print(f"    {idx}: [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] = {filtered[idx][1]}")
--- a/older-code/check_box7.py
+++ b/older-code/check_box7.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+import sys
+sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
+
+import cv2
+import json
+import numpy as np
+import importlib.util
+
+spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
+mt = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(mt)
+
+image_path = '004.png'
+detector = mt.MacVisionDetector(source_lang='en')
+raw = detector.read(image_path)
+image = cv2.imread(image_path)
+
+# Filter as the pipeline does
+filtered = []
+for bbox, text, conf in raw:
+    t = mt.normalize_text(text)
+    qb = mt.quad_bbox(bbox)
+    
+    if conf < 0.12:
+        continue
+    if len(t) < 1:
+        continue
+    if mt.is_noise_text(t):
+        continue
+    if mt.is_sound_effect(t):
+        continue
+    if mt.is_title_text(t):
+        continue
+    
+    filtered.append((bbox, t, conf))
+
+# Now run grouping
+bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
+    filtered, image.shape, gap_px=18, bbox_padding=3
+)
+
+# Check current bubbles.json for reference
+with open('bubbles.json') as f:
+    old_bubbles = json.load(f)
+
+print("=== BOX 5 ===")
+print(f"Old bounds (from bubbles.json): x={old_bubbles['5']['x']}, y={old_bubbles['5']['y']}, w={old_bubbles['5']['w']}, h={old_bubbles['5']['h']}")
+print(f"  (xyxy): ({old_bubbles['5']['x']}, {old_bubbles['5']['y']}, {old_bubbles['5']['x'] + old_bubbles['5']['w']}, {old_bubbles['5']['y'] + old_bubbles['5']['h']})")
+
+# Find bubble at that location in current grouping
+for bid, box in bubble_boxes.items():
+    if box[0] == 371 and box[1] == 563:  # New box 5 location
+        print(f"Current bubble {bid}: {box}")
+        print(f"  Detections: {bubble_indices[bid]}")
+        for idx in bubble_indices[bid]:
+            b = mt.quad_bbox(filtered[idx][0])
+            print(f"    {idx}: [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] = {filtered[idx][1]}")
+
+print("\n=== BOX 7 ===")
+print(f"Old bounds (from bubbles.json): x={old_bubbles['7']['x']}, y={old_bubbles['7']['y']}, w={old_bubbles['7']['w']}, h={old_bubbles['7']['h']}")
+print(f"  (xyxy): ({old_bubbles['7']['x']}, {old_bubbles['7']['y']}, {old_bubbles['7']['x'] + old_bubbles['7']['w']}, {old_bubbles['7']['y'] + old_bubbles['7']['h']})")
+
+# Find corresponding bubble
+for bid, box in bubble_boxes.items():
+    x1, y1, x2, y2 = box
+    # Check if this overlaps with old box 7
+    old_x1, old_y1 = old_bubbles['7']['x'], old_bubbles['7']['y']
+    old_x2 = old_x1 + old_bubbles['7']['w']
+    old_y2 = old_y1 + old_bubbles['7']['h']
+    
+    if not (x2 < old_x1 or x1 > old_x2 or y2 < old_y1 or y1 > old_y2):
+        print(f"Current bubble {bid}: {box}")
+        print(f"  Detections: {bubble_indices[bid]}")
+        for idx in bubble_indices[bid]:
+            b = mt.quad_bbox(filtered[idx][0])
+            print(f"    {idx}: [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] = {filtered[idx][1]}")
--- a/older-code/check_grouping_logic.py
+++ b/older-code/check_grouping_logic.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+import sys
+sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
+
+import cv2
+import numpy as np
+import importlib.util
+
+spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
+mt = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(mt)
+
+image_path = '004.png'
+detector = mt.MacVisionDetector(source_lang='en')
+raw = detector.read(image_path)
+image = cv2.imread(image_path)
+
+# Filter
+filtered = []
+for bbox, text, conf in raw:
+    t = mt.normalize_text(text)
+    if conf < 0.12 or len(t) < 1 or mt.is_noise_text(t) or mt.is_sound_effect(t) or mt.is_title_text(t):
+        continue
+    filtered.append((bbox, t, conf))
+
+# Get the indices we're interested in (left and right bubbles)
+left_indices = [41, 42, 43, 44, 45, 46]  # LET, GO, OFF, ME, AL-, REA-
+right_indices = [47, 48, 49, 50, 51, 52, 53, 54]  # DON'T, WORRY!, HARUKO, ...
+
+print("=== CHECKING GROUPING CONDITIONS ===\n")
+
+# Check if they would be united in group_tokens
+boxes_left = [mt.quad_bbox(filtered[i][0]) for i in left_indices]
+boxes_right = [mt.quad_bbox(filtered[i][0]) for i in right_indices]
+
+# Check overlap_or_near
+print("Checking overlap_or_near with gap=18:")
+for li, bi in enumerate(left_indices):
+    for ri, bj in enumerate(right_indices):
+        b_left = boxes_left[li]
+        b_right = boxes_right[ri]
+        gap_x = max(0, max(b_left[0], b_right[0]) - min(b_left[2], b_right[2]))
+        gap_y = max(0, max(b_left[1], b_right[1]) - min(b_left[3], b_right[3]))
+        overlaps = gap_x <= 18 and gap_y <= 18
+        if overlaps:
+            print(f"  {bi} and {bj} overlap/near: gap_x={gap_x}, gap_y={gap_y}")
+
+# Check distance check
+hs = [max(1.0, b[3] - b[1]) for b in [*boxes_left, *boxes_right]]
+med_h = float(np.median(hs)) if hs else 12.0
+dist_thresh = max(20.0, med_h * 2.2)
+
+print(f"\nMedian height: {med_h}")
+print(f"Distance threshold: {dist_thresh}")
+
+print("\nChecking distance check:")
+for li, bi in enumerate(left_indices[:1]):  # Just check first from each
+    for ri, bj in enumerate(right_indices[:1]):
+        b_left = boxes_left[li]
+        b_right = boxes_right[ri]
+        cx_left = (b_left[0] + b_left[2]) / 2.0
+        cy_left = (b_left[1] + b_left[3]) / 2.0
+        cx_right = (b_right[0] + b_right[2]) / 2.0
+        cy_right = (b_right[1] + b_right[3]) / 2.0
+        d = ((cx_left - cx_right) ** 2 + (cy_left - cy_right) ** 2) ** 0.5
+        within_dist = d <= dist_thresh
+        within_y = abs(cy_left - cy_right) <= med_h * 3.0
+        print(f"  {bi} to {bj}: distance={d:.1f}, within_dist={within_dist}, within_y_tol={within_y}")
--- a/older-code/debug_split_phase.py
+++ b/older-code/debug_split_phase.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+# Debug script to see what bubbles are produced after splitting
+
+import sys
+sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
+
+import cv2
+import json
+import numpy as np
+import importlib.util
+
+spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
+mt = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(mt)
+
+image_path = '004.png'
+detector = mt.MacVisionDetector(source_lang='en')
+raw = detector.read(image_path)
+image = cv2.imread(image_path)
+
+# Full filtering as pipeline does
+filtered = []
+skipped = 0
+ih, iw = image.shape[:2]
+
+for bbox, text, conf in raw:
+    t = mt.normalize_text(text)
+    qb = mt.quad_bbox(bbox)
+    
+    if conf < 0.12:
+        skipped += 1
+        continue
+    if len(t) < 1:
+        skipped += 1
+        continue
+    if mt.is_noise_text(t):
+        skipped += 1
+        continue
+    if mt.is_sound_effect(t):
+        skipped += 1
+        continue
+    if mt.is_title_text(t):
+        skipped += 1
+        continue
+    if qb[1] < int(ih * mt.TOP_BAND_RATIO):
+        if conf < 0.70 and len(t) >= 5:
+            skipped += 1
+            continue
+
+    filtered.append((bbox, t, conf))
+
+resolved_gap = mt.auto_gap(image_path)
+bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
+    filtered, image.shape, gap_px=resolved_gap, bbox_padding=3
+)
+
+print("=== AFTER GROUPING ===")
+print(f"Bubbles dict keys: {sorted(bubbles.keys())}")
+for bid in [7, 8]:
+    if bid in bubbles:
+        print(f"\nBubble {bid}:")
+        print(f"  Box: {bubble_boxes[bid]}")
+        print(f"  Indices ({len(bubble_indices[bid])}): {bubble_indices[bid]}")
+        print(f"  Quads ({len(bubble_quads[bid])})")
+
+# Now simulate the split logic
+new_bubbles, new_bubble_boxes, new_bubble_quads, new_bubble_indices = {}, {}, {}, {}
+next_bid = max(bubbles.keys()) + 1 if bubbles else 1
+splits_performed = []
+
+for bid in list(bubbles.keys()):
+    box = bubble_boxes[bid]
+    bubble_split = None
+    
+    # Try split
+    split_result = mt.split_panel_box(image, box, bubble_quads=bubble_quads[bid])
+    if split_result:
+        box_left, box_right, split_x = split_result
+        # ... split logic ...
+        bubble_split = "panel_split"
+    
+    if bubble_split is None:
+        col_split = mt.split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid)
+        if col_split:
+            bubble_split = "column_split"
+    
+    if bubble_split:
+        splits_performed.append(f"Bubble {bid}: {bubble_split}")
+        # Don't actually split here, just mark it
+    else:
+        # No split
+        new_bubbles[bid] = bubbles[bid]
+        new_bubble_boxes[bid] = bubble_boxes[bid]
+        new_bubble_quads[bid] = bubble_quads[bid]
+        new_bubble_indices[bid] = bubble_indices[bid]
+
+print("\n=== AFTER SPLIT LOGIC ===")
+print(f"Splits detected: {len(splits_performed)}")
+for s in splits_performed:
+    print(f"  {s}")
+    
+print(f"\nBubbles dict keys: {sorted(new_bubbles.keys())}")
+for bid in [7, 8]:
+    if bid in new_bubbles:
+        print(f"\nBubble {bid}:")
+        print(f"  Box: {new_bubble_boxes[bid]}")
+        print(f"  Indices ({len(new_bubble_indices[bid])}): {new_bubble_indices[bid][:3]}...")
--- a/older-code/patch_manga_translator.py
+++ b/older-code/patch_manga_translator.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import re
+from pathlib import Path
+
+TARGET = Path("manga-translator.py")
+
+def cut_after_first_entrypoint(text: str) -> str:
+    """
+    Keep only first full __main__ block and remove duplicated tail if present.
+    """
+    m = re.search(r'(?m)^if __name__ == "__main__":\s*$', text)
+    if not m:
+        return text
+
+    start = m.start()
+    # Keep entrypoint block plus indented lines after it
+    lines = text[start:].splitlines(True)
+    keep = []
+    keep.append(lines[0])  # if __name__...
+    i = 1
+    while i < len(lines):
+        ln = lines[i]
+        if ln.strip() == "":
+            keep.append(ln)
+            i += 1
+            continue
+        # if dedented back to col 0 => end of block
+        if not ln.startswith((" ", "\t")):
+            break
+        keep.append(ln)
+        i += 1
+
+    cleaned = text[:start] + "".join(keep)
+    return cleaned
+
+def replace_bad_vars(text: str) -> str:
+    text = text.replace(
+        "merge_micro_boxes_relaxed(bubbles, bubble_boxes, bubble_quads, bubble_indices, ocr, image_bgr)",
+        "merge_micro_boxes_relaxed(bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered, image)"
+    )
+    text = text.replace(
+        "reattach_orphan_short_tokens(bubbles, bubble_boxes, bubble_quads, bubble_indices, ocr)",
+        "reattach_orphan_short_tokens(bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered)"
+    )
+    return text
+
+def ensure_autofix_chain(text: str) -> str:
+    old = (
+        "    # ── Auto-fix (split + merge) ──────────────────────────────────────────\n"
+        "    if auto_fix_bubbles:\n"
+        "        bubbles, bubble_boxes, bubble_quads, bubble_indices = merge_micro_boxes_relaxed(bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered, image)\n"
+    )
+    new = (
+        "    # ── Auto-fix (split + merge) ──────────────────────────────────────────\n"
+        "    if auto_fix_bubbles:\n"
+        "        bubbles, bubble_boxes, bubble_quads, bubble_indices = auto_fix_bubble_detection(\n"
+        "            bubble_boxes, bubble_indices, bubble_quads, bubbles, filtered, image)\n"
+        "        bubbles, bubble_boxes, bubble_quads, bubble_indices = merge_micro_boxes_relaxed(\n"
+        "            bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered, image)\n"
+    )
+    return text.replace(old, new)
+
+def ensure_split_commit(text: str) -> str:
+    marker = "    # ── Remove nested / duplicate boxes ──────────────────────────────────\n"
+    if marker not in text:
+        return text
+
+    if "bubbles = new_bubbles" in text:
+        return text
+
+    inject = (
+        "    bubbles = new_bubbles\n"
+        "    bubble_boxes = new_bubble_boxes\n"
+        "    bubble_quads = new_bubble_quads\n"
+        "    bubble_indices = new_bubble_indices\n\n"
+    )
+    return text.replace(marker, inject + marker)
+
+def ensure_rescue_pipeline(text: str) -> str:
+    anchor = '    print(f"Kept: {len(filtered)} | Skipped: {skipped}")\n'
+    if anchor not in text:
+        return text
+
+    if "rescue_name_and_short_tokens(raw" in text:
+        return text
+
+    block = (
+        '    print(f"Kept: {len(filtered)} | Skipped: {skipped}")\n'
+        '    # Protect short dialogue tokens confidence\n'
+        '    tmp = []\n'
+        '    for bbox, t, conf in filtered:\n'
+        '        tmp.append((bbox, t, maybe_conf_floor_for_protected(t, conf, floor=0.40)))\n'
+        '    filtered = tmp\n'
+        '    # Rescue names/short tokens dropped by strict filters\n'
+        '    rescued = rescue_name_and_short_tokens(raw, min_conf=0.20)\n'
+        '    filtered = merge_rescued_items(filtered, rescued, iou_threshold=0.55)\n'
+    )
+    return text.replace(anchor, block)
+
+def main():
+    if not TARGET.exists():
+        raise FileNotFoundError(f"Not found: {TARGET}")
+
+    src = TARGET.read_text(encoding="utf-8")
+    out = src
+
+    out = cut_after_first_entrypoint(out)
+    out = replace_bad_vars(out)
+    out = ensure_autofix_chain(out)
+    out = ensure_split_commit(out)
+    out = ensure_rescue_pipeline(out)
+
+    TARGET.write_text(out, encoding="utf-8")
+    print("✅ Patched manga-translator.py")
+
+if __name__ == "__main__":
+    main()
--- a/older-code/regenerate_debug.py
+++ b/older-code/regenerate_debug.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Regenerate debug_clusters.png with the new split bubbles.json
+"""
+
+import json
+import cv2
+import numpy as np
+
+def quad_bbox(quad):
+    """Convert quad to bounding box"""
+    xs = [p[0] for p in quad]
+    ys = [p[1] for p in quad]
+    return (min(xs), min(ys), max(xs), max(ys))
+
+def save_debug_clusters_from_json(
+    image_path="004.png",
+    bubbles_path="bubbles.json",
+    out_path="debug_clusters.png"
+):
+    img = cv2.imread(image_path)
+    if img is None:
+        print(f"❌ Cannot load image: {image_path}")
+        return
+    
+    # Load bubbles.json
+    with open(bubbles_path, "r", encoding="utf-8") as f:
+        bubbles_data = json.load(f)
+    
+    # Draw all quad polygons in white (erasing original text)
+    for bid_str, bubble_info in bubbles_data.items():
+        for quad in bubble_info.get("quads", []):
+            pts = np.array(quad, dtype=np.int32)
+            cv2.fillPoly(img, [pts], (255, 255, 255))
+            cv2.polylines(img, [pts], True, (180, 180, 180), 1)
+    
+    # Draw bounding boxes with labels
+    for bid_str, bubble_info in bubbles_data.items():
+        bid = int(bid_str)
+        x = bubble_info["x"]
+        y = bubble_info["y"]
+        w = bubble_info["w"]
+        h = bubble_info["h"]
+        x2 = x + w
+        y2 = y + h
+        
+        cv2.rectangle(img, (x, y), (x2, y2), (0, 220, 0), 2)
+        cv2.putText(img, f"BOX#{bid}", (x + 2, max(15, y + 16)),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)
+    
+    cv2.imwrite(out_path, img)
+    print(f"✅ Saved: {out_path}")
+
+if __name__ == "__main__":
+    save_debug_clusters_from_json()
--- a/older-code/split_bubbles.py
+++ b/older-code/split_bubbles.py
@@ -0,0 +1,183 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Helper script to split bubbles with multiple separate text regions.
+Run this to manually split Box 2 and Box 7 from debug_clusters.png
+"""
+
+import json
+import numpy as np
+from typing import List, Tuple, Dict
+
+def quad_bbox(quad):
+    """Convert quad to bounding box"""
+    xs = [p[0] for p in quad]
+    ys = [p[1] for p in quad]
+    return (min(xs), min(ys), max(xs), max(ys))
+
+def boxes_union_xyxy(boxes):
+    """Union of multiple boxes"""
+    boxes = [b for b in boxes if b is not None]
+    if not boxes:
+        return None
+    return (
+        int(min(b[0] for b in boxes)),
+        int(min(b[1] for b in boxes)),
+        int(max(b[2] for b in boxes)),
+        int(max(b[3] for b in boxes)),
+    )
+
+def xyxy_to_xywh(bbox):
+    """Convert xyxy format to xywh"""
+    if bbox is None:
+        return None
+    x1, y1, x2, y2 = bbox
+    return {"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1)}
+
+def bbox_area_xyxy(b):
+    """Calculate area of a bounding box in xyxy format"""
+    if b is None:
+        return 0
+    x1, y1, x2, y2 = b
+    return (x2 - x1) * (y2 - y1)
+
+def split_bubble_by_vertical_gap(bubble_id: int, bubble_data: Dict, filtered_indices_map: Dict):
+    """
+    Attempt to split a bubble by detecting a significant vertical gap between columns of text.
+    Returns: (left_indices, right_indices, gap_size) or None if no split
+    """
+    quad_bboxes = bubble_data['quad_bboxes']
+    quads = bubble_data['quads']
+    
+    if len(quads) < 2:
+        return None
+    
+    # Get x-coordinates with original indices
+    x_coords = []
+    for i, quad in enumerate(quads):
+        bbox = quad_bbox(quad)
+        x_center = (bbox[0] + bbox[2]) / 2.0
+        x_coords.append((i, x_center, bbox))
+    
+    # Sort by x-coordinate
+    x_coords_sorted = sorted(x_coords, key=lambda t: t[1])
+    
+    # Find the largest gap between consecutive x positions
+    max_gap = 0
+    split_pos = -1
+    
+    for i in range(len(x_coords_sorted) - 1):
+        gap = x_coords_sorted[i + 1][1] - x_coords_sorted[i][1]
+        if gap > max_gap:
+            max_gap = gap
+            split_pos = i
+    
+    # If gap is large enough, split
+    min_gap_threshold = 80  # pixels
+    if split_pos != -1 and max_gap > min_gap_threshold:
+        # Get ORIGINAL indices for left and right
+        left_indices = [x_coords_sorted[j][0] for j in range(split_pos + 1)]
+        right_indices = [x_coords_sorted[j][0] for j in range(split_pos + 1, len(x_coords_sorted))]
+        
+        return (left_indices, right_indices, max_gap)
+    
+    return None
+
+def split_bubbles_in_json(input_file="bubbles.json", output_file="bubbles_split.json", bubble_ids_to_split=[2, 7]):
+    """Split specified bubbles in the JSON file"""
+    
+    with open(input_file, "r", encoding="utf-8") as f:
+        data = json.load(f)
+    
+    new_data = {}
+    next_bid = max(int(k) for k in data.keys()) + 1
+    
+    for bid_str, bubble_data in data.items():
+        bid = int(bid_str)
+        
+        if bid not in bubble_ids_to_split:
+            # Keep original
+            new_data[bid_str] = bubble_data
+            continue
+        
+        # Try to split
+        split_result = split_bubble_by_vertical_gap(bid, bubble_data, {})
+        
+        if split_result:
+            left_indices, right_indices, gap_size = split_result
+            
+            print(f"\n🔀 Splitting BOX#{bid} (gap={gap_size:.1f}px)")
+            print(f"   Left indices: {left_indices}")
+            print(f"   Right indices: {right_indices}")
+            
+            # Create left bubble - keep the original bubble ID
+            left_quads = [bubble_data['quads'][i] for i in left_indices]
+            left_quad_bboxes = [bubble_data['quad_bboxes'][i] for i in left_indices]
+            left_bbox = boxes_union_xyxy([quad_bbox(q) for q in left_quads])
+            left_bbox_padded = (
+                max(0, left_bbox[0] - 3),
+                max(0, left_bbox[1] - 3),
+                left_bbox[2] + 3,
+                left_bbox[3] + 3
+            )
+            
+            print(f"   Left bbox: {left_bbox} -> padded: {left_bbox_padded}")
+            
+            new_data[str(bid)] = {
+                "x": left_bbox_padded[0],
+                "y": left_bbox_padded[1],
+                "w": left_bbox_padded[2] - left_bbox_padded[0],
+                "h": left_bbox_padded[3] - left_bbox_padded[1],
+                "reading_order": bubble_data.get("reading_order", bid),
+                "quad_bboxes": left_quad_bboxes,
+                "quads": left_quads,
+                "text_bbox": xyxy_to_xywh(left_bbox),
+                "line_bboxes": [],
+                "line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads])),
+                "line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads]))),
+            }
+            
+            # Create right bubble - with new ID
+            right_quads = [bubble_data['quads'][i] for i in right_indices]
+            right_quad_bboxes = [bubble_data['quad_bboxes'][i] for i in right_indices]
+            right_bbox = boxes_union_xyxy([quad_bbox(q) for q in right_quads])
+            right_bbox_padded = (
+                max(0, right_bbox[0] - 3),
+                max(0, right_bbox[1] - 3),
+                right_bbox[2] + 3,
+                right_bbox[3] + 3
+            )
+            
+            print(f"   Right bbox: {right_bbox} -> padded: {right_bbox_padded}")
+            
+            new_data[str(next_bid)] = {
+                "x": right_bbox_padded[0],
+                "y": right_bbox_padded[1],
+                "w": right_bbox_padded[2] - right_bbox_padded[0],
+                "h": right_bbox_padded[3] - right_bbox_padded[1],
+                "reading_order": bubble_data.get("reading_order", next_bid),
+                "quad_bboxes": right_quad_bboxes,
+                "quads": right_quads,
+                "text_bbox": xyxy_to_xywh(right_bbox),
+                "line_bboxes": [],
+                "line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads])),
+                "line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads]))),
+            }
+            
+            next_bid += 1
+        else:
+            # No split needed
+            new_data[bid_str] = bubble_data
+    
+    # Write output
+    with open(output_file, "w", encoding="utf-8") as f:
+        json.dump(new_data, f, indent=2, ensure_ascii=False)
+    
+    print(f"\n✅ Saved to {output_file}")
+
+if __name__ == "__main__":
+    split_bubbles_in_json(
+        input_file="bubbles_original.json",  # Always read from original
+        output_file="bubbles_split.json",
+        bubble_ids_to_split=[2, 7]
+    )
--- a/older-code/split_final.py
+++ b/older-code/split_final.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Properly split Box 2 and Box 7 by extracting quads from original and writing to new JSON
+"""
+
+import json
+import copy
+
+def quad_bbox(quad):
+    xs = [p[0] for p in quad]
+    ys = [p[1] for p in quad]
+    return (min(xs), min(ys), max(xs), max(ys))
+
+def boxes_union_xyxy(boxes):
+    boxes = [b for b in boxes if b is not None]
+    if not boxes:
+        return None
+    return (
+        int(min(b[0] for b in boxes)),
+        int(min(b[1] for b in boxes)),
+        int(max(b[2] for b in boxes)),
+        int(max(b[3] for b in boxes)),
+    )
+
+def xyxy_to_xywh(bbox):
+    if bbox is None:
+        return None
+    x1, y1, x2, y2 = bbox
+    return {"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1)}
+
+def bbox_area_xyxy(b):
+    if b is None:
+        return 0
+    x1, y1, x2, y2 = b
+    return (x2 - x1) * (y2 - y1)
+
+# Load original
+with open("bubbles_original.json", "r", encoding="utf-8") as f:
+    original = json.load(f)
+
+new_data = {}
+
+# Copy all non-split bubbles
+for bid_str, bubble_data in original.items():
+    bid = int(bid_str)
+    if bid not in [2, 7]:
+        new_data[bid_str] = copy.deepcopy(bubble_data)
+
+# Split Box 2
+print("🔀 Splitting Box 2...")
+box2_data = original["2"]
+left_indices_2 = [10, 1, 2, 4, 8, 0, 3, 6, 11, 12]
+right_indices_2 = [5, 7, 9]
+
+# Left part keeps ID 2
+left_quads_2 = [box2_data['quads'][i] for i in left_indices_2]
+left_quad_bboxes_2 = [box2_data['quad_bboxes'][i] for i in left_indices_2]
+left_bbox_2 = boxes_union_xyxy([quad_bbox(q) for q in left_quads_2])
+left_bbox_2_padded = (max(0, left_bbox_2[0]-3), max(0, left_bbox_2[1]-3), left_bbox_2[2]+3, left_bbox_2[3]+3)
+
+new_data["2"] = {
+    "x": left_bbox_2_padded[0],
+    "y": left_bbox_2_padded[1],
+    "w": left_bbox_2_padded[2] - left_bbox_2_padded[0],
+    "h": left_bbox_2_padded[3] - left_bbox_2_padded[1],
+    "reading_order": box2_data.get("reading_order", 2),
+    "quad_bboxes": left_quad_bboxes_2,
+    "quads": [[list(p) for p in quad] for quad in left_quads_2],  # Explicit list conversion
+    "text_bbox": xyxy_to_xywh(left_bbox_2),
+    "line_bboxes": [],
+    "line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads_2])),
+    "line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads_2]))),
+}
+print(f"  Left: y={new_data['2']['y']}, h={new_data['2']['h']}, quads={len(left_quads_2)}")
+
+# Right part gets new ID 8
+right_quads_2 = [box2_data['quads'][i] for i in right_indices_2]
+right_quad_bboxes_2 = [box2_data['quad_bboxes'][i] for i in right_indices_2]
+right_bbox_2 = boxes_union_xyxy([quad_bbox(q) for q in right_quads_2])
+right_bbox_2_padded = (max(0, right_bbox_2[0]-3), max(0, right_bbox_2[1]-3), right_bbox_2[2]+3, right_bbox_2[3]+3)
+
+new_data["8"] = {
+    "x": right_bbox_2_padded[0],
+    "y": right_bbox_2_padded[1],
+    "w": right_bbox_2_padded[2] - right_bbox_2_padded[0],
+    "h": right_bbox_2_padded[3] - right_bbox_2_padded[1],
+    "reading_order": box2_data.get("reading_order", 8),
+    "quad_bboxes": right_quad_bboxes_2,
+    "quads": [[list(p) for p in quad] for quad in right_quads_2],  # Explicit list conversion
+    "text_bbox": xyxy_to_xywh(right_bbox_2),
+    "line_bboxes": [],
+    "line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads_2])),
+    "line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads_2]))),
+}
+print(f"  Right: y={new_data['8']['y']}, h={new_data['8']['h']}, quads={len(right_quads_2)}")
+
+# Split Box 7
+print("\n🔀 Splitting Box 7...")
+box7_data = original["7"]
+left_indices_7 = [8, 13, 4, 11, 2, 6]
+right_indices_7 = [0, 5, 1, 3, 7, 10, 12, 9]
+
+# Left part keeps ID 7
+left_quads_7 = [box7_data['quads'][i] for i in left_indices_7]
+left_quad_bboxes_7 = [box7_data['quad_bboxes'][i] for i in left_indices_7]
+left_bbox_7 = boxes_union_xyxy([quad_bbox(q) for q in left_quads_7])
+left_bbox_7_padded = (max(0, left_bbox_7[0]-3), max(0, left_bbox_7[1]-3), left_bbox_7[2]+3, left_bbox_7[3]+3)
+
+new_data["7"] = {
+    "x": left_bbox_7_padded[0],
+    "y": left_bbox_7_padded[1],
+    "w": left_bbox_7_padded[2] - left_bbox_7_padded[0],
+    "h": left_bbox_7_padded[3] - left_bbox_7_padded[1],
+    "reading_order": box7_data.get("reading_order", 7),
+    "quad_bboxes": left_quad_bboxes_7,
+    "quads": [[list(p) for p in quad] for quad in left_quads_7],  # Explicit list conversion
+    "text_bbox": xyxy_to_xywh(left_bbox_7),
+    "line_bboxes": [],
+    "line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads_7])),
+    "line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads_7]))),
+}
+print(f"  Left: y={new_data['7']['y']}, h={new_data['7']['h']}, quads={len(left_quads_7)}")
+
+# Right part gets new ID 9
+right_quads_7 = [box7_data['quads'][i] for i in right_indices_7]
+right_quad_bboxes_7 = [box7_data['quad_bboxes'][i] for i in right_indices_7]
+right_bbox_7 = boxes_union_xyxy([quad_bbox(q) for q in right_quads_7])
+right_bbox_7_padded = (max(0, right_bbox_7[0]-3), max(0, right_bbox_7[1]-3), right_bbox_7[2]+3, right_bbox_7[3]+3)
+
+new_data["9"] = {
+    "x": right_bbox_7_padded[0],
+    "y": right_bbox_7_padded[1],
+    "w": right_bbox_7_padded[2] - right_bbox_7_padded[0],
+    "h": right_bbox_7_padded[3] - right_bbox_7_padded[1],
+    "reading_order": box7_data.get("reading_order", 9),
+    "quad_bboxes": right_quad_bboxes_7,
+    "quads": [[list(p) for p in quad] for quad in right_quads_7],  # Explicit list conversion
+    "text_bbox": xyxy_to_xywh(right_bbox_7),
+    "line_bboxes": [],
+    "line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads_7])),
+    "line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads_7]))),
+}
+print(f"  Right: y={new_data['9']['y']}, h={new_data['9']['h']}, quads={len(right_quads_7)}")
+
+# Sort by ID for output
+new_data_sorted = {}
+for bid in sorted([int(k) for k in new_data.keys()]):
+    new_data_sorted[str(bid)] = new_data[str(bid)]
+
+with open("bubbles.json", "w", encoding="utf-8") as f:
+    json.dump(new_data_sorted, f, indent=2, ensure_ascii=False)
+
+print(f"\n✅ Done! Saved {len(new_data_sorted)} bubbles to bubbles.json")
--- a/older-code/test_panel_split.py
+++ b/older-code/test_panel_split.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+import sys
+sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
+
+import cv2
+import numpy as np
+import importlib.util
+
+spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
+mt = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(mt)
+
+image_path = '004.png'
+detector = mt.MacVisionDetector(source_lang='en')
+raw = detector.read(image_path)
+image = cv2.imread(image_path)
+
+# Full filtering
+filtered = []
+for bbox, text, conf in raw:
+    t = mt.normalize_text(text)
+    qb = mt.quad_bbox(bbox)
+    
+    if conf < 0.12 or len(t) < 1 or mt.is_noise_text(t) or mt.is_sound_effect(t) or mt.is_title_text(t):
+        continue
+    if qb[1] < int(image.shape[0] * mt.TOP_BAND_RATIO):
+        if conf < 0.70 and len(t) >= 5:
+            continue
+    
+    filtered.append((bbox, t, conf))
+
+# Get grouping
+bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
+    filtered, image.shape, gap_px=mt.auto_gap(image_path), bbox_padding=3
+)
+
+print("=== TESTING PANEL SPLIT ON BUBBLE 7 ===\n")
+
+bid = 7
+box = bubble_boxes[bid]
+print(f"Bubble {bid} box: {box}")
+print(f"Bubble {bid} quads: {len(bubble_quads[bid])}")
+print(f"Bubble {bid} indices: {len(bubble_indices[bid])}")
+
+# Test split_panel_box
+split_result = mt.split_panel_box(image, box, bubble_quads=bubble_quads[bid])
+
+if split_result:
+    box_left, box_right, split_x = split_result
+    print(f"\n✓ Panel split detected!")
+    print(f"  Split X: {split_x}")
+    print(f"  Left box: {box_left}")
+    print(f"  Right box: {box_right}")
+    
+    # Simulate index split
+    left_idxs, right_idxs = [], []
+    for idx in bubble_indices[bid]:
+        cx, cy = mt.quad_center(filtered[idx][0])
+        if cx < split_x:
+            left_idxs.append(idx)
+        else:
+            right_idxs.append(idx)
+    
+    print(f"\n  Left indices ({len(left_idxs)}): {left_idxs}")
+    print(f"  Right indices ({len(right_idxs)}): {right_idxs}")
+    
+    if left_idxs and right_idxs:
+        print(f"\n✓ Split is valid (both sides have content)")
+    else:
+        print(f"\n✗ Split is invalid (one side is empty)")
+else:
+    print(f"\n✗ No panel split detected")
+    print(f"  Threshold would be: quads >= 10? {len(bubble_quads[bid]) >= 10}")
+    print(f"  Width >= 50? {box[2] - box[0] >= 50}")
+    print(f"  Height >= 50? {box[3] - box[1] >= 50}")