Added new rendered

2026-04-21 18:53:34 +02:00
parent bd475d8f01
commit dfa52f54eb
2 changed files with 277 additions and 267 deletions
--- a/manga-renderer.py
+++ b/manga-renderer.py
@@ -3,13 +3,16 @@
 """
 manga-renderer.py
-Inputs:  001.jpg  +  bubbles.json  +  output.txt
+Inputs:  001.jpg  +  bubbles.json  +  output_001.txt
-Output:  translated_page.png
+Output:  translated_page_001.png
 Strategy:
-  1. For every bubble, white-fill all its OCR quads  (erases original text cleanly)
+  1. For every bubble, white-fill all its OCR quads (erases original text cleanly).
-  2. Render the translated text centered inside the bubble bounding box
+  2. Detect the original font size from the OCR bounding boxes.
-  3. Bubbles in SKIP_BUBBLE_IDS are erased but NOT re-rendered (left blank)
+  3. Dynamically wrap and scale down the translated text if it exceeds the bubble dimensions.
  4. Render the translated text centered inside the bubble bounding box.
  5. Uses uniform line heights to prevent accent collisions.
  6. Adds a white stroke to the text to cover any residual original characters.
 """
 import json
@@ -17,43 +20,40 @@ import textwrap
 import cv2
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
-from typing import Dict, List, Tuple, Optional, Set
+from typing import Dict, List, Tuple, Optional, Set, Any
 # ============================================================
 # CONFIG  — edit these paths to match your setup
 # ============================================================
-IMAGE_PATH        = "004.png"
+IMAGE_PATH        = "003.jpg"
 BUBBLES_PATH      = "bubbles.json"
-TRANSLATIONS_PATH = "output_004.txt"
+TRANSLATIONS_PATH = "output_003.txt"
-OUTPUT_PATH       = "translated_page_004.png"
+OUTPUT_PATH       = "translated_page_003.png"
 # Font candidates — first one that loads wins
 FONT_CANDIDATES = [
    "fonts/ComicNeue-Bold.ttf",
    # Mac fallbacks
    "/System/Library/Fonts/Supplemental/Comic Sans MS Bold.ttf",
    "/System/Library/Fonts/Supplemental/Arial Bold.ttf",
    # Windows fallbacks
    "C:\\Windows\\Fonts\\comicbd.ttf",
    "C:\\Windows\\Fonts\\arialbd.ttf",
    # Linux fallbacks
    "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
 ]
-FONT_SIZE     = 24
+DEFAULT_FONT_SIZE = 24
-MIN_FONT_SIZE = 18
+MIN_FONT_SIZE     = 12
 QUAD_PAD          = 4    # extra pixels added around each quad before white-fill
 # ============================================================
 # SKIP LIST
 # ── Add any bubble IDs you do NOT want rendered here.
 # ── The quads will still be erased (white-filled) but no
 # ── translated text will be drawn inside them.
 # ──
 # ── Examples of why you'd skip a bubble:
 # ──   • Sound effects  (BURP, BAM, POW …)
 # ──   • Untranslatable single characters
 # ──   • Bubbles with bad OCR you want to fix manually later
 # ──   • Narrator boxes you want to leave in the source language
 # ============================================================
 SKIP_BUBBLE_IDS: Set[int] = {
-    # 8,    # BURP BURP — sound effect
+    # Add any bubble IDs you do NOT want rendered here.
    # 2,    # example: bad OCR, fix manually
 }
 # ============================================================
 # FONT LOADER
 # ============================================================
@@ -69,20 +69,17 @@ def load_font(path: str, size: int) -> Optional[ImageFont.FreeTypeFont]:
            continue
    return None
-
+def resolve_font_path() -> str:
-def resolve_font() -> Tuple[str, ImageFont.FreeTypeFont]:
+    """Return the path for the first working candidate."""
    """Return (path, font) for the first working candidate."""
    for candidate in FONT_CANDIDATES:
-        font = load_font(candidate, FONT_SIZE)
+        if load_font(candidate, DEFAULT_FONT_SIZE) is not None:
        if font is not None:
            print(f"   ✅ Font: {candidate}")
-            return candidate, font
+            return candidate
    print("   ⚠️  No TrueType font found — using Pillow bitmap fallback")
-    return "", ImageFont.load_default()
+    return ""
 # ============================================================
-# PARSE output.txt  →  {bid: translated_string}
+# PARSERS
 # ============================================================
 def parse_translations(filepath: str) -> Dict[int, str]:
    """
@@ -107,41 +104,21 @@ def parse_translations(filepath: str) -> Dict[int, str]:
                continue
    return translations
 # ============================================================
 # PARSE bubbles.json  →  bubble_boxes, quads_per_bubble
 # ============================================================
 def parse_bubbles(filepath: str):
    """
-    Returns:
+    Returns the full JSON data.
        bubble_boxes     : {bid: (x1, y1, x2, y2)}
        quads_per_bubble : {bid: [ [[x,y],[x,y],[x,y],[x,y]], ... ]}
    """
    with open(filepath, "r", encoding="utf-8") as f:
        data = json.load(f)
-
+    return data
    bubble_boxes     = {}
    quads_per_bubble = {}
    for key, val in data.items():
        bid = int(key)
        x1 = val["x"];       y1 = val["y"]
        x2 = x1 + val["w"]; y2 = y1 + val["h"]
        bubble_boxes[bid] = (x1, y1, x2, y2)
        quads_per_bubble[bid] = val.get("quads", [])
    return bubble_boxes, quads_per_bubble
 # ============================================================
 # ERASE  — white-fill every OCR quad (with small padding)
 # ============================================================
 def erase_quads(
    image_bgr,
-    quads_per_bubble: Dict[int, List],
+    bubbles_data: Dict[str, dict],
-    translations: Dict[int, str],   # ← NEW: only erase what we'll render
+    translations: Dict[int, str],
    skip_ids: Set[int],
    pad: int = QUAD_PAD
 ):
@@ -149,7 +126,6 @@ def erase_quads(
    White-fills OCR quads ONLY for bubbles that:
      - have a translation in output.txt  AND
      - are NOT in SKIP_BUBBLE_IDS
    Everything else is left completely untouched.
    """
    ih, iw = image_bgr.shape[:2]
    result = image_bgr.copy()
@@ -157,15 +133,11 @@ def erase_quads(
    erased_count  = 0
    skipped_count = 0
-    for bid, quads in quads_per_bubble.items():
+    for bid_str, val in bubbles_data.items():
        bid = int(bid_str)
        quads = val.get("quads", [])
-        # ignore if explicitly skipped
+        if bid in skip_ids or bid not in translations:
        if bid in skip_ids:
            skipped_count += 1
            continue
        # ignore if no translation exists (deleted from output.txt)
        if bid not in translations:
            skipped_count += 1
            continue
@@ -186,227 +158,206 @@ def erase_quads(
    print(f"   Ignored: {skipped_count} bubbles (no translation or in skip list)")
    return result
 # ============================================================
 # DYNAMIC TEXT FITTING
 # ============================================================
 def get_original_font_size(bubble_data: dict, fallback_size: int = DEFAULT_FONT_SIZE) -> int:
    """Calculates the original font size based on the OCR bounding boxes."""
    line_bboxes = bubble_data.get("line_bboxes", [])
    if not line_bboxes:
        return fallback_size
-# ============================================================
+    heights = [box["h"] for box in line_bboxes]
-# FONT SIZING  +  TEXT WRAP
+    median_h = int(np.median(heights))
-# ============================================================
+    
-def fit_text(
+    estimated_size = int(median_h * 0.85)
    return max(MIN_FONT_SIZE, min(estimated_size, 60))
 def fit_text_dynamically(
    text: str, 
    box_w: int,
    box_h: int,
    font_path: str, 
-    max_size: int = FONT_SIZE,
+    max_w: int, 
-    min_size: int = MIN_FONT_SIZE
+    max_h: int, 
-) -> Tuple[int, ImageFont.FreeTypeFont, List[str]]:
+    target_font_size: int
 ) -> Tuple[List[str], Any, int, int]:
    """
-    Returns (fitted_size, font, wrapped_lines) — largest size where
+    Wraps text and scales down font size if it exceeds the bubble dimensions.
-    the text block fits inside box_w × box_h.
+    Returns: (wrapped_lines, font_object, line_spacing, final_font_size)
    """
-    for size in range(max_size, min_size - 1, -1):
+    font_size = target_font_size
        font = load_font(font_path, size) if font_path else None
        if font is None:
            return min_size, ImageFont.load_default(), [text]
-        chars_per_line = max(1, int(box_w / (size * 0.62)))
+    if not font_path:
-        wrapped        = textwrap.fill(text, width=chars_per_line)
+        font = ImageFont.load_default()
-        lines          = wrapped.split("\n")
+        char_w = 6
-        total_h        = (size + 8) * len(lines)
+        chars_per_line = max(1, int(max_w / char_w))
        wrapped_lines = textwrap.wrap(text, width=chars_per_line)
        return wrapped_lines, font, 4, 10
-        if total_h <= box_h - 8:
+    while font_size >= MIN_FONT_SIZE:
-            return size, font, lines
+        font = load_font(font_path, font_size)
    # Nothing fit — use minimum size
    font = load_font(font_path, min_size) if font_path else None
        if font is None:
            font = ImageFont.load_default()
-    chars_per_line = max(1, int(box_w / (min_size * 0.62)))
+            return [text], font, 4, 10
    lines = textwrap.fill(text, width=chars_per_line).split("\n")
    return min_size, font, lines
        char_bbox = font.getbbox("A")
        char_w = (char_bbox[2] - char_bbox[0]) or 10
        chars_per_line = max(1, int((max_w * 0.95) / char_w))
-# ============================================================
+        wrapped_lines = textwrap.wrap(text, width=chars_per_line)
 # COLOR HELPERS
 # ============================================================
 def sample_bg_color(
    image_bgr,
    x1: int, y1: int,
    x2: int, y2: int
 ) -> Tuple[int, int, int]:
    """Sample four corners of a bubble to estimate background color (R, G, B)."""
    ih, iw = image_bgr.shape[:2]
    samples = []
    for sx, sy in [(x1+4, y1+4), (x2-4, y1+4), (x1+4, y2-4), (x2-4, y2-4)]:
        sx = max(0, min(iw-1, sx)); sy = max(0, min(ih-1, sy))
        b, g, r = image_bgr[sy, sx]
        samples.append((int(r), int(g), int(b)))
    return (
        int(np.median([s[0] for s in samples])),
        int(np.median([s[1] for s in samples])),
        int(np.median([s[2] for s in samples])),
    )
        # Use uniform font metrics for height instead of per-line bounding boxes
        line_spacing = max(2, int(font_size * 0.15))
        if hasattr(font, 'getmetrics'):
            ascent, descent = font.getmetrics()
            line_h = ascent + descent
        else:
            line_h = font_size
-def pick_fg_color(bg: Tuple[int, int, int]) -> Tuple[int, int, int]:
+        total_h = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1))
    lum = 0.299 * bg[0] + 0.587 * bg[1] + 0.114 * bg[2]
    return (0, 0, 0) if lum > 128 else (255, 255, 255)
        max_line_w = 0
        for line in wrapped_lines:
            bbox = font.getbbox(line)
            lw = bbox[2] - bbox[0]
            max_line_w = max(max_line_w, lw)
-def safe_textbbox(
+        if max_line_w <= max_w and total_h <= max_h:
-    draw, pos, text, font
+            return wrapped_lines, font, line_spacing, font_size
 ) -> Tuple[int, int, int, int]:
    try:
        return draw.textbbox(pos, text, font=font)
    except Exception:
        size = getattr(font, "size", 12)
        return (
            pos[0], pos[1],
            pos[0] + int(len(text) * size * 0.6),
            pos[1] + int(size * 1.2)
        )
        font_size -= 2
    font = load_font(font_path, MIN_FONT_SIZE) or ImageFont.load_default()
    char_bbox = font.getbbox("A") if hasattr(font, 'getbbox') else (0,0,6,10)
    char_w = (char_bbox[2] - char_bbox[0]) or 6
    chars_per_line = max(1, int(max_w / char_w))
    wrapped_lines = textwrap.wrap(text, width=chars_per_line)
    return wrapped_lines, font, max(2, int(MIN_FONT_SIZE * 0.15)), MIN_FONT_SIZE
 # ============================================================
 # RENDER
 # ============================================================
-def render_translations(
+def render_text(
    image_bgr,
-    bubble_boxes: Dict[int, Tuple],
+    bubbles_data: Dict[str, dict],
    translations: Dict[int, str],
    skip_ids: Set[int],
    font_path: str,
-    font_size: int     = FONT_SIZE,
+    skip_ids: Set[int]
    bold_outline: bool = True,
    auto_color: bool   = True,
    output_path: str   = OUTPUT_PATH
 ):
    """
    Draws the translated text centered in the line_union_bbox of each bubble.
    Adds a white stroke (outline) to cover any residual original characters.
    """
    image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
    pil_img = Image.fromarray(image_rgb)
    draw = ImageDraw.Draw(pil_img)
-    rendered = 0
+    rendered_count = 0
    skipped  = 0
    missing  = 0
-    for bid, (x1, y1, x2, y2) in sorted(bubble_boxes.items()):
+    for bid_str, val in bubbles_data.items():
        bid = int(bid_str)
-        # ── skip list check ────────────────────────────────────────
+        if bid in skip_ids or bid not in translations:
        if bid in skip_ids:
            print(f"   ⏭️  Bubble #{bid:<3} — skipped (in SKIP_BUBBLE_IDS)")
            skipped += 1
            continue
-        text = translations.get(bid, "").strip()
+        text = translations[bid]
-        if not text:
+        
-            print(f"   ⚠️  Bubble #{bid:<3} — no translation found, left blank")
+        union_box = val.get("line_union_bbox")
-            missing += 1
+        if not union_box:
            union_box = val.get("text_bbox")
            if not union_box:
                continue
-        box_w = x2 - x1
+        bx, by, bw, bh = union_box["x"], union_box["y"], union_box["w"], union_box["h"]
        box_h = y2 - y1
        if box_w < 10 or box_h < 10:
            continue
-        # ── fit font + wrap ────────────────────────────────────────
+        pad_x = int(bw * 0.1)
-        size, font, lines = fit_text(
+        pad_y = int(bh * 0.1)
-            text, box_w, box_h, font_path, max_size=font_size
+        bx -= pad_x // 2
        by -= pad_y // 2
        bw += pad_x
        bh += pad_y
        target_size = get_original_font_size(val)
        wrapped_lines, font, line_spacing, final_size = fit_text_dynamically(text, font_path, bw, bh, target_size)
        # Use uniform typographic line height for rendering
        if hasattr(font, 'getmetrics'):
            ascent, descent = font.getmetrics()
            line_h = ascent + descent
        else:
            line_h = final_size
        total_text_height = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1))
        current_y = by + (bh - total_text_height) // 2
        outline_thickness = max(2, int(final_size * 0.10))
        for i, line in enumerate(wrapped_lines):
            if hasattr(font, 'getbbox'):
                bbox = font.getbbox(line)
                lw = bbox[2] - bbox[0]
            else:
                lw = len(line) * 6
            current_x = bx + (bw - lw) // 2
            draw.text(
                (current_x, current_y), 
                line, 
                fill=(0, 0, 0),
                font=font,
                stroke_width=outline_thickness,
                stroke_fill=(255, 255, 255)
            )
-        # ── colors ─────────────────────────────────────────────────
+            # Advance Y by the uniform line height + spacing
-        if auto_color:
+            current_y += line_h + line_spacing
            bg = sample_bg_color(image_bgr, x1, y1, x2, y2)
            fg = pick_fg_color(bg)
            ol = (255, 255, 255) if fg == (0, 0, 0) else (0, 0, 0)
        else:
            fg, ol = (0, 0, 0), (255, 255, 255)
-        # ── vertical center ────────────────────────────────────────
+        rendered_count += 1
        line_h  = size + 8
        total_h = line_h * len(lines)
        y_cur   = y1 + max(4, (box_h - total_h) // 2)
        for line in lines:
            bb     = safe_textbbox(draw, (0, 0), line, font)
            line_w = bb[2] - bb[0]
            x_cur  = x1 + max(2, (box_w - line_w) // 2)
            if bold_outline:
                for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
                    try:
                        draw.text((x_cur + dx, y_cur + dy), line, font=font, fill=ol)
                    except Exception:
                        pass
            try:
                draw.text((x_cur, y_cur), line, font=font, fill=fg)
            except Exception as e:
                print(f"   ❌ Draw error bubble #{bid}: {e}")
            y_cur += line_h
        print(f"   ✅ Bubble #{bid:<3} — rendered  ({len(lines)} lines, size {size}px)")
        rendered += 1
    pil_img.save(output_path)
    print()
    print(f"{'─'*50}")
    print(f"  Rendered : {rendered}")
    print(f"  Skipped  : {skipped}  (SKIP_BUBBLE_IDS)")
    print(f"  No text  : {missing}  (not in output.txt)")
    print(f"{'─'*50}")
    print(f"✅ Saved → {output_path}")
    return pil_img
    print(f"   Rendered: {rendered_count} bubbles (with uniform line spacing & outlines)")
    return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
 # ============================================================
 # MAIN
 # ============================================================
 def main():
-    print(f"📖 Loading image        : {IMAGE_PATH}")
+    print(f"Loading image: {IMAGE_PATH}")
-    image = cv2.imread(IMAGE_PATH)
+    image_bgr = cv2.imread(IMAGE_PATH)
-    if image is None:
+    if image_bgr is None:
-        print(f"❌ Cannot load: {IMAGE_PATH}"); return
+        print(f"❌ Error: Could not load {IMAGE_PATH}")
        return
-    print(f"📦 Loading bubbles      : {BUBBLES_PATH}")
+    print(f"Loading translations: {TRANSLATIONS_PATH}")
    bubble_boxes, quads_per_bubble = parse_bubbles(BUBBLES_PATH)
    print(f"   {len(bubble_boxes)} bubbles  |  "
          f"{sum(len(v) for v in quads_per_bubble.values())} quads total")
    print(f"🌐 Loading translations : {TRANSLATIONS_PATH}")
    translations = parse_translations(TRANSLATIONS_PATH)
    print(f"   {len(translations)} translations found")
-    if SKIP_BUBBLE_IDS:
+    print(f"Loading bubble data: {BUBBLES_PATH}")
-        print(f"⏭️  Skip list            : bubbles {sorted(SKIP_BUBBLE_IDS)}")
+    bubbles_data = parse_bubbles(BUBBLES_PATH)
    else:
        print(f"⏭️  Skip list            : (empty — all bubbles will be rendered)")
-    print("🔤 Resolving font...")
+    print("Resolving font...")
-    font_path, _ = resolve_font()
+    font_path = resolve_font_path()
-    print(f"🧹 Erasing original text (quad fill + pad={QUAD_PAD}px)...")
+    print("\n--- Step 1: Erasing original text ---")
-    clean_image = erase_quads(
+    erased_bgr = erase_quads(
-        image,
+        image_bgr=image_bgr,
-        quads_per_bubble,
+        bubbles_data=bubbles_data,
-        translations = translations,   # ← pass translations here
+        translations=translations,
-        skip_ids     = SKIP_BUBBLE_IDS,
+        skip_ids=SKIP_BUBBLE_IDS,
-        pad          = QUAD_PAD
+        pad=QUAD_PAD
    )
-    print("✍️  Rendering translated text...")
+    print("\n--- Step 2: Rendering translated text ---")
-    render_translations(
+    final_bgr = render_text(
-        image_bgr    = clean_image,
+        image_bgr=erased_bgr,
-        bubble_boxes = bubble_boxes,
+        bubbles_data=bubbles_data,
-        translations = translations,
+        translations=translations,
-        skip_ids     = SKIP_BUBBLE_IDS,
+        font_path=font_path,
-        font_path    = font_path,
+        skip_ids=SKIP_BUBBLE_IDS
        font_size    = FONT_SIZE,
        bold_outline = True,
        auto_color   = True,
        output_path  = OUTPUT_PATH
    )
    print(f"\nSaving final image to: {OUTPUT_PATH}")
    cv2.imwrite(OUTPUT_PATH, final_bgr)
    print("✅ Done!")
 if __name__ == "__main__":
    main()
--- a/manga-translator.py
+++ b/manga-translator.py
@@ -301,6 +301,54 @@ def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thre
    return None
 def split_bubble_if_multiple_rows(indices, ocr, bid=None):
    """
    Splits a bubble horizontally (top/bottom) if there is a massive vertical gap
    between text lines, indicating two separate bubbles were merged.
    """
    if len(indices) < 2:
        return None
    boxes = [quad_bbox(ocr[i][0]) for i in indices]
    # Sort by Y-coordinate (top to bottom)
    sorted_items = sorted(zip(indices, boxes), key=lambda x: x[1][1])
    gaps = []
    current_max_y = sorted_items[0][1][3]
    for i in range(1, len(sorted_items)):
        idx, b = sorted_items[i]
        y1 = b[1]
        gap = y1 - current_max_y
        gaps.append((i, gap, current_max_y, y1))
        current_max_y = max(current_max_y, b[3])
    if not gaps:
        return None
    # Find the largest vertical gap
    max_gap_idx, max_gap_size, _, _ = max(gaps, key=lambda x: x[1])
    # Calculate median line height to define what a "large" gap is
    hs = [b[3] - b[1] for b in boxes]
    med_h = float(np.median(hs)) if hs else 15.0
    # If the vertical gap is more than 2.5x the height of a text line, it's a split!
    threshold = med_h * 2.5
    min_gap = 40.0 # Absolute minimum pixel gap to prevent micro-splits
    if max_gap_size > threshold and max_gap_size > min_gap:
        split_idx = max_gap_idx
        top_indices = [item[0] for item in sorted_items[:split_idx]]
        bottom_indices = [item[0] for item in sorted_items[split_idx:]]
        # Ensure we don't just split off a single noise character
        if len(top_indices) >= 1 and len(bottom_indices) >= 1:
            return top_indices, bottom_indices
    return None
 # ============================================================
 # OCR ENGINES (Apple Native Vision)
 # ============================================================
@@ -886,6 +934,7 @@ def translate_manga_text(
        box = bubble_boxes[bid]
        bubble_split = None
        # 1. Panel border split
        split_result = split_panel_box(image, box, bubble_quads=bubble_quads[bid])
        if split_result:
            box_left, box_right, split_x = split_result
@@ -909,27 +958,37 @@ def translate_manga_text(
                        bubble_split = (left_idxs, right_idxs)
                        splits_performed.append(f"BOX#{bid} ({len(left_idxs)} quads | {len(right_idxs)} quads)")
        # 2. Check for vertical columns (left/right split)
        if bubble_split is None:
            col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid)
            if col_split:
                left_idxs, right_idxs = col_split
                if left_idxs and right_idxs:
                    bubble_split = (left_idxs, right_idxs)
-                    splits_performed.append(f"BOX#{bid} ({len(left_idxs)} quads | {len(right_idxs)} quads)")
+                    splits_performed.append(f"BOX#{bid} (Vertical Column Split: {len(left_idxs)} | {len(right_idxs)} quads)")
        # 3. Check for horizontal rows (top/bottom split)
        if bubble_split is None:
            row_split = split_bubble_if_multiple_rows(bubble_indices[bid], filtered, bid=bid)
            if row_split:
                top_idxs, bottom_idxs = row_split
                if top_idxs and bottom_idxs:
                    bubble_split = (top_idxs, bottom_idxs)
                    splits_performed.append(f"BOX#{bid} (Horizontal Row Split: {len(top_idxs)} | {len(bottom_idxs)} quads)")
        if bubble_split:
-            left_idxs, right_idxs = bubble_split
+            part1_idxs, part2_idxs = bubble_split
-            new_bubbles[bid] = build_lines_from_indices(left_idxs, filtered)
+            new_bubbles[bid] = build_lines_from_indices(part1_idxs, filtered)
-            ub_left = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in left_idxs])
+            ub_1 = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in part1_idxs])
-            new_bubble_boxes[bid] = (max(0, ub_left[0]-3), max(0, ub_left[1]-3), min(iw-1, ub_left[2]+3), min(ih-1, ub_left[3]+3))
+            new_bubble_boxes[bid] = (max(0, ub_1[0]-3), max(0, ub_1[1]-3), min(iw-1, ub_1[2]+3), min(ih-1, ub_1[3]+3))
-            new_bubble_quads[bid] = [filtered[i][0] for i in left_idxs]
+            new_bubble_quads[bid] = [filtered[i][0] for i in part1_idxs]
-            new_bubble_indices[bid] = left_idxs
+            new_bubble_indices[bid] = part1_idxs
-            new_bubbles[next_bid] = build_lines_from_indices(right_idxs, filtered)
+            new_bubbles[next_bid] = build_lines_from_indices(part2_idxs, filtered)
-            ub_right = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in right_idxs])
+            ub_2 = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in part2_idxs])
-            new_bubble_boxes[next_bid] = (max(0, ub_right[0]-3), max(0, ub_right[1]-3), min(iw-1, ub_right[2]+3), min(ih-1, ub_right[3]+3))
+            new_bubble_boxes[next_bid] = (max(0, ub_2[0]-3), max(0, ub_2[1]-3), min(iw-1, ub_2[2]+3), min(ih-1, ub_2[3]+3))
-            new_bubble_quads[next_bid] = [filtered[i][0] for i in right_idxs]
+            new_bubble_quads[next_bid] = [filtered[i][0] for i in part2_idxs]
-            new_bubble_indices[next_bid] = right_idxs
+            new_bubble_indices[next_bid] = part2_idxs
            next_bid += 1
        else:
            new_bubbles[bid] = bubbles[bid]
@@ -938,7 +997,7 @@ def translate_manga_text(
            new_bubble_indices[bid] = bubble_indices[bid]
    if splits_performed:
-        print(f"\n🔀 Multi-column bubble splits detected: {len(splits_performed)}")
+        print(f"\n🔀 Multi-column/row bubble splits detected: {len(splits_performed)}")
        for split_info in splits_performed:
            print(f"   ✓ Split {split_info}")
@@ -1049,8 +1108,8 @@ def translate_manga_text(
 if __name__ == "__main__":
    translate_manga_text(
-        image_path="004.png",
+        image_path="003.jpg",
-        source_lang="en",
+        source_lang="es",
        target_lang="ca",
        confidence_threshold=0.05,
        min_text_length=1,