Added new rendered

2026-04-21 18:53:34 +02:00
parent bd475d8f01
commit dfa52f54eb
2 changed files with 277 additions and 267 deletions
--- a/manga-renderer.py
+++ b/manga-renderer.py
@@ -3,13 +3,16 @@
 """
 manga-renderer.py

-Inputs:  001.jpg  +  bubbles.json  +  output.txt
-Output:  translated_page.png
+Inputs:  001.jpg  +  bubbles.json  +  output_001.txt
+Output:  translated_page_001.png

 Strategy:
-  1. For every bubble, white-fill all its OCR quads  (erases original text cleanly)
-  2. Render the translated text centered inside the bubble bounding box
-  3. Bubbles in SKIP_BUBBLE_IDS are erased but NOT re-rendered (left blank)
+  1. For every bubble, white-fill all its OCR quads (erases original text cleanly).
+  2. Detect the original font size from the OCR bounding boxes.
+  3. Dynamically wrap and scale down the translated text if it exceeds the bubble dimensions.
+  4. Render the translated text centered inside the bubble bounding box.
+  5. Uses uniform line heights to prevent accent collisions.
+  6. Adds a white stroke to the text to cover any residual original characters.
 """

 import json
@@ -17,43 +20,40 @@ import textwrap
 import cv2
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
-from typing import Dict, List, Tuple, Optional, Set
+from typing import Dict, List, Tuple, Optional, Set, Any

 # ============================================================
 # CONFIG  — edit these paths to match your setup
 # ============================================================
-IMAGE_PATH        = "004.png"
+IMAGE_PATH        = "003.jpg"
 BUBBLES_PATH      = "bubbles.json"
-TRANSLATIONS_PATH = "output_004.txt"
-OUTPUT_PATH       = "translated_page_004.png"
+TRANSLATIONS_PATH = "output_003.txt"
+OUTPUT_PATH       = "translated_page_003.png"

 # Font candidates — first one that loads wins
 FONT_CANDIDATES = [
    "fonts/ComicNeue-Bold.ttf",
+    # Mac fallbacks
+    "/System/Library/Fonts/Supplemental/Comic Sans MS Bold.ttf",
+    "/System/Library/Fonts/Supplemental/Arial Bold.ttf",
+    # Windows fallbacks
+    "C:\\Windows\\Fonts\\comicbd.ttf",
+    "C:\\Windows\\Fonts\\arialbd.ttf",
+    # Linux fallbacks
+    "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
 ]

-FONT_SIZE     = 24
-MIN_FONT_SIZE = 18
-QUAD_PAD      = 4    # extra pixels added around each quad before white-fill
+DEFAULT_FONT_SIZE = 24
+MIN_FONT_SIZE     = 12
+QUAD_PAD          = 4    # extra pixels added around each quad before white-fill

 # ============================================================
 # SKIP LIST
-# ── Add any bubble IDs you do NOT want rendered here.
-# ── The quads will still be erased (white-filled) but no
-# ── translated text will be drawn inside them.
-# ──
-# ── Examples of why you'd skip a bubble:
-# ──   • Sound effects  (BURP, BAM, POW …)
-# ──   • Untranslatable single characters
-# ──   • Bubbles with bad OCR you want to fix manually later
-# ──   • Narrator boxes you want to leave in the source language
 # ============================================================
 SKIP_BUBBLE_IDS: Set[int] = {
-    # 8,    # BURP BURP — sound effect
-    # 2,    # example: bad OCR, fix manually
+    # Add any bubble IDs you do NOT want rendered here.
 }

-
 # ============================================================
 # FONT LOADER
 # ============================================================
@@ -69,20 +69,17 @@ def load_font(path: str, size: int) -> Optional[ImageFont.FreeTypeFont]:
            continue
    return None

-
-def resolve_font() -> Tuple[str, ImageFont.FreeTypeFont]:
-    """Return (path, font) for the first working candidate."""
+def resolve_font_path() -> str:
+    """Return the path for the first working candidate."""
    for candidate in FONT_CANDIDATES:
-        font = load_font(candidate, FONT_SIZE)
-        if font is not None:
+        if load_font(candidate, DEFAULT_FONT_SIZE) is not None:
            print(f"   ✅ Font: {candidate}")
-            return candidate, font
+            return candidate
    print("   ⚠️  No TrueType font found — using Pillow bitmap fallback")
-    return "", ImageFont.load_default()
-
+    return ""

 # ============================================================
-# PARSE output.txt  →  {bid: translated_string}
+# PARSERS
 # ============================================================
 def parse_translations(filepath: str) -> Dict[int, str]:
    """
@@ -107,41 +104,21 @@ def parse_translations(filepath: str) -> Dict[int, str]:
                continue
    return translations

-
-# ============================================================
-# PARSE bubbles.json  →  bubble_boxes, quads_per_bubble
-# ============================================================
 def parse_bubbles(filepath: str):
    """
-    Returns:
-        bubble_boxes     : {bid: (x1, y1, x2, y2)}
-        quads_per_bubble : {bid: [ [[x,y],[x,y],[x,y],[x,y]], ... ]}
+    Returns the full JSON data.
    """
    with open(filepath, "r", encoding="utf-8") as f:
        data = json.load(f)
-
-    bubble_boxes     = {}
-    quads_per_bubble = {}
-
-    for key, val in data.items():
-        bid = int(key)
-
-        x1 = val["x"];       y1 = val["y"]
-        x2 = x1 + val["w"]; y2 = y1 + val["h"]
-        bubble_boxes[bid] = (x1, y1, x2, y2)
-
-        quads_per_bubble[bid] = val.get("quads", [])
-
-    return bubble_boxes, quads_per_bubble
-
+    return data

 # ============================================================
 # ERASE  — white-fill every OCR quad (with small padding)
 # ============================================================
 def erase_quads(
    image_bgr,
-    quads_per_bubble: Dict[int, List],
-    translations: Dict[int, str],   # ← NEW: only erase what we'll render
+    bubbles_data: Dict[str, dict],
+    translations: Dict[int, str],
    skip_ids: Set[int],
    pad: int = QUAD_PAD
 ):
@@ -149,7 +126,6 @@ def erase_quads(
    White-fills OCR quads ONLY for bubbles that:
      - have a translation in output.txt  AND
      - are NOT in SKIP_BUBBLE_IDS
-    Everything else is left completely untouched.
    """
    ih, iw = image_bgr.shape[:2]
    result = image_bgr.copy()
@@ -157,15 +133,11 @@ def erase_quads(
    erased_count  = 0
    skipped_count = 0

-    for bid, quads in quads_per_bubble.items():
+    for bid_str, val in bubbles_data.items():
+        bid = int(bid_str)
+        quads = val.get("quads", [])

-        # ignore if explicitly skipped
-        if bid in skip_ids:
-            skipped_count += 1
-            continue
-
-        # ignore if no translation exists (deleted from output.txt)
-        if bid not in translations:
+        if bid in skip_ids or bid not in translations:
            skipped_count += 1
            continue

@@ -186,227 +158,206 @@ def erase_quads(
    print(f"   Ignored: {skipped_count} bubbles (no translation or in skip list)")
    return result

-
 # ============================================================
-# FONT SIZING  +  TEXT WRAP
+# DYNAMIC TEXT FITTING
 # ============================================================
-def fit_text(
-    text: str,
-    box_w: int,
-    box_h: int,
-    font_path: str,
-    max_size: int = FONT_SIZE,
-    min_size: int = MIN_FONT_SIZE
-) -> Tuple[int, ImageFont.FreeTypeFont, List[str]]:
+def get_original_font_size(bubble_data: dict, fallback_size: int = DEFAULT_FONT_SIZE) -> int:
+    """Calculates the original font size based on the OCR bounding boxes."""
+    line_bboxes = bubble_data.get("line_bboxes", [])
+    if not line_bboxes:
+        return fallback_size
+    
+    heights = [box["h"] for box in line_bboxes]
+    median_h = int(np.median(heights))
+    
+    estimated_size = int(median_h * 0.85)
+    return max(MIN_FONT_SIZE, min(estimated_size, 60))
+
+def fit_text_dynamically(
+    text: str, 
+    font_path: str, 
+    max_w: int, 
+    max_h: int, 
+    target_font_size: int
+) -> Tuple[List[str], Any, int, int]:
    """
-    Returns (fitted_size, font, wrapped_lines) — largest size where
-    the text block fits inside box_w × box_h.
+    Wraps text and scales down font size if it exceeds the bubble dimensions.
+    Returns: (wrapped_lines, font_object, line_spacing, final_font_size)
    """
-    for size in range(max_size, min_size - 1, -1):
-        font = load_font(font_path, size) if font_path else None
-        if font is None:
-            return min_size, ImageFont.load_default(), [text]
-
-        chars_per_line = max(1, int(box_w / (size * 0.62)))
-        wrapped        = textwrap.fill(text, width=chars_per_line)
-        lines          = wrapped.split("\n")
-        total_h        = (size + 8) * len(lines)
-
-        if total_h <= box_h - 8:
-            return size, font, lines
-
-    # Nothing fit — use minimum size
-    font = load_font(font_path, min_size) if font_path else None
-    if font is None:
+    font_size = target_font_size
+    
+    if not font_path:
        font = ImageFont.load_default()
-    chars_per_line = max(1, int(box_w / (min_size * 0.62)))
-    lines = textwrap.fill(text, width=chars_per_line).split("\n")
-    return min_size, font, lines
-
-
-# ============================================================
-# COLOR HELPERS
-# ============================================================
-def sample_bg_color(
-    image_bgr,
-    x1: int, y1: int,
-    x2: int, y2: int
-) -> Tuple[int, int, int]:
-    """Sample four corners of a bubble to estimate background color (R, G, B)."""
-    ih, iw = image_bgr.shape[:2]
-    samples = []
-    for sx, sy in [(x1+4, y1+4), (x2-4, y1+4), (x1+4, y2-4), (x2-4, y2-4)]:
-        sx = max(0, min(iw-1, sx)); sy = max(0, min(ih-1, sy))
-        b, g, r = image_bgr[sy, sx]
-        samples.append((int(r), int(g), int(b)))
-    return (
-        int(np.median([s[0] for s in samples])),
-        int(np.median([s[1] for s in samples])),
-        int(np.median([s[2] for s in samples])),
-    )
-
-
-def pick_fg_color(bg: Tuple[int, int, int]) -> Tuple[int, int, int]:
-    lum = 0.299 * bg[0] + 0.587 * bg[1] + 0.114 * bg[2]
-    return (0, 0, 0) if lum > 128 else (255, 255, 255)
-
-
-def safe_textbbox(
-    draw, pos, text, font
-) -> Tuple[int, int, int, int]:
-    try:
-        return draw.textbbox(pos, text, font=font)
-    except Exception:
-        size = getattr(font, "size", 12)
-        return (
-            pos[0], pos[1],
-            pos[0] + int(len(text) * size * 0.6),
-            pos[1] + int(size * 1.2)
-        )
-
+        char_w = 6
+        chars_per_line = max(1, int(max_w / char_w))
+        wrapped_lines = textwrap.wrap(text, width=chars_per_line)
+        return wrapped_lines, font, 4, 10
+    
+    while font_size >= MIN_FONT_SIZE:
+        font = load_font(font_path, font_size)
+        if font is None:
+            font = ImageFont.load_default()
+            return [text], font, 4, 10
+            
+        char_bbox = font.getbbox("A")
+        char_w = (char_bbox[2] - char_bbox[0]) or 10
+        chars_per_line = max(1, int((max_w * 0.95) / char_w))
+        
+        wrapped_lines = textwrap.wrap(text, width=chars_per_line)
+        
+        # Use uniform font metrics for height instead of per-line bounding boxes
+        line_spacing = max(2, int(font_size * 0.15))
+        if hasattr(font, 'getmetrics'):
+            ascent, descent = font.getmetrics()
+            line_h = ascent + descent
+        else:
+            line_h = font_size
+            
+        total_h = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1))
+        
+        max_line_w = 0
+        for line in wrapped_lines:
+            bbox = font.getbbox(line)
+            lw = bbox[2] - bbox[0]
+            max_line_w = max(max_line_w, lw)
+            
+        if max_line_w <= max_w and total_h <= max_h:
+            return wrapped_lines, font, line_spacing, font_size
+            
+        font_size -= 2
+        
+    font = load_font(font_path, MIN_FONT_SIZE) or ImageFont.load_default()
+    char_bbox = font.getbbox("A") if hasattr(font, 'getbbox') else (0,0,6,10)
+    char_w = (char_bbox[2] - char_bbox[0]) or 6
+    chars_per_line = max(1, int(max_w / char_w))
+    wrapped_lines = textwrap.wrap(text, width=chars_per_line)
+    
+    return wrapped_lines, font, max(2, int(MIN_FONT_SIZE * 0.15)), MIN_FONT_SIZE

 # ============================================================
 # RENDER
 # ============================================================
-def render_translations(
+def render_text(
    image_bgr,
-    bubble_boxes: Dict[int, Tuple],
+    bubbles_data: Dict[str, dict],
    translations: Dict[int, str],
-    skip_ids: Set[int],
    font_path: str,
-    font_size: int     = FONT_SIZE,
-    bold_outline: bool = True,
-    auto_color: bool   = True,
-    output_path: str   = OUTPUT_PATH
+    skip_ids: Set[int]
 ):
+    """
+    Draws the translated text centered in the line_union_bbox of each bubble.
+    Adds a white stroke (outline) to cover any residual original characters.
+    """
    image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
-    pil_img   = Image.fromarray(image_rgb)
-    draw      = ImageDraw.Draw(pil_img)
+    pil_img = Image.fromarray(image_rgb)
+    draw = ImageDraw.Draw(pil_img)

-    rendered = 0
-    skipped  = 0
-    missing  = 0
+    rendered_count = 0

-    for bid, (x1, y1, x2, y2) in sorted(bubble_boxes.items()):
+    for bid_str, val in bubbles_data.items():
+        bid = int(bid_str)

-        # ── skip list check ────────────────────────────────────────
-        if bid in skip_ids:
-            print(f"   ⏭️  Bubble #{bid:<3} — skipped (in SKIP_BUBBLE_IDS)")
-            skipped += 1
+        if bid in skip_ids or bid not in translations:
            continue

-        text = translations.get(bid, "").strip()
-        if not text:
-            print(f"   ⚠️  Bubble #{bid:<3} — no translation found, left blank")
-            missing += 1
-            continue
-
-        box_w = x2 - x1
-        box_h = y2 - y1
-        if box_w < 10 or box_h < 10:
-            continue
-
-        # ── fit font + wrap ────────────────────────────────────────
-        size, font, lines = fit_text(
-            text, box_w, box_h, font_path, max_size=font_size
-        )
-
-        # ── colors ─────────────────────────────────────────────────
-        if auto_color:
-            bg = sample_bg_color(image_bgr, x1, y1, x2, y2)
-            fg = pick_fg_color(bg)
-            ol = (255, 255, 255) if fg == (0, 0, 0) else (0, 0, 0)
+        text = translations[bid]
+        
+        union_box = val.get("line_union_bbox")
+        if not union_box:
+            union_box = val.get("text_bbox")
+            if not union_box:
+                continue
+            
+        bx, by, bw, bh = union_box["x"], union_box["y"], union_box["w"], union_box["h"]
+        
+        pad_x = int(bw * 0.1)
+        pad_y = int(bh * 0.1)
+        bx -= pad_x // 2
+        by -= pad_y // 2
+        bw += pad_x
+        bh += pad_y
+        
+        target_size = get_original_font_size(val)
+        wrapped_lines, font, line_spacing, final_size = fit_text_dynamically(text, font_path, bw, bh, target_size)
+        
+        # Use uniform typographic line height for rendering
+        if hasattr(font, 'getmetrics'):
+            ascent, descent = font.getmetrics()
+            line_h = ascent + descent
        else:
-            fg, ol = (0, 0, 0), (255, 255, 255)
+            line_h = final_size
+                
+        total_text_height = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1))
+        
+        current_y = by + (bh - total_text_height) // 2
+        outline_thickness = max(2, int(final_size * 0.10))

-        # ── vertical center ────────────────────────────────────────
-        line_h  = size + 8
-        total_h = line_h * len(lines)
-        y_cur   = y1 + max(4, (box_h - total_h) // 2)
+        for i, line in enumerate(wrapped_lines):
+            if hasattr(font, 'getbbox'):
+                bbox = font.getbbox(line)
+                lw = bbox[2] - bbox[0]
+            else:
+                lw = len(line) * 6
+            
+            current_x = bx + (bw - lw) // 2
+            
+            draw.text(
+                (current_x, current_y), 
+                line, 
+                fill=(0, 0, 0),
+                font=font,
+                stroke_width=outline_thickness,
+                stroke_fill=(255, 255, 255)
+            )
+            
+            # Advance Y by the uniform line height + spacing
+            current_y += line_h + line_spacing

-        for line in lines:
-            bb     = safe_textbbox(draw, (0, 0), line, font)
-            line_w = bb[2] - bb[0]
-            x_cur  = x1 + max(2, (box_w - line_w) // 2)
-
-            if bold_outline:
-                for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
-                    try:
-                        draw.text((x_cur + dx, y_cur + dy), line, font=font, fill=ol)
-                    except Exception:
-                        pass
-
-            try:
-                draw.text((x_cur, y_cur), line, font=font, fill=fg)
-            except Exception as e:
-                print(f"   ❌ Draw error bubble #{bid}: {e}")
-
-            y_cur += line_h
-
-        print(f"   ✅ Bubble #{bid:<3} — rendered  ({len(lines)} lines, size {size}px)")
-        rendered += 1
-
-    pil_img.save(output_path)
-
-    print()
-    print(f"{'─'*50}")
-    print(f"  Rendered : {rendered}")
-    print(f"  Skipped  : {skipped}  (SKIP_BUBBLE_IDS)")
-    print(f"  No text  : {missing}  (not in output.txt)")
-    print(f"{'─'*50}")
-    print(f"✅ Saved → {output_path}")
-
-    return pil_img
+        rendered_count += 1

+    print(f"   Rendered: {rendered_count} bubbles (with uniform line spacing & outlines)")
+    return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)

 # ============================================================
 # MAIN
 # ============================================================
 def main():
-    print(f"📖 Loading image        : {IMAGE_PATH}")
-    image = cv2.imread(IMAGE_PATH)
-    if image is None:
-        print(f"❌ Cannot load: {IMAGE_PATH}"); return
+    print(f"Loading image: {IMAGE_PATH}")
+    image_bgr = cv2.imread(IMAGE_PATH)
+    if image_bgr is None:
+        print(f"❌ Error: Could not load {IMAGE_PATH}")
+        return

-    print(f"📦 Loading bubbles      : {BUBBLES_PATH}")
-    bubble_boxes, quads_per_bubble = parse_bubbles(BUBBLES_PATH)
-    print(f"   {len(bubble_boxes)} bubbles  |  "
-          f"{sum(len(v) for v in quads_per_bubble.values())} quads total")
-
-    print(f"🌐 Loading translations : {TRANSLATIONS_PATH}")
+    print(f"Loading translations: {TRANSLATIONS_PATH}")
    translations = parse_translations(TRANSLATIONS_PATH)
-    print(f"   {len(translations)} translations found")
+    
+    print(f"Loading bubble data: {BUBBLES_PATH}")
+    bubbles_data = parse_bubbles(BUBBLES_PATH)

-    if SKIP_BUBBLE_IDS:
-        print(f"⏭️  Skip list            : bubbles {sorted(SKIP_BUBBLE_IDS)}")
-    else:
-        print(f"⏭️  Skip list            : (empty — all bubbles will be rendered)")
+    print("Resolving font...")
+    font_path = resolve_font_path()

-    print("🔤 Resolving font...")
-    font_path, _ = resolve_font()
-
-    print(f"🧹 Erasing original text (quad fill + pad={QUAD_PAD}px)...")
-    clean_image = erase_quads(
-        image,
-        quads_per_bubble,
-        translations = translations,   # ← pass translations here
-        skip_ids     = SKIP_BUBBLE_IDS,
-        pad          = QUAD_PAD
+    print("\n--- Step 1: Erasing original text ---")
+    erased_bgr = erase_quads(
+        image_bgr=image_bgr,
+        bubbles_data=bubbles_data,
+        translations=translations,
+        skip_ids=SKIP_BUBBLE_IDS,
+        pad=QUAD_PAD
    )

-    print("✍️  Rendering translated text...")
-    render_translations(
-        image_bgr    = clean_image,
-        bubble_boxes = bubble_boxes,
-        translations = translations,
-        skip_ids     = SKIP_BUBBLE_IDS,
-        font_path    = font_path,
-        font_size    = FONT_SIZE,
-        bold_outline = True,
-        auto_color   = True,
-        output_path  = OUTPUT_PATH
+    print("\n--- Step 2: Rendering translated text ---")
+    final_bgr = render_text(
+        image_bgr=erased_bgr,
+        bubbles_data=bubbles_data,
+        translations=translations,
+        font_path=font_path,
+        skip_ids=SKIP_BUBBLE_IDS
    )

+    print(f"\nSaving final image to: {OUTPUT_PATH}")
+    cv2.imwrite(OUTPUT_PATH, final_bgr)
+    print("✅ Done!")

 if __name__ == "__main__":
-    main()
+    main()
--- a/manga-translator.py
+++ b/manga-translator.py
@@ -301,6 +301,54 @@ def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thre
    return None


+def split_bubble_if_multiple_rows(indices, ocr, bid=None):
+    """
+    Splits a bubble horizontally (top/bottom) if there is a massive vertical gap
+    between text lines, indicating two separate bubbles were merged.
+    """
+    if len(indices) < 2:
+        return None
+        
+    boxes = [quad_bbox(ocr[i][0]) for i in indices]
+    # Sort by Y-coordinate (top to bottom)
+    sorted_items = sorted(zip(indices, boxes), key=lambda x: x[1][1])
+    
+    gaps = []
+    current_max_y = sorted_items[0][1][3]
+    
+    for i in range(1, len(sorted_items)):
+        idx, b = sorted_items[i]
+        y1 = b[1]
+        gap = y1 - current_max_y
+        gaps.append((i, gap, current_max_y, y1))
+        current_max_y = max(current_max_y, b[3])
+    
+    if not gaps:
+        return None
+    
+    # Find the largest vertical gap
+    max_gap_idx, max_gap_size, _, _ = max(gaps, key=lambda x: x[1])
+    
+    # Calculate median line height to define what a "large" gap is
+    hs = [b[3] - b[1] for b in boxes]
+    med_h = float(np.median(hs)) if hs else 15.0
+    
+    # If the vertical gap is more than 2.5x the height of a text line, it's a split!
+    threshold = med_h * 2.5
+    min_gap = 40.0 # Absolute minimum pixel gap to prevent micro-splits
+    
+    if max_gap_size > threshold and max_gap_size > min_gap:
+        split_idx = max_gap_idx
+        top_indices = [item[0] for item in sorted_items[:split_idx]]
+        bottom_indices = [item[0] for item in sorted_items[split_idx:]]
+        
+        # Ensure we don't just split off a single noise character
+        if len(top_indices) >= 1 and len(bottom_indices) >= 1:
+            return top_indices, bottom_indices
+    
+    return None
+
+
 # ============================================================
 # OCR ENGINES (Apple Native Vision)
 # ============================================================
@@ -886,6 +934,7 @@ def translate_manga_text(
        box = bubble_boxes[bid]
        bubble_split = None
        
+        # 1. Panel border split
        split_result = split_panel_box(image, box, bubble_quads=bubble_quads[bid])
        if split_result:
            box_left, box_right, split_x = split_result
@@ -909,27 +958,37 @@ def translate_manga_text(
                        bubble_split = (left_idxs, right_idxs)
                        splits_performed.append(f"BOX#{bid} ({len(left_idxs)} quads | {len(right_idxs)} quads)")
        
+        # 2. Check for vertical columns (left/right split)
        if bubble_split is None:
            col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid)
            if col_split:
                left_idxs, right_idxs = col_split
                if left_idxs and right_idxs:
                    bubble_split = (left_idxs, right_idxs)
-                    splits_performed.append(f"BOX#{bid} ({len(left_idxs)} quads | {len(right_idxs)} quads)")
+                    splits_performed.append(f"BOX#{bid} (Vertical Column Split: {len(left_idxs)} | {len(right_idxs)} quads)")
+        
+        # 3. Check for horizontal rows (top/bottom split)
+        if bubble_split is None:
+            row_split = split_bubble_if_multiple_rows(bubble_indices[bid], filtered, bid=bid)
+            if row_split:
+                top_idxs, bottom_idxs = row_split
+                if top_idxs and bottom_idxs:
+                    bubble_split = (top_idxs, bottom_idxs)
+                    splits_performed.append(f"BOX#{bid} (Horizontal Row Split: {len(top_idxs)} | {len(bottom_idxs)} quads)")
        
        if bubble_split:
-            left_idxs, right_idxs = bubble_split
-            new_bubbles[bid] = build_lines_from_indices(left_idxs, filtered)
-            ub_left = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in left_idxs])
-            new_bubble_boxes[bid] = (max(0, ub_left[0]-3), max(0, ub_left[1]-3), min(iw-1, ub_left[2]+3), min(ih-1, ub_left[3]+3))
-            new_bubble_quads[bid] = [filtered[i][0] for i in left_idxs]
-            new_bubble_indices[bid] = left_idxs
+            part1_idxs, part2_idxs = bubble_split
+            new_bubbles[bid] = build_lines_from_indices(part1_idxs, filtered)
+            ub_1 = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in part1_idxs])
+            new_bubble_boxes[bid] = (max(0, ub_1[0]-3), max(0, ub_1[1]-3), min(iw-1, ub_1[2]+3), min(ih-1, ub_1[3]+3))
+            new_bubble_quads[bid] = [filtered[i][0] for i in part1_idxs]
+            new_bubble_indices[bid] = part1_idxs
            
-            new_bubbles[next_bid] = build_lines_from_indices(right_idxs, filtered)
-            ub_right = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in right_idxs])
-            new_bubble_boxes[next_bid] = (max(0, ub_right[0]-3), max(0, ub_right[1]-3), min(iw-1, ub_right[2]+3), min(ih-1, ub_right[3]+3))
-            new_bubble_quads[next_bid] = [filtered[i][0] for i in right_idxs]
-            new_bubble_indices[next_bid] = right_idxs
+            new_bubbles[next_bid] = build_lines_from_indices(part2_idxs, filtered)
+            ub_2 = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in part2_idxs])
+            new_bubble_boxes[next_bid] = (max(0, ub_2[0]-3), max(0, ub_2[1]-3), min(iw-1, ub_2[2]+3), min(ih-1, ub_2[3]+3))
+            new_bubble_quads[next_bid] = [filtered[i][0] for i in part2_idxs]
+            new_bubble_indices[next_bid] = part2_idxs
            next_bid += 1
        else:
            new_bubbles[bid] = bubbles[bid]
@@ -938,7 +997,7 @@ def translate_manga_text(
            new_bubble_indices[bid] = bubble_indices[bid]
    
    if splits_performed:
-        print(f"\n🔀 Multi-column bubble splits detected: {len(splits_performed)}")
+        print(f"\n🔀 Multi-column/row bubble splits detected: {len(splits_performed)}")
        for split_info in splits_performed:
            print(f"   ✓ Split {split_info}")
            
@@ -1049,8 +1108,8 @@ def translate_manga_text(

 if __name__ == "__main__":
    translate_manga_text(
-        image_path="004.png",
-        source_lang="en",
+        image_path="003.jpg",
+        source_lang="es",
        target_lang="ca",
        confidence_threshold=0.05,
        min_text_length=1,