diff --git a/manga-renderer.py b/manga-renderer.py index 98cebaf..f108d9f 100644 --- a/manga-renderer.py +++ b/manga-renderer.py @@ -3,13 +3,16 @@ """ manga-renderer.py -Inputs: 001.jpg + bubbles.json + output.txt -Output: translated_page.png +Inputs: 001.jpg + bubbles.json + output_001.txt +Output: translated_page_001.png Strategy: - 1. For every bubble, white-fill all its OCR quads (erases original text cleanly) - 2. Render the translated text centered inside the bubble bounding box - 3. Bubbles in SKIP_BUBBLE_IDS are erased but NOT re-rendered (left blank) + 1. For every bubble, white-fill all its OCR quads (erases original text cleanly). + 2. Detect the original font size from the OCR bounding boxes. + 3. Dynamically wrap and scale down the translated text if it exceeds the bubble dimensions. + 4. Render the translated text centered inside the bubble bounding box. + 5. Uses uniform line heights to prevent accent collisions. + 6. Adds a white stroke to the text to cover any residual original characters. """ import json @@ -17,43 +20,40 @@ import textwrap import cv2 import numpy as np from PIL import Image, ImageDraw, ImageFont -from typing import Dict, List, Tuple, Optional, Set +from typing import Dict, List, Tuple, Optional, Set, Any # ============================================================ # CONFIG — edit these paths to match your setup # ============================================================ -IMAGE_PATH = "004.png" +IMAGE_PATH = "003.jpg" BUBBLES_PATH = "bubbles.json" -TRANSLATIONS_PATH = "output_004.txt" -OUTPUT_PATH = "translated_page_004.png" +TRANSLATIONS_PATH = "output_003.txt" +OUTPUT_PATH = "translated_page_003.png" # Font candidates — first one that loads wins FONT_CANDIDATES = [ "fonts/ComicNeue-Bold.ttf", + # Mac fallbacks + "/System/Library/Fonts/Supplemental/Comic Sans MS Bold.ttf", + "/System/Library/Fonts/Supplemental/Arial Bold.ttf", + # Windows fallbacks + "C:\\Windows\\Fonts\\comicbd.ttf", + "C:\\Windows\\Fonts\\arialbd.ttf", + # Linux fallbacks + "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", ] -FONT_SIZE = 24 -MIN_FONT_SIZE = 18 -QUAD_PAD = 4 # extra pixels added around each quad before white-fill +DEFAULT_FONT_SIZE = 24 +MIN_FONT_SIZE = 12 +QUAD_PAD = 4 # extra pixels added around each quad before white-fill # ============================================================ # SKIP LIST -# ── Add any bubble IDs you do NOT want rendered here. -# ── The quads will still be erased (white-filled) but no -# ── translated text will be drawn inside them. -# ── -# ── Examples of why you'd skip a bubble: -# ── • Sound effects (BURP, BAM, POW …) -# ── • Untranslatable single characters -# ── • Bubbles with bad OCR you want to fix manually later -# ── • Narrator boxes you want to leave in the source language # ============================================================ SKIP_BUBBLE_IDS: Set[int] = { - # 8, # BURP BURP — sound effect - # 2, # example: bad OCR, fix manually + # Add any bubble IDs you do NOT want rendered here. } - # ============================================================ # FONT LOADER # ============================================================ @@ -69,20 +69,17 @@ def load_font(path: str, size: int) -> Optional[ImageFont.FreeTypeFont]: continue return None - -def resolve_font() -> Tuple[str, ImageFont.FreeTypeFont]: - """Return (path, font) for the first working candidate.""" +def resolve_font_path() -> str: + """Return the path for the first working candidate.""" for candidate in FONT_CANDIDATES: - font = load_font(candidate, FONT_SIZE) - if font is not None: + if load_font(candidate, DEFAULT_FONT_SIZE) is not None: print(f" ✅ Font: {candidate}") - return candidate, font + return candidate print(" ⚠️ No TrueType font found — using Pillow bitmap fallback") - return "", ImageFont.load_default() - + return "" # ============================================================ -# PARSE output.txt → {bid: translated_string} +# PARSERS # ============================================================ def parse_translations(filepath: str) -> Dict[int, str]: """ @@ -107,41 +104,21 @@ def parse_translations(filepath: str) -> Dict[int, str]: continue return translations - -# ============================================================ -# PARSE bubbles.json → bubble_boxes, quads_per_bubble -# ============================================================ def parse_bubbles(filepath: str): """ - Returns: - bubble_boxes : {bid: (x1, y1, x2, y2)} - quads_per_bubble : {bid: [ [[x,y],[x,y],[x,y],[x,y]], ... ]} + Returns the full JSON data. """ with open(filepath, "r", encoding="utf-8") as f: data = json.load(f) - - bubble_boxes = {} - quads_per_bubble = {} - - for key, val in data.items(): - bid = int(key) - - x1 = val["x"]; y1 = val["y"] - x2 = x1 + val["w"]; y2 = y1 + val["h"] - bubble_boxes[bid] = (x1, y1, x2, y2) - - quads_per_bubble[bid] = val.get("quads", []) - - return bubble_boxes, quads_per_bubble - + return data # ============================================================ # ERASE — white-fill every OCR quad (with small padding) # ============================================================ def erase_quads( image_bgr, - quads_per_bubble: Dict[int, List], - translations: Dict[int, str], # ← NEW: only erase what we'll render + bubbles_data: Dict[str, dict], + translations: Dict[int, str], skip_ids: Set[int], pad: int = QUAD_PAD ): @@ -149,7 +126,6 @@ def erase_quads( White-fills OCR quads ONLY for bubbles that: - have a translation in output.txt AND - are NOT in SKIP_BUBBLE_IDS - Everything else is left completely untouched. """ ih, iw = image_bgr.shape[:2] result = image_bgr.copy() @@ -157,15 +133,11 @@ def erase_quads( erased_count = 0 skipped_count = 0 - for bid, quads in quads_per_bubble.items(): + for bid_str, val in bubbles_data.items(): + bid = int(bid_str) + quads = val.get("quads", []) - # ignore if explicitly skipped - if bid in skip_ids: - skipped_count += 1 - continue - - # ignore if no translation exists (deleted from output.txt) - if bid not in translations: + if bid in skip_ids or bid not in translations: skipped_count += 1 continue @@ -186,227 +158,206 @@ def erase_quads( print(f" Ignored: {skipped_count} bubbles (no translation or in skip list)") return result - # ============================================================ -# FONT SIZING + TEXT WRAP +# DYNAMIC TEXT FITTING # ============================================================ -def fit_text( - text: str, - box_w: int, - box_h: int, - font_path: str, - max_size: int = FONT_SIZE, - min_size: int = MIN_FONT_SIZE -) -> Tuple[int, ImageFont.FreeTypeFont, List[str]]: +def get_original_font_size(bubble_data: dict, fallback_size: int = DEFAULT_FONT_SIZE) -> int: + """Calculates the original font size based on the OCR bounding boxes.""" + line_bboxes = bubble_data.get("line_bboxes", []) + if not line_bboxes: + return fallback_size + + heights = [box["h"] for box in line_bboxes] + median_h = int(np.median(heights)) + + estimated_size = int(median_h * 0.85) + return max(MIN_FONT_SIZE, min(estimated_size, 60)) + +def fit_text_dynamically( + text: str, + font_path: str, + max_w: int, + max_h: int, + target_font_size: int +) -> Tuple[List[str], Any, int, int]: """ - Returns (fitted_size, font, wrapped_lines) — largest size where - the text block fits inside box_w × box_h. + Wraps text and scales down font size if it exceeds the bubble dimensions. + Returns: (wrapped_lines, font_object, line_spacing, final_font_size) """ - for size in range(max_size, min_size - 1, -1): - font = load_font(font_path, size) if font_path else None - if font is None: - return min_size, ImageFont.load_default(), [text] - - chars_per_line = max(1, int(box_w / (size * 0.62))) - wrapped = textwrap.fill(text, width=chars_per_line) - lines = wrapped.split("\n") - total_h = (size + 8) * len(lines) - - if total_h <= box_h - 8: - return size, font, lines - - # Nothing fit — use minimum size - font = load_font(font_path, min_size) if font_path else None - if font is None: + font_size = target_font_size + + if not font_path: font = ImageFont.load_default() - chars_per_line = max(1, int(box_w / (min_size * 0.62))) - lines = textwrap.fill(text, width=chars_per_line).split("\n") - return min_size, font, lines - - -# ============================================================ -# COLOR HELPERS -# ============================================================ -def sample_bg_color( - image_bgr, - x1: int, y1: int, - x2: int, y2: int -) -> Tuple[int, int, int]: - """Sample four corners of a bubble to estimate background color (R, G, B).""" - ih, iw = image_bgr.shape[:2] - samples = [] - for sx, sy in [(x1+4, y1+4), (x2-4, y1+4), (x1+4, y2-4), (x2-4, y2-4)]: - sx = max(0, min(iw-1, sx)); sy = max(0, min(ih-1, sy)) - b, g, r = image_bgr[sy, sx] - samples.append((int(r), int(g), int(b))) - return ( - int(np.median([s[0] for s in samples])), - int(np.median([s[1] for s in samples])), - int(np.median([s[2] for s in samples])), - ) - - -def pick_fg_color(bg: Tuple[int, int, int]) -> Tuple[int, int, int]: - lum = 0.299 * bg[0] + 0.587 * bg[1] + 0.114 * bg[2] - return (0, 0, 0) if lum > 128 else (255, 255, 255) - - -def safe_textbbox( - draw, pos, text, font -) -> Tuple[int, int, int, int]: - try: - return draw.textbbox(pos, text, font=font) - except Exception: - size = getattr(font, "size", 12) - return ( - pos[0], pos[1], - pos[0] + int(len(text) * size * 0.6), - pos[1] + int(size * 1.2) - ) - + char_w = 6 + chars_per_line = max(1, int(max_w / char_w)) + wrapped_lines = textwrap.wrap(text, width=chars_per_line) + return wrapped_lines, font, 4, 10 + + while font_size >= MIN_FONT_SIZE: + font = load_font(font_path, font_size) + if font is None: + font = ImageFont.load_default() + return [text], font, 4, 10 + + char_bbox = font.getbbox("A") + char_w = (char_bbox[2] - char_bbox[0]) or 10 + chars_per_line = max(1, int((max_w * 0.95) / char_w)) + + wrapped_lines = textwrap.wrap(text, width=chars_per_line) + + # Use uniform font metrics for height instead of per-line bounding boxes + line_spacing = max(2, int(font_size * 0.15)) + if hasattr(font, 'getmetrics'): + ascent, descent = font.getmetrics() + line_h = ascent + descent + else: + line_h = font_size + + total_h = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1)) + + max_line_w = 0 + for line in wrapped_lines: + bbox = font.getbbox(line) + lw = bbox[2] - bbox[0] + max_line_w = max(max_line_w, lw) + + if max_line_w <= max_w and total_h <= max_h: + return wrapped_lines, font, line_spacing, font_size + + font_size -= 2 + + font = load_font(font_path, MIN_FONT_SIZE) or ImageFont.load_default() + char_bbox = font.getbbox("A") if hasattr(font, 'getbbox') else (0,0,6,10) + char_w = (char_bbox[2] - char_bbox[0]) or 6 + chars_per_line = max(1, int(max_w / char_w)) + wrapped_lines = textwrap.wrap(text, width=chars_per_line) + + return wrapped_lines, font, max(2, int(MIN_FONT_SIZE * 0.15)), MIN_FONT_SIZE # ============================================================ # RENDER # ============================================================ -def render_translations( +def render_text( image_bgr, - bubble_boxes: Dict[int, Tuple], + bubbles_data: Dict[str, dict], translations: Dict[int, str], - skip_ids: Set[int], font_path: str, - font_size: int = FONT_SIZE, - bold_outline: bool = True, - auto_color: bool = True, - output_path: str = OUTPUT_PATH + skip_ids: Set[int] ): + """ + Draws the translated text centered in the line_union_bbox of each bubble. + Adds a white stroke (outline) to cover any residual original characters. + """ image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB) - pil_img = Image.fromarray(image_rgb) - draw = ImageDraw.Draw(pil_img) + pil_img = Image.fromarray(image_rgb) + draw = ImageDraw.Draw(pil_img) - rendered = 0 - skipped = 0 - missing = 0 + rendered_count = 0 - for bid, (x1, y1, x2, y2) in sorted(bubble_boxes.items()): + for bid_str, val in bubbles_data.items(): + bid = int(bid_str) - # ── skip list check ──────────────────────────────────────── - if bid in skip_ids: - print(f" ⏭️ Bubble #{bid:<3} — skipped (in SKIP_BUBBLE_IDS)") - skipped += 1 + if bid in skip_ids or bid not in translations: continue - text = translations.get(bid, "").strip() - if not text: - print(f" ⚠️ Bubble #{bid:<3} — no translation found, left blank") - missing += 1 - continue - - box_w = x2 - x1 - box_h = y2 - y1 - if box_w < 10 or box_h < 10: - continue - - # ── fit font + wrap ──────────────────────────────────────── - size, font, lines = fit_text( - text, box_w, box_h, font_path, max_size=font_size - ) - - # ── colors ───────────────────────────────────────────────── - if auto_color: - bg = sample_bg_color(image_bgr, x1, y1, x2, y2) - fg = pick_fg_color(bg) - ol = (255, 255, 255) if fg == (0, 0, 0) else (0, 0, 0) + text = translations[bid] + + union_box = val.get("line_union_bbox") + if not union_box: + union_box = val.get("text_bbox") + if not union_box: + continue + + bx, by, bw, bh = union_box["x"], union_box["y"], union_box["w"], union_box["h"] + + pad_x = int(bw * 0.1) + pad_y = int(bh * 0.1) + bx -= pad_x // 2 + by -= pad_y // 2 + bw += pad_x + bh += pad_y + + target_size = get_original_font_size(val) + wrapped_lines, font, line_spacing, final_size = fit_text_dynamically(text, font_path, bw, bh, target_size) + + # Use uniform typographic line height for rendering + if hasattr(font, 'getmetrics'): + ascent, descent = font.getmetrics() + line_h = ascent + descent else: - fg, ol = (0, 0, 0), (255, 255, 255) + line_h = final_size + + total_text_height = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1)) + + current_y = by + (bh - total_text_height) // 2 + outline_thickness = max(2, int(final_size * 0.10)) - # ── vertical center ──────────────────────────────────────── - line_h = size + 8 - total_h = line_h * len(lines) - y_cur = y1 + max(4, (box_h - total_h) // 2) + for i, line in enumerate(wrapped_lines): + if hasattr(font, 'getbbox'): + bbox = font.getbbox(line) + lw = bbox[2] - bbox[0] + else: + lw = len(line) * 6 + + current_x = bx + (bw - lw) // 2 + + draw.text( + (current_x, current_y), + line, + fill=(0, 0, 0), + font=font, + stroke_width=outline_thickness, + stroke_fill=(255, 255, 255) + ) + + # Advance Y by the uniform line height + spacing + current_y += line_h + line_spacing - for line in lines: - bb = safe_textbbox(draw, (0, 0), line, font) - line_w = bb[2] - bb[0] - x_cur = x1 + max(2, (box_w - line_w) // 2) - - if bold_outline: - for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]: - try: - draw.text((x_cur + dx, y_cur + dy), line, font=font, fill=ol) - except Exception: - pass - - try: - draw.text((x_cur, y_cur), line, font=font, fill=fg) - except Exception as e: - print(f" ❌ Draw error bubble #{bid}: {e}") - - y_cur += line_h - - print(f" ✅ Bubble #{bid:<3} — rendered ({len(lines)} lines, size {size}px)") - rendered += 1 - - pil_img.save(output_path) - - print() - print(f"{'─'*50}") - print(f" Rendered : {rendered}") - print(f" Skipped : {skipped} (SKIP_BUBBLE_IDS)") - print(f" No text : {missing} (not in output.txt)") - print(f"{'─'*50}") - print(f"✅ Saved → {output_path}") - - return pil_img + rendered_count += 1 + print(f" Rendered: {rendered_count} bubbles (with uniform line spacing & outlines)") + return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR) # ============================================================ # MAIN # ============================================================ def main(): - print(f"📖 Loading image : {IMAGE_PATH}") - image = cv2.imread(IMAGE_PATH) - if image is None: - print(f"❌ Cannot load: {IMAGE_PATH}"); return + print(f"Loading image: {IMAGE_PATH}") + image_bgr = cv2.imread(IMAGE_PATH) + if image_bgr is None: + print(f"❌ Error: Could not load {IMAGE_PATH}") + return - print(f"📦 Loading bubbles : {BUBBLES_PATH}") - bubble_boxes, quads_per_bubble = parse_bubbles(BUBBLES_PATH) - print(f" {len(bubble_boxes)} bubbles | " - f"{sum(len(v) for v in quads_per_bubble.values())} quads total") - - print(f"🌐 Loading translations : {TRANSLATIONS_PATH}") + print(f"Loading translations: {TRANSLATIONS_PATH}") translations = parse_translations(TRANSLATIONS_PATH) - print(f" {len(translations)} translations found") + + print(f"Loading bubble data: {BUBBLES_PATH}") + bubbles_data = parse_bubbles(BUBBLES_PATH) - if SKIP_BUBBLE_IDS: - print(f"⏭️ Skip list : bubbles {sorted(SKIP_BUBBLE_IDS)}") - else: - print(f"⏭️ Skip list : (empty — all bubbles will be rendered)") + print("Resolving font...") + font_path = resolve_font_path() - print("🔤 Resolving font...") - font_path, _ = resolve_font() - - print(f"🧹 Erasing original text (quad fill + pad={QUAD_PAD}px)...") - clean_image = erase_quads( - image, - quads_per_bubble, - translations = translations, # ← pass translations here - skip_ids = SKIP_BUBBLE_IDS, - pad = QUAD_PAD + print("\n--- Step 1: Erasing original text ---") + erased_bgr = erase_quads( + image_bgr=image_bgr, + bubbles_data=bubbles_data, + translations=translations, + skip_ids=SKIP_BUBBLE_IDS, + pad=QUAD_PAD ) - print("✍️ Rendering translated text...") - render_translations( - image_bgr = clean_image, - bubble_boxes = bubble_boxes, - translations = translations, - skip_ids = SKIP_BUBBLE_IDS, - font_path = font_path, - font_size = FONT_SIZE, - bold_outline = True, - auto_color = True, - output_path = OUTPUT_PATH + print("\n--- Step 2: Rendering translated text ---") + final_bgr = render_text( + image_bgr=erased_bgr, + bubbles_data=bubbles_data, + translations=translations, + font_path=font_path, + skip_ids=SKIP_BUBBLE_IDS ) + print(f"\nSaving final image to: {OUTPUT_PATH}") + cv2.imwrite(OUTPUT_PATH, final_bgr) + print("✅ Done!") if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/manga-translator.py b/manga-translator.py index 3997112..684c7c3 100644 --- a/manga-translator.py +++ b/manga-translator.py @@ -301,6 +301,54 @@ def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thre return None +def split_bubble_if_multiple_rows(indices, ocr, bid=None): + """ + Splits a bubble horizontally (top/bottom) if there is a massive vertical gap + between text lines, indicating two separate bubbles were merged. + """ + if len(indices) < 2: + return None + + boxes = [quad_bbox(ocr[i][0]) for i in indices] + # Sort by Y-coordinate (top to bottom) + sorted_items = sorted(zip(indices, boxes), key=lambda x: x[1][1]) + + gaps = [] + current_max_y = sorted_items[0][1][3] + + for i in range(1, len(sorted_items)): + idx, b = sorted_items[i] + y1 = b[1] + gap = y1 - current_max_y + gaps.append((i, gap, current_max_y, y1)) + current_max_y = max(current_max_y, b[3]) + + if not gaps: + return None + + # Find the largest vertical gap + max_gap_idx, max_gap_size, _, _ = max(gaps, key=lambda x: x[1]) + + # Calculate median line height to define what a "large" gap is + hs = [b[3] - b[1] for b in boxes] + med_h = float(np.median(hs)) if hs else 15.0 + + # If the vertical gap is more than 2.5x the height of a text line, it's a split! + threshold = med_h * 2.5 + min_gap = 40.0 # Absolute minimum pixel gap to prevent micro-splits + + if max_gap_size > threshold and max_gap_size > min_gap: + split_idx = max_gap_idx + top_indices = [item[0] for item in sorted_items[:split_idx]] + bottom_indices = [item[0] for item in sorted_items[split_idx:]] + + # Ensure we don't just split off a single noise character + if len(top_indices) >= 1 and len(bottom_indices) >= 1: + return top_indices, bottom_indices + + return None + + # ============================================================ # OCR ENGINES (Apple Native Vision) # ============================================================ @@ -886,6 +934,7 @@ def translate_manga_text( box = bubble_boxes[bid] bubble_split = None + # 1. Panel border split split_result = split_panel_box(image, box, bubble_quads=bubble_quads[bid]) if split_result: box_left, box_right, split_x = split_result @@ -909,27 +958,37 @@ def translate_manga_text( bubble_split = (left_idxs, right_idxs) splits_performed.append(f"BOX#{bid} ({len(left_idxs)} quads | {len(right_idxs)} quads)") + # 2. Check for vertical columns (left/right split) if bubble_split is None: col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid) if col_split: left_idxs, right_idxs = col_split if left_idxs and right_idxs: bubble_split = (left_idxs, right_idxs) - splits_performed.append(f"BOX#{bid} ({len(left_idxs)} quads | {len(right_idxs)} quads)") + splits_performed.append(f"BOX#{bid} (Vertical Column Split: {len(left_idxs)} | {len(right_idxs)} quads)") + + # 3. Check for horizontal rows (top/bottom split) + if bubble_split is None: + row_split = split_bubble_if_multiple_rows(bubble_indices[bid], filtered, bid=bid) + if row_split: + top_idxs, bottom_idxs = row_split + if top_idxs and bottom_idxs: + bubble_split = (top_idxs, bottom_idxs) + splits_performed.append(f"BOX#{bid} (Horizontal Row Split: {len(top_idxs)} | {len(bottom_idxs)} quads)") if bubble_split: - left_idxs, right_idxs = bubble_split - new_bubbles[bid] = build_lines_from_indices(left_idxs, filtered) - ub_left = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in left_idxs]) - new_bubble_boxes[bid] = (max(0, ub_left[0]-3), max(0, ub_left[1]-3), min(iw-1, ub_left[2]+3), min(ih-1, ub_left[3]+3)) - new_bubble_quads[bid] = [filtered[i][0] for i in left_idxs] - new_bubble_indices[bid] = left_idxs + part1_idxs, part2_idxs = bubble_split + new_bubbles[bid] = build_lines_from_indices(part1_idxs, filtered) + ub_1 = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in part1_idxs]) + new_bubble_boxes[bid] = (max(0, ub_1[0]-3), max(0, ub_1[1]-3), min(iw-1, ub_1[2]+3), min(ih-1, ub_1[3]+3)) + new_bubble_quads[bid] = [filtered[i][0] for i in part1_idxs] + new_bubble_indices[bid] = part1_idxs - new_bubbles[next_bid] = build_lines_from_indices(right_idxs, filtered) - ub_right = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in right_idxs]) - new_bubble_boxes[next_bid] = (max(0, ub_right[0]-3), max(0, ub_right[1]-3), min(iw-1, ub_right[2]+3), min(ih-1, ub_right[3]+3)) - new_bubble_quads[next_bid] = [filtered[i][0] for i in right_idxs] - new_bubble_indices[next_bid] = right_idxs + new_bubbles[next_bid] = build_lines_from_indices(part2_idxs, filtered) + ub_2 = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in part2_idxs]) + new_bubble_boxes[next_bid] = (max(0, ub_2[0]-3), max(0, ub_2[1]-3), min(iw-1, ub_2[2]+3), min(ih-1, ub_2[3]+3)) + new_bubble_quads[next_bid] = [filtered[i][0] for i in part2_idxs] + new_bubble_indices[next_bid] = part2_idxs next_bid += 1 else: new_bubbles[bid] = bubbles[bid] @@ -938,7 +997,7 @@ def translate_manga_text( new_bubble_indices[bid] = bubble_indices[bid] if splits_performed: - print(f"\n🔀 Multi-column bubble splits detected: {len(splits_performed)}") + print(f"\n🔀 Multi-column/row bubble splits detected: {len(splits_performed)}") for split_info in splits_performed: print(f" ✓ Split {split_info}") @@ -1049,8 +1108,8 @@ def translate_manga_text( if __name__ == "__main__": translate_manga_text( - image_path="004.png", - source_lang="en", + image_path="003.jpg", + source_lang="es", target_lang="ca", confidence_threshold=0.05, min_text_length=1,