diff --git a/bubble-detection.jpg b/bubble-detection.jpg deleted file mode 100755 index 0c9637c..0000000 Binary files a/bubble-detection.jpg and /dev/null differ diff --git a/bubbles.json b/bubbles.json deleted file mode 100644 index e6adfd7..0000000 --- a/bubbles.json +++ /dev/null @@ -1,1490 +0,0 @@ -{ - "1": { - "x": 57, - "y": 106, - "w": 135, - "h": 115, - "x_tight": 68, - "y_tight": 115, - "w_tight": 113, - "h_tight": 97, - "quad_bboxes": [ - { - "x": 100, - "y": 116, - "w": 50, - "h": 24 - }, - { - "x": 80, - "y": 138, - "w": 88, - "h": 24 - }, - { - "x": 92, - "y": 160, - "w": 66, - "h": 26 - }, - { - "x": 69, - "y": 179, - "w": 111, - "h": 32 - } - ], - "quads": [ - [ - [ - 100, - 116 - ], - [ - 150, - 116 - ], - [ - 150, - 140 - ], - [ - 100, - 140 - ] - ], - [ - [ - 80, - 138 - ], - [ - 168, - 138 - ], - [ - 168, - 162 - ], - [ - 80, - 162 - ] - ], - [ - [ - 92, - 160 - ], - [ - 158, - 160 - ], - [ - 158, - 186 - ], - [ - 92, - 186 - ] - ], - [ - [ - 69, - 179 - ], - [ - 180, - 179 - ], - [ - 180, - 211 - ], - [ - 69, - 211 - ] - ] - ] - }, - "2": { - "x": 164, - "y": 0, - "w": 210, - "h": 127, - "x_tight": 181, - "y_tight": 9, - "w_tight": 176, - "h_tight": 108, - "quad_bboxes": [ - { - "x": 182, - "y": 10, - "w": 174, - "h": 56 - }, - { - "x": 233, - "y": 66, - "w": 56, - "h": 26 - }, - { - "x": 214, - "y": 92, - "w": 54, - "h": 24 - } - ], - "quads": [ - [ - [ - 182, - 10 - ], - [ - 356, - 10 - ], - [ - 356, - 66 - ], - [ - 182, - 66 - ] - ], - [ - [ - 233, - 66 - ], - [ - 289, - 66 - ], - [ - 289, - 92 - ], - [ - 233, - 92 - ] - ], - [ - [ - 214, - 92 - ], - [ - 268, - 92 - ], - [ - 268, - 116 - ], - [ - 214, - 116 - ] - ] - ] - }, - "3": { - "x": 540, - "y": 90, - "w": 116, - "h": 112, - "x_tight": 549, - "y_tight": 99, - "w_tight": 98, - "h_tight": 94, - "quad_bboxes": [ - { - "x": 558, - "y": 100, - "w": 88, - "h": 24 - }, - { - "x": 550, - "y": 122, - "w": 70, - "h": 24 - }, - { - "x": 558, - "y": 144, - "w": 88, - "h": 26 - }, - { - "x": 550, - "y": 168, - "w": 70, - "h": 24 - } - ], - "quads": [ - [ - [ - 558, - 100 - ], - [ - 646, - 100 - ], - [ - 646, - 124 - ], - [ - 558, - 124 - ] - ], - [ - [ - 550, - 122 - ], - [ - 620, - 122 - ], - [ - 620, - 146 - ], - [ - 550, - 146 - ] - ], - [ - [ - 558, - 144 - ], - [ - 646, - 144 - ], - [ - 646, - 170 - ], - [ - 558, - 170 - ] - ], - [ - [ - 550, - 168 - ], - [ - 620, - 168 - ], - [ - 620, - 192 - ], - [ - 550, - 192 - ] - ] - ] - }, - "4": { - "x": 251, - "y": 377, - "w": 134, - "h": 138, - "x_tight": 262, - "y_tight": 388, - "w_tight": 112, - "h_tight": 116, - "quad_bboxes": [ - { - "x": 274, - "y": 389, - "w": 86, - "h": 20 - }, - { - "x": 263, - "y": 407, - "w": 110, - "h": 20 - }, - { - "x": 279, - "y": 445, - "w": 78, - "h": 20 - }, - { - "x": 267, - "y": 465, - "w": 102, - "h": 20 - }, - { - "x": 279, - "y": 483, - "w": 76, - "h": 20 - } - ], - "quads": [ - [ - [ - 274, - 389 - ], - [ - 360, - 389 - ], - [ - 360, - 409 - ], - [ - 274, - 409 - ] - ], - [ - [ - 263, - 407 - ], - [ - 373, - 407 - ], - [ - 373, - 427 - ], - [ - 263, - 427 - ] - ], - [ - [ - 279, - 445 - ], - [ - 357, - 445 - ], - [ - 357, - 465 - ], - [ - 279, - 465 - ] - ], - [ - [ - 267, - 465 - ], - [ - 369, - 465 - ], - [ - 369, - 485 - ], - [ - 267, - 485 - ] - ], - [ - [ - 279, - 483 - ], - [ - 355, - 483 - ], - [ - 355, - 503 - ], - [ - 279, - 503 - ] - ] - ] - }, - "5": { - "x": 522, - "y": 468, - "w": 150, - "h": 98, - "x_tight": 534, - "y_tight": 476, - "w_tight": 126, - "h_tight": 82, - "quad_bboxes": [ - { - "x": 549, - "y": 477, - "w": 95, - "h": 26 - }, - { - "x": 546, - "y": 516, - "w": 104, - "h": 24 - }, - { - "x": 535, - "y": 537, - "w": 124, - "h": 20 - } - ], - "quads": [ - [ - [ - 549, - 477 - ], - [ - 644, - 477 - ], - [ - 644, - 503 - ], - [ - 549, - 503 - ] - ], - [ - [ - 546, - 516 - ], - [ - 650, - 516 - ], - [ - 650, - 540 - ], - [ - 546, - 540 - ] - ], - [ - [ - 535, - 537 - ], - [ - 659, - 537 - ], - [ - 659, - 557 - ], - [ - 535, - 557 - ] - ] - ] - }, - "6": { - "x": 44, - "y": 607, - "w": 96, - "h": 108, - "x_tight": 52, - "y_tight": 616, - "w_tight": 80, - "h_tight": 90, - "quad_bboxes": [ - { - "x": 79, - "y": 617, - "w": 48, - "h": 20 - }, - { - "x": 75, - "y": 635, - "w": 56, - "h": 20 - }, - { - "x": 53, - "y": 669, - "w": 68, - "h": 16 - }, - { - "x": 65, - "y": 687, - "w": 46, - "h": 18 - } - ], - "quads": [ - [ - [ - 79, - 617 - ], - [ - 127, - 617 - ], - [ - 127, - 637 - ], - [ - 79, - 637 - ] - ], - [ - [ - 75, - 635 - ], - [ - 131, - 635 - ], - [ - 131, - 655 - ], - [ - 75, - 655 - ] - ], - [ - [ - 53, - 669 - ], - [ - 121, - 669 - ], - [ - 121, - 685 - ], - [ - 53, - 685 - ] - ], - [ - [ - 65, - 687 - ], - [ - 111, - 687 - ], - [ - 111, - 705 - ], - [ - 65, - 705 - ] - ] - ] - }, - "7": { - "x": 72, - "y": 826, - "w": 78, - "h": 48, - "x_tight": 78, - "y_tight": 830, - "w_tight": 66, - "h_tight": 40, - "quad_bboxes": [ - { - "x": 79, - "y": 831, - "w": 64, - "h": 20 - }, - { - "x": 79, - "y": 849, - "w": 62, - "h": 20 - } - ], - "quads": [ - [ - [ - 79, - 831 - ], - [ - 143, - 831 - ], - [ - 143, - 851 - ], - [ - 79, - 851 - ] - ], - [ - [ - 79, - 849 - ], - [ - 141, - 849 - ], - [ - 141, - 869 - ], - [ - 79, - 869 - ] - ] - ] - }, - "8": { - "x": 192, - "y": 796, - "w": 102, - "h": 126, - "x_tight": 200, - "y_tight": 806, - "w_tight": 86, - "h_tight": 106, - "quad_bboxes": [ - { - "x": 205, - "y": 807, - "w": 76, - "h": 18 - }, - { - "x": 211, - "y": 843, - "w": 64, - "h": 16 - }, - { - "x": 209, - "y": 857, - "w": 68, - "h": 20 - }, - { - "x": 201, - "y": 875, - "w": 84, - "h": 18 - }, - { - "x": 203, - "y": 893, - "w": 70, - "h": 18 - } - ], - "quads": [ - [ - [ - 205, - 807 - ], - [ - 281, - 807 - ], - [ - 281, - 825 - ], - [ - 205, - 825 - ] - ], - [ - [ - 211, - 843 - ], - [ - 275, - 843 - ], - [ - 275, - 859 - ], - [ - 211, - 859 - ] - ], - [ - [ - 209, - 857 - ], - [ - 277, - 857 - ], - [ - 277, - 877 - ], - [ - 209, - 877 - ] - ], - [ - [ - 201, - 875 - ], - [ - 285, - 875 - ], - [ - 285, - 893 - ], - [ - 201, - 893 - ] - ], - [ - [ - 203, - 893 - ], - [ - 273, - 893 - ], - [ - 273, - 911 - ], - [ - 203, - 911 - ] - ] - ] - }, - "9": { - "x": 394, - "y": 817, - "w": 122, - "h": 88, - "x_tight": 404, - "y_tight": 824, - "w_tight": 102, - "h_tight": 74, - "quad_bboxes": [ - { - "x": 405, - "y": 825, - "w": 100, - "h": 20 - }, - { - "x": 419, - "y": 843, - "w": 74, - "h": 20 - }, - { - "x": 417, - "y": 863, - "w": 78, - "h": 16 - }, - { - "x": 409, - "y": 877, - "w": 94, - "h": 20 - } - ], - "quads": [ - [ - [ - 405, - 825 - ], - [ - 505, - 825 - ], - [ - 505, - 845 - ], - [ - 405, - 845 - ] - ], - [ - [ - 419, - 843 - ], - [ - 493, - 843 - ], - [ - 493, - 863 - ], - [ - 419, - 863 - ] - ], - [ - [ - 417, - 863 - ], - [ - 495, - 863 - ], - [ - 495, - 879 - ], - [ - 417, - 879 - ] - ], - [ - [ - 409, - 877 - ], - [ - 503, - 877 - ], - [ - 503, - 897 - ], - [ - 409, - 897 - ] - ] - ] - }, - "10": { - "x": 537, - "y": 775, - "w": 156, - "h": 206, - "x_tight": 550, - "y_tight": 792, - "w_tight": 130, - "h_tight": 172, - "quad_bboxes": [ - { - "x": 565, - "y": 793, - "w": 100, - "h": 18 - }, - { - "x": 569, - "y": 809, - "w": 92, - "h": 21 - }, - { - "x": 551, - "y": 827, - "w": 128, - "h": 20 - }, - { - "x": 613, - "y": 847, - "w": 48, - "h": 16 - }, - { - "x": 571, - "y": 861, - "w": 88, - "h": 18 - }, - { - "x": 561, - "y": 877, - "w": 106, - "h": 20 - }, - { - "x": 559, - "y": 911, - "w": 110, - "h": 20 - }, - { - "x": 577, - "y": 931, - "w": 74, - "h": 16 - }, - { - "x": 591, - "y": 947, - "w": 48, - "h": 16 - } - ], - "quads": [ - [ - [ - 565, - 793 - ], - [ - 665, - 793 - ], - [ - 665, - 811 - ], - [ - 565, - 811 - ] - ], - [ - [ - 569, - 809 - ], - [ - 661, - 809 - ], - [ - 661, - 830 - ], - [ - 569, - 830 - ] - ], - [ - [ - 551, - 827 - ], - [ - 679, - 827 - ], - [ - 679, - 847 - ], - [ - 551, - 847 - ] - ], - [ - [ - 613, - 847 - ], - [ - 661, - 847 - ], - [ - 661, - 863 - ], - [ - 613, - 863 - ] - ], - [ - [ - 571, - 861 - ], - [ - 659, - 861 - ], - [ - 659, - 879 - ], - [ - 571, - 879 - ] - ], - [ - [ - 561, - 877 - ], - [ - 667, - 877 - ], - [ - 667, - 897 - ], - [ - 561, - 897 - ] - ], - [ - [ - 559, - 911 - ], - [ - 669, - 911 - ], - [ - 669, - 931 - ], - [ - 559, - 931 - ] - ], - [ - [ - 577, - 931 - ], - [ - 651, - 931 - ], - [ - 651, - 947 - ], - [ - 577, - 947 - ] - ], - [ - [ - 591, - 947 - ], - [ - 639, - 947 - ], - [ - 639, - 963 - ], - [ - 591, - 963 - ] - ] - ] - }, - "11": { - "x": 28, - "y": 939, - "w": 144, - "h": 186, - "x_tight": 40, - "y_tight": 954, - "w_tight": 120, - "h_tight": 156, - "quad_bboxes": [ - { - "x": 71, - "y": 955, - "w": 60, - "h": 18 - }, - { - "x": 55, - "y": 971, - "w": 92, - "h": 20 - }, - { - "x": 53, - "y": 987, - "w": 96, - "h": 20 - }, - { - "x": 41, - "y": 1005, - "w": 118, - "h": 20 - }, - { - "x": 53, - "y": 1023, - "w": 94, - "h": 18 - }, - { - "x": 41, - "y": 1039, - "w": 118, - "h": 20 - }, - { - "x": 43, - "y": 1055, - "w": 114, - "h": 20 - }, - { - "x": 57, - "y": 1089, - "w": 84, - "h": 20 - } - ], - "quads": [ - [ - [ - 71, - 955 - ], - [ - 131, - 955 - ], - [ - 131, - 973 - ], - [ - 71, - 973 - ] - ], - [ - [ - 55, - 971 - ], - [ - 147, - 971 - ], - [ - 147, - 991 - ], - [ - 55, - 991 - ] - ], - [ - [ - 53, - 987 - ], - [ - 149, - 987 - ], - [ - 149, - 1007 - ], - [ - 53, - 1007 - ] - ], - [ - [ - 41, - 1005 - ], - [ - 159, - 1005 - ], - [ - 159, - 1025 - ], - [ - 41, - 1025 - ] - ], - [ - [ - 53, - 1023 - ], - [ - 147, - 1023 - ], - [ - 147, - 1041 - ], - [ - 53, - 1041 - ] - ], - [ - [ - 41, - 1039 - ], - [ - 159, - 1039 - ], - [ - 159, - 1059 - ], - [ - 41, - 1059 - ] - ], - [ - [ - 43, - 1055 - ], - [ - 157, - 1055 - ], - [ - 157, - 1075 - ], - [ - 43, - 1075 - ] - ], - [ - [ - 57, - 1089 - ], - [ - 141, - 1089 - ], - [ - 141, - 1109 - ], - [ - 57, - 1109 - ] - ] - ] - }, - "12": { - "x": 326, - "y": 928, - "w": 80, - "h": 102, - "x_tight": 332, - "y_tight": 936, - "w_tight": 68, - "h_tight": 86, - "quad_bboxes": [ - { - "x": 363, - "y": 937, - "w": 36, - "h": 16 - }, - { - "x": 335, - "y": 951, - "w": 60, - "h": 20 - }, - { - "x": 333, - "y": 969, - "w": 66, - "h": 18 - }, - { - "x": 337, - "y": 1005, - "w": 52, - "h": 16 - } - ], - "quads": [ - [ - [ - 363, - 937 - ], - [ - 399, - 937 - ], - [ - 399, - 953 - ], - [ - 363, - 953 - ] - ], - [ - [ - 335, - 951 - ], - [ - 395, - 951 - ], - [ - 395, - 971 - ], - [ - 335, - 971 - ] - ], - [ - [ - 333, - 969 - ], - [ - 399, - 969 - ], - [ - 399, - 987 - ], - [ - 333, - 987 - ] - ], - [ - [ - 337, - 1005 - ], - [ - 389, - 1005 - ], - [ - 389, - 1021 - ], - [ - 337, - 1021 - ] - ] - ] - } -} \ No newline at end of file diff --git a/manga-renderer.py b/manga-renderer.py index 68987f8..860ca62 100644 --- a/manga-renderer.py +++ b/manga-renderer.py @@ -2,19 +2,20 @@ manga-renderer.py ───────────────────────────────────────────────────────────────── Pipeline: - 1. Detect panel boundaries using border-line detection - 2. Split wide panels that contain internal vertical borders - 3. For each bubble: - a. Detect real bubble ellipse via flood-fill + contour - b. Assign bubble to its panel (max overlap) - c. Clip + nudge ellipse to stay inside panel bounds - d. White-fill the clipped rotated ellipse - e. Fit + centre translated text inside safe area + 1. Detect panel boundaries + 2. Assign bubble -> panel + 3. Detect/fallback bubble ellipse + 4. Clean original text region: + - OCR union mask (default) + - Hybrid mask fallback + - Ellipse mode optional + 5. Render translated text with ellipse-aware wrapping """ import os import math import json +import re import cv2 import numpy as np @@ -24,33 +25,44 @@ from PIL import Image, ImageDraw, ImageFont # ───────────────────────────────────────────── # CONSTANTS # ───────────────────────────────────────────── -DEFAULT_FONT_PATH = "fonts/ComicRelief-Regular.ttf" +DEFAULT_FONT_CANDIDATES = [ + "fonts/AnimeAce2_reg.ttf", + "fonts/WildWordsRoman.ttf", + "fonts/ComicRelief-Regular.ttf", + "fonts/NotoSans-Regular.ttf", +] DEFAULT_FONT_COLOR = (0, 0, 0) -WHITE = (255, 255, 255) +DEFAULT_STROKE_COLOR = (255, 255, 255) -MAX_FONT_SIZE = 22 -MIN_FONT_SIZE = 6 +MAX_FONT_SIZE = 20 +MIN_FONT_SIZE = 6 FONT_SIZE_STEP = 1 -TEXT_RATIO = 0.82 +TEXT_RATIO = 0.76 -FLOOD_TOLERANCE = 30 +FLOOD_TOLERANCE = 30 BORDER_SHRINK_PX = 4 MIN_PANEL_AREA_RATIO = 0.02 - -# How far the center can be nudged as a fraction -# of the semi-axis before we resort to shrinking MAX_NUDGE_RATIO = 0.30 -# Debug colors (BGR) -DBG_COLOR_PANEL = (200, 200, 0) -DBG_COLOR_DETECTED = (0, 200, 0) -DBG_COLOR_FILL = (0, 0, 255) -DBG_COLOR_SAFE = (255, 120, 0) -DBG_COLOR_CENTER = (255, 255, 0) -DBG_COLOR_SEED = (255, 0, 255) -DBG_COLOR_LABEL = (80, 80, 200) -DBG_THICKNESS = 2 -DBG_CENTER_R = 5 +# Cleaning mode: +# "ocr_union" -> precise cleanup from OCR quad boxes (recommended) +# "hybrid" -> rounded-rect + inner ellipse +# "ellipse" -> legacy large ellipse fill +CLEAN_MODE = "ocr_union" + +# OCR-union cleaning tuning +OCR_CLEAN_PAD_X = 12 +OCR_CLEAN_PAD_Y = 10 +OCR_CLEAN_MIN_W = 24 +OCR_CLEAN_MIN_H = 24 +OCR_CLEAN_CLOSE_KERNEL = 5 +OCR_CLEAN_DILATE = 1 + +# Hybrid cleanup mask tuning +CLEAN_MASK_RECT_SCALE_W = 1.08 +CLEAN_MASK_RECT_SCALE_H = 1.20 +CLEAN_MASK_ELLIPSE_SCALE = 0.84 +CLEAN_MASK_BLUR = 0 # ───────────────────────────────────────────── @@ -58,23 +70,46 @@ DBG_CENTER_R = 5 # ───────────────────────────────────────────── def parse_translations(translations_file): translations = {} + originals = {} + flags_map = {} + with open(translations_file, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line.startswith("#"): continue parts = line.split("|") - if len(parts) < 3: - continue + # Format: + # #ID|ORDER|ORIGINAL|TRANSLATED|FLAGS + # backward-compatible with older variants try: - bubble_id = int(parts[0].lstrip("#")) - translated = parts[2].strip() - if translated.startswith("["): - continue - translations[bubble_id] = translated - except ValueError: + bubble_id = int(parts[0].lstrip("#")) + except Exception: continue - return translations + + if len(parts) >= 5: + original = parts[2].strip() + translated = parts[3].strip() + flags = parts[4].strip() + elif len(parts) >= 4: + original = parts[2].strip() + translated = parts[3].strip() + flags = "-" + elif len(parts) >= 3: + original = "" + translated = parts[2].strip() + flags = "-" + else: + continue + + if translated.startswith("["): + continue + + translations[bubble_id] = translated + originals[bubble_id] = original + flags_map[bubble_id] = flags + + return translations, originals, flags_map def parse_bubbles(bubbles_file): @@ -83,16 +118,33 @@ def parse_bubbles(bubbles_file): return {int(k): v for k, v in raw.items()} +# ───────────────────────────────────────────── +# HELPERS +# ───────────────────────────────────────────── +def normalize_text(s): + t = s.upper().strip() + t = re.sub(r"[^\w]+", "", t) + return t + + +def is_sfx_like(text): + t = normalize_text(text) + if len(t) <= 8 and re.fullmatch(r"(SHA+|BIP+|BEEP+|HN+|AH+|OH+)", t): + return True + return False + + # ───────────────────────────────────────────── # FONT HELPERS # ───────────────────────────────────────────── -def load_font(font_path, size): - if font_path and os.path.exists(font_path): - try: - return ImageFont.truetype(font_path, size) - except Exception: - pass - return ImageFont.load_default() +def load_font_from_candidates(candidates, size): + for path in candidates: + if path and os.path.exists(path): + try: + return ImageFont.truetype(path, size), path + except Exception: + continue + return ImageFont.load_default(), "PIL_DEFAULT" def measure_text(draw, text, font): @@ -100,8 +152,18 @@ def measure_text(draw, text, font): return bbox[2] - bbox[0], bbox[3] - bbox[1] +def ellipse_line_max_width(y_offset, a, b): + if b <= 0: + return 0 + t = 1.0 - (y_offset * y_offset) / (b * b) + t = max(0.0, t) + return 2.0 * a * math.sqrt(t) + + def wrap_text(draw, text, font, max_width): - words, lines, current = text.split(), [], "" + words = text.split() + lines = [] + current = "" for word in words: test = (current + " " + word).strip() w, _ = measure_text(draw, test, font) @@ -112,91 +174,269 @@ def wrap_text(draw, text, font, max_width): current = word if current: lines.append(current) + if not lines: return [""], 0, 0 - heights, widths = [], [] - for line in lines: - w, h = measure_text(draw, line, font) + + widths, heights = [], [] + for ln in lines: + w, h = measure_text(draw, ln, font) widths.append(w) heights.append(h) - line_gap = max(heights[0] // 5, 2) if heights else 2 - total_height = sum(heights) + line_gap * (len(lines) - 1) - return lines, total_height, max(widths) if widths else 0 + + line_gap = max(heights[0] // 5, 2) if heights else 2 + total_h = sum(heights) + line_gap * (len(lines) - 1) + return lines, total_h, max(widths) if widths else 0 -def best_fit_font(draw, text, font_path, safe_w, safe_h): +def wrap_text_ellipse_aware(draw, text, font, safe_w, safe_h, tall_bubble=False): + target_w = safe_w * (0.85 if tall_bubble else 1.0) + lines, total_h, _ = wrap_text(draw, text, font, target_w) + if not lines: + return lines, total_h + + heights = [] + for ln in lines: + _, h = measure_text(draw, ln, font) + heights.append(h) + + line_gap = max(heights[0] // 5, 2) if heights else 2 + if tall_bubble: + line_gap += 1 + + block_h = sum(heights) + line_gap * (len(lines) - 1) + if block_h > safe_h: + return lines, block_h + + a = target_w / 2.0 + b = safe_h / 2.0 + + words = text.split() + refined = [] + cursor_y = -block_h / 2.0 + current = "" + idx_h = 0 + + for word in words: + h_line = heights[min(idx_h, len(heights) - 1)] if heights else 12 + y_mid = cursor_y + h_line / 2.0 + row_max = ellipse_line_max_width(y_mid, a, b) * 0.95 + row_max = max(20, row_max) + + candidate = (current + " " + word).strip() + w, _ = measure_text(draw, candidate, font) + + if (w <= row_max) or (not current): + current = candidate + else: + refined.append(current) + cursor_y += h_line + line_gap + idx_h += 1 + current = word + + if current: + refined.append(current) + + hs = [] + for ln in refined: + _, h = measure_text(draw, ln, font) + hs.append(h) + + total = sum(hs) + (max(hs[0] // 5, 2) + (1 if tall_bubble else 0)) * (len(refined) - 1) if hs else 0 + return refined, total + + +def best_fit_font(draw, text, font_candidates, safe_w, safe_h, tall_bubble=False): for size in range(MAX_FONT_SIZE, MIN_FONT_SIZE - 1, -FONT_SIZE_STEP): - font = load_font(font_path, size) - lines, total_h, max_lw = wrap_text(draw, text, font, safe_w) + font, path_used = load_font_from_candidates(font_candidates, size) + lines, total_h = wrap_text_ellipse_aware(draw, text, font, safe_w, safe_h, tall_bubble=tall_bubble) + + max_lw = 0 + for ln in lines: + lw, _ = measure_text(draw, ln, font) + max_lw = max(max_lw, lw) + if total_h <= safe_h and max_lw <= safe_w: - return font, lines, total_h - font = load_font(font_path, MIN_FONT_SIZE) - lines, total_h, _ = wrap_text(draw, text, font, safe_w) - return font, lines, total_h + return font, lines, total_h, path_used + + font, path_used = load_font_from_candidates(font_candidates, MIN_FONT_SIZE) + lines, total_h = wrap_text_ellipse_aware(draw, text, font, safe_w, safe_h, tall_bubble=tall_bubble) + return font, lines, total_h, path_used + + +def draw_text_with_stroke(draw, pos, text, font, fill, stroke_fill): + x, y = pos + _, h = measure_text(draw, text, font) + stroke_width = 2 if h <= 11 else 1 + + for dx in range(-stroke_width, stroke_width + 1): + for dy in range(-stroke_width, stroke_width + 1): + if dx == 0 and dy == 0: + continue + draw.text((x + dx, y + dy), text, font=font, fill=stroke_fill) + + draw.text((x, y), text, font=font, fill=fill) # ───────────────────────────────────────────── -# PANEL DETECTION HELPERS +# CLEAN MASK BUILDERS +# ───────────────────────────────────────────── +def draw_rounded_rect_mask(mask, x1, y1, x2, y2, radius, color=255): + x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) + if x2 <= x1 or y2 <= y1: + return mask + + r = int(max(1, min(radius, (x2 - x1) // 2, (y2 - y1) // 2))) + + cv2.rectangle(mask, (x1 + r, y1), (x2 - r, y2), color, -1) + cv2.rectangle(mask, (x1, y1 + r), (x2, y2 - r), color, -1) + + cv2.circle(mask, (x1 + r, y1 + r), r, color, -1) + cv2.circle(mask, (x2 - r, y1 + r), r, color, -1) + cv2.circle(mask, (x1 + r, y2 - r), r, color, -1) + cv2.circle(mask, (x2 - r, y2 - r), r, color, -1) + return mask + + +def build_hybrid_clean_mask(img_h, img_w, cx, cy, sa_fill, sb_fill, angle, safe_w, safe_h, panel): + px1, py1, px2, py2 = panel + mask = np.zeros((img_h, img_w), dtype=np.uint8) + + rw = max(8, int(safe_w * CLEAN_MASK_RECT_SCALE_W)) + rh = max(8, int(safe_h * CLEAN_MASK_RECT_SCALE_H)) + x1 = int(cx - rw / 2) + y1 = int(cy - rh / 2) + x2 = int(cx + rw / 2) + y2 = int(cy + rh / 2) + rr = int(min(rw, rh) * 0.22) + + draw_rounded_rect_mask(mask, x1, y1, x2, y2, rr, color=255) + + e_sa = max(3, int(sa_fill * CLEAN_MASK_ELLIPSE_SCALE)) + e_sb = max(3, int(sb_fill * CLEAN_MASK_ELLIPSE_SCALE)) + cv2.ellipse(mask, (int(round(cx)), int(round(cy))), (e_sa, e_sb), angle, 0, 360, 255, -1) + + kernel = np.ones((3, 3), np.uint8) + mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=1) + + clip = np.zeros_like(mask) + clip[py1:py2, px1:px2] = 255 + mask = cv2.bitwise_and(mask, clip) + + if CLEAN_MASK_BLUR > 0: + mask = cv2.GaussianBlur(mask, (0, 0), CLEAN_MASK_BLUR) + _, mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY) + + return mask + + +def build_ocr_union_clean_mask(img_h, img_w, bubble_data, panel): + """ + Build precise text cleanup mask from OCR quad bounding boxes. + """ + px1, py1, px2, py2 = panel + quad_boxes = bubble_data.get("quad_bboxes", []) + mask = np.zeros((img_h, img_w), dtype=np.uint8) + + if not quad_boxes: + return mask + + for qb in quad_boxes: + x = int(qb.get("x", 0)) + y = int(qb.get("y", 0)) + w = int(qb.get("w", 0)) + h = int(qb.get("h", 0)) + + if w <= 0 or h <= 0: + continue + + if w < OCR_CLEAN_MIN_W: + extra = (OCR_CLEAN_MIN_W - w) // 2 + x -= extra + w += 2 * extra + + if h < OCR_CLEAN_MIN_H: + extra = (OCR_CLEAN_MIN_H - h) // 2 + y -= extra + h += 2 * extra + + x1 = max(px1, x - OCR_CLEAN_PAD_X) + y1 = max(py1, y - OCR_CLEAN_PAD_Y) + x2 = min(px2, x + w + OCR_CLEAN_PAD_X) + y2 = min(py2, y + h + OCR_CLEAN_PAD_Y) + + if x2 > x1 and y2 > y1: + cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1) + + # Merge nearby fragments + ksize = max(3, int(OCR_CLEAN_CLOSE_KERNEL) | 1) # ensure odd and >=3 + k = np.ones((ksize, ksize), np.uint8) + mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, k, iterations=1) + + if OCR_CLEAN_DILATE > 0: + mask = cv2.dilate(mask, np.ones((3, 3), np.uint8), iterations=OCR_CLEAN_DILATE) + + # Clip to panel bounds + clip = np.zeros_like(mask) + clip[py1:py2, px1:px2] = 255 + mask = cv2.bitwise_and(mask, clip) + + return mask + + +# ───────────────────────────────────────────── +# PANEL DETECTION # ───────────────────────────────────────────── def merge_nested_panels(panels): - """ - Removes panels that are >80% contained inside - a larger panel. Keeps the larger one. - """ if len(panels) <= 1: return panels - panels_sorted = sorted( - panels, - key=lambda p: (p[2] - p[0]) * (p[3] - p[1]), - reverse=True - ) - + panels_sorted = sorted(panels, key=lambda p: (p[2] - p[0]) * (p[3] - p[1]), reverse=True) keep = [] + for panel in panels_sorted: px1, py1, px2, py2 = panel - p_area = (px2 - px1) * (py2 - py1) + p_area = (px2 - px1) * (py2 - py1) dominated = False + for kept in keep: kx1, ky1, kx2, ky2 = kept - ix1 = max(px1, kx1); iy1 = max(py1, ky1) - ix2 = min(px2, kx2); iy2 = min(py2, ky2) + ix1 = max(px1, kx1) + iy1 = max(py1, ky1) + ix2 = min(px2, kx2) + iy2 = min(py2, ky2) if ix2 > ix1 and iy2 > iy1: inter = (ix2 - ix1) * (iy2 - iy1) - if inter / p_area > 0.80: + if inter / max(1, p_area) > 0.80: dominated = True break + if not dominated: keep.append(panel) return keep -def split_panels_on_internal_borders(panels, v_lines, - img_w, img_h): - """ - For each panel wider than 30% of the image, checks - whether a strong vertical border line runs through - its interior. If found, splits into two sub-panels. - """ +def split_panels_on_internal_borders(panels, v_lines, img_w, img_h): result = [] for (px1, py1, px2, py2) in panels: pw = px2 - px1 - if pw < img_w * 0.30: result.append((px1, py1, px2, py2)) continue - margin = int(pw * 0.20) + margin = int(pw * 0.20) search_x1 = px1 + margin search_x2 = px2 - margin + if search_x2 <= search_x1: + result.append((px1, py1, px2, py2)) + continue panel_vlines = v_lines[py1:py2, search_x1:search_x2] - col_sums = panel_vlines.sum(axis=0) + col_sums = panel_vlines.sum(axis=0) - panel_h = py2 - py1 + panel_h = py2 - py1 threshold = panel_h * 255 * 0.40 - split_cols = np.where(col_sums > threshold)[0] if len(split_cols) == 0: @@ -204,68 +444,39 @@ def split_panels_on_internal_borders(panels, v_lines, continue split_x = int(np.median(split_cols)) + search_x1 - left_w = split_x - px1 + left_w = split_x - px1 right_w = px2 - split_x if left_w > img_w * 0.10 and right_w > img_w * 0.10: - result.append((px1, py1, split_x, py2)) - result.append((split_x, py1, px2, py2)) - print(f" ✂️ Split ({px1},{py1})→({px2},{py2}) " - f"at x={split_x}") + result.append((px1, py1, split_x, py2)) + result.append((split_x, py1, px2, py2)) else: result.append((px1, py1, px2, py2)) return result -# ───────────────────────────────────────────── -# PANEL DETECTION (v2 — border-line based) -# ───────────────────────────────────────────── def detect_panels(img_bgr): - """ - Detects manga panel boundaries using morphological - line detection on dark border pixels. - - 1. Threshold dark pixels → border candidates - 2. Horizontal kernel → long horizontal lines - 3. Vertical kernel → long vertical lines - 4. Combine + dilate → closed border skeleton - 5. Invert → panel interior blobs - 6. connectedComponents → one blob per panel - 7. Filter by area, shape, minimum dimensions - 8. Merge nested panels - 9. Split wide panels on internal vertical borders - """ img_h, img_w = img_bgr.shape[:2] - total_area = img_h * img_w - min_area = total_area * MIN_PANEL_AREA_RATIO + total_area = img_h * img_w + min_area = total_area * MIN_PANEL_AREA_RATIO gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) + _, dark_mask = cv2.threshold(gray, 80, 255, cv2.THRESH_BINARY_INV) - _, dark_mask = cv2.threshold( - gray, 80, 255, cv2.THRESH_BINARY_INV) + h_len = max(40, img_w // 25) + h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (h_len, 1)) + h_lines = cv2.morphologyEx(dark_mask, cv2.MORPH_OPEN, h_kernel) - h_len = max(40, img_w // 25) - h_kernel = cv2.getStructuringElement( - cv2.MORPH_RECT, (h_len, 1)) - h_lines = cv2.morphologyEx( - dark_mask, cv2.MORPH_OPEN, h_kernel) + v_len = max(40, img_h // 25) + v_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, v_len)) + v_lines = cv2.morphologyEx(dark_mask, cv2.MORPH_OPEN, v_kernel) - v_len = max(40, img_h // 25) - v_kernel = cv2.getStructuringElement( - cv2.MORPH_RECT, (1, v_len)) - v_lines = cv2.morphologyEx( - dark_mask, cv2.MORPH_OPEN, v_kernel) - - borders = cv2.bitwise_or(h_lines, v_lines) - close_kernel = np.ones((5, 5), np.uint8) - borders = cv2.dilate(borders, close_kernel, iterations=2) + borders = cv2.bitwise_or(h_lines, v_lines) + borders = cv2.dilate(borders, np.ones((5, 5), np.uint8), iterations=2) panel_interior = cv2.bitwise_not(borders) - - num_labels, labels, stats, centroids = \ - cv2.connectedComponentsWithStats( - panel_interior, connectivity=8) + num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(panel_interior, connectivity=8) panels = [] for label_id in range(1, num_labels): @@ -273,10 +484,10 @@ def detect_panels(img_bgr): if area < min_area: continue - x = stats[label_id, cv2.CC_STAT_LEFT] - y = stats[label_id, cv2.CC_STAT_TOP] - w = stats[label_id, cv2.CC_STAT_WIDTH] - h = stats[label_id, cv2.CC_STAT_HEIGHT] + x = stats[label_id, cv2.CC_STAT_LEFT] + y = stats[label_id, cv2.CC_STAT_TOP] + w = stats[label_id, cv2.CC_STAT_WIDTH] + h = stats[label_id, cv2.CC_STAT_HEIGHT] x2 = x + w y2 = y + h @@ -287,49 +498,40 @@ def detect_panels(img_bgr): if aspect > 15: continue - # Skip panels too narrow/short to be real panels if w < img_w * 0.05 or h < img_h * 0.05: continue panels.append((x, y, x2, y2)) panels = merge_nested_panels(panels) - panels = split_panels_on_internal_borders( - panels, v_lines, img_w, img_h) - + panels = split_panels_on_internal_borders(panels, v_lines, img_w, img_h) panels.sort(key=lambda p: (p[1] // 100, p[0])) if not panels: - print(" ⚠️ No panels detected — using full image as panel") panels = [(0, 0, img_w, img_h)] - print(f" 📐 {len(panels)} panel(s) detected:") - for i, (x1, y1, x2, y2) in enumerate(panels, 1): - pct = (x2 - x1) * (y2 - y1) / total_area * 100 - print(f" Panel {i}: ({x1},{y1})→({x2},{y2}) " - f"{x2-x1}×{y2-y1}px area={pct:.1f}%") - return panels # ───────────────────────────────────────────── -# BUBBLE → PANEL ASSIGNMENT +# BUBBLE GEOMETRY # ───────────────────────────────────────────── def assign_panel(bubble_data, panels, img_w, img_h): - bx = bubble_data["x"]; bw = bubble_data["w"] - by = bubble_data["y"]; bh = bubble_data["h"] - bcx = bx + bw / 2.0; bcy = by + bh / 2.0 + bx, bw = bubble_data["x"], bubble_data["w"] + by, bh = bubble_data["y"], bubble_data["h"] + bcx, bcy = bx + bw / 2.0, by + bh / 2.0 best_panel, best_overlap = None, 0 - for (px1, py1, px2, py2) in panels: - ix1 = max(bx, px1); iy1 = max(by, py1) - ix2 = min(bx+bw, px2); iy2 = min(by+bh, py2) + ix1 = max(bx, px1) + iy1 = max(by, py1) + ix2 = min(bx + bw, px2) + iy2 = min(by + bh, py2) if ix2 > ix1 and iy2 > iy1: overlap = (ix2 - ix1) * (iy2 - iy1) if overlap > best_overlap: best_overlap = overlap - best_panel = (px1, py1, px2, py2) + best_panel = (px1, py1, px2, py2) if best_panel is None: for (px1, py1, px2, py2) in panels: @@ -340,12 +542,9 @@ def assign_panel(bubble_data, panels, img_w, img_h): return best_panel -# ───────────────────────────────────────────── -# BUBBLE ELLIPSE DETECTION (flood-fill) -# ───────────────────────────────────────────── def detect_bubble_ellipse(img_bgr, bubble_data, panel): - x = bubble_data["x"]; w = bubble_data["w"] - y = bubble_data["y"]; h = bubble_data["h"] + x, w = bubble_data["x"], bubble_data["w"] + y, h = bubble_data["y"], bubble_data["h"] img_h, img_w = img_bgr.shape[:2] px1, py1, px2, py2 = panel @@ -361,31 +560,37 @@ def detect_bubble_ellipse(img_bgr, bubble_data, panel): if gray[seed_y, seed_x] < 150: found = False - for r in range(1, min(w, h) // 3): + for r in range(1, max(2, min(w, h) // 3)): for dy in range(-r, r + 1): for dx in range(-r, r + 1): nx, ny = seed_x + dx, seed_y + dy - if (px1 <= nx < px2 and py1 <= ny < py2 - and gray[ny, nx] >= 200): + if px1 <= nx < px2 and py1 <= ny < py2 and gray[ny, nx] >= 200: seed_x, seed_y = nx, ny found = True break - if found: break - if found: break + if found: + break + if found: + break if not found: return None - flood_mask = np.zeros((img_h + 2, img_w + 2), dtype=np.uint8) + flood_mask = np.zeros((img_h + 2, img_w + 2), dtype=np.uint8) flood_fill_img = panel_mask.copy() - cv2.floodFill(flood_fill_img, flood_mask, - (seed_x, seed_y), 255, - loDiff=FLOOD_TOLERANCE, upDiff=FLOOD_TOLERANCE, - flags=cv2.FLOODFILL_FIXED_RANGE) + cv2.floodFill( + flood_fill_img, + flood_mask, + (seed_x, seed_y), + 255, + loDiff=FLOOD_TOLERANCE, + upDiff=FLOOD_TOLERANCE, + flags=cv2.FLOODFILL_FIXED_RANGE + ) filled_region = flood_mask[1:-1, 1:-1] * 255 - contours, _ = cv2.findContours( - filled_region, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + filled_region = cv2.morphologyEx(filled_region, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8), iterations=1) + contours, _ = cv2.findContours(filled_region, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if not contours: return None @@ -396,38 +601,21 @@ def detect_bubble_ellipse(img_bgr, bubble_data, panel): return None (ecx, ecy), (ew, eh), angle = cv2.fitEllipse(bubble_contour) - return float(ecx), float(ecy), float(ew/2), float(eh/2), float(angle) + return float(ecx), float(ecy), float(ew / 2), float(eh / 2), float(angle) -# ───────────────────────────────────────────── -# CLIP + NUDGE ELLIPSE TO PANEL -# ───────────────────────────────────────────── -def clip_ellipse_to_panel(cx, cy, sa, sb, angle, panel, - shrink=BORDER_SHRINK_PX): - """ - Keeps the ellipse inside the panel by: - 1. Applying border shrink margin - 2. Nudging center inward (up to MAX_NUDGE_RATIO) - 3. Shrinking axes only for remaining overflow - - Returns (cx, cy, sa, sb) — center may be adjusted. - """ +def clip_ellipse_to_panel(cx, cy, sa, sb, angle, panel, shrink=BORDER_SHRINK_PX): px1, py1, px2, py2 = panel - - inner_x1 = px1 + shrink - inner_y1 = py1 + shrink - inner_x2 = px2 - shrink - inner_y2 = py2 - shrink + inner_x1, inner_y1 = px1 + shrink, py1 + shrink + inner_x2, inner_y2 = px2 - shrink, py2 - shrink sa_s = max(sa - shrink, 1.0) sb_s = max(sb - shrink, 1.0) for _ in range(3): rad = math.radians(angle) - hw = math.sqrt((sa_s * math.cos(rad))**2 + - (sb_s * math.sin(rad))**2) - hh = math.sqrt((sa_s * math.sin(rad))**2 + - (sb_s * math.cos(rad))**2) + hw = math.sqrt((sa_s * math.cos(rad))**2 + (sb_s * math.sin(rad))**2) + hh = math.sqrt((sa_s * math.sin(rad))**2 + (sb_s * math.cos(rad))**2) ovf_l = max(0, inner_x1 - (cx - hw)) ovf_r = max(0, (cx + hw) - inner_x2) @@ -437,18 +625,14 @@ def clip_ellipse_to_panel(cx, cy, sa, sb, angle, panel, if max(ovf_l, ovf_r, ovf_t, ovf_b) == 0: break - # Step 1: nudge center inward max_nx = sa_s * MAX_NUDGE_RATIO max_ny = sb_s * MAX_NUDGE_RATIO cx += min(ovf_l, max_nx) - min(ovf_r, max_nx) cy += min(ovf_t, max_ny) - min(ovf_b, max_ny) - # Step 2: recompute overflow after nudge rad = math.radians(angle) - hw = math.sqrt((sa_s * math.cos(rad))**2 + - (sb_s * math.sin(rad))**2) - hh = math.sqrt((sa_s * math.sin(rad))**2 + - (sb_s * math.cos(rad))**2) + hw = math.sqrt((sa_s * math.cos(rad))**2 + (sb_s * math.sin(rad))**2) + hh = math.sqrt((sa_s * math.sin(rad))**2 + (sb_s * math.cos(rad))**2) ovf_l = max(0, inner_x1 - (cx - hw)) ovf_r = max(0, (cx + hw) - inner_x2) @@ -456,7 +640,6 @@ def clip_ellipse_to_panel(cx, cy, sa, sb, angle, panel, ovf_b = max(0, (cy + hh) - inner_y2) max_ovf = max(ovf_l, ovf_r, ovf_t, ovf_b) - # Step 3: shrink only remaining overflow if max_ovf > 0: sa_s = max(sa_s - max_ovf, 1.0) sb_s = max(sb_s - max_ovf, 1.0) @@ -464,164 +647,117 @@ def clip_ellipse_to_panel(cx, cy, sa, sb, angle, panel, return cx, cy, sa_s, sb_s -# ───────────────────────────────────────────── -# GET FINAL RENDER ELLIPSE PARAMS -# ───────────────────────────────────────────── def get_render_ellipse(img_bgr, bubble_data, panel): - x = bubble_data["x"]; w = bubble_data["w"] - y = bubble_data["y"]; h = bubble_data["h"] + x, w = bubble_data["x"], bubble_data["w"] + y, h = bubble_data["y"], bubble_data["h"] detected = detect_bubble_ellipse(img_bgr, bubble_data, panel) - if detected is not None: ecx, ecy, sa, sb, angle = detected - ecx, ecy, sa_fill, sb_fill = clip_ellipse_to_panel( - ecx, ecy, sa, sb, angle, panel) + ecx, ecy, sa_fill, sb_fill = clip_ellipse_to_panel(ecx, ecy, sa, sb, angle, panel) safe_w = sa_fill * math.sqrt(2) * TEXT_RATIO safe_h = sb_fill * math.sqrt(2) * TEXT_RATIO - return (ecx, ecy, sa_fill, sb_fill, angle, - sa, sb, safe_w, safe_h, "detected") + return (ecx, ecy, sa_fill, sb_fill, angle, safe_w, safe_h, "detected") else: - cx = x + w / 2.0; cy = y + h / 2.0 - sa = w / 2.0; sb = h / 2.0 - cx, cy, sa_fill, sb_fill = clip_ellipse_to_panel( - cx, cy, sa, sb, 0.0, panel) + cx, cy = x + w / 2.0, y + h / 2.0 + sa, sb = w / 2.0, h / 2.0 + cx, cy, sa_fill, sb_fill = clip_ellipse_to_panel(cx, cy, sa, sb, 0.0, panel) safe_w = sa_fill * math.sqrt(2) * TEXT_RATIO safe_h = sb_fill * math.sqrt(2) * TEXT_RATIO - return (cx, cy, sa_fill, sb_fill, 0.0, - sa, sb, safe_w, safe_h, "fallback") + return (cx, cy, sa_fill, sb_fill, 0.0, safe_w, safe_h, "fallback") # ───────────────────────────────────────────── # DRAW ONE BUBBLE # ───────────────────────────────────────────── -def draw_bubble(pil_img, img_bgr, bubble_data, - translated_text, font_path, - font_color, panel): - (cx, cy, sa_fill, sb_fill, angle, - sa_det, sb_det, - safe_w, safe_h, method) = get_render_ellipse( - img_bgr, bubble_data, panel) +def draw_bubble( + pil_img, + img_bgr, + bubble_data, + original_text, + translated_text, + flags, + font_candidates, + font_color, + stroke_color, + panel +): + # skip unchanged SFX + if original_text and translated_text: + if normalize_text(original_text) == normalize_text(translated_text) and is_sfx_like(original_text): + return "skip_sfx", "NO_FONT" - cx_i = int(round(cx)) - cy_i = int(round(cy)) + (cx, cy, sa_fill, sb_fill, angle, safe_w, safe_h, method) = get_render_ellipse(img_bgr, bubble_data, panel) + cx_i, cy_i = int(round(cx)), int(round(cy)) img_h, img_w = img_bgr.shape[:2] - mask = np.zeros((img_h, img_w), dtype=np.uint8) - cv2.ellipse(mask, (cx_i, cy_i), - (int(math.ceil(sa_fill)), - int(math.ceil(sb_fill))), - angle, 0, 360, 255, -1) + # choose cleaning mask + if CLEAN_MODE == "ocr_union": + mask = build_ocr_union_clean_mask(img_h, img_w, bubble_data, panel) + # robust fallback + if mask is None or int(mask.sum()) == 0: + mask = build_hybrid_clean_mask( + img_h=img_h, img_w=img_w, + cx=cx, cy=cy, + sa_fill=sa_fill, sb_fill=sb_fill, angle=angle, + safe_w=safe_w, safe_h=safe_h, + panel=panel + ) + elif CLEAN_MODE == "hybrid": + mask = build_hybrid_clean_mask( + img_h=img_h, img_w=img_w, + cx=cx, cy=cy, + sa_fill=sa_fill, sb_fill=sb_fill, angle=angle, + safe_w=safe_w, safe_h=safe_h, + panel=panel + ) + else: # ellipse + mask = np.zeros((img_h, img_w), dtype=np.uint8) + cv2.ellipse(mask, (cx_i, cy_i), (int(math.ceil(sa_fill)), int(math.ceil(sb_fill))), angle, 0, 360, 255, -1) + + # paint white over mask img_np = np.array(pil_img) img_np[mask == 255] = [255, 255, 255] pil_img.paste(Image.fromarray(img_np)) if not translated_text: - return method + return method, "NO_FONT" - sw = max(int(safe_w), 1) - sh = max(int(safe_h), 1) draw = ImageDraw.Draw(pil_img) - font, lines, total_h = best_fit_font( - draw, translated_text, font_path, sw, sh) + # Center text in the cleaned region bbox (red-box style target) + ys, xs = np.where(mask > 0) + if len(xs) > 0 and len(ys) > 0: + mx1, my1, mx2, my2 = xs.min(), ys.min(), xs.max(), ys.max() + text_cx = int((mx1 + mx2) / 2) + text_cy = int((my1 + my2) / 2) + sw = max(20, int((mx2 - mx1) * 0.92)) + sh = max(20, int((my2 - my1) * 0.92)) + else: + text_cx, text_cy = cx_i, cy_i + sw, sh = max(int(safe_w), 1), max(int(safe_h), 1) + + bw = max(1, bubble_data.get("w", 1)) + bh = max(1, bubble_data.get("h", 1)) + tall_bubble = (bh / bw) > 1.25 + + font, lines, total_h, font_used = best_fit_font( + draw, translated_text, font_candidates, sw, sh, tall_bubble=tall_bubble + ) if not lines: - return method + return method, font_used + + y_cursor = int(round(text_cy - total_h / 2.0 - 0.02 * sh)) - y_cursor = cy_i - total_h // 2 for line in lines: lw, lh = measure_text(draw, line, font) - draw.text((cx_i - lw // 2, y_cursor), - line, font=font, fill=font_color) - y_cursor += lh + max(lh // 5, 2) + x = text_cx - lw // 2 + draw_text_with_stroke(draw, (x, y_cursor), line, font, fill=font_color, stroke_fill=stroke_color) + y_cursor += lh + max(lh // 5, 2) + (1 if tall_bubble else 0) - return method - - -# ───────────────────────────────────────────── -# DEBUG OVERLAY -# ───────────────────────────────────────────── -def save_debug_ellipses(input_image_path, bubbles, - translations, panels, output_path): - img = cv2.imread(input_image_path) - if img is None: - print(f" ⚠️ Debug: cannot load {input_image_path}") - return - - overlay = img.copy() - img_h, img_w = img.shape[:2] - - for i, (px1, py1, px2, py2) in enumerate(panels, 1): - cv2.rectangle(overlay, (px1, py1), (px2, py2), - DBG_COLOR_PANEL, 3) - cv2.putText(overlay, f"P{i}", - (px1 + 4, py1 + 22), - cv2.FONT_HERSHEY_SIMPLEX, - 0.65, DBG_COLOR_PANEL, 2) - - for bubble_id in sorted(translations.keys()): - if bubble_id not in bubbles: - continue - - bubble_data = bubbles[bubble_id] - panel = assign_panel(bubble_data, panels, img_w, img_h) - - x = bubble_data["x"]; w = bubble_data["w"] - y = bubble_data["y"]; h = bubble_data["h"] - - (cx, cy, sa_fill, sb_fill, angle, - sa_det, sb_det, - safe_w, safe_h, method) = get_render_ellipse( - img, bubble_data, panel) - - cx_i = int(round(cx)); cy_i = int(round(cy)) - sa_d_i = int(math.ceil(sa_det)) - sb_d_i = int(math.ceil(sb_det)) - sa_f_i = int(math.ceil(sa_fill)) - sb_f_i = int(math.ceil(sb_fill)) - sw_i = int(safe_w); sh_i = int(safe_h) - - cv2.ellipse(overlay, (cx_i, cy_i), - (sa_d_i, sb_d_i), angle, 0, 360, - DBG_COLOR_DETECTED, DBG_THICKNESS) - - cv2.ellipse(overlay, (cx_i, cy_i), - (sa_f_i, sb_f_i), angle, 0, 360, - DBG_COLOR_FILL, DBG_THICKNESS) - - cv2.rectangle(overlay, - (cx_i - sw_i//2, cy_i - sh_i//2), - (cx_i + sw_i//2, cy_i + sh_i//2), - DBG_COLOR_SAFE, DBG_THICKNESS) - - cv2.circle(overlay, (cx_i, cy_i), - DBG_CENTER_R, DBG_COLOR_CENTER, -1) - - cv2.circle(overlay, - (int(x + w/2), int(y + h/2)), - DBG_CENTER_R - 1, DBG_COLOR_SEED, -1) - - tag = "D" if method == "detected" else "F" - cv2.putText(overlay, f"#{bubble_id}({tag})", - (cx_i - sa_d_i, cy_i - sb_d_i - 6), - cv2.FONT_HERSHEY_SIMPLEX, - 0.50, DBG_COLOR_LABEL, 2) - - debug_img = cv2.addWeighted(overlay, 0.85, img, 0.15, 0) - cv2.imwrite(output_path, debug_img) - - print(f" 🐛 Debug saved → {output_path}") - print() - print(" Legend:") - print(" 🟡 YELLOW → Panel boundary") - print(" 🟢 GREEN → Detected bubble ellipse") - print(" 🔴 RED → Fill ellipse (nudged + clipped)") - print(" 🔵 BLUE → Text safe rectangle") - print(" 🔵 CYAN → Ellipse center (may be nudged)") - print(" 🟣 MAGENTA → Original flood seed point") - print(" (D) = contour detected | (F) = bbox fallback") + return method, font_used # ───────────────────────────────────────────── @@ -632,70 +768,64 @@ def render_translations( output_image, translations_file, bubbles_file, - font_path = DEFAULT_FONT_PATH, - font_color = DEFAULT_FONT_COLOR, - debug = False, - debug_path = "debug_ellipses.png", + font_candidates=DEFAULT_FONT_CANDIDATES, + font_color=DEFAULT_FONT_COLOR, + stroke_color=DEFAULT_STROKE_COLOR, ): img_bgr = cv2.imread(input_image) if img_bgr is None: - raise FileNotFoundError( - f"Cannot load image: {input_image}") + raise FileNotFoundError(f"Cannot load image: {input_image}") img_h, img_w = img_bgr.shape[:2] - img_pil = Image.fromarray( - cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)) + img_pil = Image.fromarray(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)) - translations = parse_translations(translations_file) - bubbles = parse_bubbles(bubbles_file) - - print("\n📐 Detecting panels...") + translations, originals, flags_map = parse_translations(translations_file) + bubbles = parse_bubbles(bubbles_file) panels = detect_panels(img_bgr) - print(f"\n🎨 Rendering {len(translations)} bubble(s)...") - print(f" Font : {font_path}") - print(f" Border shrink : -{BORDER_SHRINK_PX}px") - print(f" Max nudge ratio : {MAX_NUDGE_RATIO}") - print(f" Flood tolerance : {FLOOD_TOLERANCE}") - print(f" Text ratio : {TEXT_RATIO}") - if debug: - print(f" Debug mode : ON → {debug_path}") - save_debug_ellipses(input_image, bubbles, - translations, panels, debug_path) + rendered = 0 + skipped = 0 - rendered = 0; skipped = 0 - n_detect = 0; n_fallbk = 0 + def sort_key(item): + bid, _ = item + b = bubbles.get(bid, {}) + return int(b.get("reading_order", bid)) - for bubble_id, translated_text in sorted(translations.items()): + for bubble_id, translated_text in sorted(translations.items(), key=sort_key): if bubble_id not in bubbles: - print(f" ⚠️ #{bubble_id}: not in bubbles.json — skipped") skipped += 1 continue bubble_data = bubbles[bubble_id] - panel = assign_panel(bubble_data, panels, img_w, img_h) - method = draw_bubble( - img_pil, img_bgr, bubble_data, - translated_text, font_path, font_color, panel) + panel = assign_panel(bubble_data, panels, img_w, img_h) - tag = "🔍 detected" if method == "detected" else "📦 fallback" - if method == "detected": n_detect += 1 - else: n_fallbk += 1 + original_text = originals.get(bubble_id, "") + flags = flags_map.get(bubble_id, "-") - px1, py1, px2, py2 = panel - print(f" ✅ #{bubble_id} [{tag}] " - f"panel=({px1},{py1})→({px2},{py2}) " - f"→ \"{translated_text[:35]}\"") - rendered += 1 + method, font_used = draw_bubble( + pil_img=img_pil, + img_bgr=img_bgr, + bubble_data=bubble_data, + original_text=original_text, + translated_text=translated_text, + flags=flags, + font_candidates=font_candidates, + font_color=font_color, + stroke_color=stroke_color, + panel=panel + ) - result_cv = cv2.cvtColor(np.array(img_pil), - cv2.COLOR_RGB2BGR) + if method == "skip_sfx": + skipped += 1 + else: + rendered += 1 + + result_cv = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR) cv2.imwrite(output_image, result_cv) - print(f"\n✅ Done — {rendered} rendered " - f"({n_detect} detected, {n_fallbk} fallback), " - f"{skipped} skipped.") + print(f"✅ Done — {rendered} rendered, {skipped} skipped.") print(f"📄 Output → {output_image}") + print(f"🧼 Clean mode: {CLEAN_MODE}") # ───────────────────────────────────────────── @@ -703,12 +833,11 @@ def render_translations( # ───────────────────────────────────────────── if __name__ == "__main__": render_translations( - input_image = "002-page.jpg", - output_image = "page_translated.png", - translations_file = "output.txt", - bubbles_file = "bubbles.json", - font_path = DEFAULT_FONT_PATH, - font_color = DEFAULT_FONT_COLOR, - debug = True, - debug_path = "debug_ellipses.png", + input_image="001-page.png", + output_image="page_translated.png", + translations_file="output.txt", + bubbles_file="bubbles.json", + font_candidates=DEFAULT_FONT_CANDIDATES, + font_color=DEFAULT_FONT_COLOR, + stroke_color=DEFAULT_STROKE_COLOR, ) diff --git a/manga-translator.py b/manga-translator.py index 8102ce8..8869f2b 100644 --- a/manga-translator.py +++ b/manga-translator.py @@ -1,6 +1,7 @@ import re import os import json +import difflib import cv2 import numpy as np import easyocr @@ -28,6 +29,37 @@ SUPPORTED_LANGUAGES = { "Catalan" : "ca", } +# ───────────────────────────────────────────── +# DOMAIN GLOSSARY +# ───────────────────────────────────────────── +GLOSSARY = { + "ANYA": "ANYA", + "STELLA STAR": "STELLA STAR", + "MR. HENDERSON": "MR. HENDERSON", + "STARLIGHT ANYA": "STARLIGHT ANYA", +} + +# Phrase-level fallback (source IT -> target CA) +PHRASE_MAP_IT_CA = { + "LA BAMBINA È ILLESA!": "LA NENA ESTÀ IL·LESA!", + "L'UOMO E LA DONNA SONO MORTI!": "L'HOME I LA DONA SÓN MORTS!", + "IL BAMBINO È FERITO GRAVEMENTE, MA È ANCORA VIVO!!": "EL NEN ESTÀ GREUMENT FERIT, PERÒ ENCARA ÉS VIU!!", + "UN CASO URGENTE...?": "UN CAS URGENT...?", + "UN CASO URGENTE,?": "UN CAS URGENT?", +} + +ITALIAN_OCR_FIXES = [ + (r"\bL'LOMO\b", "L'UOMO"), + (r"\bLOMO\b", "UOMO"), + (r"\bMORT I\b", "MORTI"), + (r"\bI[L1]LESA\b", "ILLESA"), + (r"\bBAM8INA\b", "BAMBINA"), + (r"\bBAM8INO\b", "BAMBINO"), + (r",\?", "?"), + (r"\?{2,}", "?"), + (r"\!{3,}", "!!"), +] + # ───────────────────────────────────────────── # SOUND EFFECT FILTER @@ -42,8 +74,7 @@ SOUND_EFFECT_PATTERNS = [ def is_sound_effect(text): cleaned = re.sub(r"[^a-z]", "", text.strip().lower()) - return any(re.fullmatch(p, cleaned, re.IGNORECASE) - for p in SOUND_EFFECT_PATTERNS) + return any(re.fullmatch(p, cleaned, re.IGNORECASE) for p in SOUND_EFFECT_PATTERNS) # ───────────────────────────────────────────── @@ -52,26 +83,23 @@ def is_sound_effect(text): TITLE_PATTERNS = [ r"^(mission|chapter|episode|vol\.?|volume)\s*\d+$", r"^(spy|family|spy.family)$", - r"^by\s+.+$", # "BY TATSUYA ENDO" - r"^[a-z]{1,4}\s+[a-z]+\s+[a-z]+$", # short author-style lines + r"^by\s+.+$", + r"^[a-z]{1,4}\s+[a-z]+\s+[a-z]+$", ] def is_title_text(text): cleaned = text.strip().lower() - return any(re.fullmatch(p, cleaned, re.IGNORECASE) - for p in TITLE_PATTERNS) + return any(re.fullmatch(p, cleaned, re.IGNORECASE) for p in TITLE_PATTERNS) # ───────────────────────────────────────────── # GARBAGE TOKEN FILTER -# Catches OCR misreads that are mostly -# non-alpha or suspiciously short/mangled # ───────────────────────────────────────────── GARBAGE_PATTERNS = [ - r"^[^a-zA-Z]*$", # no letters at all - r"^.{1,2}$", # 1-2 char tokens - r".*\d+.*", # contains digits (YO4, HLNGRY etc.) - r"^[A-Z]{1,4}$", # isolated caps abbreviations (IILK) + r"^[^a-zA-Z]*$", + r"^.{1,2}$", + r".*\d+.*", + r"^[A-Z]{1,4}$", ] def is_garbage(text): @@ -82,11 +110,7 @@ def is_garbage(text): # ───────────────────────────────────────────── # TOKEN CLASSIFIER # ───────────────────────────────────────────── -def classify_token(text, confidence, confidence_threshold, - min_text_length, filter_sound_effects): - """ - Returns one of: "alpha" | "punct" | "noise" - """ +def classify_token(text, confidence, confidence_threshold, min_text_length, filter_sound_effects): cleaned = text.strip() if confidence < confidence_threshold: @@ -107,34 +131,47 @@ def classify_token(text, confidence, confidence_threshold, return "punct" return "alpha" - -def should_keep_token(text, confidence, confidence_threshold, - min_text_length, filter_sound_effects): - cat = classify_token(text, confidence, confidence_threshold, - min_text_length, filter_sound_effects) +def should_keep_token(text, confidence, confidence_threshold, min_text_length, filter_sound_effects): + cat = classify_token(text, confidence, confidence_threshold, min_text_length, filter_sound_effects) return cat != "noise", cat # ───────────────────────────────────────────── -# QUAD HELPERS +# QUAD / BBOX HELPERS # ───────────────────────────────────────────── def quad_bbox(quad): xs = [pt[0] for pt in quad] ys = [pt[1] for pt in quad] return min(xs), min(ys), max(xs), max(ys) +def quad_center(quad): + x1, y1, x2, y2 = quad_bbox(quad) + return (x1 + x2) / 2.0, (y1 + y2) / 2.0 + +def quad_h(quad): + x1, y1, x2, y2 = quad_bbox(quad) + return max(1.0, y2 - y1) + +def bbox_center(b): + x1, y1, x2, y2 = b + return (x1 + x2) / 2.0, (y1 + y2) / 2.0 + +def bbox_h(b): + return max(1.0, b[3] - b[1]) + +def distance_pt(a, b): + return ((a[0]-b[0])**2 + (a[1]-b[1])**2) ** 0.5 def quads_bbox(quads, image_shape, padding_px=10): img_h, img_w = image_shape[:2] all_x = [pt[0] for quad in quads for pt in quad] all_y = [pt[1] for quad in quads for pt in quad] - x1 = max(0, min(all_x) - padding_px) - y1 = max(0, min(all_y) - padding_px) + x1 = max(0, min(all_x) - padding_px) + y1 = max(0, min(all_y) - padding_px) x2 = min(img_w, max(all_x) + padding_px) y2 = min(img_h, max(all_y) + padding_px) return x1, y1, x2, y2 - def bboxes_overlap_or_touch(a, b, gap_px=0): ax1, ay1, ax2, ay2 = a bx1, by1, bx2, by2 = b @@ -144,16 +181,137 @@ def bboxes_overlap_or_touch(a, b, gap_px=0): # ───────────────────────────────────────────── -# OVERLAP-BASED GROUPING (Union-Find) +# TEXT NORMALIZATION # ───────────────────────────────────────────── -def group_quads_by_overlap(ocr_results, image_shape, - gap_px=18, bbox_padding=10): +def normalize_ocr_text(text): + t = text.strip().upper() + t = t.replace("“", "\"").replace("”", "\"") + t = t.replace("’", "'").replace("‘", "'") + t = t.replace("…", "...") + t = re.sub(r"\s+", " ", t) + t = re.sub(r"\s+([,.;:!?])", r"\1", t) + t = re.sub(r"\(\s+", "(", t) + t = re.sub(r"\s+\)", ")", t) + t = re.sub(r"\.{4,}", "...", t) + t = re.sub(r",\?", "?", t) + return t.strip() + +def italian_post_ocr_cleanup(text): + t = normalize_ocr_text(text) + for pat, rep in ITALIAN_OCR_FIXES: + t = re.sub(pat, rep, t, flags=re.IGNORECASE) + t = re.sub(r"\s{2,}", " ", t).strip().upper() + return t + +def fix_hyphens(lines): + if not lines: + return "" + merged = lines[0] + for line in lines[1:]: + line = line.strip() + if merged.endswith("-"): + merged = merged[:-1] + line + else: + merged = merged + " " + line + merged = re.sub(r" {2,}", " ", merged).strip() + return normalize_ocr_text(merged) + +def apply_glossary(text, glossary): + out = text + keys = sorted(glossary.keys(), key=len, reverse=True) + for k in keys: + v = glossary[k] + out = re.sub(rf"\b{re.escape(k)}\b", v, out, flags=re.IGNORECASE) + return out + + +# ───────────────────────────────────────────── +# TRANSLATION SAFETY +# ───────────────────────────────────────────── +def fuzzy_phrase_match(source_text, phrase_map, min_ratio=0.88): + if source_text in phrase_map: + return phrase_map[source_text], 1.0, source_text + + best_key, best_ratio = None, 0.0 + for k in phrase_map.keys(): + ratio = difflib.SequenceMatcher(None, source_text, k).ratio() + if ratio > best_ratio: + best_ratio = ratio + best_key = k + + if best_key and best_ratio >= min_ratio: + return phrase_map[best_key], best_ratio, best_key + return None, best_ratio, best_key + +def looks_suspicious_translation(src, tgt): + t = normalize_ocr_text(tgt) + bad_tokens = ["NEETA", "LOMO", "MORT I", "ESTA IL", "MORT I LA"] + if any(b in t for b in bad_tokens): + return True + if len(t) < 3: + return True + return False + + +# ───────────────────────────────────────────── +# LINE REBUILD (shared) +# ───────────────────────────────────────────── +def rebuild_bubble_lines_from_indices(indices, ocr_results): + if not indices: + return [] + + token_bboxes = [quad_bbox(ocr_results[i][0]) for i in indices] + items = [] + for i, bx in zip(indices, token_bboxes): + xc = (bx[0] + bx[2]) / 2.0 + yc = (bx[1] + bx[3]) / 2.0 + h = max(1.0, bx[3] - bx[1]) + items.append((i, xc, yc, h)) + + line_tol = max(6.0, float(np.median([it[3] for it in items])) * 0.6) + items.sort(key=lambda t: t[2]) + + lines = [] + for it in items: + i, xc, yc, h = it + placed = False + for ln in lines: + if abs(yc - ln["yc"]) <= line_tol: + ln["members"].append((i, xc, yc)) + ln["yc"] = np.mean([m[2] for m in ln["members"]]) + placed = True + break + if not placed: + lines.append({"yc": yc, "members": [(i, xc, yc)]}) + + lines.sort(key=lambda ln: ln["yc"]) + out = [] + for ln in lines: + mem = sorted(ln["members"], key=lambda m: m[1]) + toks = [ocr_results[i][1] for i, _, _ in mem] + line = " ".join(toks) + line = re.sub(r"\s+([,.;:!?])", r"\1", line) + line = re.sub(r"\(\s+", "(", line) + line = re.sub(r"\s+\)", ")", line) + out.append(normalize_ocr_text(line)) + return out + + +# ───────────────────────────────────────────── +# GROUPING (pass 1) +# ───────────────────────────────────────────── +def group_quads_by_overlap(ocr_results, image_shape, gap_px=18, bbox_padding=10): n = len(ocr_results) if n == 0: return {}, {}, {} token_bboxes = [quad_bbox(r[0]) for r in ocr_results] - parent = list(range(n)) + token_centers = [quad_center(r[0]) for r in ocr_results] + token_heights = [quad_h(r[0]) for r in ocr_results] + median_h = float(np.median(token_heights)) if token_heights else 12.0 + dist_thresh = max(20.0, median_h * 2.2) + + parent = list(range(n)) def find(x): while parent[x] != x: @@ -166,16 +324,20 @@ def group_quads_by_overlap(ocr_results, image_shape, for i in range(n): for j in range(i + 1, n): - if bboxes_overlap_or_touch( - token_bboxes[i], token_bboxes[j], - gap_px=gap_px): + ov = bboxes_overlap_or_touch(token_bboxes[i], token_bboxes[j], gap_px=gap_px) + if ov: + union(i, j) + continue + cx1, cy1 = token_centers[i] + cx2, cy2 = token_centers[j] + d = ((cx1 - cx2) ** 2 + (cy1 - cy2) ** 2) ** 0.5 + if d <= dist_thresh and abs(cy1 - cy2) <= median_h * 3.0: union(i, j) groups = {} for i in range(n): root = find(i) - groups.setdefault(root, []) - groups[root].append(i) + groups.setdefault(root, []).append(i) def group_sort_key(indices): ys = [token_bboxes[i][1] for i in indices] @@ -185,119 +347,105 @@ def group_quads_by_overlap(ocr_results, image_shape, sorted_groups = sorted(groups.values(), key=group_sort_key) bubble_dict = {} - bbox_dict = {} - ocr_quads = {} + bbox_dict = {} + ocr_quads = {} + bubble_indices = {} for gid, indices in enumerate(sorted_groups, start=1): - indices_sorted = sorted( - indices, key=lambda i: token_bboxes[i][1]) + idxs = sorted(indices, key=lambda k: token_bboxes[k][1]) + lines = rebuild_bubble_lines_from_indices(idxs, ocr_results) + quads = [ocr_results[k][0] for k in idxs] + bb = quads_bbox(quads, image_shape, padding_px=bbox_padding) - quads = [ocr_results[i][0] for i in indices_sorted] - raw_texts = [ocr_results[i][1] for i in indices_sorted] + bubble_dict[gid] = lines + ocr_quads[gid] = quads + bbox_dict[gid] = bb + bubble_indices[gid] = idxs - alpha_lines = [] - punct_tokens = [] - - for i in indices_sorted: - _, text, _ = ocr_results[i] - yc = (token_bboxes[i][1] + token_bboxes[i][3]) / 2.0 - if any(ch.isalpha() for ch in text): - alpha_lines.append((yc, text)) - else: - punct_tokens.append((yc, text)) - - for pcy, ptext in punct_tokens: - if alpha_lines: - closest = min( - range(len(alpha_lines)), - key=lambda k: abs(alpha_lines[k][0] - pcy) - ) - yc_a, text_a = alpha_lines[closest] - alpha_lines[closest] = (yc_a, text_a + ptext) - - text_lines = [t for _, t in alpha_lines] or raw_texts - - bubble_dict[gid] = text_lines - ocr_quads[gid] = quads - bbox_dict[gid] = quads_bbox(quads, image_shape, - padding_px=bbox_padding) - - b = bbox_dict[gid] - print(f" Group #{gid}: {len(quads)} quad(s) " - f"bbox=({int(b[0])},{int(b[1])})→" - f"({int(b[2])},{int(b[3])}) " - f"w={int(b[2]-b[0])} h={int(b[3]-b[1])} " - f"text={text_lines}") - - return bubble_dict, bbox_dict, ocr_quads + return bubble_dict, bbox_dict, ocr_quads, bubble_indices # ───────────────────────────────────────────── -# HYPHEN REMOVAL +# ORPHAN ABSORPTION (pass 2) # ───────────────────────────────────────────── -def fix_hyphens(lines): - if not lines: - return "" - merged = lines[0] - for line in lines[1:]: - line = line.strip() - merged = (merged[:-1] + line if merged.endswith("-") - else merged + " " + line) - return re.sub(r" {2,}", " ", merged).strip().upper() +def absorb_orphan_tokens_into_bubbles( + ocr_results, + bubble_dict, + bbox_dict, + ocr_quads, + bubble_indices, + image_shape, + bbox_padding=2, + gap_factor=1.9, + max_center_dist_factor=3.2, +): + n = len(ocr_results) + token_bboxes = [quad_bbox(r[0]) for r in ocr_results] + token_centers = [bbox_center(b) for b in token_bboxes] + token_heights = [bbox_h(b) for b in token_bboxes] + median_h = float(np.median(token_heights)) if token_heights else 12.0 + used = set() + for bid, idxs in bubble_indices.items(): + for i in idxs: + used.add(i) -# ───────────────────────────────────────────── -# CROP-BASED OCR RE-READ -# ───────────────────────────────────────────── -def reread_cluster_crop(image, bbox, reader, - padding_px=20, upscale_factor=2.5): - img_h, img_w = image.shape[:2] - x1, y1, x2, y2 = bbox - x1 = max(0, int(x1) - padding_px) - y1 = max(0, int(y1) - padding_px) - x2 = min(img_w, int(x2) + padding_px) - y2 = min(img_h, int(y2) + padding_px) + orphan_indices = [i for i in range(n) if i not in used] - crop = image[y1:y2, x1:x2] - if crop.size == 0: - return None + for i in orphan_indices: + tb = token_bboxes[i] + tc = token_centers[i] - new_w = int(crop.shape[1] * upscale_factor) - new_h = int(crop.shape[0] * upscale_factor) - upscaled = cv2.resize(crop, (new_w, new_h), - interpolation=cv2.INTER_CUBIC) - kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]]) - sharpened = cv2.filter2D(upscaled, -1, kernel) + best_bid = None + best_score = 1e18 - temp_path = "_temp_crop_ocr.png" - cv2.imwrite(temp_path, sharpened) - try: - crop_results = reader.readtext(temp_path, paragraph=False) - finally: - if os.path.exists(temp_path): - os.remove(temp_path) + for bid, bb in bbox_dict.items(): + bc = bbox_center(bb) + dist = distance_pt(tc, bc) + bh = bbox_h(bb) - if not crop_results: - return None + max_dist = max(60.0, median_h * max_center_dist_factor + bh * 0.15) + if dist > max_dist: + continue - crop_results.sort(key=lambda r: r[0][0][1]) - lines = [t.strip().upper() for _, t, _ in crop_results - if t.strip()] - return fix_hyphens(lines) if lines else None + near = bboxes_overlap_or_touch(tb, bb, gap_px=int(median_h * gap_factor)) + y_ok = abs(tc[1] - bc[1]) <= max(bh * 0.65, median_h * 4.0) + if near or y_ok: + score = dist - (25.0 if near else 0.0) + if score < best_score: + best_score = score + best_bid = bid -# ───────────────────────────────────────────── -# AUTO GAP -# ───────────────────────────────────────────── -def compute_auto_gap(image_path, base_gap=18, - reference_width=750): - image = cv2.imread(image_path) - if image is None: - return base_gap - img_w = image.shape[1] - scaled = base_gap * (img_w / reference_width) - print(f" ℹ️ Image width: {img_w}px → auto gap: {scaled:.1f}px") - return scaled + if best_bid is not None: + bubble_indices.setdefault(best_bid, []) + bubble_indices[best_bid].append(i) + + # rebuild bubbles after absorption + new_bubble_dict = {} + new_ocr_quads = {} + new_bbox_dict = {} + new_bubble_indices = {} + + for bid in sorted(bubble_dict.keys()): + idxs = sorted(set(bubble_indices.get(bid, [])), key=lambda k: token_bboxes[k][1]) + if not idxs: + idxs = [] + + lines = rebuild_bubble_lines_from_indices(idxs, ocr_results) if idxs else bubble_dict.get(bid, []) + quads = [ocr_results[k][0] for k in idxs] if idxs else ocr_quads.get(bid, []) + + if quads: + bb = quads_bbox(quads, image_shape, padding_px=bbox_padding) + else: + bb = bbox_dict[bid] + + new_bubble_dict[bid] = lines + new_ocr_quads[bid] = quads + new_bbox_dict[bid] = bb + new_bubble_indices[bid] = idxs + + return new_bubble_dict, new_bbox_dict, new_ocr_quads, new_bubble_indices # ───────────────────────────────────────────── @@ -306,57 +454,162 @@ def compute_auto_gap(image_path, base_gap=18, def ocr_quality_score(text): if not text or len(text) < 2: return 0.0 - alpha_ratio = sum(1 for c in text if c.isalpha()) / len(text) - garbage = [r",,", r"\.\.-", r"[^\w\s\'\!\?\.,-]{2,}"] - penalty = sum(0.2 for p in garbage - if re.search(p, text)) - return max(0.0, min(1.0, alpha_ratio - penalty)) + alpha_ratio = sum(1 for c in text if c.isalpha()) / max(1, len(text)) + penalty = 0.0 + for p in [r",,", r"\.\.-", r"[^\w\s\'\!\?\.,\-]{2,}"]: + if re.search(p, text): + penalty += 0.2 + bonus = 0.05 if re.search(r"[.!?]$", text) else 0.0 + return max(0.0, min(1.0, alpha_ratio - penalty + bonus)) # ───────────────────────────────────────────── -# BUBBLE JSON EXPORT -# bbox_expand_ratio: grow bbox by this fraction -# of its own size in each direction to better -# approximate the full speech bubble boundary. +# OCR VARIANTS # ───────────────────────────────────────────── -def export_bubble_boxes(bbox_dict, ocr_quads_dict, - filepath="bubbles.json", - bbox_expand_ratio=0.35, - image_shape=None): +def preprocess_variant(crop_bgr, mode): + gray = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2GRAY) + if mode == "raw": + return gray + if mode == "clahe": + clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) + return clahe.apply(gray) + if mode == "adaptive": + den = cv2.GaussianBlur(gray, (3, 3), 0) + return cv2.adaptiveThreshold(den, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 35, 11) + return gray + +def run_ocr_on_img_array(reader, img_arr): + temp_path = "_temp_crop_ocr.png" + cv2.imwrite(temp_path, img_arr) + try: + return reader.readtext(temp_path, paragraph=False) + finally: + if os.path.exists(temp_path): + os.remove(temp_path) + +def reread_cluster_crop(image, bbox, reader, source_lang="en", padding_px=20, upscale_factor=2.5): + img_h, img_w = image.shape[:2] + x1, y1, x2, y2 = bbox + x1 = max(0, int(x1) - padding_px) + y1 = max(0, int(y1) - padding_px) + x2 = min(img_w, int(x2) + padding_px) + y2 = min(img_h, int(y2) + padding_px) + + crop = image[y1:y2, x1:x2] + if crop.size == 0: + return None + + new_w = int(crop.shape[1] * upscale_factor) + new_h = int(crop.shape[0] * upscale_factor) + upscaled = cv2.resize(crop, (new_w, new_h), interpolation=cv2.INTER_CUBIC) + + candidates = [] + for mode in ("raw", "clahe", "adaptive"): + proc = preprocess_variant(upscaled, mode) + res = run_ocr_on_img_array(reader, proc) + if not res: + continue + res.sort(key=lambda r: (r[0][0][1], r[0][0][0])) + lines = [normalize_ocr_text(t) for _, t, _ in res if t.strip()] + merged = fix_hyphens(lines) if lines else "" + if source_lang == "it": + merged = italian_post_ocr_cleanup(merged) + score = ocr_quality_score(merged) + candidates.append((score, mode, merged)) + + if not candidates: + return None + candidates.sort(key=lambda x: x[0], reverse=True) + return candidates[0][2] if candidates[0][2] else None + + +# ───────────────────────────────────────────── +# AUTO GAP +# ───────────────────────────────────────────── +def compute_auto_gap(image_path, base_gap=18, reference_width=750): + image = cv2.imread(image_path) + if image is None: + return base_gap + img_w = image.shape[1] + return base_gap * (img_w / reference_width) + + +# ───────────────────────────────────────────── +# READING ORDER +# ───────────────────────────────────────────── +def estimate_reading_order(bbox_dict, mode="ltr"): + items = [] + for bid, (x1, y1, x2, y2) in bbox_dict.items(): + cx = (x1 + x2) / 2.0 + cy = (y1 + y2) / 2.0 + items.append((bid, x1, y1, x2, y2, cx, cy)) + + items.sort(key=lambda t: t[6]) + rows = [] + row_tol = 90 + for it in items: + placed = False + for row in rows: + if abs(it[6] - row["cy"]) <= row_tol: + row["items"].append(it) + row["cy"] = np.mean([x[6] for x in row["items"]]) + placed = True + break + if not placed: + rows.append({"cy": it[6], "items": [it]}) + + rows.sort(key=lambda r: r["cy"]) + order = [] + for r in rows: + if mode == "rtl": + r["items"].sort(key=lambda t: t[5], reverse=True) + else: + r["items"].sort(key=lambda t: t[5]) + order.extend([it[0] for it in r["items"]]) + + return {bid: idx + 1 for idx, bid in enumerate(order)} + + +# ───────────────────────────────────────────── +# EXPORTERS +# ───────────────────────────────────────────── +def export_bubble_boxes( + bbox_dict, + ocr_quads_dict, + reading_order_map, + filepath="bubbles.json", + bbox_expand_ratio=0.16, + image_shape=None, +): export = {} for bubble_id, (x1, y1, x2, y2) in bbox_dict.items(): quads = ocr_quads_dict.get(bubble_id, []) - # ── Expand bbox to approximate full bubble ──────────────── w_orig = x2 - x1 h_orig = y2 - y1 - pad_x = int(w_orig * bbox_expand_ratio) - pad_y = int(h_orig * bbox_expand_ratio) + pad_x = int(w_orig * bbox_expand_ratio) + pad_y = int(h_orig * bbox_expand_ratio) - # Clamp to image bounds if image_shape provided if image_shape is not None: img_h, img_w = image_shape[:2] - ex1 = max(0, x1 - pad_x) - ey1 = max(0, y1 - pad_y) + ex1 = max(0, x1 - pad_x) + ey1 = max(0, y1 - pad_y) ex2 = min(img_w, x2 + pad_x) ey2 = min(img_h, y2 + pad_y) else: - ex1 = x1 - pad_x - ey1 = y1 - pad_y - ex2 = x2 + pad_x - ey2 = y2 + pad_y + ex1, ey1, ex2, ey2 = x1 - pad_x, y1 - pad_y, x2 + pad_x, y2 + pad_y export[str(bubble_id)] = { - "x" : int(ex1), - "y" : int(ey1), - "w" : int(ex2 - ex1), - "h" : int(ey2 - ey1), - # Original tight bbox kept for reference - "x_tight" : int(x1), - "y_tight" : int(y1), - "w_tight" : int(w_orig), - "h_tight" : int(h_orig), - "quad_bboxes" : [ + "x": int(ex1), + "y": int(ey1), + "w": int(ex2 - ex1), + "h": int(ey2 - ey1), + "x_tight": int(x1), + "y_tight": int(y1), + "w_tight": int(w_orig), + "h_tight": int(h_orig), + "reading_order": int(reading_order_map.get(bubble_id, bubble_id)), + "quad_bboxes": [ { "x": int(quad_bbox(q)[0]), "y": int(quad_bbox(q)[1]), @@ -365,73 +618,42 @@ def export_bubble_boxes(bbox_dict, ocr_quads_dict, } for q in quads ], - "quads": [[[int(pt[0]), int(pt[1])] for pt in quad] - for quad in quads], + "quads": [[[int(pt[0]), int(pt[1])] for pt in quad] for quad in quads], } with open(filepath, "w", encoding="utf-8") as f: json.dump(export, f, indent=2, ensure_ascii=False) - print(f"\n📦 Bubble boxes saved → {filepath}") - for bid, v in export.items(): - print(f" #{bid}: expanded=({v['x']},{v['y']}) " - f"{v['w']}×{v['h']}px " - f"tight={v['w_tight']}×{v['h_tight']}px " - f"[{len(v['quads'])} quad(s)]") - -# ───────────────────────────────────────────── -# OUTPUT.TXT WRITER -# Uses a pipe | as unambiguous delimiter -# Format: #ID|ORIGINAL|TRANSLATED -# ───────────────────────────────────────────── def write_output(output_lines, filepath): with open(filepath, "w", encoding="utf-8") as f: f.write("\n".join(output_lines)) - print(f"📄 Translations saved → {filepath}") # ───────────────────────────────────────────── # DEBUG IMAGE # ───────────────────────────────────────────── -def save_debug_clusters(image_path, ocr_results, - bubble_dict, bbox_dict): +def save_debug_clusters(image_path, ocr_results, bubble_dict, bbox_dict): image = cv2.imread(image_path) if image is None: return np.random.seed(42) num_bubbles = max(bubble_dict.keys(), default=1) - colors = [ - tuple(int(c) for c in col) - for col in np.random.randint( - 50, 230, size=(num_bubbles + 2, 3)) - ] - - text_to_bubble = {} - for bubble_id, lines in bubble_dict.items(): - for line in lines: - text_to_bubble[line] = bubble_id + colors = [tuple(int(c) for c in col) for col in np.random.randint(50, 230, size=(num_bubbles + 2, 3))] + # draw all OCR quads lightly for bbox, text, _ in ocr_results: - bubble_id = text_to_bubble.get(text, 0) - color = colors[(bubble_id - 1) % len(colors)] - pts = np.array(bbox, dtype=np.int32) - cv2.polylines(image, [pts], isClosed=True, - color=color, thickness=1) + pts = np.array(bbox, dtype=np.int32) + cv2.polylines(image, [pts], isClosed=True, color=(180, 180, 180), thickness=1) + # draw bubble bboxes for bubble_id, (x1, y1, x2, y2) in bbox_dict.items(): color = colors[(bubble_id - 1) % len(colors)] - cv2.rectangle(image, - (int(x1), int(y1)), - (int(x2), int(y2)), - color, 2) - cv2.putText(image, f"BOX#{bubble_id}", - (int(x1) + 2, int(y1) + 16), - cv2.FONT_HERSHEY_SIMPLEX, - 0.5, color, 2) + cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), color, 2) + cv2.putText(image, f"BOX#{bubble_id}", (int(x1) + 2, int(y1) + 16), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) cv2.imwrite("debug_clusters.png", image) - print(" 🐛 debug_clusters.png saved") # ───────────────────────────────────────────── @@ -441,169 +663,203 @@ def translate_manga_text( image_path, source_lang="en", target_lang="ca", - confidence_threshold=0.10, + confidence_threshold=0.12, export_to_file=None, export_bubbles_to="bubbles.json", min_text_length=2, gap_px="auto", filter_sound_effects=True, - quality_threshold=0.5, + quality_threshold=0.62, upscale_factor=2.5, - bbox_padding=10, + bbox_padding=3, debug=False, + reading_mode="ltr", ): - # ── 1. Resolve gap ──────────────────────────────────────────── + # gap resolve if gap_px == "auto": resolved_gap = compute_auto_gap(image_path) else: resolved_gap = float(gap_px) - # ── 2. Load full image ──────────────────────────────────────── full_image = cv2.imread(image_path) if full_image is None: print(f"❌ Could not load image: {image_path}") return - # ── 3. Initialize OCR ───────────────────────────────────────── + # OCR init print("\nLoading OCR model...") - ocr_lang_list = ["en", "es"] if source_lang == "ca" \ - else [source_lang] + ocr_lang_list = ["en", "es"] if source_lang == "ca" else [source_lang] reader = easyocr.Reader(ocr_lang_list) - # ── 4. Initialize translator ────────────────────────────────── - translator = GoogleTranslator(source=source_lang, - target=target_lang) + # Translator init + translator = GoogleTranslator(source=source_lang, target=target_lang) - # ── 5. Run OCR ──────────────────────────────────────────────── + # OCR full image print(f"\nRunning OCR on: {image_path}") results = reader.readtext(image_path, paragraph=False) print(f" Raw detections: {len(results)}") - # ── 6. Filter tokens ────────────────────────────────────────── + # Filter tokens filtered = [] - skipped = 0 - + skipped = 0 for bbox, text, confidence in results: - cleaned = text.strip().upper() - keep, category = should_keep_token( - cleaned, confidence, - confidence_threshold, min_text_length, - filter_sound_effects - ) + cleaned = normalize_ocr_text(text) + keep, _ = should_keep_token(cleaned, confidence, confidence_threshold, min_text_length, filter_sound_effects) if keep: filtered.append((bbox, cleaned, confidence)) - if category == "punct": - print(f" ✔ Punct kept: '{cleaned}'") else: - tag = ("🔇 SFX" if is_sound_effect(cleaned) else - "🏷 Title" if is_title_text(cleaned) else - "🗑 Garbage" if is_garbage(cleaned) else - "✂️ Low-conf") - print(f" {tag} skipped: '{cleaned}'") skipped += 1 print(f" ✅ {len(filtered)} kept, {skipped} skipped.\n") - if not filtered: print("⚠️ No text detected after filtering.") return - # ── 7. Group by overlap ─────────────────────────────────────── - print(f"Grouping by overlap " - f"(gap_px={resolved_gap:.1f}, " - f"bbox_padding={bbox_padding}px)...") - - bubble_dict, bbox_dict, ocr_quads = group_quads_by_overlap( + # Pass 1 grouping + bubble_dict, bbox_dict, ocr_quads, bubble_indices = group_quads_by_overlap( filtered, - image_shape = full_image.shape, - gap_px = resolved_gap, - bbox_padding = bbox_padding, + image_shape=full_image.shape, + gap_px=resolved_gap, + bbox_padding=bbox_padding, ) - print(f" ✅ {len(bubble_dict)} bubble(s) detected.\n") - # ── 8. Debug ────────────────────────────────────────────────── + # Pass 2 orphan absorption + bubble_dict, bbox_dict, ocr_quads, bubble_indices = absorb_orphan_tokens_into_bubbles( + ocr_results=filtered, + bubble_dict=bubble_dict, + bbox_dict=bbox_dict, + ocr_quads=ocr_quads, + bubble_indices=bubble_indices, + image_shape=full_image.shape, + bbox_padding=bbox_padding, + ) + + print(f" ✅ {len(bubble_dict)} bubble(s) detected after absorption.\n") + if debug: - save_debug_clusters(image_path, filtered, - bubble_dict, bbox_dict) + save_debug_clusters(image_path, filtered, bubble_dict, bbox_dict) - # ── 9. Fix hyphens ──────────────────────────────────────────── - clean_bubbles = { - i: fix_hyphens(lines) - for i, lines in bubble_dict.items() - if lines - } + # merge lines + clean_bubbles = {i: fix_hyphens(lines) for i, lines in bubble_dict.items() if lines} - # ── 10. Quality check + crop re-read ────────────────────────── + # OCR quality + reread print("Checking OCR quality per bubble...") for i, text in clean_bubbles.items(): - score = ocr_quality_score(text) + if source_lang == "it": + text = italian_post_ocr_cleanup(text) + clean_bubbles[i] = text + + score = ocr_quality_score(text) status = "✅" if score >= quality_threshold else "🔁" - print(f" #{i}: score={score:.2f} {status} " - f"'{text[:55]}'") + print(f" #{i}: score={score:.2f} {status} '{text[:65]}'") if score < quality_threshold: - print(f" → Re-reading #{i} from crop...") reread = reread_cluster_crop( - full_image, bbox_dict[i], reader, + full_image, + bbox_dict[i], + reader, + source_lang=source_lang, upscale_factor=upscale_factor, ) if reread: - print(f" → '{reread}'") clean_bubbles[i] = reread - else: - print(f" → Nothing found, keeping original.") - # ── 11. Translate ───────────────────────────────────────────── - # Output format (pipe-delimited, unambiguous): - # #ID|ORIGINAL TEXT|TRANSLATED TEXT + # Reading order + glossary prepass + reading_order_map = estimate_reading_order(bbox_dict, mode=reading_mode) + for i in list(clean_bubbles.keys()): + clean_bubbles[i] = apply_glossary(clean_bubbles[i], GLOSSARY) + + # Translate + header = "BUBBLE|ORDER|ORIGINAL|TRANSLATED|FLAGS" + divider = "─" * 120 + output_lines = [header, divider] + print() - header = "BUBBLE|ORIGINAL|TRANSLATED" - divider = "─" * 80 - output_lines = [header, divider] - translations = {} - translated_count = 0 - - print(f"{'BUBBLE':<8} {'ORIGINAL':<45} {'TRANSLATED'}") + print(f"{'BUBBLE':<8} {'ORDER':<6} {'ORIGINAL':<50} {'TRANSLATED':<50} FLAGS") print(divider) - for i in sorted(clean_bubbles.keys()): - bubble_text = clean_bubbles[i].strip() - if not bubble_text: + ordered_ids = sorted(clean_bubbles.keys(), key=lambda b: reading_order_map.get(b, b)) + translated_count = 0 + + for i in ordered_ids: + src = clean_bubbles[i].strip() + if not src: continue - try: - result = translator.translate(bubble_text) - except Exception as e: - result = f"[Translation error: {e}]" - if result is None: - result = "[No translation returned]" - result = result.upper() - translations[i] = result + flags = [] + forced_translation = None + + # phrase-map pass + if source_lang == "it" and target_lang == "ca": + exact = PHRASE_MAP_IT_CA.get(src) + if exact: + forced_translation = exact + flags.append("PHRASE_EXACT") + else: + fuzzy, ratio, _ = fuzzy_phrase_match(src, PHRASE_MAP_IT_CA, min_ratio=0.88) + if fuzzy: + forced_translation = fuzzy + flags.append(f"PHRASE_FUZZY:{ratio:.2f}") + + if forced_translation is not None: + tgt = forced_translation + else: + try: + tgt = translator.translate(src) + except Exception as e: + tgt = f"[Translation error: {e}]" + + if tgt is None: + tgt = "[No translation returned]" + + tgt = normalize_ocr_text(tgt) + tgt = apply_glossary(tgt, GLOSSARY) + + # suspicious retry + if looks_suspicious_translation(src, tgt): + flags.append("SUSPICIOUS_RETRY") + retry_src = italian_post_ocr_cleanup(src) if source_lang == "it" else src + try: + retry_tgt = translator.translate(retry_src) + if retry_tgt: + retry_tgt = normalize_ocr_text(retry_tgt) + retry_tgt = apply_glossary(retry_tgt, GLOSSARY) + if not looks_suspicious_translation(src, retry_tgt): + tgt = retry_tgt + flags.append("RETRY_OK") + else: + if source_lang == "it" and target_lang == "ca": + fallback, ratio, _ = fuzzy_phrase_match(src, PHRASE_MAP_IT_CA, min_ratio=0.80) + if fallback: + tgt = fallback + flags.append(f"FALLBACK_MAP:{ratio:.2f}") + except Exception: + pass + + tgt = tgt.upper() translated_count += 1 + ro = reading_order_map.get(i, i) - # Pipe-delimited line — safe regardless of text content - output_lines.append(f"#{i}|{bubble_text}|{result}") - print(f"#{i:<7} {bubble_text:<45} {result}") + output_lines.append(f"#{i}|{ro}|{src}|{tgt}|{','.join(flags) if flags else '-'}") + print(f"#{i:<7} {ro:<6} {src:<50} {tgt:<50} {','.join(flags) if flags else '-'}") output_lines.append(divider) - summary = (f"✅ Done! {translated_count} bubble(s) " - f"translated, {skipped} detection(s) skipped.") + summary = f"✅ Done! {translated_count} bubble(s) translated, {skipped} detection(s) skipped." output_lines.append(summary) print(divider) print(summary) - # ── 12. Export translations ─────────────────────────────────── if export_to_file: write_output(output_lines, export_to_file) - # ── 13. Export bubble boxes ─────────────────────────────────── if export_bubbles_to: export_bubble_boxes( bbox_dict, ocr_quads, - filepath = export_bubbles_to, - bbox_expand_ratio = 0.1, # ← tune this - image_shape = full_image.shape, + reading_order_map=reading_order_map, + filepath=export_bubbles_to, + bbox_expand_ratio=0.16, + image_shape=full_image.shape, ) @@ -612,17 +868,18 @@ def translate_manga_text( # ───────────────────────────────────────────── if __name__ == "__main__": translate_manga_text( - image_path = "002-page.jpg", - source_lang = "en", - target_lang = "ca", - confidence_threshold = 0.10, - min_text_length = 2, - export_to_file = "output.txt", - export_bubbles_to = "bubbles.json", - gap_px = "auto", - filter_sound_effects = True, - quality_threshold = 0.5, - upscale_factor = 2.5, - bbox_padding = 1, - debug = True, + image_path="001-page.png", + source_lang="it", + target_lang="ca", + confidence_threshold=0.12, + min_text_length=2, + export_to_file="output.txt", + export_bubbles_to="bubbles.json", + gap_px="auto", + filter_sound_effects=True, + quality_threshold=0.62, + upscale_factor=2.5, + bbox_padding=3, + debug=True, + reading_mode="ltr", )