diff --git a/002-page.jpg b/002-page.jpg new file mode 100755 index 0000000..e8c059b Binary files /dev/null and b/002-page.jpg differ diff --git a/bubble-detection.jpg b/bubble-detection.jpg new file mode 100755 index 0000000..0c9637c Binary files /dev/null and b/bubble-detection.jpg differ diff --git a/bubbles.json b/bubbles.json index c1008d1..e6adfd7 100644 --- a/bubbles.json +++ b/bubbles.json @@ -1,408 +1,1488 @@ { "1": { - "x": 199, - "y": 132, - "w": 163, - "h": 92, + "x": 57, + "y": 106, + "w": 135, + "h": 115, + "x_tight": 68, + "y_tight": 115, + "w_tight": 113, + "h_tight": 97, + "quad_bboxes": [ + { + "x": 100, + "y": 116, + "w": 50, + "h": 24 + }, + { + "x": 80, + "y": 138, + "w": 88, + "h": 24 + }, + { + "x": 92, + "y": 160, + "w": 66, + "h": 26 + }, + { + "x": 69, + "y": 179, + "w": 111, + "h": 32 + } + ], "quads": [ [ [ - 204, - 172 + 100, + 116 ], [ - 348, - 137 + 150, + 116 ], [ - 358, - 185 + 150, + 140 ], [ - 215, - 220 + 100, + 140 + ] + ], + [ + [ + 80, + 138 + ], + [ + 168, + 138 + ], + [ + 168, + 162 + ], + [ + 80, + 162 + ] + ], + [ + [ + 92, + 160 + ], + [ + 158, + 160 + ], + [ + 158, + 186 + ], + [ + 92, + 186 + ] + ], + [ + [ + 69, + 179 + ], + [ + 180, + 179 + ], + [ + 180, + 211 + ], + [ + 69, + 211 ] ] ] }, "2": { - "x": 1162, - "y": 235, - "w": 142, - "h": 140, + "x": 164, + "y": 0, + "w": 210, + "h": 127, + "x_tight": 181, + "y_tight": 9, + "w_tight": 176, + "h_tight": 108, + "quad_bboxes": [ + { + "x": 182, + "y": 10, + "w": 174, + "h": 56 + }, + { + "x": 233, + "y": 66, + "w": 56, + "h": 26 + }, + { + "x": 214, + "y": 92, + "w": 54, + "h": 24 + } + ], "quads": [ [ [ - 1214, - 240 + 182, + 10 ], [ - 1252, - 240 + 356, + 10 ], [ - 1252, - 272 + 356, + 66 ], [ - 1214, - 272 + 182, + 66 ] ], [ [ - 1167, - 271 + 233, + 66 ], [ - 1299, - 271 + 289, + 66 ], [ - 1299, - 307 + 289, + 92 ], [ - 1167, - 307 + 233, + 92 ] ], [ [ - 1175, - 303 + 214, + 92 ], [ - 1289, - 303 + 268, + 92 ], [ - 1289, - 339 + 268, + 116 ], [ - 1175, - 339 - ] - ], - [ - [ - 1206, - 340 - ], - [ - 1260, - 340 - ], - [ - 1260, - 370 - ], - [ - 1206, - 370 + 214, + 116 ] ] ] }, "3": { - "x": 925, - "y": 373, - "w": 146, - "h": 138, + "x": 540, + "y": 90, + "w": 116, + "h": 112, + "x_tight": 549, + "y_tight": 99, + "w_tight": 98, + "h_tight": 94, + "quad_bboxes": [ + { + "x": 558, + "y": 100, + "w": 88, + "h": 24 + }, + { + "x": 550, + "y": 122, + "w": 70, + "h": 24 + }, + { + "x": 558, + "y": 144, + "w": 88, + "h": 26 + }, + { + "x": 550, + "y": 168, + "w": 70, + "h": 24 + } + ], "quads": [ [ [ - 930, - 378 + 558, + 100 ], [ - 1062, - 378 + 646, + 100 ], [ - 1062, - 410 + 646, + 124 ], [ - 930, - 410 + 558, + 124 ] ], [ [ - 930, - 410 + 550, + 122 ], [ - 1066, - 410 + 620, + 122 ], [ - 1066, - 442 + 620, + 146 ], [ - 930, - 442 + 550, + 146 ] ], [ [ - 954, - 439 + 558, + 144 ], [ - 1041, - 439 + 646, + 144 ], [ - 1041, - 475 + 646, + 170 ], [ - 954, - 475 + 558, + 170 ] ], [ [ - 946, - 474 + 550, + 168 ], [ - 1050, - 474 + 620, + 168 ], [ - 1050, - 506 + 620, + 192 ], [ - 946, - 506 + 550, + 192 ] ] ] }, "4": { - "x": 215, - "y": 481, - "w": 160, - "h": 210, + "x": 251, + "y": 377, + "w": 134, + "h": 138, + "x_tight": 262, + "y_tight": 388, + "w_tight": 112, + "h_tight": 116, + "quad_bboxes": [ + { + "x": 274, + "y": 389, + "w": 86, + "h": 20 + }, + { + "x": 263, + "y": 407, + "w": 110, + "h": 20 + }, + { + "x": 279, + "y": 445, + "w": 78, + "h": 20 + }, + { + "x": 267, + "y": 465, + "w": 102, + "h": 20 + }, + { + "x": 279, + "y": 483, + "w": 76, + "h": 20 + } + ], "quads": [ [ [ - 278, - 486 + 274, + 389 ], [ - 312, - 486 + 360, + 389 ], [ - 312, - 516 + 360, + 409 ], [ - 278, - 516 + 274, + 409 ] ], [ [ - 236, - 514 + 263, + 407 ], [ - 356, - 514 + 373, + 407 ], [ - 356, - 544 + 373, + 427 ], [ - 236, - 544 + 263, + 427 ] ], [ [ - 236, - 542 + 279, + 445 ], [ - 358, - 542 + 357, + 445 ], [ - 358, - 572 + 357, + 465 ], [ - 236, - 572 + 279, + 465 ] ], [ [ - 220, - 572 + 267, + 465 ], [ - 370, - 572 + 369, + 465 ], [ - 370, - 600 + 369, + 485 ], [ - 220, - 600 + 267, + 485 ] ], [ [ - 240, - 598 + 279, + 483 ], [ - 350, - 598 + 355, + 483 ], [ - 350, - 630 + 355, + 503 ], [ - 240, - 630 - ] - ], - [ - [ - 246, - 628 - ], - [ - 346, - 628 - ], - [ - 346, - 658 - ], - [ - 246, - 658 - ] - ], - [ - [ - 250, - 656 - ], - [ - 340, - 656 - ], - [ - 340, - 686 - ], - [ - 250, - 686 + 279, + 503 ] ] ] }, "5": { - "x": 349, - "y": 1127, - "w": 102, - "h": 108, + "x": 522, + "y": 468, + "w": 150, + "h": 98, + "x_tight": 534, + "y_tight": 476, + "w_tight": 126, + "h_tight": 82, + "quad_bboxes": [ + { + "x": 549, + "y": 477, + "w": 95, + "h": 26 + }, + { + "x": 546, + "y": 516, + "w": 104, + "h": 24 + }, + { + "x": 535, + "y": 537, + "w": 124, + "h": 20 + } + ], "quads": [ [ [ - 384, - 1132 + 549, + 477 ], [ - 418, - 1132 + 644, + 477 ], [ - 418, - 1156 + 644, + 503 ], [ - 384, - 1156 + 549, + 503 ] ], [ [ - 354, - 1154 + 546, + 516 ], [ - 446, - 1154 + 650, + 516 ], [ - 446, - 1208 + 650, + 540 ], [ - 354, - 1208 + 546, + 540 ] ], [ [ - 366, - 1206 + 535, + 537 ], [ - 412, - 1206 + 659, + 537 ], [ - 412, - 1230 + 659, + 557 ], [ - 366, - 1230 + 535, + 557 ] ] ] }, "6": { - "x": 735, - "y": 1319, - "w": 48, - "h": 34, + "x": 44, + "y": 607, + "w": 96, + "h": 108, + "x_tight": 52, + "y_tight": 616, + "w_tight": 80, + "h_tight": 90, + "quad_bboxes": [ + { + "x": 79, + "y": 617, + "w": 48, + "h": 20 + }, + { + "x": 75, + "y": 635, + "w": 56, + "h": 20 + }, + { + "x": 53, + "y": 669, + "w": 68, + "h": 16 + }, + { + "x": 65, + "y": 687, + "w": 46, + "h": 18 + } + ], "quads": [ [ [ - 740, - 1324 + 79, + 617 ], [ - 778, - 1324 + 127, + 617 ], [ - 778, - 1348 + 127, + 637 ], [ - 740, - 1348 + 79, + 637 + ] + ], + [ + [ + 75, + 635 + ], + [ + 131, + 635 + ], + [ + 131, + 655 + ], + [ + 75, + 655 + ] + ], + [ + [ + 53, + 669 + ], + [ + 121, + 669 + ], + [ + 121, + 685 + ], + [ + 53, + 685 + ] + ], + [ + [ + 65, + 687 + ], + [ + 111, + 687 + ], + [ + 111, + 705 + ], + [ + 65, + 705 + ] + ] + ] + }, + "7": { + "x": 72, + "y": 826, + "w": 78, + "h": 48, + "x_tight": 78, + "y_tight": 830, + "w_tight": 66, + "h_tight": 40, + "quad_bboxes": [ + { + "x": 79, + "y": 831, + "w": 64, + "h": 20 + }, + { + "x": 79, + "y": 849, + "w": 62, + "h": 20 + } + ], + "quads": [ + [ + [ + 79, + 831 + ], + [ + 143, + 831 + ], + [ + 143, + 851 + ], + [ + 79, + 851 + ] + ], + [ + [ + 79, + 849 + ], + [ + 141, + 849 + ], + [ + 141, + 869 + ], + [ + 79, + 869 + ] + ] + ] + }, + "8": { + "x": 192, + "y": 796, + "w": 102, + "h": 126, + "x_tight": 200, + "y_tight": 806, + "w_tight": 86, + "h_tight": 106, + "quad_bboxes": [ + { + "x": 205, + "y": 807, + "w": 76, + "h": 18 + }, + { + "x": 211, + "y": 843, + "w": 64, + "h": 16 + }, + { + "x": 209, + "y": 857, + "w": 68, + "h": 20 + }, + { + "x": 201, + "y": 875, + "w": 84, + "h": 18 + }, + { + "x": 203, + "y": 893, + "w": 70, + "h": 18 + } + ], + "quads": [ + [ + [ + 205, + 807 + ], + [ + 281, + 807 + ], + [ + 281, + 825 + ], + [ + 205, + 825 + ] + ], + [ + [ + 211, + 843 + ], + [ + 275, + 843 + ], + [ + 275, + 859 + ], + [ + 211, + 859 + ] + ], + [ + [ + 209, + 857 + ], + [ + 277, + 857 + ], + [ + 277, + 877 + ], + [ + 209, + 877 + ] + ], + [ + [ + 201, + 875 + ], + [ + 285, + 875 + ], + [ + 285, + 893 + ], + [ + 201, + 893 + ] + ], + [ + [ + 203, + 893 + ], + [ + 273, + 893 + ], + [ + 273, + 911 + ], + [ + 203, + 911 + ] + ] + ] + }, + "9": { + "x": 394, + "y": 817, + "w": 122, + "h": 88, + "x_tight": 404, + "y_tight": 824, + "w_tight": 102, + "h_tight": 74, + "quad_bboxes": [ + { + "x": 405, + "y": 825, + "w": 100, + "h": 20 + }, + { + "x": 419, + "y": 843, + "w": 74, + "h": 20 + }, + { + "x": 417, + "y": 863, + "w": 78, + "h": 16 + }, + { + "x": 409, + "y": 877, + "w": 94, + "h": 20 + } + ], + "quads": [ + [ + [ + 405, + 825 + ], + [ + 505, + 825 + ], + [ + 505, + 845 + ], + [ + 405, + 845 + ] + ], + [ + [ + 419, + 843 + ], + [ + 493, + 843 + ], + [ + 493, + 863 + ], + [ + 419, + 863 + ] + ], + [ + [ + 417, + 863 + ], + [ + 495, + 863 + ], + [ + 495, + 879 + ], + [ + 417, + 879 + ] + ], + [ + [ + 409, + 877 + ], + [ + 503, + 877 + ], + [ + 503, + 897 + ], + [ + 409, + 897 + ] + ] + ] + }, + "10": { + "x": 537, + "y": 775, + "w": 156, + "h": 206, + "x_tight": 550, + "y_tight": 792, + "w_tight": 130, + "h_tight": 172, + "quad_bboxes": [ + { + "x": 565, + "y": 793, + "w": 100, + "h": 18 + }, + { + "x": 569, + "y": 809, + "w": 92, + "h": 21 + }, + { + "x": 551, + "y": 827, + "w": 128, + "h": 20 + }, + { + "x": 613, + "y": 847, + "w": 48, + "h": 16 + }, + { + "x": 571, + "y": 861, + "w": 88, + "h": 18 + }, + { + "x": 561, + "y": 877, + "w": 106, + "h": 20 + }, + { + "x": 559, + "y": 911, + "w": 110, + "h": 20 + }, + { + "x": 577, + "y": 931, + "w": 74, + "h": 16 + }, + { + "x": 591, + "y": 947, + "w": 48, + "h": 16 + } + ], + "quads": [ + [ + [ + 565, + 793 + ], + [ + 665, + 793 + ], + [ + 665, + 811 + ], + [ + 565, + 811 + ] + ], + [ + [ + 569, + 809 + ], + [ + 661, + 809 + ], + [ + 661, + 830 + ], + [ + 569, + 830 + ] + ], + [ + [ + 551, + 827 + ], + [ + 679, + 827 + ], + [ + 679, + 847 + ], + [ + 551, + 847 + ] + ], + [ + [ + 613, + 847 + ], + [ + 661, + 847 + ], + [ + 661, + 863 + ], + [ + 613, + 863 + ] + ], + [ + [ + 571, + 861 + ], + [ + 659, + 861 + ], + [ + 659, + 879 + ], + [ + 571, + 879 + ] + ], + [ + [ + 561, + 877 + ], + [ + 667, + 877 + ], + [ + 667, + 897 + ], + [ + 561, + 897 + ] + ], + [ + [ + 559, + 911 + ], + [ + 669, + 911 + ], + [ + 669, + 931 + ], + [ + 559, + 931 + ] + ], + [ + [ + 577, + 931 + ], + [ + 651, + 931 + ], + [ + 651, + 947 + ], + [ + 577, + 947 + ] + ], + [ + [ + 591, + 947 + ], + [ + 639, + 947 + ], + [ + 639, + 963 + ], + [ + 591, + 963 + ] + ] + ] + }, + "11": { + "x": 28, + "y": 939, + "w": 144, + "h": 186, + "x_tight": 40, + "y_tight": 954, + "w_tight": 120, + "h_tight": 156, + "quad_bboxes": [ + { + "x": 71, + "y": 955, + "w": 60, + "h": 18 + }, + { + "x": 55, + "y": 971, + "w": 92, + "h": 20 + }, + { + "x": 53, + "y": 987, + "w": 96, + "h": 20 + }, + { + "x": 41, + "y": 1005, + "w": 118, + "h": 20 + }, + { + "x": 53, + "y": 1023, + "w": 94, + "h": 18 + }, + { + "x": 41, + "y": 1039, + "w": 118, + "h": 20 + }, + { + "x": 43, + "y": 1055, + "w": 114, + "h": 20 + }, + { + "x": 57, + "y": 1089, + "w": 84, + "h": 20 + } + ], + "quads": [ + [ + [ + 71, + 955 + ], + [ + 131, + 955 + ], + [ + 131, + 973 + ], + [ + 71, + 973 + ] + ], + [ + [ + 55, + 971 + ], + [ + 147, + 971 + ], + [ + 147, + 991 + ], + [ + 55, + 991 + ] + ], + [ + [ + 53, + 987 + ], + [ + 149, + 987 + ], + [ + 149, + 1007 + ], + [ + 53, + 1007 + ] + ], + [ + [ + 41, + 1005 + ], + [ + 159, + 1005 + ], + [ + 159, + 1025 + ], + [ + 41, + 1025 + ] + ], + [ + [ + 53, + 1023 + ], + [ + 147, + 1023 + ], + [ + 147, + 1041 + ], + [ + 53, + 1041 + ] + ], + [ + [ + 41, + 1039 + ], + [ + 159, + 1039 + ], + [ + 159, + 1059 + ], + [ + 41, + 1059 + ] + ], + [ + [ + 43, + 1055 + ], + [ + 157, + 1055 + ], + [ + 157, + 1075 + ], + [ + 43, + 1075 + ] + ], + [ + [ + 57, + 1089 + ], + [ + 141, + 1089 + ], + [ + 141, + 1109 + ], + [ + 57, + 1109 + ] + ] + ] + }, + "12": { + "x": 326, + "y": 928, + "w": 80, + "h": 102, + "x_tight": 332, + "y_tight": 936, + "w_tight": 68, + "h_tight": 86, + "quad_bboxes": [ + { + "x": 363, + "y": 937, + "w": 36, + "h": 16 + }, + { + "x": 335, + "y": 951, + "w": 60, + "h": 20 + }, + { + "x": 333, + "y": 969, + "w": 66, + "h": 18 + }, + { + "x": 337, + "y": 1005, + "w": 52, + "h": 16 + } + ], + "quads": [ + [ + [ + 363, + 937 + ], + [ + 399, + 937 + ], + [ + 399, + 953 + ], + [ + 363, + 953 + ] + ], + [ + [ + 335, + 951 + ], + [ + 395, + 951 + ], + [ + 395, + 971 + ], + [ + 335, + 971 + ] + ], + [ + [ + 333, + 969 + ], + [ + 399, + 969 + ], + [ + 399, + 987 + ], + [ + 333, + 987 + ] + ], + [ + [ + 337, + 1005 + ], + [ + 389, + 1005 + ], + [ + 389, + 1021 + ], + [ + 337, + 1021 ] ] ] diff --git a/manga-renderer.py b/manga-renderer.py index 0acb369..68987f8 100644 --- a/manga-renderer.py +++ b/manga-renderer.py @@ -1,457 +1,714 @@ -import re +""" +manga-renderer.py +───────────────────────────────────────────────────────────────── +Pipeline: + 1. Detect panel boundaries using border-line detection + 2. Split wide panels that contain internal vertical borders + 3. For each bubble: + a. Detect real bubble ellipse via flood-fill + contour + b. Assign bubble to its panel (max overlap) + c. Clip + nudge ellipse to stay inside panel bounds + d. White-fill the clipped rotated ellipse + e. Fit + centre translated text inside safe area +""" + +import os +import math import json + import cv2 import numpy as np from PIL import Image, ImageDraw, ImageFont -import os # ───────────────────────────────────────────── -# CONFIG +# CONSTANTS # ───────────────────────────────────────────── -INPUT_IMAGE = "page.png" -OUTPUT_IMAGE = "page_translated.png" -TRANSLATIONS_FILE = "output.txt" -BUBBLES_FILE = "bubbles.json" -FONT_PATH = "font.ttf" -FONT_FALLBACK = "/System/Library/Fonts/Helvetica.ttc" -FONT_COLOR = (0, 0, 0) +DEFAULT_FONT_PATH = "fonts/ComicRelief-Regular.ttf" +DEFAULT_FONT_COLOR = (0, 0, 0) +WHITE = (255, 255, 255) + +MAX_FONT_SIZE = 22 +MIN_FONT_SIZE = 6 +FONT_SIZE_STEP = 1 +TEXT_RATIO = 0.82 + +FLOOD_TOLERANCE = 30 +BORDER_SHRINK_PX = 4 +MIN_PANEL_AREA_RATIO = 0.02 + +# How far the center can be nudged as a fraction +# of the semi-axis before we resort to shrinking +MAX_NUDGE_RATIO = 0.30 + +# Debug colors (BGR) +DBG_COLOR_PANEL = (200, 200, 0) +DBG_COLOR_DETECTED = (0, 200, 0) +DBG_COLOR_FILL = (0, 0, 255) +DBG_COLOR_SAFE = (255, 120, 0) +DBG_COLOR_CENTER = (255, 255, 0) +DBG_COLOR_SEED = (255, 0, 255) +DBG_COLOR_LABEL = (80, 80, 200) +DBG_THICKNESS = 2 +DBG_CENTER_R = 5 # ───────────────────────────────────────────── -# WORD-ONLY WRAP -# -# Breaks ONLY at space boundaries. -# Returns (lines, overflow) where overflow=True -# means a single word is wider than max_w at -# this font size → caller must try smaller. +# PARSERS # ───────────────────────────────────────────── -def wrap_text_words(draw, text, max_w, font): - """ - Word-wraps text to fit within max_w pixels. - Never inserts hyphens or breaks mid-word. - - Returns: - (lines, overflow) - lines : list of strings, each ≤ max_w px wide - overflow : True if any single word exceeds max_w - """ - def measure(s): - bb = draw.textbbox((0, 0), s, font=font) - return bb[2] - bb[0] - - words = text.split() - lines = [] - current = "" - overflow = False - - for word in words: - if measure(word) > max_w: - overflow = True - break - test = (current + " " + word).strip() - if measure(test) <= max_w: - current = test - else: - if current: - lines.append(current) - current = word - - if not overflow and current: - lines.append(current) - - return lines, overflow - - -# ───────────────────────────────────────────── -# PARSE output.txt -# ───────────────────────────────────────────── -def parse_translations(filepath): - """ - Parses output.txt → {bubble_id: translated_text}. - Uses header line as column ruler to find the exact - char position of the TRANSLATED column. - Immune to commas, ellipses, spaces in translated text. - """ +def parse_translations(translations_file): translations = {} - header_pos = None - - with open(filepath, "r", encoding="utf-8") as f: - lines = f.readlines() - - for raw_line in lines: - line = raw_line.rstrip("\n") - - if re.match(r"^BUBBLE\s+ORIGINAL", line): - m = re.search(r"TRANSLATED", line) - if m: - header_pos = m.start() - print(f" ℹ️ TRANSLATED column at char {header_pos}") - continue - - stripped = line.strip() - if re.match(r"^[─\-=]{3,}$", stripped): - continue - if stripped.startswith("✅") or stripped.startswith("Done"): - continue - if not re.match(r"^\s*#\d+", line): - continue - - m_id = re.match(r"^\s*#(\d+)", line) - if not m_id: - continue - bubble_id = int(m_id.group(1)) - - if header_pos is not None and len(line) > header_pos: - translated = line[header_pos:].strip() - else: - parts = re.split(r" {2,}", stripped) - translated = parts[-1].strip() if len(parts) >= 3 else "" - - if not translated or translated.startswith("["): - print(f" ⚠️ #{bubble_id}: no translation found") - continue - - translations[bubble_id] = translated - - print(f" ✅ {len(translations)} bubble(s) to translate: " - f"{sorted(translations.keys())}") - for bid, text in sorted(translations.items()): - print(f" #{bid}: {text}") + with open(translations_file, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line.startswith("#"): + continue + parts = line.split("|") + if len(parts) < 3: + continue + try: + bubble_id = int(parts[0].lstrip("#")) + translated = parts[2].strip() + if translated.startswith("["): + continue + translations[bubble_id] = translated + except ValueError: + continue return translations -# ───────────────────────────────────────────── -# LOAD bubbles.json -# ───────────────────────────────────────────── -def load_bubble_boxes(filepath): - with open(filepath, "r", encoding="utf-8") as f: +def parse_bubbles(bubbles_file): + with open(bubbles_file, "r", encoding="utf-8") as f: raw = json.load(f) - boxes = {int(k): v for k, v in raw.items()} - print(f" ✅ Loaded {len(boxes)} bubble(s)") - for bid, val in sorted(boxes.items()): - print(f" #{bid}: ({val['x']},{val['y']}) " - f"{val['w']}×{val['h']}px") - return boxes + return {int(k): v for k, v in raw.items()} # ───────────────────────────────────────────── -# SAMPLE BACKGROUND COLOR +# FONT HELPERS # ───────────────────────────────────────────── -def sample_bubble_background(cv_image, bubble_data): - x = max(0, bubble_data["x"]) - y = max(0, bubble_data["y"]) - x2 = min(cv_image.shape[1], x + bubble_data["w"]) - y2 = min(cv_image.shape[0], y + bubble_data["h"]) - - region = cv_image[y:y2, x:x2] - if region.size == 0: - return (255, 255, 255) - - gray = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY) - threshold = np.percentile(gray, 90) - bg_mask = gray >= threshold - if not np.any(bg_mask): - return (255, 255, 255) - - return tuple(int(c) for c in region[bg_mask].mean(axis=0)) - - -# ───────────────────────────────────────────── -# ERASE ORIGINAL TEXT -# ───────────────────────────────────────────── -def erase_bubble_text(cv_image, bubble_data, - bg_color=(255, 255, 255)): - img_h, img_w = cv_image.shape[:2] - x = max(0, bubble_data["x"]) - y = max(0, bubble_data["y"]) - x2 = min(img_w, bubble_data["x"] + bubble_data["w"]) - y2 = min(img_h, bubble_data["y"] + bubble_data["h"]) - cv_image[y:y2, x:x2] = list(bg_color) - - -# ───────────────────────────────────────────── -# LINE HEIGHT (tight) -# -# Uses actual ascender+descender of the font -# at the given size, with a minimal 1px gap. -# Much tighter than the old flat "+2" approach. -# ───────────────────────────────────────────── -def get_line_height(draw, font): - """ - Returns the line height in pixels for the given font. - Measured from actual glyph bounds of "Ay" (covers - ascenders and descenders) plus 1px breathing room. - """ - bb = draw.textbbox((0, 0), "Ay", font=font) - return (bb[3] - bb[1]) + 1 - - -# ───────────────────────────────────────────── -# FIT FONT SIZE (dynamic ceiling, word-wrap) -# -# max_size is derived from the box itself: -# min(MAX_FONT_CAP, inner_h) -# so a tall box can use a large font and a -# small box won't waste iterations on huge sizes. -# -# Rejects a size if: -# • any single word is wider than inner_w, OR -# • total wrapped height exceeds inner_h -# ───────────────────────────────────────────── -MAX_FONT_CAP = 120 # absolute ceiling across all boxes - -def fit_font_size(draw, text, max_w, max_h, font_path, - min_size=7): - """ - Finds the largest font size where word-wrapped text - fits inside max_w × max_h with NO mid-word breaking. - - max_size is computed dynamically as min(MAX_FONT_CAP, max_h) - so the search always starts from a sensible upper bound - relative to the actual box height. - - Args: - draw : ImageDraw instance - text : Full text string - max_w : Available width in pixels - max_h : Available height in pixels - font_path : Path to .ttf (or None for PIL default) - min_size : Minimum font pt (default: 7) - - Returns: - (font, lines) - """ - # Dynamic ceiling: no point trying a font taller than the box - max_size = min(MAX_FONT_CAP, max_h) - max_size = max(max_size, min_size) # safety: never below min - - best_font = None - best_lines = [text] - - for size in range(max_size, min_size - 1, -1): +def load_font(font_path, size): + if font_path and os.path.exists(font_path): try: - font = (ImageFont.truetype(font_path, size) - if font_path else ImageFont.load_default()) + return ImageFont.truetype(font_path, size) except Exception: - font = ImageFont.load_default() + pass + return ImageFont.load_default() - lines, overflow = wrap_text_words(draw, text, max_w, font) - if overflow: - continue # a word is wider than the box → too big +def measure_text(draw, text, font): + bbox = draw.textbbox((0, 0), text, font=font) + return bbox[2] - bbox[0], bbox[3] - bbox[1] - line_h = get_line_height(draw, font) - total_h = line_h * len(lines) - if total_h <= max_h: - best_font = font - best_lines = lines - break # largest size that fits — done +def wrap_text(draw, text, font, max_width): + words, lines, current = text.split(), [], "" + for word in words: + test = (current + " " + word).strip() + w, _ = measure_text(draw, test, font) + if w <= max_width or not current: + current = test + else: + lines.append(current) + current = word + if current: + lines.append(current) + if not lines: + return [""], 0, 0 + heights, widths = [], [] + for line in lines: + w, h = measure_text(draw, line, font) + widths.append(w) + heights.append(h) + line_gap = max(heights[0] // 5, 2) if heights else 2 + total_height = sum(heights) + line_gap * (len(lines) - 1) + return lines, total_height, max(widths) if widths else 0 - # Guaranteed fallback at min_size - if best_font is None: - try: - best_font = (ImageFont.truetype(font_path, min_size) - if font_path else ImageFont.load_default()) - except Exception: - best_font = ImageFont.load_default() - best_lines, _ = wrap_text_words( - draw, text, max_w, best_font) - if not best_lines: - best_lines = [text] - return best_font, best_lines +def best_fit_font(draw, text, font_path, safe_w, safe_h): + for size in range(MAX_FONT_SIZE, MIN_FONT_SIZE - 1, -FONT_SIZE_STEP): + font = load_font(font_path, size) + lines, total_h, max_lw = wrap_text(draw, text, font, safe_w) + if total_h <= safe_h and max_lw <= safe_w: + return font, lines, total_h + font = load_font(font_path, MIN_FONT_SIZE) + lines, total_h, _ = wrap_text(draw, text, font, safe_w) + return font, lines, total_h # ───────────────────────────────────────────── -# RENDER TEXT INTO BUBBLE -# -# Text is centered both horizontally and -# vertically inside the padded bbox. -# Line height uses get_line_height() (tight). +# PANEL DETECTION HELPERS # ───────────────────────────────────────────── -def render_text_in_bubble(pil_image, bubble_data, text, - font_path, padding=6, - font_color=(0, 0, 0)): +def merge_nested_panels(panels): """ - Renders translated text centered inside the bbox. - Font auto-sizes to fill the box as much as possible. - Word-wrap only — no mid-word hyphens. + Removes panels that are >80% contained inside + a larger panel. Keeps the larger one. """ - x, y = bubble_data["x"], bubble_data["y"] - w, h = bubble_data["w"], bubble_data["h"] + if len(panels) <= 1: + return panels - draw = ImageDraw.Draw(pil_image) - inner_w = max(1, w - padding * 2) - inner_h = max(1, h - padding * 2) - - font, lines = fit_font_size( - draw, text, inner_w, inner_h, font_path + panels_sorted = sorted( + panels, + key=lambda p: (p[2] - p[0]) * (p[3] - p[1]), + reverse=True ) - line_h = get_line_height(draw, font) - total_h = line_h * len(lines) + keep = [] + for panel in panels_sorted: + px1, py1, px2, py2 = panel + p_area = (px2 - px1) * (py2 - py1) + dominated = False + for kept in keep: + kx1, ky1, kx2, ky2 = kept + ix1 = max(px1, kx1); iy1 = max(py1, ky1) + ix2 = min(px2, kx2); iy2 = min(py2, ky2) + if ix2 > ix1 and iy2 > iy1: + inter = (ix2 - ix1) * (iy2 - iy1) + if inter / p_area > 0.80: + dominated = True + break + if not dominated: + keep.append(panel) - # Center block vertically - start_y = y + padding + max(0, (inner_h - total_h) // 2) + return keep + +def split_panels_on_internal_borders(panels, v_lines, + img_w, img_h): + """ + For each panel wider than 30% of the image, checks + whether a strong vertical border line runs through + its interior. If found, splits into two sub-panels. + """ + result = [] + for (px1, py1, px2, py2) in panels: + pw = px2 - px1 + + if pw < img_w * 0.30: + result.append((px1, py1, px2, py2)) + continue + + margin = int(pw * 0.20) + search_x1 = px1 + margin + search_x2 = px2 - margin + + panel_vlines = v_lines[py1:py2, search_x1:search_x2] + col_sums = panel_vlines.sum(axis=0) + + panel_h = py2 - py1 + threshold = panel_h * 255 * 0.40 + + split_cols = np.where(col_sums > threshold)[0] + + if len(split_cols) == 0: + result.append((px1, py1, px2, py2)) + continue + + split_x = int(np.median(split_cols)) + search_x1 + left_w = split_x - px1 + right_w = px2 - split_x + + if left_w > img_w * 0.10 and right_w > img_w * 0.10: + result.append((px1, py1, split_x, py2)) + result.append((split_x, py1, px2, py2)) + print(f" ✂️ Split ({px1},{py1})→({px2},{py2}) " + f"at x={split_x}") + else: + result.append((px1, py1, px2, py2)) + + return result + + +# ───────────────────────────────────────────── +# PANEL DETECTION (v2 — border-line based) +# ───────────────────────────────────────────── +def detect_panels(img_bgr): + """ + Detects manga panel boundaries using morphological + line detection on dark border pixels. + + 1. Threshold dark pixels → border candidates + 2. Horizontal kernel → long horizontal lines + 3. Vertical kernel → long vertical lines + 4. Combine + dilate → closed border skeleton + 5. Invert → panel interior blobs + 6. connectedComponents → one blob per panel + 7. Filter by area, shape, minimum dimensions + 8. Merge nested panels + 9. Split wide panels on internal vertical borders + """ + img_h, img_w = img_bgr.shape[:2] + total_area = img_h * img_w + min_area = total_area * MIN_PANEL_AREA_RATIO + + gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) + + _, dark_mask = cv2.threshold( + gray, 80, 255, cv2.THRESH_BINARY_INV) + + h_len = max(40, img_w // 25) + h_kernel = cv2.getStructuringElement( + cv2.MORPH_RECT, (h_len, 1)) + h_lines = cv2.morphologyEx( + dark_mask, cv2.MORPH_OPEN, h_kernel) + + v_len = max(40, img_h // 25) + v_kernel = cv2.getStructuringElement( + cv2.MORPH_RECT, (1, v_len)) + v_lines = cv2.morphologyEx( + dark_mask, cv2.MORPH_OPEN, v_kernel) + + borders = cv2.bitwise_or(h_lines, v_lines) + close_kernel = np.ones((5, 5), np.uint8) + borders = cv2.dilate(borders, close_kernel, iterations=2) + + panel_interior = cv2.bitwise_not(borders) + + num_labels, labels, stats, centroids = \ + cv2.connectedComponentsWithStats( + panel_interior, connectivity=8) + + panels = [] + for label_id in range(1, num_labels): + area = stats[label_id, cv2.CC_STAT_AREA] + if area < min_area: + continue + + x = stats[label_id, cv2.CC_STAT_LEFT] + y = stats[label_id, cv2.CC_STAT_TOP] + w = stats[label_id, cv2.CC_STAT_WIDTH] + h = stats[label_id, cv2.CC_STAT_HEIGHT] + x2 = x + w + y2 = y + h + + if w * h > total_area * 0.90: + continue + + aspect = max(w, h) / max(min(w, h), 1) + if aspect > 15: + continue + + # Skip panels too narrow/short to be real panels + if w < img_w * 0.05 or h < img_h * 0.05: + continue + + panels.append((x, y, x2, y2)) + + panels = merge_nested_panels(panels) + panels = split_panels_on_internal_borders( + panels, v_lines, img_w, img_h) + + panels.sort(key=lambda p: (p[1] // 100, p[0])) + + if not panels: + print(" ⚠️ No panels detected — using full image as panel") + panels = [(0, 0, img_w, img_h)] + + print(f" 📐 {len(panels)} panel(s) detected:") + for i, (x1, y1, x2, y2) in enumerate(panels, 1): + pct = (x2 - x1) * (y2 - y1) / total_area * 100 + print(f" Panel {i}: ({x1},{y1})→({x2},{y2}) " + f"{x2-x1}×{y2-y1}px area={pct:.1f}%") + + return panels + + +# ───────────────────────────────────────────── +# BUBBLE → PANEL ASSIGNMENT +# ───────────────────────────────────────────── +def assign_panel(bubble_data, panels, img_w, img_h): + bx = bubble_data["x"]; bw = bubble_data["w"] + by = bubble_data["y"]; bh = bubble_data["h"] + bcx = bx + bw / 2.0; bcy = by + bh / 2.0 + + best_panel, best_overlap = None, 0 + + for (px1, py1, px2, py2) in panels: + ix1 = max(bx, px1); iy1 = max(by, py1) + ix2 = min(bx+bw, px2); iy2 = min(by+bh, py2) + if ix2 > ix1 and iy2 > iy1: + overlap = (ix2 - ix1) * (iy2 - iy1) + if overlap > best_overlap: + best_overlap = overlap + best_panel = (px1, py1, px2, py2) + + if best_panel is None: + for (px1, py1, px2, py2) in panels: + if px1 <= bcx <= px2 and py1 <= bcy <= py2: + return (px1, py1, px2, py2) + return (0, 0, img_w, img_h) + + return best_panel + + +# ───────────────────────────────────────────── +# BUBBLE ELLIPSE DETECTION (flood-fill) +# ───────────────────────────────────────────── +def detect_bubble_ellipse(img_bgr, bubble_data, panel): + x = bubble_data["x"]; w = bubble_data["w"] + y = bubble_data["y"]; h = bubble_data["h"] + + img_h, img_w = img_bgr.shape[:2] + px1, py1, px2, py2 = panel + + seed_x = max(1, min(img_w - 2, int(x + w / 2.0))) + seed_y = max(1, min(img_h - 2, int(y + h / 2.0))) + + gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) + _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY) + + panel_mask = np.zeros_like(binary) + panel_mask[py1:py2, px1:px2] = binary[py1:py2, px1:px2] + + if gray[seed_y, seed_x] < 150: + found = False + for r in range(1, min(w, h) // 3): + for dy in range(-r, r + 1): + for dx in range(-r, r + 1): + nx, ny = seed_x + dx, seed_y + dy + if (px1 <= nx < px2 and py1 <= ny < py2 + and gray[ny, nx] >= 200): + seed_x, seed_y = nx, ny + found = True + break + if found: break + if found: break + if not found: + return None + + flood_mask = np.zeros((img_h + 2, img_w + 2), dtype=np.uint8) + flood_fill_img = panel_mask.copy() + cv2.floodFill(flood_fill_img, flood_mask, + (seed_x, seed_y), 255, + loDiff=FLOOD_TOLERANCE, upDiff=FLOOD_TOLERANCE, + flags=cv2.FLOODFILL_FIXED_RANGE) + + filled_region = flood_mask[1:-1, 1:-1] * 255 + contours, _ = cv2.findContours( + filled_region, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + if not contours: + return None + + bubble_contour = max(contours, key=cv2.contourArea) + if len(bubble_contour) < 5: + return None + if cv2.contourArea(bubble_contour) < 100: + return None + + (ecx, ecy), (ew, eh), angle = cv2.fitEllipse(bubble_contour) + return float(ecx), float(ecy), float(ew/2), float(eh/2), float(angle) + + +# ───────────────────────────────────────────── +# CLIP + NUDGE ELLIPSE TO PANEL +# ───────────────────────────────────────────── +def clip_ellipse_to_panel(cx, cy, sa, sb, angle, panel, + shrink=BORDER_SHRINK_PX): + """ + Keeps the ellipse inside the panel by: + 1. Applying border shrink margin + 2. Nudging center inward (up to MAX_NUDGE_RATIO) + 3. Shrinking axes only for remaining overflow + + Returns (cx, cy, sa, sb) — center may be adjusted. + """ + px1, py1, px2, py2 = panel + + inner_x1 = px1 + shrink + inner_y1 = py1 + shrink + inner_x2 = px2 - shrink + inner_y2 = py2 - shrink + + sa_s = max(sa - shrink, 1.0) + sb_s = max(sb - shrink, 1.0) + + for _ in range(3): + rad = math.radians(angle) + hw = math.sqrt((sa_s * math.cos(rad))**2 + + (sb_s * math.sin(rad))**2) + hh = math.sqrt((sa_s * math.sin(rad))**2 + + (sb_s * math.cos(rad))**2) + + ovf_l = max(0, inner_x1 - (cx - hw)) + ovf_r = max(0, (cx + hw) - inner_x2) + ovf_t = max(0, inner_y1 - (cy - hh)) + ovf_b = max(0, (cy + hh) - inner_y2) + + if max(ovf_l, ovf_r, ovf_t, ovf_b) == 0: + break + + # Step 1: nudge center inward + max_nx = sa_s * MAX_NUDGE_RATIO + max_ny = sb_s * MAX_NUDGE_RATIO + cx += min(ovf_l, max_nx) - min(ovf_r, max_nx) + cy += min(ovf_t, max_ny) - min(ovf_b, max_ny) + + # Step 2: recompute overflow after nudge + rad = math.radians(angle) + hw = math.sqrt((sa_s * math.cos(rad))**2 + + (sb_s * math.sin(rad))**2) + hh = math.sqrt((sa_s * math.sin(rad))**2 + + (sb_s * math.cos(rad))**2) + + ovf_l = max(0, inner_x1 - (cx - hw)) + ovf_r = max(0, (cx + hw) - inner_x2) + ovf_t = max(0, inner_y1 - (cy - hh)) + ovf_b = max(0, (cy + hh) - inner_y2) + max_ovf = max(ovf_l, ovf_r, ovf_t, ovf_b) + + # Step 3: shrink only remaining overflow + if max_ovf > 0: + sa_s = max(sa_s - max_ovf, 1.0) + sb_s = max(sb_s - max_ovf, 1.0) + + return cx, cy, sa_s, sb_s + + +# ───────────────────────────────────────────── +# GET FINAL RENDER ELLIPSE PARAMS +# ───────────────────────────────────────────── +def get_render_ellipse(img_bgr, bubble_data, panel): + x = bubble_data["x"]; w = bubble_data["w"] + y = bubble_data["y"]; h = bubble_data["h"] + + detected = detect_bubble_ellipse(img_bgr, bubble_data, panel) + + if detected is not None: + ecx, ecy, sa, sb, angle = detected + ecx, ecy, sa_fill, sb_fill = clip_ellipse_to_panel( + ecx, ecy, sa, sb, angle, panel) + safe_w = sa_fill * math.sqrt(2) * TEXT_RATIO + safe_h = sb_fill * math.sqrt(2) * TEXT_RATIO + return (ecx, ecy, sa_fill, sb_fill, angle, + sa, sb, safe_w, safe_h, "detected") + else: + cx = x + w / 2.0; cy = y + h / 2.0 + sa = w / 2.0; sb = h / 2.0 + cx, cy, sa_fill, sb_fill = clip_ellipse_to_panel( + cx, cy, sa, sb, 0.0, panel) + safe_w = sa_fill * math.sqrt(2) * TEXT_RATIO + safe_h = sb_fill * math.sqrt(2) * TEXT_RATIO + return (cx, cy, sa_fill, sb_fill, 0.0, + sa, sb, safe_w, safe_h, "fallback") + + +# ───────────────────────────────────────────── +# DRAW ONE BUBBLE +# ───────────────────────────────────────────── +def draw_bubble(pil_img, img_bgr, bubble_data, + translated_text, font_path, + font_color, panel): + (cx, cy, sa_fill, sb_fill, angle, + sa_det, sb_det, + safe_w, safe_h, method) = get_render_ellipse( + img_bgr, bubble_data, panel) + + cx_i = int(round(cx)) + cy_i = int(round(cy)) + + img_h, img_w = img_bgr.shape[:2] + mask = np.zeros((img_h, img_w), dtype=np.uint8) + cv2.ellipse(mask, (cx_i, cy_i), + (int(math.ceil(sa_fill)), + int(math.ceil(sb_fill))), + angle, 0, 360, 255, -1) + + img_np = np.array(pil_img) + img_np[mask == 255] = [255, 255, 255] + pil_img.paste(Image.fromarray(img_np)) + + if not translated_text: + return method + + sw = max(int(safe_w), 1) + sh = max(int(safe_h), 1) + draw = ImageDraw.Draw(pil_img) + + font, lines, total_h = best_fit_font( + draw, translated_text, font_path, sw, sh) + + if not lines: + return method + + y_cursor = cy_i - total_h // 2 for line in lines: - bb = draw.textbbox((0, 0), line, font=font) - line_w = bb[2] - bb[0] - # Center each line horizontally - start_x = x + padding + max(0, (inner_w - line_w) // 2) - draw.text((start_x, start_y), line, - font=font, fill=font_color) - start_y += line_h + lw, lh = measure_text(draw, line, font) + draw.text((cx_i - lw // 2, y_cursor), + line, font=font, fill=font_color) + y_cursor += lh + max(lh // 5, 2) + + return method # ───────────────────────────────────────────── -# RESOLVE FONT +# DEBUG OVERLAY # ───────────────────────────────────────────── -def resolve_font(font_path, fallback): - if font_path and os.path.exists(font_path): - print(f" ✅ Using font: {font_path}") - return font_path - if fallback and os.path.exists(fallback): - print(f" ⚠️ Fallback: {fallback}") - return fallback - print(" ⚠️ No font found. Using PIL default.") - return None +def save_debug_ellipses(input_image_path, bubbles, + translations, panels, output_path): + img = cv2.imread(input_image_path) + if img is None: + print(f" ⚠️ Debug: cannot load {input_image_path}") + return + + overlay = img.copy() + img_h, img_w = img.shape[:2] + + for i, (px1, py1, px2, py2) in enumerate(panels, 1): + cv2.rectangle(overlay, (px1, py1), (px2, py2), + DBG_COLOR_PANEL, 3) + cv2.putText(overlay, f"P{i}", + (px1 + 4, py1 + 22), + cv2.FONT_HERSHEY_SIMPLEX, + 0.65, DBG_COLOR_PANEL, 2) + + for bubble_id in sorted(translations.keys()): + if bubble_id not in bubbles: + continue + + bubble_data = bubbles[bubble_id] + panel = assign_panel(bubble_data, panels, img_w, img_h) + + x = bubble_data["x"]; w = bubble_data["w"] + y = bubble_data["y"]; h = bubble_data["h"] + + (cx, cy, sa_fill, sb_fill, angle, + sa_det, sb_det, + safe_w, safe_h, method) = get_render_ellipse( + img, bubble_data, panel) + + cx_i = int(round(cx)); cy_i = int(round(cy)) + sa_d_i = int(math.ceil(sa_det)) + sb_d_i = int(math.ceil(sb_det)) + sa_f_i = int(math.ceil(sa_fill)) + sb_f_i = int(math.ceil(sb_fill)) + sw_i = int(safe_w); sh_i = int(safe_h) + + cv2.ellipse(overlay, (cx_i, cy_i), + (sa_d_i, sb_d_i), angle, 0, 360, + DBG_COLOR_DETECTED, DBG_THICKNESS) + + cv2.ellipse(overlay, (cx_i, cy_i), + (sa_f_i, sb_f_i), angle, 0, 360, + DBG_COLOR_FILL, DBG_THICKNESS) + + cv2.rectangle(overlay, + (cx_i - sw_i//2, cy_i - sh_i//2), + (cx_i + sw_i//2, cy_i + sh_i//2), + DBG_COLOR_SAFE, DBG_THICKNESS) + + cv2.circle(overlay, (cx_i, cy_i), + DBG_CENTER_R, DBG_COLOR_CENTER, -1) + + cv2.circle(overlay, + (int(x + w/2), int(y + h/2)), + DBG_CENTER_R - 1, DBG_COLOR_SEED, -1) + + tag = "D" if method == "detected" else "F" + cv2.putText(overlay, f"#{bubble_id}({tag})", + (cx_i - sa_d_i, cy_i - sb_d_i - 6), + cv2.FONT_HERSHEY_SIMPLEX, + 0.50, DBG_COLOR_LABEL, 2) + + debug_img = cv2.addWeighted(overlay, 0.85, img, 0.15, 0) + cv2.imwrite(output_path, debug_img) + + print(f" 🐛 Debug saved → {output_path}") + print() + print(" Legend:") + print(" 🟡 YELLOW → Panel boundary") + print(" 🟢 GREEN → Detected bubble ellipse") + print(" 🔴 RED → Fill ellipse (nudged + clipped)") + print(" 🔵 BLUE → Text safe rectangle") + print(" 🔵 CYAN → Ellipse center (may be nudged)") + print(" 🟣 MAGENTA → Original flood seed point") + print(" (D) = contour detected | (F) = bbox fallback") # ───────────────────────────────────────────── -# MAIN RENDERER +# MAIN RENDER FUNCTION # ───────────────────────────────────────────── -def render_translated_page( - input_image = INPUT_IMAGE, - output_image = OUTPUT_IMAGE, - translations_file = TRANSLATIONS_FILE, - bubbles_file = BUBBLES_FILE, - font_path = FONT_PATH, - font_fallback = FONT_FALLBACK, - font_color = FONT_COLOR, - text_padding = 6, - debug = False, +def render_translations( + input_image, + output_image, + translations_file, + bubbles_file, + font_path = DEFAULT_FONT_PATH, + font_color = DEFAULT_FONT_COLOR, + debug = False, + debug_path = "debug_ellipses.png", ): - print("=" * 55) - print(" MANGA TRANSLATOR — RENDERER") - print("=" * 55) + img_bgr = cv2.imread(input_image) + if img_bgr is None: + raise FileNotFoundError( + f"Cannot load image: {input_image}") + + img_h, img_w = img_bgr.shape[:2] + img_pil = Image.fromarray( + cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)) - print("\n📄 Parsing translations...") translations = parse_translations(translations_file) - if not translations: - print("❌ No translations found. Aborting.") - return + bubbles = parse_bubbles(bubbles_file) - print(f"\n📦 Loading bubble data...") - bubble_boxes = load_bubble_boxes(bubbles_file) - if not bubble_boxes: - print("❌ No bubble data. Re-run manga-translator.py.") - return - - translate_ids = set(translations.keys()) - box_ids = set(bubble_boxes.keys()) - to_process = sorted(translate_ids & box_ids) - untouched = sorted(box_ids - translate_ids) - missing = sorted(translate_ids - box_ids) - - print(f"\n🔗 To process : {to_process}") - print(f" Untouched : {untouched}") - if missing: - print(f" ⚠️ In output.txt but no box: {missing}") - - if not to_process: - print("❌ No matching IDs. Aborting.") - return - - print(f"\n🖼️ Loading: {input_image}") - cv_image = cv2.imread(input_image) - if cv_image is None: - print(f"❌ Could not load: {input_image}") - return - print(f" {cv_image.shape[1]}×{cv_image.shape[0]}px") - - # Sample backgrounds BEFORE erasing - print("\n🎨 Sampling backgrounds...") - bg_colors = {} - for bid in to_process: - bg_bgr = sample_bubble_background( - cv_image, bubble_boxes[bid]) - bg_colors[bid] = bg_bgr - bg_rgb = (bg_bgr[2], bg_bgr[1], bg_bgr[0]) - brightness = sum(bg_rgb) / 3 - ink = "black" if brightness > 128 else "white" - print(f" #{bid}: RGB{bg_rgb} ink→{ink}") - - # Erase - print("\n🧹 Erasing original text...") - for bid in to_process: - bd = bubble_boxes[bid] - erase_bubble_text(cv_image, bd, bg_color=bg_colors[bid]) - print(f" ✅ #{bid} ({bd['w']}×{bd['h']}px)") - - pil_image = Image.fromarray( - cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB)) - - print("\n🔤 Resolving font...") - resolved_font = resolve_font(font_path, font_fallback) - - # Render - print("\n✍️ Rendering...") - for bid in to_process: - text = translations[bid] - bd = bubble_boxes[bid] - bg_rgb = (bg_colors[bid][2], - bg_colors[bid][1], - bg_colors[bid][0]) - brightness = sum(bg_rgb) / 3 - txt_color = (0, 0, 0) if brightness > 128 \ - else (255, 255, 255) - - render_text_in_bubble( - pil_image, bd, text, - font_path = resolved_font, - padding = text_padding, - font_color = txt_color, - ) - print(f" ✅ #{bid}: '{text}' " - f"({bd['x']},{bd['y']}) {bd['w']}×{bd['h']}px") + print("\n📐 Detecting panels...") + panels = detect_panels(img_bgr) + print(f"\n🎨 Rendering {len(translations)} bubble(s)...") + print(f" Font : {font_path}") + print(f" Border shrink : -{BORDER_SHRINK_PX}px") + print(f" Max nudge ratio : {MAX_NUDGE_RATIO}") + print(f" Flood tolerance : {FLOOD_TOLERANCE}") + print(f" Text ratio : {TEXT_RATIO}") if debug: - dbg = pil_image.copy() - dbg_draw = ImageDraw.Draw(dbg) - for bid, bd in sorted(bubble_boxes.items()): - color = (0, 200, 0) if bid in translate_ids \ - else (160, 160, 160) - dbg_draw.rectangle( - [bd["x"], bd["y"], - bd["x"] + bd["w"], bd["y"] + bd["h"]], - outline=color, width=2) - dbg_draw.text((bd["x"] + 3, bd["y"] + 3), - f"#{bid}", fill=color) - dbg.save("debug_render.png") - print("\n 🐛 debug_render.png saved " - "(green=translated, grey=untouched)") + print(f" Debug mode : ON → {debug_path}") + save_debug_ellipses(input_image, bubbles, + translations, panels, debug_path) - print(f"\n💾 Saving → {output_image}") - pil_image.save(output_image, "PNG") - print(" ✅ Done!") - print("=" * 55) + rendered = 0; skipped = 0 + n_detect = 0; n_fallbk = 0 + + for bubble_id, translated_text in sorted(translations.items()): + if bubble_id not in bubbles: + print(f" ⚠️ #{bubble_id}: not in bubbles.json — skipped") + skipped += 1 + continue + + bubble_data = bubbles[bubble_id] + panel = assign_panel(bubble_data, panels, img_w, img_h) + method = draw_bubble( + img_pil, img_bgr, bubble_data, + translated_text, font_path, font_color, panel) + + tag = "🔍 detected" if method == "detected" else "📦 fallback" + if method == "detected": n_detect += 1 + else: n_fallbk += 1 + + px1, py1, px2, py2 = panel + print(f" ✅ #{bubble_id} [{tag}] " + f"panel=({px1},{py1})→({px2},{py2}) " + f"→ \"{translated_text[:35]}\"") + rendered += 1 + + result_cv = cv2.cvtColor(np.array(img_pil), + cv2.COLOR_RGB2BGR) + cv2.imwrite(output_image, result_cv) + + print(f"\n✅ Done — {rendered} rendered " + f"({n_detect} detected, {n_fallbk} fallback), " + f"{skipped} skipped.") + print(f"📄 Output → {output_image}") # ───────────────────────────────────────────── # ENTRY POINT # ───────────────────────────────────────────── if __name__ == "__main__": - render_translated_page( - input_image = "page.png", + render_translations( + input_image = "002-page.jpg", output_image = "page_translated.png", translations_file = "output.txt", bubbles_file = "bubbles.json", - font_path = "fonts/ComicRelief-Regular.ttf", - font_fallback = "/System/Library/Fonts/Helvetica.ttc", - font_color = (0, 0, 0), - text_padding = 6, + font_path = DEFAULT_FONT_PATH, + font_color = DEFAULT_FONT_COLOR, debug = True, + debug_path = "debug_ellipses.png", ) diff --git a/manga-translator.py b/manga-translator.py index 33b3a9c..8102ce8 100644 --- a/manga-translator.py +++ b/manga-translator.py @@ -5,7 +5,6 @@ import cv2 import numpy as np import easyocr from deep_translator import GoogleTranslator -from sklearn.cluster import DBSCAN # ───────────────────────────────────────────── @@ -38,7 +37,7 @@ SOUND_EFFECT_PATTERNS = [ r"^oh+$", r"^ugh+$", r"^gr+$", r"^bam+$", r"^pow+$", r"^crash+$", r"^boom+$", r"^bang+$", r"^crack+$", r"^whoosh+$", r"^thud+$", r"^snap+$", - r"^zip+$", r"^swoosh+$", + r"^zip+$", r"^swoosh+$", r"^chirp+$", r"^tweet+$", ] def is_sound_effect(text): @@ -47,6 +46,39 @@ def is_sound_effect(text): for p in SOUND_EFFECT_PATTERNS) +# ───────────────────────────────────────────── +# TITLE / LOGO / AUTHOR FILTER +# ───────────────────────────────────────────── +TITLE_PATTERNS = [ + r"^(mission|chapter|episode|vol\.?|volume)\s*\d+$", + r"^(spy|family|spy.family)$", + r"^by\s+.+$", # "BY TATSUYA ENDO" + r"^[a-z]{1,4}\s+[a-z]+\s+[a-z]+$", # short author-style lines +] + +def is_title_text(text): + cleaned = text.strip().lower() + return any(re.fullmatch(p, cleaned, re.IGNORECASE) + for p in TITLE_PATTERNS) + + +# ───────────────────────────────────────────── +# GARBAGE TOKEN FILTER +# Catches OCR misreads that are mostly +# non-alpha or suspiciously short/mangled +# ───────────────────────────────────────────── +GARBAGE_PATTERNS = [ + r"^[^a-zA-Z]*$", # no letters at all + r"^.{1,2}$", # 1-2 char tokens + r".*\d+.*", # contains digits (YO4, HLNGRY etc.) + r"^[A-Z]{1,4}$", # isolated caps abbreviations (IILK) +] + +def is_garbage(text): + t = text.strip() + return any(re.fullmatch(p, t) for p in GARBAGE_PATTERNS) + + # ───────────────────────────────────────────── # TOKEN CLASSIFIER # ───────────────────────────────────────────── @@ -54,15 +86,6 @@ def classify_token(text, confidence, confidence_threshold, min_text_length, filter_sound_effects): """ Returns one of: "alpha" | "punct" | "noise" - - Rules (in order): - 1. confidence below threshold → noise - 2. shorter than min_text_length → noise - 3. pure digit string → noise - 4. single non-alpha character → noise - 5. sound effect (if filter enabled) → noise - 6. 2+ chars with no letters → punct - 7. has at least one letter → alpha """ cleaned = text.strip() @@ -76,90 +99,61 @@ def classify_token(text, confidence, confidence_threshold, return "noise" if filter_sound_effects and is_sound_effect(cleaned): return "noise" + if is_title_text(cleaned): + return "noise" + if is_garbage(cleaned): + return "noise" if not any(ch.isalpha() for ch in cleaned): return "punct" - return "alpha" def should_keep_token(text, confidence, confidence_threshold, min_text_length, filter_sound_effects): - """ - Backward-compatible wrapper. - Returns (keep: bool, category: str). - """ cat = classify_token(text, confidence, confidence_threshold, min_text_length, filter_sound_effects) return cat != "noise", cat # ───────────────────────────────────────────── -# BOUNDING BOX -# -# Flat union of ALL quad corners. -# Handles every layout correctly: -# • "HN" + "..." same line → horizontal union -# • Multi-line bubbles → vertical union -# • Rotated/skewed quads → all 4 corners included +# QUAD HELPERS # ───────────────────────────────────────────── -def get_cluster_bbox_from_ocr(ocr_bboxes, image_shape, - padding_px=10): - """ - Computes the bubble erase bbox by taking the flat union - of ALL quad corners. +def quad_bbox(quad): + xs = [pt[0] for pt in quad] + ys = [pt[1] for pt in quad] + return min(xs), min(ys), max(xs), max(ys) - Args: - ocr_bboxes : List of EasyOCR quad bboxes - Each = [[x0,y0],[x1,y1],[x2,y2],[x3,y3]] - image_shape : (height, width) for clamping - padding_px : Expansion on each side (default: 10) - Returns: - (x1, y1, x2, y2) clamped to image bounds - """ +def quads_bbox(quads, image_shape, padding_px=10): img_h, img_w = image_shape[:2] - - if not ocr_bboxes: - return 0, 0, 0, 0 - - all_x = [pt[0] for quad in ocr_bboxes for pt in quad] - all_y = [pt[1] for quad in ocr_bboxes for pt in quad] - + all_x = [pt[0] for quad in quads for pt in quad] + all_y = [pt[1] for quad in quads for pt in quad] x1 = max(0, min(all_x) - padding_px) y1 = max(0, min(all_y) - padding_px) x2 = min(img_w, max(all_x) + padding_px) y2 = min(img_h, max(all_y) + padding_px) - return x1, y1, x2, y2 -def get_cluster_bbox(items): - """Fallback center-point bbox — used only during merge step.""" - half = 30 - x1 = min(cx for _, cx, _ in items) - half - y1 = min(cy for cy, _, _ in items) - half - x2 = max(cx for _, cx, _ in items) + half - y2 = max(cy for cy, _, _ in items) + half - return x1, y1, x2, y2 - - -def boxes_are_close(bbox_a, bbox_b, proximity_px=80): - ax1, ay1, ax2, ay2 = bbox_a - bx1, by1, bx2, by2 = bbox_b - ax1 -= proximity_px; ay1 -= proximity_px - ax2 += proximity_px; ay2 += proximity_px - return not (ax2 < bx1 or bx2 < ax1 or ay2 < by1 or by2 < ay1) +def bboxes_overlap_or_touch(a, b, gap_px=0): + ax1, ay1, ax2, ay2 = a + bx1, by1, bx2, by2 = b + gap_x = max(0, max(ax1, bx1) - min(ax2, bx2)) + gap_y = max(0, max(ay1, by1) - min(ay2, by2)) + return gap_x <= gap_px and gap_y <= gap_px # ───────────────────────────────────────────── -# POST-CLUSTER MERGE (Union-Find) +# OVERLAP-BASED GROUPING (Union-Find) # ───────────────────────────────────────────── -def merge_nearby_clusters(raw_clusters, raw_quads, - proximity_px=80): - labels = list(raw_clusters.keys()) - bboxes = {lbl: get_cluster_bbox(raw_clusters[lbl]) - for lbl in labels} - parent = {lbl: lbl for lbl in labels} +def group_quads_by_overlap(ocr_results, image_shape, + gap_px=18, bbox_padding=10): + n = len(ocr_results) + if n == 0: + return {}, {}, {} + + token_bboxes = [quad_bbox(r[0]) for r in ocr_results] + parent = list(range(n)) def find(x): while parent[x] != x: @@ -170,32 +164,95 @@ def merge_nearby_clusters(raw_clusters, raw_quads, def union(x, y): parent[find(x)] = find(y) - for i in range(len(labels)): - for j in range(i + 1, len(labels)): - a, b = labels[i], labels[j] - if boxes_are_close(bboxes[a], bboxes[b], proximity_px): - union(a, b) + for i in range(n): + for j in range(i + 1, n): + if bboxes_overlap_or_touch( + token_bboxes[i], token_bboxes[j], + gap_px=gap_px): + union(i, j) - merged_clusters = {} - merged_quads = {} - for lbl in labels: - root = find(lbl) - merged_clusters.setdefault(root, []) - merged_quads.setdefault(root, []) - merged_clusters[root].extend(raw_clusters[lbl]) - merged_quads[root].extend(raw_quads[lbl]) + groups = {} + for i in range(n): + root = find(i) + groups.setdefault(root, []) + groups[root].append(i) - return merged_clusters, merged_quads + def group_sort_key(indices): + ys = [token_bboxes[i][1] for i in indices] + xs = [token_bboxes[i][0] for i in indices] + return (min(ys) // 150, min(xs)) + + sorted_groups = sorted(groups.values(), key=group_sort_key) + + bubble_dict = {} + bbox_dict = {} + ocr_quads = {} + + for gid, indices in enumerate(sorted_groups, start=1): + indices_sorted = sorted( + indices, key=lambda i: token_bboxes[i][1]) + + quads = [ocr_results[i][0] for i in indices_sorted] + raw_texts = [ocr_results[i][1] for i in indices_sorted] + + alpha_lines = [] + punct_tokens = [] + + for i in indices_sorted: + _, text, _ = ocr_results[i] + yc = (token_bboxes[i][1] + token_bboxes[i][3]) / 2.0 + if any(ch.isalpha() for ch in text): + alpha_lines.append((yc, text)) + else: + punct_tokens.append((yc, text)) + + for pcy, ptext in punct_tokens: + if alpha_lines: + closest = min( + range(len(alpha_lines)), + key=lambda k: abs(alpha_lines[k][0] - pcy) + ) + yc_a, text_a = alpha_lines[closest] + alpha_lines[closest] = (yc_a, text_a + ptext) + + text_lines = [t for _, t in alpha_lines] or raw_texts + + bubble_dict[gid] = text_lines + ocr_quads[gid] = quads + bbox_dict[gid] = quads_bbox(quads, image_shape, + padding_px=bbox_padding) + + b = bbox_dict[gid] + print(f" Group #{gid}: {len(quads)} quad(s) " + f"bbox=({int(b[0])},{int(b[1])})→" + f"({int(b[2])},{int(b[3])}) " + f"w={int(b[2]-b[0])} h={int(b[3]-b[1])} " + f"text={text_lines}") + + return bubble_dict, bbox_dict, ocr_quads + + +# ───────────────────────────────────────────── +# HYPHEN REMOVAL +# ───────────────────────────────────────────── +def fix_hyphens(lines): + if not lines: + return "" + merged = lines[0] + for line in lines[1:]: + line = line.strip() + merged = (merged[:-1] + line if merged.endswith("-") + else merged + " " + line) + return re.sub(r" {2,}", " ", merged).strip().upper() # ───────────────────────────────────────────── # CROP-BASED OCR RE-READ # ───────────────────────────────────────────── -def reread_cluster_crop(image, bbox, reader, source_lang, +def reread_cluster_crop(image, bbox, reader, padding_px=20, upscale_factor=2.5): img_h, img_w = image.shape[:2] x1, y1, x2, y2 = bbox - x1 = max(0, int(x1) - padding_px) y1 = max(0, int(y1) - padding_px) x2 = min(img_w, int(x2) + padding_px) @@ -224,164 +281,22 @@ def reread_cluster_crop(image, bbox, reader, source_lang, return None crop_results.sort(key=lambda r: r[0][0][1]) - lines = [t.strip() for _, t, _ in crop_results if t.strip()] + lines = [t.strip().upper() for _, t, _ in crop_results + if t.strip()] return fix_hyphens(lines) if lines else None # ───────────────────────────────────────────── -# DBSCAN BUBBLE CLUSTERING +# AUTO GAP # ───────────────────────────────────────────── -def cluster_into_bubbles(ocr_results, image_shape, - eps=80, min_samples=1, - proximity_px=80, bbox_padding=10): - """ - Two-pass clustering: - Pass 1 — DBSCAN on center points - Pass 2 — Bounding-box proximity merge - - Token handling per cluster: - "alpha" tokens → translation text + bbox - "punct" tokens → bbox included, appended to nearest - alpha line by Y distance - (e.g. "..." joins "HN" → "HN...") - - Bbox uses flat union of ALL quad corners: - min/max of all x,y across every quad in the cluster. - - Returns: - bubble_dict : cluster_id → list of text lines - bbox_dict : cluster_id → (x1, y1, x2, y2) - ocr_quads : cluster_id → list of ALL raw quads - """ - if not ocr_results: - return {}, {}, {} - - centers = [] - for bbox, text, confidence in ocr_results: - xs = [pt[0] for pt in bbox] - ys = [pt[1] for pt in bbox] - centers.append([sum(xs) / 4, sum(ys) / 4]) - - centers_array = np.array(centers, dtype=np.float32) - db = DBSCAN(eps=eps, min_samples=min_samples, - metric="euclidean") - labels = db.fit_predict(centers_array) - - raw_clusters = {} - raw_quads = {} - noise_counter = int(max(labels, default=0)) + 1 - - for idx, label in enumerate(labels): - if label == -1: - label = noise_counter - noise_counter += 1 - raw_clusters.setdefault(label, []) - raw_quads.setdefault(label, []) - bbox, text, _ = ocr_results[idx] - raw_clusters[label].append( - (centers[idx][1], centers[idx][0], text)) - raw_quads[label].append(bbox) - - print(f" DBSCAN pass: {len(raw_clusters)} cluster(s)") - - merged_clusters, merged_quads = merge_nearby_clusters( - raw_clusters, raw_quads, proximity_px=proximity_px - ) - print(f" After merge: {len(merged_clusters)} cluster(s)") - - row_band_px = 150 - - def cluster_sort_key(items): - return (min(cy for cy, cx, _ in items) // row_band_px, - min(cx for cy, cx, _ in items)) - - sorted_labels = sorted( - merged_clusters.keys(), - key=lambda lbl: cluster_sort_key(merged_clusters[lbl]) - ) - - bubble_dict = {} - bbox_dict = {} - ocr_quads = {} - - for i, lbl in enumerate(sorted_labels, start=1): - items = merged_clusters[lbl] - quads = merged_quads[lbl] - - items_sorted = sorted(items, key=lambda t: t[0]) - - # ── Separate alpha and punct tokens ─────────────────────── - alpha_lines = [] # (cy, text) - punct_tokens = [] # (cy, text) - - for cy, cx, text in items_sorted: - if any(ch.isalpha() for ch in text): - alpha_lines.append((cy, text)) - else: - punct_tokens.append((cy, text)) - - # ── Append punct to closest alpha line by Y ─────────────── - for pcy, ptext in punct_tokens: - if alpha_lines: - closest_idx = min( - range(len(alpha_lines)), - key=lambda k: abs(alpha_lines[k][0] - pcy) - ) - cy_a, text_a = alpha_lines[closest_idx] - alpha_lines[closest_idx] = (cy_a, text_a + ptext) - - text_lines = [t for _, t in alpha_lines] - - # Fallback: no alpha at all → keep everything as-is - if not text_lines: - text_lines = [text for _, _, text in items_sorted] - - bubble_dict[i] = text_lines - ocr_quads[i] = quads # ALL quads → full bbox coverage - - bbox_dict[i] = get_cluster_bbox_from_ocr( - quads, image_shape, padding_px=bbox_padding - ) - - b = bbox_dict[i] - print(f" Cluster #{i}: {len(quads)} quad(s) " - f"bbox=({int(b[0])},{int(b[1])})→" - f"({int(b[2])},{int(b[3])}) " - f"w={int(b[2]-b[0])} h={int(b[3]-b[1])} " - f"text={text_lines}") - - return bubble_dict, bbox_dict, ocr_quads - - -# ───────────────────────────────────────────── -# HYPHEN REMOVAL -# ───────────────────────────────────────────── -def fix_hyphens(lines): - """ - Joins lines, merging mid-word hyphens. - e.g. ["GRAVEMEN-", "TE"] → "GRAVEMENTE" - """ - if not lines: - return "" - merged = lines[0] - for line in lines[1:]: - line = line.strip() - merged = (merged[:-1] + line if merged.endswith("-") - else merged + " " + line) - return re.sub(r" {2,}", " ", merged).strip() - - -# ───────────────────────────────────────────── -# AUTO EPS -# ───────────────────────────────────────────── -def compute_auto_eps(image_path, base_eps=80, +def compute_auto_gap(image_path, base_gap=18, reference_width=750): image = cv2.imread(image_path) if image is None: - return base_eps + return base_gap img_w = image.shape[1] - scaled = base_eps * (img_w / reference_width) - print(f" ℹ️ Image width: {img_w}px → auto eps: {scaled:.1f}px") + scaled = base_gap * (img_w / reference_width) + print(f" ℹ️ Image width: {img_w}px → auto gap: {scaled:.1f}px") return scaled @@ -400,17 +315,56 @@ def ocr_quality_score(text): # ───────────────────────────────────────────── # BUBBLE JSON EXPORT +# bbox_expand_ratio: grow bbox by this fraction +# of its own size in each direction to better +# approximate the full speech bubble boundary. # ───────────────────────────────────────────── def export_bubble_boxes(bbox_dict, ocr_quads_dict, - filepath="bubbles.json"): + filepath="bubbles.json", + bbox_expand_ratio=0.35, + image_shape=None): export = {} for bubble_id, (x1, y1, x2, y2) in bbox_dict.items(): quads = ocr_quads_dict.get(bubble_id, []) + + # ── Expand bbox to approximate full bubble ──────────────── + w_orig = x2 - x1 + h_orig = y2 - y1 + pad_x = int(w_orig * bbox_expand_ratio) + pad_y = int(h_orig * bbox_expand_ratio) + + # Clamp to image bounds if image_shape provided + if image_shape is not None: + img_h, img_w = image_shape[:2] + ex1 = max(0, x1 - pad_x) + ey1 = max(0, y1 - pad_y) + ex2 = min(img_w, x2 + pad_x) + ey2 = min(img_h, y2 + pad_y) + else: + ex1 = x1 - pad_x + ey1 = y1 - pad_y + ex2 = x2 + pad_x + ey2 = y2 + pad_y + export[str(bubble_id)] = { - "x" : int(x1), - "y" : int(y1), - "w" : int(x2 - x1), - "h" : int(y2 - y1), + "x" : int(ex1), + "y" : int(ey1), + "w" : int(ex2 - ex1), + "h" : int(ey2 - ey1), + # Original tight bbox kept for reference + "x_tight" : int(x1), + "y_tight" : int(y1), + "w_tight" : int(w_orig), + "h_tight" : int(h_orig), + "quad_bboxes" : [ + { + "x": int(quad_bbox(q)[0]), + "y": int(quad_bbox(q)[1]), + "w": int(quad_bbox(q)[2] - quad_bbox(q)[0]), + "h": int(quad_bbox(q)[3] - quad_bbox(q)[1]), + } + for q in quads + ], "quads": [[[int(pt[0]), int(pt[1])] for pt in quad] for quad in quads], } @@ -420,13 +374,24 @@ def export_bubble_boxes(bbox_dict, ocr_quads_dict, print(f"\n📦 Bubble boxes saved → {filepath}") for bid, v in export.items(): - print(f" #{bid}: ({v['x']},{v['y']}) " + print(f" #{bid}: expanded=({v['x']},{v['y']}) " f"{v['w']}×{v['h']}px " + f"tight={v['w_tight']}×{v['h_tight']}px " f"[{len(v['quads'])} quad(s)]") +# ───────────────────────────────────────────── +# OUTPUT.TXT WRITER +# Uses a pipe | as unambiguous delimiter +# Format: #ID|ORIGINAL|TRANSLATED +# ───────────────────────────────────────────── +def write_output(output_lines, filepath): + with open(filepath, "w", encoding="utf-8") as f: + f.write("\n".join(output_lines)) + print(f"📄 Translations saved → {filepath}") + # ───────────────────────────────────────────── -# DEBUG CLUSTER IMAGE +# DEBUG IMAGE # ───────────────────────────────────────────── def save_debug_clusters(image_path, ocr_results, bubble_dict, bbox_dict): @@ -474,26 +439,24 @@ def save_debug_clusters(image_path, ocr_results, # ───────────────────────────────────────────── def translate_manga_text( image_path, - source_lang="it", + source_lang="en", target_lang="ca", confidence_threshold=0.10, export_to_file=None, export_bubbles_to="bubbles.json", min_text_length=2, - cluster_eps="auto", - proximity_px=80, + gap_px="auto", filter_sound_effects=True, quality_threshold=0.5, upscale_factor=2.5, bbox_padding=10, debug=False, ): - # ── 1. Resolve eps ──────────────────────────────────────────── - if cluster_eps == "auto": - print("Computing auto eps...") - eps = compute_auto_eps(image_path) + # ── 1. Resolve gap ──────────────────────────────────────────── + if gap_px == "auto": + resolved_gap = compute_auto_gap(image_path) else: - eps = float(cluster_eps) + resolved_gap = float(gap_px) # ── 2. Load full image ──────────────────────────────────────── full_image = cv2.imread(image_path) @@ -521,7 +484,7 @@ def translate_manga_text( skipped = 0 for bbox, text, confidence in results: - cleaned = text.strip() + cleaned = text.strip().upper() keep, category = should_keep_token( cleaned, confidence, confidence_threshold, min_text_length, @@ -530,10 +493,13 @@ def translate_manga_text( if keep: filtered.append((bbox, cleaned, confidence)) if category == "punct": - print(f" ✔ Punct kept: '{cleaned}'") + print(f" ✔ Punct kept: '{cleaned}'") else: - if is_sound_effect(cleaned): - print(f" 🔇 SFX skipped: '{cleaned}'") + tag = ("🔇 SFX" if is_sound_effect(cleaned) else + "🏷 Title" if is_title_text(cleaned) else + "🗑 Garbage" if is_garbage(cleaned) else + "✂️ Low-conf") + print(f" {tag} skipped: '{cleaned}'") skipped += 1 print(f" ✅ {len(filtered)} kept, {skipped} skipped.\n") @@ -542,21 +508,20 @@ def translate_manga_text( print("⚠️ No text detected after filtering.") return - # ── 7. Cluster + merge ──────────────────────────────────────── - print(f"Clustering (eps={eps:.1f}px, " - f"proximity={proximity_px}px, " + # ── 7. Group by overlap ─────────────────────────────────────── + print(f"Grouping by overlap " + f"(gap_px={resolved_gap:.1f}, " f"bbox_padding={bbox_padding}px)...") - bubble_dict, bbox_dict, ocr_quads = cluster_into_bubbles( + bubble_dict, bbox_dict, ocr_quads = group_quads_by_overlap( filtered, image_shape = full_image.shape, - eps = eps, - proximity_px = proximity_px, + gap_px = resolved_gap, bbox_padding = bbox_padding, ) - print(f" ✅ {len(bubble_dict)} bubble(s) after merge.\n") + print(f" ✅ {len(bubble_dict)} bubble(s) detected.\n") - # ── 8. Debug clusters ───────────────────────────────────────── + # ── 8. Debug ────────────────────────────────────────────────── if debug: save_debug_clusters(image_path, filtered, bubble_dict, bbox_dict) @@ -579,7 +544,7 @@ def translate_manga_text( if score < quality_threshold: print(f" → Re-reading #{i} from crop...") reread = reread_cluster_crop( - full_image, bbox_dict[i], reader, source_lang, + full_image, bbox_dict[i], reader, upscale_factor=upscale_factor, ) if reread: @@ -588,32 +553,37 @@ def translate_manga_text( else: print(f" → Nothing found, keeping original.") - # ── 11. Translate & print ───────────────────────────────────── + # ── 11. Translate ───────────────────────────────────────────── + # Output format (pipe-delimited, unambiguous): + # #ID|ORIGINAL TEXT|TRANSLATED TEXT print() - header = (f"{'BUBBLE':<8} " - f"{'ORIGINAL (Italian)':<50} " - f"{'TRANSLATED (Catalan)'}") - divider = "─" * 105 - output_lines = [header, divider] - print(header) + header = "BUBBLE|ORIGINAL|TRANSLATED" + divider = "─" * 80 + output_lines = [header, divider] + translations = {} + translated_count = 0 + + print(f"{'BUBBLE':<8} {'ORIGINAL':<45} {'TRANSLATED'}") print(divider) - translated_count = 0 for i in sorted(clean_bubbles.keys()): bubble_text = clean_bubbles[i].strip() if not bubble_text: continue try: - translated = translator.translate(bubble_text) + result = translator.translate(bubble_text) except Exception as e: - translated = f"[Translation error: {e}]" - if translated is None: - translated = "[No translation returned]" + result = f"[Translation error: {e}]" + if result is None: + result = "[No translation returned]" + result = result.upper() + translations[i] = result translated_count += 1 - line = f"#{i:<7} {bubble_text:<50} {translated}" - print(line) - output_lines.append(line) + + # Pipe-delimited line — safe regardless of text content + output_lines.append(f"#{i}|{bubble_text}|{result}") + print(f"#{i:<7} {bubble_text:<45} {result}") output_lines.append(divider) summary = (f"✅ Done! {translated_count} bubble(s) " @@ -624,25 +594,17 @@ def translate_manga_text( # ── 12. Export translations ─────────────────────────────────── if export_to_file: - with open(export_to_file, "w", encoding="utf-8") as f: - f.write("\n".join(output_lines)) - print(f"📄 Translations saved → {export_to_file}") + write_output(output_lines, export_to_file) # ── 13. Export bubble boxes ─────────────────────────────────── if export_bubbles_to: - export_bubble_boxes(bbox_dict, ocr_quads, - filepath=export_bubbles_to) - - -# ───────────────────────────────────────────── -# HELPER -# ───────────────────────────────────────────── -def list_languages(): - print(f"\n{'LANGUAGE':<30} {'CODE'}") - print("─" * 40) - for name, code in SUPPORTED_LANGUAGES.items(): - print(f"{name:<30} {code}") - print("─" * 40) + export_bubble_boxes( + bbox_dict, + ocr_quads, + filepath = export_bubbles_to, + bbox_expand_ratio = 0.1, # ← tune this + image_shape = full_image.shape, + ) # ───────────────────────────────────────────── @@ -650,18 +612,17 @@ def list_languages(): # ───────────────────────────────────────────── if __name__ == "__main__": translate_manga_text( - image_path = "page.png", - source_lang = "it", + image_path = "002-page.jpg", + source_lang = "en", target_lang = "ca", confidence_threshold = 0.10, min_text_length = 2, export_to_file = "output.txt", export_bubbles_to = "bubbles.json", - cluster_eps = "auto", - proximity_px = 80, + gap_px = "auto", filter_sound_effects = True, quality_threshold = 0.5, upscale_factor = 2.5, - bbox_padding = 5, + bbox_padding = 1, debug = True, - ) \ No newline at end of file + )