From f753a78ba41f4c3a9adf29719d911c25fdc5bb1e Mon Sep 17 00:00:00 2001 From: Guillem Hernandez Sola Date: Tue, 21 Apr 2026 17:12:32 +0200 Subject: [PATCH] Split --- analyze_box5.py | 37 ++++++++++++++ analyze_box7_split.py | 95 +++++++++++++++++++++++++++++++++++ analyze_grouping.py | 55 +++++++++++++++++++++ check_box7.py | 77 +++++++++++++++++++++++++++++ check_grouping_logic.py | 68 +++++++++++++++++++++++++ debug_split_phase.py | 107 ++++++++++++++++++++++++++++++++++++++++ manga-translator.py | 67 ++++++++++++++++++++----- test_panel_split.py | 75 ++++++++++++++++++++++++++++ 8 files changed, 569 insertions(+), 12 deletions(-) create mode 100644 analyze_box5.py create mode 100644 analyze_box7_split.py create mode 100644 analyze_grouping.py create mode 100644 check_box7.py create mode 100644 check_grouping_logic.py create mode 100644 debug_split_phase.py create mode 100644 test_panel_split.py diff --git a/analyze_box5.py b/analyze_box5.py new file mode 100644 index 0000000..022ffa1 --- /dev/null +++ b/analyze_box5.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +import sys +sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator') + +import cv2 +import json +import numpy as np + +# Import functions from manga-translator.py +import importlib.util +spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py") +mt = importlib.util.module_from_spec(spec) +spec.loader.exec_module(mt) + +image_path = '004.png' +detector = mt.MacVisionDetector(source_lang='en') +raw = detector.read(image_path) + +# Load current bubbles to see what box 5 contains +with open('bubbles.json') as f: + bubbles_data = json.load(f) + box5_data = bubbles_data['5'] + box5_bounds = (box5_data['x'], box5_data['y'], box5_data['x'] + box5_data['w'], box5_data['y'] + box5_data['h']) + print(f'Box 5 bounds (xyxy): {box5_bounds}') + print() + +# Print all detections sorted by position +print('All raw detections:') +for i, (bbox, text, conf) in enumerate(sorted(raw, key=lambda x: (mt.quad_bbox(x[0])[1], mt.quad_bbox(x[0])[0]))): + b = mt.quad_bbox(bbox) + t_norm = mt.normalize_text(text) + print(f'{i:2d}. [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] conf={conf:.2f} text="{t_norm}"') + + # Check if this overlaps with box 5 + b5_x1, b5_y1, b5_x2, b5_y2 = box5_bounds + if not (b[2] < b5_x1 or b[0] > b5_x2 or b[3] < b5_y1 or b[1] > b5_y2): + print(f' ^ OVERLAPS with Box 5!') diff --git a/analyze_box7_split.py b/analyze_box7_split.py new file mode 100644 index 0000000..0f399cf --- /dev/null +++ b/analyze_box7_split.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +import sys +sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator') + +import cv2 +import json +import numpy as np +import importlib.util + +spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py") +mt = importlib.util.module_from_spec(spec) +spec.loader.exec_module(mt) + +image_path = '004.png' +detector = mt.MacVisionDetector(source_lang='en') +raw = detector.read(image_path) +image = cv2.imread(image_path) + +# Filter as the pipeline does +filtered = [] +for bbox, text, conf in raw: + t = mt.normalize_text(text) + qb = mt.quad_bbox(bbox) + + if conf < 0.12: + continue + if len(t) < 1: + continue + if mt.is_noise_text(t): + continue + if mt.is_sound_effect(t): + continue + if mt.is_title_text(t): + continue + + filtered.append((bbox, t, conf)) + +# Run grouping +bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens( + filtered, image.shape, gap_px=18, bbox_padding=3 +) + +print("=== BUBBLE 7 & 8 ANALYSIS ===\n") +print("Current Bubble 7 (right side content):") +for bid in [7]: + if bid in bubble_indices: + box = bubble_boxes[bid] + print(f" Box: {box}") + print(f" Indices: {bubble_indices[bid]}") + indices = bubble_indices[bid] + boxes = [mt.quad_bbox(filtered[i][0]) for i in indices] + min_x = min(b[0] for b in boxes) + max_x = max(b[2] for b in boxes) + print(f" X range: {min_x} - {max_x}") + for idx in indices: + b = mt.quad_bbox(filtered[idx][0]) + print(f" {idx}: x=[{b[0]:3d},{b[2]:3d}] y=[{b[1]:3d},{b[3]:3d}] = {filtered[idx][1]}") + +print("\nCurrent Bubble 8 (left side content):") +for bid in [8]: + if bid in bubble_indices: + box = bubble_boxes[bid] + print(f" Box: {box}") + print(f" Indices: {bubble_indices[bid]}") + indices = bubble_indices[bid] + boxes = [mt.quad_bbox(filtered[i][0]) for i in indices] + min_x = min(b[0] for b in boxes) + max_x = max(b[2] for b in boxes) + print(f" X range: {min_x} - {max_x}") + for idx in indices: + b = mt.quad_bbox(filtered[idx][0]) + print(f" {idx}: x=[{b[0]:3d},{b[2]:3d}] y=[{b[1]:3d},{b[3]:3d}] = {filtered[idx][1]}") + +# Check the horizontal gap between them +print("\n=== GAP ANALYSIS ===") +if 7 in bubble_indices and 8 in bubble_indices: + boxes7 = [mt.quad_bbox(filtered[i][0]) for i in bubble_indices[7]] + boxes8 = [mt.quad_bbox(filtered[i][0]) for i in bubble_indices[8]] + + max_x7 = max(b[2] for b in boxes7) + min_x8 = min(b[0] for b in boxes8) + + print(f"Bubble 7 max X: {max_x7}") + print(f"Bubble 8 min X: {min_x8}") + print(f"Horizontal gap: {min_x8 - max_x7}") + + # Check Y overlap + min_y7 = min(b[1] for b in boxes7) + max_y7 = max(b[3] for b in boxes7) + min_y8 = min(b[1] for b in boxes8) + max_y8 = max(b[3] for b in boxes8) + + print(f"\nBubble 7 Y range: {min_y7} - {max_y7}") + print(f"Bubble 8 Y range: {min_y8} - {max_y8}") + print(f"Y overlap: {max(0, min(max_y7, max_y8) - max(min_y7, min_y8))} pixels") diff --git a/analyze_grouping.py b/analyze_grouping.py new file mode 100644 index 0000000..50d0ef2 --- /dev/null +++ b/analyze_grouping.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +import sys +sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator') + +import cv2 +import json +import numpy as np +import importlib.util + +spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py") +mt = importlib.util.module_from_spec(spec) +spec.loader.exec_module(mt) + +image_path = '004.png' +detector = mt.MacVisionDetector(source_lang='en') +raw = detector.read(image_path) +image = cv2.imread(image_path) + +# Filter as the pipeline does +filtered = [] +for bbox, text, conf in raw: + t = mt.normalize_text(text) + qb = mt.quad_bbox(bbox) + + if conf < 0.12: + continue + if len(t) < 1: + continue + if mt.is_noise_text(t): + continue + if mt.is_sound_effect(t): + continue + if mt.is_title_text(t): + continue + + filtered.append((bbox, t, conf)) + +print(f"Filtered {len(filtered)} detections") + +# Now run grouping +bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens( + filtered, image.shape, gap_px=18, bbox_padding=3 +) + +# Find which bubble contains box 5 +box5_region = (378, 570, 536, 753) + +print("\n=== BUBBLES ===") +for bid, box in bubble_boxes.items(): + print(f"Bubble {bid}: {box}") + print(f" Indices: {bubble_indices[bid]}") + print(f" Detections:") + for idx in bubble_indices[bid]: + b = mt.quad_bbox(filtered[idx][0]) + print(f" {idx}: [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] = {filtered[idx][1]}") diff --git a/check_box7.py b/check_box7.py new file mode 100644 index 0000000..66e57c7 --- /dev/null +++ b/check_box7.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +import sys +sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator') + +import cv2 +import json +import numpy as np +import importlib.util + +spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py") +mt = importlib.util.module_from_spec(spec) +spec.loader.exec_module(mt) + +image_path = '004.png' +detector = mt.MacVisionDetector(source_lang='en') +raw = detector.read(image_path) +image = cv2.imread(image_path) + +# Filter as the pipeline does +filtered = [] +for bbox, text, conf in raw: + t = mt.normalize_text(text) + qb = mt.quad_bbox(bbox) + + if conf < 0.12: + continue + if len(t) < 1: + continue + if mt.is_noise_text(t): + continue + if mt.is_sound_effect(t): + continue + if mt.is_title_text(t): + continue + + filtered.append((bbox, t, conf)) + +# Now run grouping +bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens( + filtered, image.shape, gap_px=18, bbox_padding=3 +) + +# Check current bubbles.json for reference +with open('bubbles.json') as f: + old_bubbles = json.load(f) + +print("=== BOX 5 ===") +print(f"Old bounds (from bubbles.json): x={old_bubbles['5']['x']}, y={old_bubbles['5']['y']}, w={old_bubbles['5']['w']}, h={old_bubbles['5']['h']}") +print(f" (xyxy): ({old_bubbles['5']['x']}, {old_bubbles['5']['y']}, {old_bubbles['5']['x'] + old_bubbles['5']['w']}, {old_bubbles['5']['y'] + old_bubbles['5']['h']})") + +# Find bubble at that location in current grouping +for bid, box in bubble_boxes.items(): + if box[0] == 371 and box[1] == 563: # New box 5 location + print(f"Current bubble {bid}: {box}") + print(f" Detections: {bubble_indices[bid]}") + for idx in bubble_indices[bid]: + b = mt.quad_bbox(filtered[idx][0]) + print(f" {idx}: [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] = {filtered[idx][1]}") + +print("\n=== BOX 7 ===") +print(f"Old bounds (from bubbles.json): x={old_bubbles['7']['x']}, y={old_bubbles['7']['y']}, w={old_bubbles['7']['w']}, h={old_bubbles['7']['h']}") +print(f" (xyxy): ({old_bubbles['7']['x']}, {old_bubbles['7']['y']}, {old_bubbles['7']['x'] + old_bubbles['7']['w']}, {old_bubbles['7']['y'] + old_bubbles['7']['h']})") + +# Find corresponding bubble +for bid, box in bubble_boxes.items(): + x1, y1, x2, y2 = box + # Check if this overlaps with old box 7 + old_x1, old_y1 = old_bubbles['7']['x'], old_bubbles['7']['y'] + old_x2 = old_x1 + old_bubbles['7']['w'] + old_y2 = old_y1 + old_bubbles['7']['h'] + + if not (x2 < old_x1 or x1 > old_x2 or y2 < old_y1 or y1 > old_y2): + print(f"Current bubble {bid}: {box}") + print(f" Detections: {bubble_indices[bid]}") + for idx in bubble_indices[bid]: + b = mt.quad_bbox(filtered[idx][0]) + print(f" {idx}: [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] = {filtered[idx][1]}") diff --git a/check_grouping_logic.py b/check_grouping_logic.py new file mode 100644 index 0000000..35ef669 --- /dev/null +++ b/check_grouping_logic.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +import sys +sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator') + +import cv2 +import numpy as np +import importlib.util + +spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py") +mt = importlib.util.module_from_spec(spec) +spec.loader.exec_module(mt) + +image_path = '004.png' +detector = mt.MacVisionDetector(source_lang='en') +raw = detector.read(image_path) +image = cv2.imread(image_path) + +# Filter +filtered = [] +for bbox, text, conf in raw: + t = mt.normalize_text(text) + if conf < 0.12 or len(t) < 1 or mt.is_noise_text(t) or mt.is_sound_effect(t) or mt.is_title_text(t): + continue + filtered.append((bbox, t, conf)) + +# Get the indices we're interested in (left and right bubbles) +left_indices = [41, 42, 43, 44, 45, 46] # LET, GO, OFF, ME, AL-, REA- +right_indices = [47, 48, 49, 50, 51, 52, 53, 54] # DON'T, WORRY!, HARUKO, ... + +print("=== CHECKING GROUPING CONDITIONS ===\n") + +# Check if they would be united in group_tokens +boxes_left = [mt.quad_bbox(filtered[i][0]) for i in left_indices] +boxes_right = [mt.quad_bbox(filtered[i][0]) for i in right_indices] + +# Check overlap_or_near +print("Checking overlap_or_near with gap=18:") +for li, bi in enumerate(left_indices): + for ri, bj in enumerate(right_indices): + b_left = boxes_left[li] + b_right = boxes_right[ri] + gap_x = max(0, max(b_left[0], b_right[0]) - min(b_left[2], b_right[2])) + gap_y = max(0, max(b_left[1], b_right[1]) - min(b_left[3], b_right[3])) + overlaps = gap_x <= 18 and gap_y <= 18 + if overlaps: + print(f" {bi} and {bj} overlap/near: gap_x={gap_x}, gap_y={gap_y}") + +# Check distance check +hs = [max(1.0, b[3] - b[1]) for b in [*boxes_left, *boxes_right]] +med_h = float(np.median(hs)) if hs else 12.0 +dist_thresh = max(20.0, med_h * 2.2) + +print(f"\nMedian height: {med_h}") +print(f"Distance threshold: {dist_thresh}") + +print("\nChecking distance check:") +for li, bi in enumerate(left_indices[:1]): # Just check first from each + for ri, bj in enumerate(right_indices[:1]): + b_left = boxes_left[li] + b_right = boxes_right[ri] + cx_left = (b_left[0] + b_left[2]) / 2.0 + cy_left = (b_left[1] + b_left[3]) / 2.0 + cx_right = (b_right[0] + b_right[2]) / 2.0 + cy_right = (b_right[1] + b_right[3]) / 2.0 + d = ((cx_left - cx_right) ** 2 + (cy_left - cy_right) ** 2) ** 0.5 + within_dist = d <= dist_thresh + within_y = abs(cy_left - cy_right) <= med_h * 3.0 + print(f" {bi} to {bj}: distance={d:.1f}, within_dist={within_dist}, within_y_tol={within_y}") diff --git a/debug_split_phase.py b/debug_split_phase.py new file mode 100644 index 0000000..d037fdd --- /dev/null +++ b/debug_split_phase.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +# Debug script to see what bubbles are produced after splitting + +import sys +sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator') + +import cv2 +import json +import numpy as np +import importlib.util + +spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py") +mt = importlib.util.module_from_spec(spec) +spec.loader.exec_module(mt) + +image_path = '004.png' +detector = mt.MacVisionDetector(source_lang='en') +raw = detector.read(image_path) +image = cv2.imread(image_path) + +# Full filtering as pipeline does +filtered = [] +skipped = 0 +ih, iw = image.shape[:2] + +for bbox, text, conf in raw: + t = mt.normalize_text(text) + qb = mt.quad_bbox(bbox) + + if conf < 0.12: + skipped += 1 + continue + if len(t) < 1: + skipped += 1 + continue + if mt.is_noise_text(t): + skipped += 1 + continue + if mt.is_sound_effect(t): + skipped += 1 + continue + if mt.is_title_text(t): + skipped += 1 + continue + if qb[1] < int(ih * mt.TOP_BAND_RATIO): + if conf < 0.70 and len(t) >= 5: + skipped += 1 + continue + + filtered.append((bbox, t, conf)) + +resolved_gap = mt.auto_gap(image_path) +bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens( + filtered, image.shape, gap_px=resolved_gap, bbox_padding=3 +) + +print("=== AFTER GROUPING ===") +print(f"Bubbles dict keys: {sorted(bubbles.keys())}") +for bid in [7, 8]: + if bid in bubbles: + print(f"\nBubble {bid}:") + print(f" Box: {bubble_boxes[bid]}") + print(f" Indices ({len(bubble_indices[bid])}): {bubble_indices[bid]}") + print(f" Quads ({len(bubble_quads[bid])})") + +# Now simulate the split logic +new_bubbles, new_bubble_boxes, new_bubble_quads, new_bubble_indices = {}, {}, {}, {} +next_bid = max(bubbles.keys()) + 1 if bubbles else 1 +splits_performed = [] + +for bid in list(bubbles.keys()): + box = bubble_boxes[bid] + bubble_split = None + + # Try split + split_result = mt.split_panel_box(image, box, bubble_quads=bubble_quads[bid]) + if split_result: + box_left, box_right, split_x = split_result + # ... split logic ... + bubble_split = "panel_split" + + if bubble_split is None: + col_split = mt.split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid) + if col_split: + bubble_split = "column_split" + + if bubble_split: + splits_performed.append(f"Bubble {bid}: {bubble_split}") + # Don't actually split here, just mark it + else: + # No split + new_bubbles[bid] = bubbles[bid] + new_bubble_boxes[bid] = bubble_boxes[bid] + new_bubble_quads[bid] = bubble_quads[bid] + new_bubble_indices[bid] = bubble_indices[bid] + +print("\n=== AFTER SPLIT LOGIC ===") +print(f"Splits detected: {len(splits_performed)}") +for s in splits_performed: + print(f" {s}") + +print(f"\nBubbles dict keys: {sorted(new_bubbles.keys())}") +for bid in [7, 8]: + if bid in new_bubbles: + print(f"\nBubble {bid}:") + print(f" Box: {new_bubble_boxes[bid]}") + print(f" Indices ({len(new_bubble_indices[bid])}): {new_bubble_indices[bid][:3]}...") diff --git a/manga-translator.py b/manga-translator.py index 8c4360a..ce832f2 100644 --- a/manga-translator.py +++ b/manga-translator.py @@ -238,8 +238,31 @@ def split_panel_box(image_bgr, bbox_xyxy, bubble_quads=None): peak_x_relative = np.argmax(vertical_projection[search_start:search_end]) + search_start peak_val = vertical_projection[peak_x_relative] - # Detect panel border with more sensitive threshold (0.40 instead of 0.60) - # This catches boxes with vertical lines even if they're not super dark + # Find ALL significant peaks, not just the first + # This helps detect internal gaps between text regions (left vs right bubbles) + threshold_val = h * 255 * 0.25 # Very sensitive threshold + significant_peaks = [] + + for x_rel in range(search_start, search_end): + if vertical_projection[x_rel] > threshold_val: + significant_peaks.append((x_rel, vertical_projection[x_rel])) + + # If we have multiple peaks, find the largest gap between text regions + # by looking for the valley (lowest projection value) between peaks + if len(significant_peaks) > 1: + # Find the minimum value (gap) between the first and last peak + min_proj_val = np.min(vertical_projection[search_start:search_end]) + min_proj_idx = np.argmin(vertical_projection[search_start:search_end]) + search_start + + # Use the valley point as the split, not the peak + # This more accurately separates left-aligned vs right-aligned content + if min_proj_val < threshold_val * 0.6: # Valley is clearly a gap + split_x_absolute = x1 + min_proj_idx + box_left = (x1, y1, split_x_absolute, y2) + box_right = (split_x_absolute, y1, x2, y2) + return box_left, box_right, split_x_absolute + + # Fallback: if the main peak is significant enough, use it if peak_val > (h * 255 * 0.40): split_x_absolute = x1 + peak_x_relative box_left = (x1, y1, split_x_absolute, y2) @@ -289,14 +312,14 @@ def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thre if use_aggressive_thresholds: # Relaxed thresholds for fallback splitting after failed panel border - threshold1 = 10.0 # Very low absolute threshold for fallback - threshold2 = med_h * 0.8 # Even lower relative threshold - min_gap = 5.0 + threshold1 = 80.0 # Increased from 10 to require significant gaps + threshold2 = med_h * 1.2 # Increased from 0.8 + min_gap = 40.0 # Increased from 5 else: - # Normal thresholds - threshold1 = 50.0 # Absolute threshold: 50 pixels - threshold2 = med_h * 2.0 # Relative threshold: 2x line height - min_gap = 25.0 + # Normal thresholds - very conservative to avoid breaking valid bubbles + threshold1 = 120.0 # Increased from 50 - require very large gaps + threshold2 = med_h * 3.0 # Increased from 2.0 - require 3x line height + min_gap = 60.0 # Increased from 25 @@ -304,6 +327,12 @@ def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thre split_idx = max_gap_idx left_indices = [item[0] for item in sorted_items[:split_idx]] right_indices = [item[0] for item in sorted_items[split_idx:]] + + # Additional safety: don't split if one side only has 1 detection + # This prevents breaking up valid bubbles with just a few words + if len(left_indices) < 2 or len(right_indices) < 2: + return None + return left_indices, right_indices return None @@ -637,6 +666,10 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3): hs = [max(1.0, b[3] - b[1]) for b in boxes] med_h = float(np.median(hs)) if hs else 12.0 dist_thresh = max(20.0, med_h * 2.2) + + # Adaptive vertical gap: allow up to 1.0x median line height for vertical gaps + # This handles cases where lines are spaced further apart (e.g., multi-line bubbles) + adaptive_gap_y = max(gap_px, med_h * 1.0) p = list(range(n)) @@ -651,7 +684,13 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3): for i in range(n): for j in range(i + 1, n): - if overlap_or_near(boxes[i], boxes[j], gap=gap_px): + # Use adaptive gap for vertical spacing, fixed gap for horizontal + ax1, ay1, ax2, ay2 = boxes[i] + bx1, by1, bx2, by2 = boxes[j] + gap_x = max(0, max(ax1, bx1) - min(ax2, bx2)) + gap_y = max(0, max(ay1, by1) - min(ay2, by2)) + + if gap_x <= gap_px and gap_y <= adaptive_gap_y: unite(i, j) continue cx1, cy1 = centers[i] @@ -681,8 +720,12 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3): continue x1, y1, x2, y2 = ub - x1 = max(0, x1 - bbox_padding); y1 = max(0, y1 - bbox_padding) - x2 = min(iw - 1, x2 + bbox_padding); y2 = min(ih - 1, y2 + bbox_padding) + + # Adaptive padding: scale with median line height to ensure all boundary chars are captured + # Use max of fixed padding or line-height-based padding + adaptive_pad = max(bbox_padding, int(round(med_h * 0.35))) + x1 = max(0, x1 - adaptive_pad); y1 = max(0, y1 - adaptive_pad) + x2 = min(iw - 1, x2 + adaptive_pad); y2 = min(ih - 1, y2 + adaptive_pad) bubbles[bid] = lines bubble_boxes[bid] = (x1, y1, x2, y2) diff --git a/test_panel_split.py b/test_panel_split.py new file mode 100644 index 0000000..5a5a488 --- /dev/null +++ b/test_panel_split.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +import sys +sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator') + +import cv2 +import numpy as np +import importlib.util + +spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py") +mt = importlib.util.module_from_spec(spec) +spec.loader.exec_module(mt) + +image_path = '004.png' +detector = mt.MacVisionDetector(source_lang='en') +raw = detector.read(image_path) +image = cv2.imread(image_path) + +# Full filtering +filtered = [] +for bbox, text, conf in raw: + t = mt.normalize_text(text) + qb = mt.quad_bbox(bbox) + + if conf < 0.12 or len(t) < 1 or mt.is_noise_text(t) or mt.is_sound_effect(t) or mt.is_title_text(t): + continue + if qb[1] < int(image.shape[0] * mt.TOP_BAND_RATIO): + if conf < 0.70 and len(t) >= 5: + continue + + filtered.append((bbox, t, conf)) + +# Get grouping +bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens( + filtered, image.shape, gap_px=mt.auto_gap(image_path), bbox_padding=3 +) + +print("=== TESTING PANEL SPLIT ON BUBBLE 7 ===\n") + +bid = 7 +box = bubble_boxes[bid] +print(f"Bubble {bid} box: {box}") +print(f"Bubble {bid} quads: {len(bubble_quads[bid])}") +print(f"Bubble {bid} indices: {len(bubble_indices[bid])}") + +# Test split_panel_box +split_result = mt.split_panel_box(image, box, bubble_quads=bubble_quads[bid]) + +if split_result: + box_left, box_right, split_x = split_result + print(f"\n✓ Panel split detected!") + print(f" Split X: {split_x}") + print(f" Left box: {box_left}") + print(f" Right box: {box_right}") + + # Simulate index split + left_idxs, right_idxs = [], [] + for idx in bubble_indices[bid]: + cx, cy = mt.quad_center(filtered[idx][0]) + if cx < split_x: + left_idxs.append(idx) + else: + right_idxs.append(idx) + + print(f"\n Left indices ({len(left_idxs)}): {left_idxs}") + print(f" Right indices ({len(right_idxs)}): {right_idxs}") + + if left_idxs and right_idxs: + print(f"\n✓ Split is valid (both sides have content)") + else: + print(f"\n✗ Split is invalid (one side is empty)") +else: + print(f"\n✗ No panel split detected") + print(f" Threshold would be: quads >= 10? {len(bubble_quads[bid]) >= 10}") + print(f" Width >= 50? {box[2] - box[0] >= 50}") + print(f" Height >= 50? {box[3] - box[1] >= 50}")