Split
This commit is contained in:
37
analyze_box5.py
Normal file
37
analyze_box5.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import json
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# Import functions from manga-translator.py
|
||||||
|
import importlib.util
|
||||||
|
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
|
||||||
|
mt = importlib.util.module_from_spec(spec)
|
||||||
|
spec.loader.exec_module(mt)
|
||||||
|
|
||||||
|
image_path = '004.png'
|
||||||
|
detector = mt.MacVisionDetector(source_lang='en')
|
||||||
|
raw = detector.read(image_path)
|
||||||
|
|
||||||
|
# Load current bubbles to see what box 5 contains
|
||||||
|
with open('bubbles.json') as f:
|
||||||
|
bubbles_data = json.load(f)
|
||||||
|
box5_data = bubbles_data['5']
|
||||||
|
box5_bounds = (box5_data['x'], box5_data['y'], box5_data['x'] + box5_data['w'], box5_data['y'] + box5_data['h'])
|
||||||
|
print(f'Box 5 bounds (xyxy): {box5_bounds}')
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Print all detections sorted by position
|
||||||
|
print('All raw detections:')
|
||||||
|
for i, (bbox, text, conf) in enumerate(sorted(raw, key=lambda x: (mt.quad_bbox(x[0])[1], mt.quad_bbox(x[0])[0]))):
|
||||||
|
b = mt.quad_bbox(bbox)
|
||||||
|
t_norm = mt.normalize_text(text)
|
||||||
|
print(f'{i:2d}. [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] conf={conf:.2f} text="{t_norm}"')
|
||||||
|
|
||||||
|
# Check if this overlaps with box 5
|
||||||
|
b5_x1, b5_y1, b5_x2, b5_y2 = box5_bounds
|
||||||
|
if not (b[2] < b5_x1 or b[0] > b5_x2 or b[3] < b5_y1 or b[1] > b5_y2):
|
||||||
|
print(f' ^ OVERLAPS with Box 5!')
|
||||||
95
analyze_box7_split.py
Normal file
95
analyze_box7_split.py
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import json
|
||||||
|
import numpy as np
|
||||||
|
import importlib.util
|
||||||
|
|
||||||
|
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
|
||||||
|
mt = importlib.util.module_from_spec(spec)
|
||||||
|
spec.loader.exec_module(mt)
|
||||||
|
|
||||||
|
image_path = '004.png'
|
||||||
|
detector = mt.MacVisionDetector(source_lang='en')
|
||||||
|
raw = detector.read(image_path)
|
||||||
|
image = cv2.imread(image_path)
|
||||||
|
|
||||||
|
# Filter as the pipeline does
|
||||||
|
filtered = []
|
||||||
|
for bbox, text, conf in raw:
|
||||||
|
t = mt.normalize_text(text)
|
||||||
|
qb = mt.quad_bbox(bbox)
|
||||||
|
|
||||||
|
if conf < 0.12:
|
||||||
|
continue
|
||||||
|
if len(t) < 1:
|
||||||
|
continue
|
||||||
|
if mt.is_noise_text(t):
|
||||||
|
continue
|
||||||
|
if mt.is_sound_effect(t):
|
||||||
|
continue
|
||||||
|
if mt.is_title_text(t):
|
||||||
|
continue
|
||||||
|
|
||||||
|
filtered.append((bbox, t, conf))
|
||||||
|
|
||||||
|
# Run grouping
|
||||||
|
bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
|
||||||
|
filtered, image.shape, gap_px=18, bbox_padding=3
|
||||||
|
)
|
||||||
|
|
||||||
|
print("=== BUBBLE 7 & 8 ANALYSIS ===\n")
|
||||||
|
print("Current Bubble 7 (right side content):")
|
||||||
|
for bid in [7]:
|
||||||
|
if bid in bubble_indices:
|
||||||
|
box = bubble_boxes[bid]
|
||||||
|
print(f" Box: {box}")
|
||||||
|
print(f" Indices: {bubble_indices[bid]}")
|
||||||
|
indices = bubble_indices[bid]
|
||||||
|
boxes = [mt.quad_bbox(filtered[i][0]) for i in indices]
|
||||||
|
min_x = min(b[0] for b in boxes)
|
||||||
|
max_x = max(b[2] for b in boxes)
|
||||||
|
print(f" X range: {min_x} - {max_x}")
|
||||||
|
for idx in indices:
|
||||||
|
b = mt.quad_bbox(filtered[idx][0])
|
||||||
|
print(f" {idx}: x=[{b[0]:3d},{b[2]:3d}] y=[{b[1]:3d},{b[3]:3d}] = {filtered[idx][1]}")
|
||||||
|
|
||||||
|
print("\nCurrent Bubble 8 (left side content):")
|
||||||
|
for bid in [8]:
|
||||||
|
if bid in bubble_indices:
|
||||||
|
box = bubble_boxes[bid]
|
||||||
|
print(f" Box: {box}")
|
||||||
|
print(f" Indices: {bubble_indices[bid]}")
|
||||||
|
indices = bubble_indices[bid]
|
||||||
|
boxes = [mt.quad_bbox(filtered[i][0]) for i in indices]
|
||||||
|
min_x = min(b[0] for b in boxes)
|
||||||
|
max_x = max(b[2] for b in boxes)
|
||||||
|
print(f" X range: {min_x} - {max_x}")
|
||||||
|
for idx in indices:
|
||||||
|
b = mt.quad_bbox(filtered[idx][0])
|
||||||
|
print(f" {idx}: x=[{b[0]:3d},{b[2]:3d}] y=[{b[1]:3d},{b[3]:3d}] = {filtered[idx][1]}")
|
||||||
|
|
||||||
|
# Check the horizontal gap between them
|
||||||
|
print("\n=== GAP ANALYSIS ===")
|
||||||
|
if 7 in bubble_indices and 8 in bubble_indices:
|
||||||
|
boxes7 = [mt.quad_bbox(filtered[i][0]) for i in bubble_indices[7]]
|
||||||
|
boxes8 = [mt.quad_bbox(filtered[i][0]) for i in bubble_indices[8]]
|
||||||
|
|
||||||
|
max_x7 = max(b[2] for b in boxes7)
|
||||||
|
min_x8 = min(b[0] for b in boxes8)
|
||||||
|
|
||||||
|
print(f"Bubble 7 max X: {max_x7}")
|
||||||
|
print(f"Bubble 8 min X: {min_x8}")
|
||||||
|
print(f"Horizontal gap: {min_x8 - max_x7}")
|
||||||
|
|
||||||
|
# Check Y overlap
|
||||||
|
min_y7 = min(b[1] for b in boxes7)
|
||||||
|
max_y7 = max(b[3] for b in boxes7)
|
||||||
|
min_y8 = min(b[1] for b in boxes8)
|
||||||
|
max_y8 = max(b[3] for b in boxes8)
|
||||||
|
|
||||||
|
print(f"\nBubble 7 Y range: {min_y7} - {max_y7}")
|
||||||
|
print(f"Bubble 8 Y range: {min_y8} - {max_y8}")
|
||||||
|
print(f"Y overlap: {max(0, min(max_y7, max_y8) - max(min_y7, min_y8))} pixels")
|
||||||
55
analyze_grouping.py
Normal file
55
analyze_grouping.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import json
|
||||||
|
import numpy as np
|
||||||
|
import importlib.util
|
||||||
|
|
||||||
|
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
|
||||||
|
mt = importlib.util.module_from_spec(spec)
|
||||||
|
spec.loader.exec_module(mt)
|
||||||
|
|
||||||
|
image_path = '004.png'
|
||||||
|
detector = mt.MacVisionDetector(source_lang='en')
|
||||||
|
raw = detector.read(image_path)
|
||||||
|
image = cv2.imread(image_path)
|
||||||
|
|
||||||
|
# Filter as the pipeline does
|
||||||
|
filtered = []
|
||||||
|
for bbox, text, conf in raw:
|
||||||
|
t = mt.normalize_text(text)
|
||||||
|
qb = mt.quad_bbox(bbox)
|
||||||
|
|
||||||
|
if conf < 0.12:
|
||||||
|
continue
|
||||||
|
if len(t) < 1:
|
||||||
|
continue
|
||||||
|
if mt.is_noise_text(t):
|
||||||
|
continue
|
||||||
|
if mt.is_sound_effect(t):
|
||||||
|
continue
|
||||||
|
if mt.is_title_text(t):
|
||||||
|
continue
|
||||||
|
|
||||||
|
filtered.append((bbox, t, conf))
|
||||||
|
|
||||||
|
print(f"Filtered {len(filtered)} detections")
|
||||||
|
|
||||||
|
# Now run grouping
|
||||||
|
bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
|
||||||
|
filtered, image.shape, gap_px=18, bbox_padding=3
|
||||||
|
)
|
||||||
|
|
||||||
|
# Find which bubble contains box 5
|
||||||
|
box5_region = (378, 570, 536, 753)
|
||||||
|
|
||||||
|
print("\n=== BUBBLES ===")
|
||||||
|
for bid, box in bubble_boxes.items():
|
||||||
|
print(f"Bubble {bid}: {box}")
|
||||||
|
print(f" Indices: {bubble_indices[bid]}")
|
||||||
|
print(f" Detections:")
|
||||||
|
for idx in bubble_indices[bid]:
|
||||||
|
b = mt.quad_bbox(filtered[idx][0])
|
||||||
|
print(f" {idx}: [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] = {filtered[idx][1]}")
|
||||||
77
check_box7.py
Normal file
77
check_box7.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import json
|
||||||
|
import numpy as np
|
||||||
|
import importlib.util
|
||||||
|
|
||||||
|
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
|
||||||
|
mt = importlib.util.module_from_spec(spec)
|
||||||
|
spec.loader.exec_module(mt)
|
||||||
|
|
||||||
|
image_path = '004.png'
|
||||||
|
detector = mt.MacVisionDetector(source_lang='en')
|
||||||
|
raw = detector.read(image_path)
|
||||||
|
image = cv2.imread(image_path)
|
||||||
|
|
||||||
|
# Filter as the pipeline does
|
||||||
|
filtered = []
|
||||||
|
for bbox, text, conf in raw:
|
||||||
|
t = mt.normalize_text(text)
|
||||||
|
qb = mt.quad_bbox(bbox)
|
||||||
|
|
||||||
|
if conf < 0.12:
|
||||||
|
continue
|
||||||
|
if len(t) < 1:
|
||||||
|
continue
|
||||||
|
if mt.is_noise_text(t):
|
||||||
|
continue
|
||||||
|
if mt.is_sound_effect(t):
|
||||||
|
continue
|
||||||
|
if mt.is_title_text(t):
|
||||||
|
continue
|
||||||
|
|
||||||
|
filtered.append((bbox, t, conf))
|
||||||
|
|
||||||
|
# Now run grouping
|
||||||
|
bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
|
||||||
|
filtered, image.shape, gap_px=18, bbox_padding=3
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check current bubbles.json for reference
|
||||||
|
with open('bubbles.json') as f:
|
||||||
|
old_bubbles = json.load(f)
|
||||||
|
|
||||||
|
print("=== BOX 5 ===")
|
||||||
|
print(f"Old bounds (from bubbles.json): x={old_bubbles['5']['x']}, y={old_bubbles['5']['y']}, w={old_bubbles['5']['w']}, h={old_bubbles['5']['h']}")
|
||||||
|
print(f" (xyxy): ({old_bubbles['5']['x']}, {old_bubbles['5']['y']}, {old_bubbles['5']['x'] + old_bubbles['5']['w']}, {old_bubbles['5']['y'] + old_bubbles['5']['h']})")
|
||||||
|
|
||||||
|
# Find bubble at that location in current grouping
|
||||||
|
for bid, box in bubble_boxes.items():
|
||||||
|
if box[0] == 371 and box[1] == 563: # New box 5 location
|
||||||
|
print(f"Current bubble {bid}: {box}")
|
||||||
|
print(f" Detections: {bubble_indices[bid]}")
|
||||||
|
for idx in bubble_indices[bid]:
|
||||||
|
b = mt.quad_bbox(filtered[idx][0])
|
||||||
|
print(f" {idx}: [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] = {filtered[idx][1]}")
|
||||||
|
|
||||||
|
print("\n=== BOX 7 ===")
|
||||||
|
print(f"Old bounds (from bubbles.json): x={old_bubbles['7']['x']}, y={old_bubbles['7']['y']}, w={old_bubbles['7']['w']}, h={old_bubbles['7']['h']}")
|
||||||
|
print(f" (xyxy): ({old_bubbles['7']['x']}, {old_bubbles['7']['y']}, {old_bubbles['7']['x'] + old_bubbles['7']['w']}, {old_bubbles['7']['y'] + old_bubbles['7']['h']})")
|
||||||
|
|
||||||
|
# Find corresponding bubble
|
||||||
|
for bid, box in bubble_boxes.items():
|
||||||
|
x1, y1, x2, y2 = box
|
||||||
|
# Check if this overlaps with old box 7
|
||||||
|
old_x1, old_y1 = old_bubbles['7']['x'], old_bubbles['7']['y']
|
||||||
|
old_x2 = old_x1 + old_bubbles['7']['w']
|
||||||
|
old_y2 = old_y1 + old_bubbles['7']['h']
|
||||||
|
|
||||||
|
if not (x2 < old_x1 or x1 > old_x2 or y2 < old_y1 or y1 > old_y2):
|
||||||
|
print(f"Current bubble {bid}: {box}")
|
||||||
|
print(f" Detections: {bubble_indices[bid]}")
|
||||||
|
for idx in bubble_indices[bid]:
|
||||||
|
b = mt.quad_bbox(filtered[idx][0])
|
||||||
|
print(f" {idx}: [{b[0]:3d},{b[1]:3d} -> {b[2]:3d},{b[3]:3d}] = {filtered[idx][1]}")
|
||||||
68
check_grouping_logic.py
Normal file
68
check_grouping_logic.py
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import importlib.util
|
||||||
|
|
||||||
|
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
|
||||||
|
mt = importlib.util.module_from_spec(spec)
|
||||||
|
spec.loader.exec_module(mt)
|
||||||
|
|
||||||
|
image_path = '004.png'
|
||||||
|
detector = mt.MacVisionDetector(source_lang='en')
|
||||||
|
raw = detector.read(image_path)
|
||||||
|
image = cv2.imread(image_path)
|
||||||
|
|
||||||
|
# Filter
|
||||||
|
filtered = []
|
||||||
|
for bbox, text, conf in raw:
|
||||||
|
t = mt.normalize_text(text)
|
||||||
|
if conf < 0.12 or len(t) < 1 or mt.is_noise_text(t) or mt.is_sound_effect(t) or mt.is_title_text(t):
|
||||||
|
continue
|
||||||
|
filtered.append((bbox, t, conf))
|
||||||
|
|
||||||
|
# Get the indices we're interested in (left and right bubbles)
|
||||||
|
left_indices = [41, 42, 43, 44, 45, 46] # LET, GO, OFF, ME, AL-, REA-
|
||||||
|
right_indices = [47, 48, 49, 50, 51, 52, 53, 54] # DON'T, WORRY!, HARUKO, ...
|
||||||
|
|
||||||
|
print("=== CHECKING GROUPING CONDITIONS ===\n")
|
||||||
|
|
||||||
|
# Check if they would be united in group_tokens
|
||||||
|
boxes_left = [mt.quad_bbox(filtered[i][0]) for i in left_indices]
|
||||||
|
boxes_right = [mt.quad_bbox(filtered[i][0]) for i in right_indices]
|
||||||
|
|
||||||
|
# Check overlap_or_near
|
||||||
|
print("Checking overlap_or_near with gap=18:")
|
||||||
|
for li, bi in enumerate(left_indices):
|
||||||
|
for ri, bj in enumerate(right_indices):
|
||||||
|
b_left = boxes_left[li]
|
||||||
|
b_right = boxes_right[ri]
|
||||||
|
gap_x = max(0, max(b_left[0], b_right[0]) - min(b_left[2], b_right[2]))
|
||||||
|
gap_y = max(0, max(b_left[1], b_right[1]) - min(b_left[3], b_right[3]))
|
||||||
|
overlaps = gap_x <= 18 and gap_y <= 18
|
||||||
|
if overlaps:
|
||||||
|
print(f" {bi} and {bj} overlap/near: gap_x={gap_x}, gap_y={gap_y}")
|
||||||
|
|
||||||
|
# Check distance check
|
||||||
|
hs = [max(1.0, b[3] - b[1]) for b in [*boxes_left, *boxes_right]]
|
||||||
|
med_h = float(np.median(hs)) if hs else 12.0
|
||||||
|
dist_thresh = max(20.0, med_h * 2.2)
|
||||||
|
|
||||||
|
print(f"\nMedian height: {med_h}")
|
||||||
|
print(f"Distance threshold: {dist_thresh}")
|
||||||
|
|
||||||
|
print("\nChecking distance check:")
|
||||||
|
for li, bi in enumerate(left_indices[:1]): # Just check first from each
|
||||||
|
for ri, bj in enumerate(right_indices[:1]):
|
||||||
|
b_left = boxes_left[li]
|
||||||
|
b_right = boxes_right[ri]
|
||||||
|
cx_left = (b_left[0] + b_left[2]) / 2.0
|
||||||
|
cy_left = (b_left[1] + b_left[3]) / 2.0
|
||||||
|
cx_right = (b_right[0] + b_right[2]) / 2.0
|
||||||
|
cy_right = (b_right[1] + b_right[3]) / 2.0
|
||||||
|
d = ((cx_left - cx_right) ** 2 + (cy_left - cy_right) ** 2) ** 0.5
|
||||||
|
within_dist = d <= dist_thresh
|
||||||
|
within_y = abs(cy_left - cy_right) <= med_h * 3.0
|
||||||
|
print(f" {bi} to {bj}: distance={d:.1f}, within_dist={within_dist}, within_y_tol={within_y}")
|
||||||
107
debug_split_phase.py
Normal file
107
debug_split_phase.py
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# Debug script to see what bubbles are produced after splitting
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import json
|
||||||
|
import numpy as np
|
||||||
|
import importlib.util
|
||||||
|
|
||||||
|
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
|
||||||
|
mt = importlib.util.module_from_spec(spec)
|
||||||
|
spec.loader.exec_module(mt)
|
||||||
|
|
||||||
|
image_path = '004.png'
|
||||||
|
detector = mt.MacVisionDetector(source_lang='en')
|
||||||
|
raw = detector.read(image_path)
|
||||||
|
image = cv2.imread(image_path)
|
||||||
|
|
||||||
|
# Full filtering as pipeline does
|
||||||
|
filtered = []
|
||||||
|
skipped = 0
|
||||||
|
ih, iw = image.shape[:2]
|
||||||
|
|
||||||
|
for bbox, text, conf in raw:
|
||||||
|
t = mt.normalize_text(text)
|
||||||
|
qb = mt.quad_bbox(bbox)
|
||||||
|
|
||||||
|
if conf < 0.12:
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
if len(t) < 1:
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
if mt.is_noise_text(t):
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
if mt.is_sound_effect(t):
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
if mt.is_title_text(t):
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
if qb[1] < int(ih * mt.TOP_BAND_RATIO):
|
||||||
|
if conf < 0.70 and len(t) >= 5:
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
filtered.append((bbox, t, conf))
|
||||||
|
|
||||||
|
resolved_gap = mt.auto_gap(image_path)
|
||||||
|
bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
|
||||||
|
filtered, image.shape, gap_px=resolved_gap, bbox_padding=3
|
||||||
|
)
|
||||||
|
|
||||||
|
print("=== AFTER GROUPING ===")
|
||||||
|
print(f"Bubbles dict keys: {sorted(bubbles.keys())}")
|
||||||
|
for bid in [7, 8]:
|
||||||
|
if bid in bubbles:
|
||||||
|
print(f"\nBubble {bid}:")
|
||||||
|
print(f" Box: {bubble_boxes[bid]}")
|
||||||
|
print(f" Indices ({len(bubble_indices[bid])}): {bubble_indices[bid]}")
|
||||||
|
print(f" Quads ({len(bubble_quads[bid])})")
|
||||||
|
|
||||||
|
# Now simulate the split logic
|
||||||
|
new_bubbles, new_bubble_boxes, new_bubble_quads, new_bubble_indices = {}, {}, {}, {}
|
||||||
|
next_bid = max(bubbles.keys()) + 1 if bubbles else 1
|
||||||
|
splits_performed = []
|
||||||
|
|
||||||
|
for bid in list(bubbles.keys()):
|
||||||
|
box = bubble_boxes[bid]
|
||||||
|
bubble_split = None
|
||||||
|
|
||||||
|
# Try split
|
||||||
|
split_result = mt.split_panel_box(image, box, bubble_quads=bubble_quads[bid])
|
||||||
|
if split_result:
|
||||||
|
box_left, box_right, split_x = split_result
|
||||||
|
# ... split logic ...
|
||||||
|
bubble_split = "panel_split"
|
||||||
|
|
||||||
|
if bubble_split is None:
|
||||||
|
col_split = mt.split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid)
|
||||||
|
if col_split:
|
||||||
|
bubble_split = "column_split"
|
||||||
|
|
||||||
|
if bubble_split:
|
||||||
|
splits_performed.append(f"Bubble {bid}: {bubble_split}")
|
||||||
|
# Don't actually split here, just mark it
|
||||||
|
else:
|
||||||
|
# No split
|
||||||
|
new_bubbles[bid] = bubbles[bid]
|
||||||
|
new_bubble_boxes[bid] = bubble_boxes[bid]
|
||||||
|
new_bubble_quads[bid] = bubble_quads[bid]
|
||||||
|
new_bubble_indices[bid] = bubble_indices[bid]
|
||||||
|
|
||||||
|
print("\n=== AFTER SPLIT LOGIC ===")
|
||||||
|
print(f"Splits detected: {len(splits_performed)}")
|
||||||
|
for s in splits_performed:
|
||||||
|
print(f" {s}")
|
||||||
|
|
||||||
|
print(f"\nBubbles dict keys: {sorted(new_bubbles.keys())}")
|
||||||
|
for bid in [7, 8]:
|
||||||
|
if bid in new_bubbles:
|
||||||
|
print(f"\nBubble {bid}:")
|
||||||
|
print(f" Box: {new_bubble_boxes[bid]}")
|
||||||
|
print(f" Indices ({len(new_bubble_indices[bid])}): {new_bubble_indices[bid][:3]}...")
|
||||||
@@ -238,8 +238,31 @@ def split_panel_box(image_bgr, bbox_xyxy, bubble_quads=None):
|
|||||||
peak_x_relative = np.argmax(vertical_projection[search_start:search_end]) + search_start
|
peak_x_relative = np.argmax(vertical_projection[search_start:search_end]) + search_start
|
||||||
peak_val = vertical_projection[peak_x_relative]
|
peak_val = vertical_projection[peak_x_relative]
|
||||||
|
|
||||||
# Detect panel border with more sensitive threshold (0.40 instead of 0.60)
|
# Find ALL significant peaks, not just the first
|
||||||
# This catches boxes with vertical lines even if they're not super dark
|
# This helps detect internal gaps between text regions (left vs right bubbles)
|
||||||
|
threshold_val = h * 255 * 0.25 # Very sensitive threshold
|
||||||
|
significant_peaks = []
|
||||||
|
|
||||||
|
for x_rel in range(search_start, search_end):
|
||||||
|
if vertical_projection[x_rel] > threshold_val:
|
||||||
|
significant_peaks.append((x_rel, vertical_projection[x_rel]))
|
||||||
|
|
||||||
|
# If we have multiple peaks, find the largest gap between text regions
|
||||||
|
# by looking for the valley (lowest projection value) between peaks
|
||||||
|
if len(significant_peaks) > 1:
|
||||||
|
# Find the minimum value (gap) between the first and last peak
|
||||||
|
min_proj_val = np.min(vertical_projection[search_start:search_end])
|
||||||
|
min_proj_idx = np.argmin(vertical_projection[search_start:search_end]) + search_start
|
||||||
|
|
||||||
|
# Use the valley point as the split, not the peak
|
||||||
|
# This more accurately separates left-aligned vs right-aligned content
|
||||||
|
if min_proj_val < threshold_val * 0.6: # Valley is clearly a gap
|
||||||
|
split_x_absolute = x1 + min_proj_idx
|
||||||
|
box_left = (x1, y1, split_x_absolute, y2)
|
||||||
|
box_right = (split_x_absolute, y1, x2, y2)
|
||||||
|
return box_left, box_right, split_x_absolute
|
||||||
|
|
||||||
|
# Fallback: if the main peak is significant enough, use it
|
||||||
if peak_val > (h * 255 * 0.40):
|
if peak_val > (h * 255 * 0.40):
|
||||||
split_x_absolute = x1 + peak_x_relative
|
split_x_absolute = x1 + peak_x_relative
|
||||||
box_left = (x1, y1, split_x_absolute, y2)
|
box_left = (x1, y1, split_x_absolute, y2)
|
||||||
@@ -289,14 +312,14 @@ def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thre
|
|||||||
|
|
||||||
if use_aggressive_thresholds:
|
if use_aggressive_thresholds:
|
||||||
# Relaxed thresholds for fallback splitting after failed panel border
|
# Relaxed thresholds for fallback splitting after failed panel border
|
||||||
threshold1 = 10.0 # Very low absolute threshold for fallback
|
threshold1 = 80.0 # Increased from 10 to require significant gaps
|
||||||
threshold2 = med_h * 0.8 # Even lower relative threshold
|
threshold2 = med_h * 1.2 # Increased from 0.8
|
||||||
min_gap = 5.0
|
min_gap = 40.0 # Increased from 5
|
||||||
else:
|
else:
|
||||||
# Normal thresholds
|
# Normal thresholds - very conservative to avoid breaking valid bubbles
|
||||||
threshold1 = 50.0 # Absolute threshold: 50 pixels
|
threshold1 = 120.0 # Increased from 50 - require very large gaps
|
||||||
threshold2 = med_h * 2.0 # Relative threshold: 2x line height
|
threshold2 = med_h * 3.0 # Increased from 2.0 - require 3x line height
|
||||||
min_gap = 25.0
|
min_gap = 60.0 # Increased from 25
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -304,6 +327,12 @@ def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thre
|
|||||||
split_idx = max_gap_idx
|
split_idx = max_gap_idx
|
||||||
left_indices = [item[0] for item in sorted_items[:split_idx]]
|
left_indices = [item[0] for item in sorted_items[:split_idx]]
|
||||||
right_indices = [item[0] for item in sorted_items[split_idx:]]
|
right_indices = [item[0] for item in sorted_items[split_idx:]]
|
||||||
|
|
||||||
|
# Additional safety: don't split if one side only has 1 detection
|
||||||
|
# This prevents breaking up valid bubbles with just a few words
|
||||||
|
if len(left_indices) < 2 or len(right_indices) < 2:
|
||||||
|
return None
|
||||||
|
|
||||||
return left_indices, right_indices
|
return left_indices, right_indices
|
||||||
|
|
||||||
return None
|
return None
|
||||||
@@ -637,6 +666,10 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
|
|||||||
hs = [max(1.0, b[3] - b[1]) for b in boxes]
|
hs = [max(1.0, b[3] - b[1]) for b in boxes]
|
||||||
med_h = float(np.median(hs)) if hs else 12.0
|
med_h = float(np.median(hs)) if hs else 12.0
|
||||||
dist_thresh = max(20.0, med_h * 2.2)
|
dist_thresh = max(20.0, med_h * 2.2)
|
||||||
|
|
||||||
|
# Adaptive vertical gap: allow up to 1.0x median line height for vertical gaps
|
||||||
|
# This handles cases where lines are spaced further apart (e.g., multi-line bubbles)
|
||||||
|
adaptive_gap_y = max(gap_px, med_h * 1.0)
|
||||||
|
|
||||||
p = list(range(n))
|
p = list(range(n))
|
||||||
|
|
||||||
@@ -651,7 +684,13 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
|
|||||||
|
|
||||||
for i in range(n):
|
for i in range(n):
|
||||||
for j in range(i + 1, n):
|
for j in range(i + 1, n):
|
||||||
if overlap_or_near(boxes[i], boxes[j], gap=gap_px):
|
# Use adaptive gap for vertical spacing, fixed gap for horizontal
|
||||||
|
ax1, ay1, ax2, ay2 = boxes[i]
|
||||||
|
bx1, by1, bx2, by2 = boxes[j]
|
||||||
|
gap_x = max(0, max(ax1, bx1) - min(ax2, bx2))
|
||||||
|
gap_y = max(0, max(ay1, by1) - min(ay2, by2))
|
||||||
|
|
||||||
|
if gap_x <= gap_px and gap_y <= adaptive_gap_y:
|
||||||
unite(i, j)
|
unite(i, j)
|
||||||
continue
|
continue
|
||||||
cx1, cy1 = centers[i]
|
cx1, cy1 = centers[i]
|
||||||
@@ -681,8 +720,12 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
x1, y1, x2, y2 = ub
|
x1, y1, x2, y2 = ub
|
||||||
x1 = max(0, x1 - bbox_padding); y1 = max(0, y1 - bbox_padding)
|
|
||||||
x2 = min(iw - 1, x2 + bbox_padding); y2 = min(ih - 1, y2 + bbox_padding)
|
# Adaptive padding: scale with median line height to ensure all boundary chars are captured
|
||||||
|
# Use max of fixed padding or line-height-based padding
|
||||||
|
adaptive_pad = max(bbox_padding, int(round(med_h * 0.35)))
|
||||||
|
x1 = max(0, x1 - adaptive_pad); y1 = max(0, y1 - adaptive_pad)
|
||||||
|
x2 = min(iw - 1, x2 + adaptive_pad); y2 = min(ih - 1, y2 + adaptive_pad)
|
||||||
|
|
||||||
bubbles[bid] = lines
|
bubbles[bid] = lines
|
||||||
bubble_boxes[bid] = (x1, y1, x2, y2)
|
bubble_boxes[bid] = (x1, y1, x2, y2)
|
||||||
|
|||||||
75
test_panel_split.py
Normal file
75
test_panel_split.py
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, '/Users/guillemhernandezsola/code/manga-translator')
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import importlib.util
|
||||||
|
|
||||||
|
spec = importlib.util.spec_from_file_location("manga_translator", "/Users/guillemhernandezsola/code/manga-translator/manga-translator.py")
|
||||||
|
mt = importlib.util.module_from_spec(spec)
|
||||||
|
spec.loader.exec_module(mt)
|
||||||
|
|
||||||
|
image_path = '004.png'
|
||||||
|
detector = mt.MacVisionDetector(source_lang='en')
|
||||||
|
raw = detector.read(image_path)
|
||||||
|
image = cv2.imread(image_path)
|
||||||
|
|
||||||
|
# Full filtering
|
||||||
|
filtered = []
|
||||||
|
for bbox, text, conf in raw:
|
||||||
|
t = mt.normalize_text(text)
|
||||||
|
qb = mt.quad_bbox(bbox)
|
||||||
|
|
||||||
|
if conf < 0.12 or len(t) < 1 or mt.is_noise_text(t) or mt.is_sound_effect(t) or mt.is_title_text(t):
|
||||||
|
continue
|
||||||
|
if qb[1] < int(image.shape[0] * mt.TOP_BAND_RATIO):
|
||||||
|
if conf < 0.70 and len(t) >= 5:
|
||||||
|
continue
|
||||||
|
|
||||||
|
filtered.append((bbox, t, conf))
|
||||||
|
|
||||||
|
# Get grouping
|
||||||
|
bubbles, bubble_boxes, bubble_quads, bubble_indices = mt.group_tokens(
|
||||||
|
filtered, image.shape, gap_px=mt.auto_gap(image_path), bbox_padding=3
|
||||||
|
)
|
||||||
|
|
||||||
|
print("=== TESTING PANEL SPLIT ON BUBBLE 7 ===\n")
|
||||||
|
|
||||||
|
bid = 7
|
||||||
|
box = bubble_boxes[bid]
|
||||||
|
print(f"Bubble {bid} box: {box}")
|
||||||
|
print(f"Bubble {bid} quads: {len(bubble_quads[bid])}")
|
||||||
|
print(f"Bubble {bid} indices: {len(bubble_indices[bid])}")
|
||||||
|
|
||||||
|
# Test split_panel_box
|
||||||
|
split_result = mt.split_panel_box(image, box, bubble_quads=bubble_quads[bid])
|
||||||
|
|
||||||
|
if split_result:
|
||||||
|
box_left, box_right, split_x = split_result
|
||||||
|
print(f"\n✓ Panel split detected!")
|
||||||
|
print(f" Split X: {split_x}")
|
||||||
|
print(f" Left box: {box_left}")
|
||||||
|
print(f" Right box: {box_right}")
|
||||||
|
|
||||||
|
# Simulate index split
|
||||||
|
left_idxs, right_idxs = [], []
|
||||||
|
for idx in bubble_indices[bid]:
|
||||||
|
cx, cy = mt.quad_center(filtered[idx][0])
|
||||||
|
if cx < split_x:
|
||||||
|
left_idxs.append(idx)
|
||||||
|
else:
|
||||||
|
right_idxs.append(idx)
|
||||||
|
|
||||||
|
print(f"\n Left indices ({len(left_idxs)}): {left_idxs}")
|
||||||
|
print(f" Right indices ({len(right_idxs)}): {right_idxs}")
|
||||||
|
|
||||||
|
if left_idxs and right_idxs:
|
||||||
|
print(f"\n✓ Split is valid (both sides have content)")
|
||||||
|
else:
|
||||||
|
print(f"\n✗ Split is invalid (one side is empty)")
|
||||||
|
else:
|
||||||
|
print(f"\n✗ No panel split detected")
|
||||||
|
print(f" Threshold would be: quads >= 10? {len(bubble_quads[bid]) >= 10}")
|
||||||
|
print(f" Width >= 50? {box[2] - box[0] >= 50}")
|
||||||
|
print(f" Height >= 50? {box[3] - box[1] >= 50}")
|
||||||
Reference in New Issue
Block a user