Added new rendered

This commit is contained in:
Guillem Hernandez Sola
2026-04-21 18:53:34 +02:00
parent bd475d8f01
commit dfa52f54eb
2 changed files with 277 additions and 267 deletions

View File

@@ -301,6 +301,54 @@ def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thre
return None
def split_bubble_if_multiple_rows(indices, ocr, bid=None):
"""
Splits a bubble horizontally (top/bottom) if there is a massive vertical gap
between text lines, indicating two separate bubbles were merged.
"""
if len(indices) < 2:
return None
boxes = [quad_bbox(ocr[i][0]) for i in indices]
# Sort by Y-coordinate (top to bottom)
sorted_items = sorted(zip(indices, boxes), key=lambda x: x[1][1])
gaps = []
current_max_y = sorted_items[0][1][3]
for i in range(1, len(sorted_items)):
idx, b = sorted_items[i]
y1 = b[1]
gap = y1 - current_max_y
gaps.append((i, gap, current_max_y, y1))
current_max_y = max(current_max_y, b[3])
if not gaps:
return None
# Find the largest vertical gap
max_gap_idx, max_gap_size, _, _ = max(gaps, key=lambda x: x[1])
# Calculate median line height to define what a "large" gap is
hs = [b[3] - b[1] for b in boxes]
med_h = float(np.median(hs)) if hs else 15.0
# If the vertical gap is more than 2.5x the height of a text line, it's a split!
threshold = med_h * 2.5
min_gap = 40.0 # Absolute minimum pixel gap to prevent micro-splits
if max_gap_size > threshold and max_gap_size > min_gap:
split_idx = max_gap_idx
top_indices = [item[0] for item in sorted_items[:split_idx]]
bottom_indices = [item[0] for item in sorted_items[split_idx:]]
# Ensure we don't just split off a single noise character
if len(top_indices) >= 1 and len(bottom_indices) >= 1:
return top_indices, bottom_indices
return None
# ============================================================
# OCR ENGINES (Apple Native Vision)
# ============================================================
@@ -886,6 +934,7 @@ def translate_manga_text(
box = bubble_boxes[bid]
bubble_split = None
# 1. Panel border split
split_result = split_panel_box(image, box, bubble_quads=bubble_quads[bid])
if split_result:
box_left, box_right, split_x = split_result
@@ -909,27 +958,37 @@ def translate_manga_text(
bubble_split = (left_idxs, right_idxs)
splits_performed.append(f"BOX#{bid} ({len(left_idxs)} quads | {len(right_idxs)} quads)")
# 2. Check for vertical columns (left/right split)
if bubble_split is None:
col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid)
if col_split:
left_idxs, right_idxs = col_split
if left_idxs and right_idxs:
bubble_split = (left_idxs, right_idxs)
splits_performed.append(f"BOX#{bid} ({len(left_idxs)} quads | {len(right_idxs)} quads)")
splits_performed.append(f"BOX#{bid} (Vertical Column Split: {len(left_idxs)} | {len(right_idxs)} quads)")
# 3. Check for horizontal rows (top/bottom split)
if bubble_split is None:
row_split = split_bubble_if_multiple_rows(bubble_indices[bid], filtered, bid=bid)
if row_split:
top_idxs, bottom_idxs = row_split
if top_idxs and bottom_idxs:
bubble_split = (top_idxs, bottom_idxs)
splits_performed.append(f"BOX#{bid} (Horizontal Row Split: {len(top_idxs)} | {len(bottom_idxs)} quads)")
if bubble_split:
left_idxs, right_idxs = bubble_split
new_bubbles[bid] = build_lines_from_indices(left_idxs, filtered)
ub_left = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in left_idxs])
new_bubble_boxes[bid] = (max(0, ub_left[0]-3), max(0, ub_left[1]-3), min(iw-1, ub_left[2]+3), min(ih-1, ub_left[3]+3))
new_bubble_quads[bid] = [filtered[i][0] for i in left_idxs]
new_bubble_indices[bid] = left_idxs
part1_idxs, part2_idxs = bubble_split
new_bubbles[bid] = build_lines_from_indices(part1_idxs, filtered)
ub_1 = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in part1_idxs])
new_bubble_boxes[bid] = (max(0, ub_1[0]-3), max(0, ub_1[1]-3), min(iw-1, ub_1[2]+3), min(ih-1, ub_1[3]+3))
new_bubble_quads[bid] = [filtered[i][0] for i in part1_idxs]
new_bubble_indices[bid] = part1_idxs
new_bubbles[next_bid] = build_lines_from_indices(right_idxs, filtered)
ub_right = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in right_idxs])
new_bubble_boxes[next_bid] = (max(0, ub_right[0]-3), max(0, ub_right[1]-3), min(iw-1, ub_right[2]+3), min(ih-1, ub_right[3]+3))
new_bubble_quads[next_bid] = [filtered[i][0] for i in right_idxs]
new_bubble_indices[next_bid] = right_idxs
new_bubbles[next_bid] = build_lines_from_indices(part2_idxs, filtered)
ub_2 = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in part2_idxs])
new_bubble_boxes[next_bid] = (max(0, ub_2[0]-3), max(0, ub_2[1]-3), min(iw-1, ub_2[2]+3), min(ih-1, ub_2[3]+3))
new_bubble_quads[next_bid] = [filtered[i][0] for i in part2_idxs]
new_bubble_indices[next_bid] = part2_idxs
next_bid += 1
else:
new_bubbles[bid] = bubbles[bid]
@@ -938,7 +997,7 @@ def translate_manga_text(
new_bubble_indices[bid] = bubble_indices[bid]
if splits_performed:
print(f"\n🔀 Multi-column bubble splits detected: {len(splits_performed)}")
print(f"\n🔀 Multi-column/row bubble splits detected: {len(splits_performed)}")
for split_info in splits_performed:
print(f" ✓ Split {split_info}")
@@ -1049,8 +1108,8 @@ def translate_manga_text(
if __name__ == "__main__":
translate_manga_text(
image_path="004.png",
source_lang="en",
image_path="003.jpg",
source_lang="es",
target_lang="ca",
confidence_threshold=0.05,
min_text_length=1,