184 lines
6.6 KiB
Python
184 lines
6.6 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Helper script to split bubbles with multiple separate text regions.
|
|
Run this to manually split Box 2 and Box 7 from debug_clusters.png
|
|
"""
|
|
|
|
import json
|
|
import numpy as np
|
|
from typing import List, Tuple, Dict
|
|
|
|
def quad_bbox(quad):
|
|
"""Convert quad to bounding box"""
|
|
xs = [p[0] for p in quad]
|
|
ys = [p[1] for p in quad]
|
|
return (min(xs), min(ys), max(xs), max(ys))
|
|
|
|
def boxes_union_xyxy(boxes):
|
|
"""Union of multiple boxes"""
|
|
boxes = [b for b in boxes if b is not None]
|
|
if not boxes:
|
|
return None
|
|
return (
|
|
int(min(b[0] for b in boxes)),
|
|
int(min(b[1] for b in boxes)),
|
|
int(max(b[2] for b in boxes)),
|
|
int(max(b[3] for b in boxes)),
|
|
)
|
|
|
|
def xyxy_to_xywh(bbox):
|
|
"""Convert xyxy format to xywh"""
|
|
if bbox is None:
|
|
return None
|
|
x1, y1, x2, y2 = bbox
|
|
return {"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1)}
|
|
|
|
def bbox_area_xyxy(b):
|
|
"""Calculate area of a bounding box in xyxy format"""
|
|
if b is None:
|
|
return 0
|
|
x1, y1, x2, y2 = b
|
|
return (x2 - x1) * (y2 - y1)
|
|
|
|
def split_bubble_by_vertical_gap(bubble_id: int, bubble_data: Dict, filtered_indices_map: Dict):
|
|
"""
|
|
Attempt to split a bubble by detecting a significant vertical gap between columns of text.
|
|
Returns: (left_indices, right_indices, gap_size) or None if no split
|
|
"""
|
|
quad_bboxes = bubble_data['quad_bboxes']
|
|
quads = bubble_data['quads']
|
|
|
|
if len(quads) < 2:
|
|
return None
|
|
|
|
# Get x-coordinates with original indices
|
|
x_coords = []
|
|
for i, quad in enumerate(quads):
|
|
bbox = quad_bbox(quad)
|
|
x_center = (bbox[0] + bbox[2]) / 2.0
|
|
x_coords.append((i, x_center, bbox))
|
|
|
|
# Sort by x-coordinate
|
|
x_coords_sorted = sorted(x_coords, key=lambda t: t[1])
|
|
|
|
# Find the largest gap between consecutive x positions
|
|
max_gap = 0
|
|
split_pos = -1
|
|
|
|
for i in range(len(x_coords_sorted) - 1):
|
|
gap = x_coords_sorted[i + 1][1] - x_coords_sorted[i][1]
|
|
if gap > max_gap:
|
|
max_gap = gap
|
|
split_pos = i
|
|
|
|
# If gap is large enough, split
|
|
min_gap_threshold = 80 # pixels
|
|
if split_pos != -1 and max_gap > min_gap_threshold:
|
|
# Get ORIGINAL indices for left and right
|
|
left_indices = [x_coords_sorted[j][0] for j in range(split_pos + 1)]
|
|
right_indices = [x_coords_sorted[j][0] for j in range(split_pos + 1, len(x_coords_sorted))]
|
|
|
|
return (left_indices, right_indices, max_gap)
|
|
|
|
return None
|
|
|
|
def split_bubbles_in_json(input_file="bubbles.json", output_file="bubbles_split.json", bubble_ids_to_split=[2, 7]):
|
|
"""Split specified bubbles in the JSON file"""
|
|
|
|
with open(input_file, "r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
|
|
new_data = {}
|
|
next_bid = max(int(k) for k in data.keys()) + 1
|
|
|
|
for bid_str, bubble_data in data.items():
|
|
bid = int(bid_str)
|
|
|
|
if bid not in bubble_ids_to_split:
|
|
# Keep original
|
|
new_data[bid_str] = bubble_data
|
|
continue
|
|
|
|
# Try to split
|
|
split_result = split_bubble_by_vertical_gap(bid, bubble_data, {})
|
|
|
|
if split_result:
|
|
left_indices, right_indices, gap_size = split_result
|
|
|
|
print(f"\n🔀 Splitting BOX#{bid} (gap={gap_size:.1f}px)")
|
|
print(f" Left indices: {left_indices}")
|
|
print(f" Right indices: {right_indices}")
|
|
|
|
# Create left bubble - keep the original bubble ID
|
|
left_quads = [bubble_data['quads'][i] for i in left_indices]
|
|
left_quad_bboxes = [bubble_data['quad_bboxes'][i] for i in left_indices]
|
|
left_bbox = boxes_union_xyxy([quad_bbox(q) for q in left_quads])
|
|
left_bbox_padded = (
|
|
max(0, left_bbox[0] - 3),
|
|
max(0, left_bbox[1] - 3),
|
|
left_bbox[2] + 3,
|
|
left_bbox[3] + 3
|
|
)
|
|
|
|
print(f" Left bbox: {left_bbox} -> padded: {left_bbox_padded}")
|
|
|
|
new_data[str(bid)] = {
|
|
"x": left_bbox_padded[0],
|
|
"y": left_bbox_padded[1],
|
|
"w": left_bbox_padded[2] - left_bbox_padded[0],
|
|
"h": left_bbox_padded[3] - left_bbox_padded[1],
|
|
"reading_order": bubble_data.get("reading_order", bid),
|
|
"quad_bboxes": left_quad_bboxes,
|
|
"quads": left_quads,
|
|
"text_bbox": xyxy_to_xywh(left_bbox),
|
|
"line_bboxes": [],
|
|
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads])),
|
|
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads]))),
|
|
}
|
|
|
|
# Create right bubble - with new ID
|
|
right_quads = [bubble_data['quads'][i] for i in right_indices]
|
|
right_quad_bboxes = [bubble_data['quad_bboxes'][i] for i in right_indices]
|
|
right_bbox = boxes_union_xyxy([quad_bbox(q) for q in right_quads])
|
|
right_bbox_padded = (
|
|
max(0, right_bbox[0] - 3),
|
|
max(0, right_bbox[1] - 3),
|
|
right_bbox[2] + 3,
|
|
right_bbox[3] + 3
|
|
)
|
|
|
|
print(f" Right bbox: {right_bbox} -> padded: {right_bbox_padded}")
|
|
|
|
new_data[str(next_bid)] = {
|
|
"x": right_bbox_padded[0],
|
|
"y": right_bbox_padded[1],
|
|
"w": right_bbox_padded[2] - right_bbox_padded[0],
|
|
"h": right_bbox_padded[3] - right_bbox_padded[1],
|
|
"reading_order": bubble_data.get("reading_order", next_bid),
|
|
"quad_bboxes": right_quad_bboxes,
|
|
"quads": right_quads,
|
|
"text_bbox": xyxy_to_xywh(right_bbox),
|
|
"line_bboxes": [],
|
|
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads])),
|
|
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads]))),
|
|
}
|
|
|
|
next_bid += 1
|
|
else:
|
|
# No split needed
|
|
new_data[bid_str] = bubble_data
|
|
|
|
# Write output
|
|
with open(output_file, "w", encoding="utf-8") as f:
|
|
json.dump(new_data, f, indent=2, ensure_ascii=False)
|
|
|
|
print(f"\n✅ Saved to {output_file}")
|
|
|
|
if __name__ == "__main__":
|
|
split_bubbles_in_json(
|
|
input_file="bubbles_original.json", # Always read from original
|
|
output_file="bubbles_split.json",
|
|
bubble_ids_to_split=[2, 7]
|
|
)
|