Added bubble split
This commit is contained in:
154
split_final.py
Normal file
154
split_final.py
Normal file
@@ -0,0 +1,154 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Properly split Box 2 and Box 7 by extracting quads from original and writing to new JSON
|
||||
"""
|
||||
|
||||
import json
|
||||
import copy
|
||||
|
||||
def quad_bbox(quad):
|
||||
xs = [p[0] for p in quad]
|
||||
ys = [p[1] for p in quad]
|
||||
return (min(xs), min(ys), max(xs), max(ys))
|
||||
|
||||
def boxes_union_xyxy(boxes):
|
||||
boxes = [b for b in boxes if b is not None]
|
||||
if not boxes:
|
||||
return None
|
||||
return (
|
||||
int(min(b[0] for b in boxes)),
|
||||
int(min(b[1] for b in boxes)),
|
||||
int(max(b[2] for b in boxes)),
|
||||
int(max(b[3] for b in boxes)),
|
||||
)
|
||||
|
||||
def xyxy_to_xywh(bbox):
|
||||
if bbox is None:
|
||||
return None
|
||||
x1, y1, x2, y2 = bbox
|
||||
return {"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1)}
|
||||
|
||||
def bbox_area_xyxy(b):
|
||||
if b is None:
|
||||
return 0
|
||||
x1, y1, x2, y2 = b
|
||||
return (x2 - x1) * (y2 - y1)
|
||||
|
||||
# Load original
|
||||
with open("bubbles_original.json", "r", encoding="utf-8") as f:
|
||||
original = json.load(f)
|
||||
|
||||
new_data = {}
|
||||
|
||||
# Copy all non-split bubbles
|
||||
for bid_str, bubble_data in original.items():
|
||||
bid = int(bid_str)
|
||||
if bid not in [2, 7]:
|
||||
new_data[bid_str] = copy.deepcopy(bubble_data)
|
||||
|
||||
# Split Box 2
|
||||
print("🔀 Splitting Box 2...")
|
||||
box2_data = original["2"]
|
||||
left_indices_2 = [10, 1, 2, 4, 8, 0, 3, 6, 11, 12]
|
||||
right_indices_2 = [5, 7, 9]
|
||||
|
||||
# Left part keeps ID 2
|
||||
left_quads_2 = [box2_data['quads'][i] for i in left_indices_2]
|
||||
left_quad_bboxes_2 = [box2_data['quad_bboxes'][i] for i in left_indices_2]
|
||||
left_bbox_2 = boxes_union_xyxy([quad_bbox(q) for q in left_quads_2])
|
||||
left_bbox_2_padded = (max(0, left_bbox_2[0]-3), max(0, left_bbox_2[1]-3), left_bbox_2[2]+3, left_bbox_2[3]+3)
|
||||
|
||||
new_data["2"] = {
|
||||
"x": left_bbox_2_padded[0],
|
||||
"y": left_bbox_2_padded[1],
|
||||
"w": left_bbox_2_padded[2] - left_bbox_2_padded[0],
|
||||
"h": left_bbox_2_padded[3] - left_bbox_2_padded[1],
|
||||
"reading_order": box2_data.get("reading_order", 2),
|
||||
"quad_bboxes": left_quad_bboxes_2,
|
||||
"quads": [[list(p) for p in quad] for quad in left_quads_2], # Explicit list conversion
|
||||
"text_bbox": xyxy_to_xywh(left_bbox_2),
|
||||
"line_bboxes": [],
|
||||
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads_2])),
|
||||
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads_2]))),
|
||||
}
|
||||
print(f" Left: y={new_data['2']['y']}, h={new_data['2']['h']}, quads={len(left_quads_2)}")
|
||||
|
||||
# Right part gets new ID 8
|
||||
right_quads_2 = [box2_data['quads'][i] for i in right_indices_2]
|
||||
right_quad_bboxes_2 = [box2_data['quad_bboxes'][i] for i in right_indices_2]
|
||||
right_bbox_2 = boxes_union_xyxy([quad_bbox(q) for q in right_quads_2])
|
||||
right_bbox_2_padded = (max(0, right_bbox_2[0]-3), max(0, right_bbox_2[1]-3), right_bbox_2[2]+3, right_bbox_2[3]+3)
|
||||
|
||||
new_data["8"] = {
|
||||
"x": right_bbox_2_padded[0],
|
||||
"y": right_bbox_2_padded[1],
|
||||
"w": right_bbox_2_padded[2] - right_bbox_2_padded[0],
|
||||
"h": right_bbox_2_padded[3] - right_bbox_2_padded[1],
|
||||
"reading_order": box2_data.get("reading_order", 8),
|
||||
"quad_bboxes": right_quad_bboxes_2,
|
||||
"quads": [[list(p) for p in quad] for quad in right_quads_2], # Explicit list conversion
|
||||
"text_bbox": xyxy_to_xywh(right_bbox_2),
|
||||
"line_bboxes": [],
|
||||
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads_2])),
|
||||
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads_2]))),
|
||||
}
|
||||
print(f" Right: y={new_data['8']['y']}, h={new_data['8']['h']}, quads={len(right_quads_2)}")
|
||||
|
||||
# Split Box 7
|
||||
print("\n🔀 Splitting Box 7...")
|
||||
box7_data = original["7"]
|
||||
left_indices_7 = [8, 13, 4, 11, 2, 6]
|
||||
right_indices_7 = [0, 5, 1, 3, 7, 10, 12, 9]
|
||||
|
||||
# Left part keeps ID 7
|
||||
left_quads_7 = [box7_data['quads'][i] for i in left_indices_7]
|
||||
left_quad_bboxes_7 = [box7_data['quad_bboxes'][i] for i in left_indices_7]
|
||||
left_bbox_7 = boxes_union_xyxy([quad_bbox(q) for q in left_quads_7])
|
||||
left_bbox_7_padded = (max(0, left_bbox_7[0]-3), max(0, left_bbox_7[1]-3), left_bbox_7[2]+3, left_bbox_7[3]+3)
|
||||
|
||||
new_data["7"] = {
|
||||
"x": left_bbox_7_padded[0],
|
||||
"y": left_bbox_7_padded[1],
|
||||
"w": left_bbox_7_padded[2] - left_bbox_7_padded[0],
|
||||
"h": left_bbox_7_padded[3] - left_bbox_7_padded[1],
|
||||
"reading_order": box7_data.get("reading_order", 7),
|
||||
"quad_bboxes": left_quad_bboxes_7,
|
||||
"quads": [[list(p) for p in quad] for quad in left_quads_7], # Explicit list conversion
|
||||
"text_bbox": xyxy_to_xywh(left_bbox_7),
|
||||
"line_bboxes": [],
|
||||
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads_7])),
|
||||
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads_7]))),
|
||||
}
|
||||
print(f" Left: y={new_data['7']['y']}, h={new_data['7']['h']}, quads={len(left_quads_7)}")
|
||||
|
||||
# Right part gets new ID 9
|
||||
right_quads_7 = [box7_data['quads'][i] for i in right_indices_7]
|
||||
right_quad_bboxes_7 = [box7_data['quad_bboxes'][i] for i in right_indices_7]
|
||||
right_bbox_7 = boxes_union_xyxy([quad_bbox(q) for q in right_quads_7])
|
||||
right_bbox_7_padded = (max(0, right_bbox_7[0]-3), max(0, right_bbox_7[1]-3), right_bbox_7[2]+3, right_bbox_7[3]+3)
|
||||
|
||||
new_data["9"] = {
|
||||
"x": right_bbox_7_padded[0],
|
||||
"y": right_bbox_7_padded[1],
|
||||
"w": right_bbox_7_padded[2] - right_bbox_7_padded[0],
|
||||
"h": right_bbox_7_padded[3] - right_bbox_7_padded[1],
|
||||
"reading_order": box7_data.get("reading_order", 9),
|
||||
"quad_bboxes": right_quad_bboxes_7,
|
||||
"quads": [[list(p) for p in quad] for quad in right_quads_7], # Explicit list conversion
|
||||
"text_bbox": xyxy_to_xywh(right_bbox_7),
|
||||
"line_bboxes": [],
|
||||
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads_7])),
|
||||
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads_7]))),
|
||||
}
|
||||
print(f" Right: y={new_data['9']['y']}, h={new_data['9']['h']}, quads={len(right_quads_7)}")
|
||||
|
||||
# Sort by ID for output
|
||||
new_data_sorted = {}
|
||||
for bid in sorted([int(k) for k in new_data.keys()]):
|
||||
new_data_sorted[str(bid)] = new_data[str(bid)]
|
||||
|
||||
with open("bubbles.json", "w", encoding="utf-8") as f:
|
||||
json.dump(new_data_sorted, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"\n✅ Done! Saved {len(new_data_sorted)} bubbles to bubbles.json")
|
||||
Reference in New Issue
Block a user