#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Properly split Box 2 and Box 7 by extracting quads from original and writing to new JSON """ import json import copy def quad_bbox(quad): xs = [p[0] for p in quad] ys = [p[1] for p in quad] return (min(xs), min(ys), max(xs), max(ys)) def boxes_union_xyxy(boxes): boxes = [b for b in boxes if b is not None] if not boxes: return None return ( int(min(b[0] for b in boxes)), int(min(b[1] for b in boxes)), int(max(b[2] for b in boxes)), int(max(b[3] for b in boxes)), ) def xyxy_to_xywh(bbox): if bbox is None: return None x1, y1, x2, y2 = bbox return {"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1)} def bbox_area_xyxy(b): if b is None: return 0 x1, y1, x2, y2 = b return (x2 - x1) * (y2 - y1) # Load original with open("bubbles_original.json", "r", encoding="utf-8") as f: original = json.load(f) new_data = {} # Copy all non-split bubbles for bid_str, bubble_data in original.items(): bid = int(bid_str) if bid not in [2, 7]: new_data[bid_str] = copy.deepcopy(bubble_data) # Split Box 2 print("šŸ”€ Splitting Box 2...") box2_data = original["2"] left_indices_2 = [10, 1, 2, 4, 8, 0, 3, 6, 11, 12] right_indices_2 = [5, 7, 9] # Left part keeps ID 2 left_quads_2 = [box2_data['quads'][i] for i in left_indices_2] left_quad_bboxes_2 = [box2_data['quad_bboxes'][i] for i in left_indices_2] left_bbox_2 = boxes_union_xyxy([quad_bbox(q) for q in left_quads_2]) left_bbox_2_padded = (max(0, left_bbox_2[0]-3), max(0, left_bbox_2[1]-3), left_bbox_2[2]+3, left_bbox_2[3]+3) new_data["2"] = { "x": left_bbox_2_padded[0], "y": left_bbox_2_padded[1], "w": left_bbox_2_padded[2] - left_bbox_2_padded[0], "h": left_bbox_2_padded[3] - left_bbox_2_padded[1], "reading_order": box2_data.get("reading_order", 2), "quad_bboxes": left_quad_bboxes_2, "quads": [[list(p) for p in quad] for quad in left_quads_2], # Explicit list conversion "text_bbox": xyxy_to_xywh(left_bbox_2), "line_bboxes": [], "line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads_2])), "line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads_2]))), } print(f" Left: y={new_data['2']['y']}, h={new_data['2']['h']}, quads={len(left_quads_2)}") # Right part gets new ID 8 right_quads_2 = [box2_data['quads'][i] for i in right_indices_2] right_quad_bboxes_2 = [box2_data['quad_bboxes'][i] for i in right_indices_2] right_bbox_2 = boxes_union_xyxy([quad_bbox(q) for q in right_quads_2]) right_bbox_2_padded = (max(0, right_bbox_2[0]-3), max(0, right_bbox_2[1]-3), right_bbox_2[2]+3, right_bbox_2[3]+3) new_data["8"] = { "x": right_bbox_2_padded[0], "y": right_bbox_2_padded[1], "w": right_bbox_2_padded[2] - right_bbox_2_padded[0], "h": right_bbox_2_padded[3] - right_bbox_2_padded[1], "reading_order": box2_data.get("reading_order", 8), "quad_bboxes": right_quad_bboxes_2, "quads": [[list(p) for p in quad] for quad in right_quads_2], # Explicit list conversion "text_bbox": xyxy_to_xywh(right_bbox_2), "line_bboxes": [], "line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads_2])), "line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads_2]))), } print(f" Right: y={new_data['8']['y']}, h={new_data['8']['h']}, quads={len(right_quads_2)}") # Split Box 7 print("\nšŸ”€ Splitting Box 7...") box7_data = original["7"] left_indices_7 = [8, 13, 4, 11, 2, 6] right_indices_7 = [0, 5, 1, 3, 7, 10, 12, 9] # Left part keeps ID 7 left_quads_7 = [box7_data['quads'][i] for i in left_indices_7] left_quad_bboxes_7 = [box7_data['quad_bboxes'][i] for i in left_indices_7] left_bbox_7 = boxes_union_xyxy([quad_bbox(q) for q in left_quads_7]) left_bbox_7_padded = (max(0, left_bbox_7[0]-3), max(0, left_bbox_7[1]-3), left_bbox_7[2]+3, left_bbox_7[3]+3) new_data["7"] = { "x": left_bbox_7_padded[0], "y": left_bbox_7_padded[1], "w": left_bbox_7_padded[2] - left_bbox_7_padded[0], "h": left_bbox_7_padded[3] - left_bbox_7_padded[1], "reading_order": box7_data.get("reading_order", 7), "quad_bboxes": left_quad_bboxes_7, "quads": [[list(p) for p in quad] for quad in left_quads_7], # Explicit list conversion "text_bbox": xyxy_to_xywh(left_bbox_7), "line_bboxes": [], "line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads_7])), "line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads_7]))), } print(f" Left: y={new_data['7']['y']}, h={new_data['7']['h']}, quads={len(left_quads_7)}") # Right part gets new ID 9 right_quads_7 = [box7_data['quads'][i] for i in right_indices_7] right_quad_bboxes_7 = [box7_data['quad_bboxes'][i] for i in right_indices_7] right_bbox_7 = boxes_union_xyxy([quad_bbox(q) for q in right_quads_7]) right_bbox_7_padded = (max(0, right_bbox_7[0]-3), max(0, right_bbox_7[1]-3), right_bbox_7[2]+3, right_bbox_7[3]+3) new_data["9"] = { "x": right_bbox_7_padded[0], "y": right_bbox_7_padded[1], "w": right_bbox_7_padded[2] - right_bbox_7_padded[0], "h": right_bbox_7_padded[3] - right_bbox_7_padded[1], "reading_order": box7_data.get("reading_order", 9), "quad_bboxes": right_quad_bboxes_7, "quads": [[list(p) for p in quad] for quad in right_quads_7], # Explicit list conversion "text_bbox": xyxy_to_xywh(right_bbox_7), "line_bboxes": [], "line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads_7])), "line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads_7]))), } print(f" Right: y={new_data['9']['y']}, h={new_data['9']['h']}, quads={len(right_quads_7)}") # Sort by ID for output new_data_sorted = {} for bid in sorted([int(k) for k in new_data.keys()]): new_data_sorted[str(bid)] = new_data[str(bid)] with open("bubbles.json", "w", encoding="utf-8") as f: json.dump(new_data_sorted, f, indent=2, ensure_ascii=False) print(f"\nāœ… Done! Saved {len(new_data_sorted)} bubbles to bubbles.json")