#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Helper script to split bubbles with multiple separate text regions. Run this to manually split Box 2 and Box 7 from debug_clusters.png """ import json import numpy as np from typing import List, Tuple, Dict def quad_bbox(quad): """Convert quad to bounding box""" xs = [p[0] for p in quad] ys = [p[1] for p in quad] return (min(xs), min(ys), max(xs), max(ys)) def boxes_union_xyxy(boxes): """Union of multiple boxes""" boxes = [b for b in boxes if b is not None] if not boxes: return None return ( int(min(b[0] for b in boxes)), int(min(b[1] for b in boxes)), int(max(b[2] for b in boxes)), int(max(b[3] for b in boxes)), ) def xyxy_to_xywh(bbox): """Convert xyxy format to xywh""" if bbox is None: return None x1, y1, x2, y2 = bbox return {"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1)} def bbox_area_xyxy(b): """Calculate area of a bounding box in xyxy format""" if b is None: return 0 x1, y1, x2, y2 = b return (x2 - x1) * (y2 - y1) def split_bubble_by_vertical_gap(bubble_id: int, bubble_data: Dict, filtered_indices_map: Dict): """ Attempt to split a bubble by detecting a significant vertical gap between columns of text. Returns: (left_indices, right_indices, gap_size) or None if no split """ quad_bboxes = bubble_data['quad_bboxes'] quads = bubble_data['quads'] if len(quads) < 2: return None # Get x-coordinates with original indices x_coords = [] for i, quad in enumerate(quads): bbox = quad_bbox(quad) x_center = (bbox[0] + bbox[2]) / 2.0 x_coords.append((i, x_center, bbox)) # Sort by x-coordinate x_coords_sorted = sorted(x_coords, key=lambda t: t[1]) # Find the largest gap between consecutive x positions max_gap = 0 split_pos = -1 for i in range(len(x_coords_sorted) - 1): gap = x_coords_sorted[i + 1][1] - x_coords_sorted[i][1] if gap > max_gap: max_gap = gap split_pos = i # If gap is large enough, split min_gap_threshold = 80 # pixels if split_pos != -1 and max_gap > min_gap_threshold: # Get ORIGINAL indices for left and right left_indices = [x_coords_sorted[j][0] for j in range(split_pos + 1)] right_indices = [x_coords_sorted[j][0] for j in range(split_pos + 1, len(x_coords_sorted))] return (left_indices, right_indices, max_gap) return None def split_bubbles_in_json(input_file="bubbles.json", output_file="bubbles_split.json", bubble_ids_to_split=[2, 7]): """Split specified bubbles in the JSON file""" with open(input_file, "r", encoding="utf-8") as f: data = json.load(f) new_data = {} next_bid = max(int(k) for k in data.keys()) + 1 for bid_str, bubble_data in data.items(): bid = int(bid_str) if bid not in bubble_ids_to_split: # Keep original new_data[bid_str] = bubble_data continue # Try to split split_result = split_bubble_by_vertical_gap(bid, bubble_data, {}) if split_result: left_indices, right_indices, gap_size = split_result print(f"\nšŸ”€ Splitting BOX#{bid} (gap={gap_size:.1f}px)") print(f" Left indices: {left_indices}") print(f" Right indices: {right_indices}") # Create left bubble - keep the original bubble ID left_quads = [bubble_data['quads'][i] for i in left_indices] left_quad_bboxes = [bubble_data['quad_bboxes'][i] for i in left_indices] left_bbox = boxes_union_xyxy([quad_bbox(q) for q in left_quads]) left_bbox_padded = ( max(0, left_bbox[0] - 3), max(0, left_bbox[1] - 3), left_bbox[2] + 3, left_bbox[3] + 3 ) print(f" Left bbox: {left_bbox} -> padded: {left_bbox_padded}") new_data[str(bid)] = { "x": left_bbox_padded[0], "y": left_bbox_padded[1], "w": left_bbox_padded[2] - left_bbox_padded[0], "h": left_bbox_padded[3] - left_bbox_padded[1], "reading_order": bubble_data.get("reading_order", bid), "quad_bboxes": left_quad_bboxes, "quads": left_quads, "text_bbox": xyxy_to_xywh(left_bbox), "line_bboxes": [], "line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads])), "line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads]))), } # Create right bubble - with new ID right_quads = [bubble_data['quads'][i] for i in right_indices] right_quad_bboxes = [bubble_data['quad_bboxes'][i] for i in right_indices] right_bbox = boxes_union_xyxy([quad_bbox(q) for q in right_quads]) right_bbox_padded = ( max(0, right_bbox[0] - 3), max(0, right_bbox[1] - 3), right_bbox[2] + 3, right_bbox[3] + 3 ) print(f" Right bbox: {right_bbox} -> padded: {right_bbox_padded}") new_data[str(next_bid)] = { "x": right_bbox_padded[0], "y": right_bbox_padded[1], "w": right_bbox_padded[2] - right_bbox_padded[0], "h": right_bbox_padded[3] - right_bbox_padded[1], "reading_order": bubble_data.get("reading_order", next_bid), "quad_bboxes": right_quad_bboxes, "quads": right_quads, "text_bbox": xyxy_to_xywh(right_bbox), "line_bboxes": [], "line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads])), "line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads]))), } next_bid += 1 else: # No split needed new_data[bid_str] = bubble_data # Write output with open(output_file, "w", encoding="utf-8") as f: json.dump(new_data, f, indent=2, ensure_ascii=False) print(f"\nāœ… Saved to {output_file}") if __name__ == "__main__": split_bubbles_in_json( input_file="bubbles_original.json", # Always read from original output_file="bubbles_split.json", bubble_ids_to_split=[2, 7] )