From 3800f6cf3f26ad98ed57221a8ef1a58710a5159b Mon Sep 17 00:00:00 2001 From: Guillem Hernandez Sola Date: Mon, 20 Apr 2026 19:02:24 +0200 Subject: [PATCH] Added bubble split --- manga-translator.py | 213 +++++++++++++++++++++++++++++++++++++++++--- regenerate_debug.py | 56 ++++++++++++ split_bubbles.py | 183 +++++++++++++++++++++++++++++++++++++ split_final.py | 154 ++++++++++++++++++++++++++++++++ 4 files changed, 596 insertions(+), 10 deletions(-) create mode 100644 regenerate_debug.py create mode 100644 split_bubbles.py create mode 100644 split_final.py diff --git a/manga-translator.py b/manga-translator.py index 64b1652..8c4360a 100644 --- a/manga-translator.py +++ b/manga-translator.py @@ -104,7 +104,6 @@ def looks_like_box_tag(t: str) -> bool: def is_noise_text(text: str) -> bool: t = (text or "").strip() - # Explicitly allow standalone punctuation like ? or ! if re.fullmatch(r"[\?\!]+", t): return False @@ -190,6 +189,126 @@ def ocr_candidate_score(text: str) -> float: return max(0.0, min(1.0, score)) +# ============================================================ +# SPLITTERS +# ============================================================ +def split_panel_box(image_bgr, bbox_xyxy, bubble_quads=None): + """ + Checks if a bounding box crosses a solid vertical panel border. + + This detects vertical lines/borders within the bubble that might indicate + the bubble should be split into left/right columns. + + Only splits if: + - Bubble has 10+ quads (real multi-column layouts have many quads, while nested bubbles have fewer) + - Height is sufficient (50+ pixels) + + Nested bubbles typically have 1-6 quads, while true multi-column bubbles have 8+. + + Returns: + Tuple (box_left, box_right, split_x_absolute) if split found, else None + """ + x1, y1, x2, y2 = bbox_xyxy + w = x2 - x1 + h = y2 - y1 + + # Prevent false splits: require substantial number of quads + # Nested bubbles have 1-6 quads, true multi-column layouts have 8+ + if bubble_quads is not None and len(bubble_quads) < 10: + return None + + if w < 50 or h < 50: + return None + + roi = image_bgr[y1:y2, x1:x2] + if roi.size == 0: + return None + + gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) + _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV) + + vertical_projection = np.sum(thresh, axis=0) + + search_start = int(w * 0.25) + search_end = int(w * 0.75) + + if search_start >= search_end: + return None + + peak_x_relative = np.argmax(vertical_projection[search_start:search_end]) + search_start + peak_val = vertical_projection[peak_x_relative] + + # Detect panel border with more sensitive threshold (0.40 instead of 0.60) + # This catches boxes with vertical lines even if they're not super dark + if peak_val > (h * 255 * 0.40): + split_x_absolute = x1 + peak_x_relative + box_left = (x1, y1, split_x_absolute, y2) + box_right = (split_x_absolute, y1, x2, y2) + return box_left, box_right, split_x_absolute + + return None + + +def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thresholds=False): + """ + Checks if the text inside a bubble is actually two separate columns of text + by looking for a clean vertical gap between the bounding boxes. + + Improved to detect large column gaps (e.g., 100+ pixels between text regions). + + Args: + use_aggressive_thresholds: If True, use lower thresholds (25px, 1.5x line height) + for fallback splitting after failed panel border detection + """ + if len(indices) < 2: + return None + + boxes = [quad_bbox(ocr[i][0]) for i in indices] + sorted_items = sorted(zip(indices, boxes), key=lambda x: x[1][0]) + + # Calculate gaps between consecutive elements sorted by x-coordinate + gaps = [] + current_max_x = sorted_items[0][1][2] + + for i in range(1, len(sorted_items)): + idx, b = sorted_items[i] + x1 = b[0] + gap = x1 - current_max_x + gaps.append((i, gap, current_max_x, x1)) + current_max_x = max(current_max_x, b[2]) + + if not gaps: + return None + + # Find the largest gap + max_gap_idx, max_gap_size, _, _ = max(gaps, key=lambda x: x[1]) + + # Calculate thresholds based on line height + hs = [b[3] - b[1] for b in boxes] + med_h = float(np.median(hs)) if hs else 15.0 + + if use_aggressive_thresholds: + # Relaxed thresholds for fallback splitting after failed panel border + threshold1 = 10.0 # Very low absolute threshold for fallback + threshold2 = med_h * 0.8 # Even lower relative threshold + min_gap = 5.0 + else: + # Normal thresholds + threshold1 = 50.0 # Absolute threshold: 50 pixels + threshold2 = med_h * 2.0 # Relative threshold: 2x line height + min_gap = 25.0 + + + + if max_gap_size > threshold1 or (max_gap_size > threshold2 and max_gap_size > min_gap): + split_idx = max_gap_idx + left_indices = [item[0] for item in sorted_items[:split_idx]] + right_indices = [item[0] for item in sorted_items[split_idx:]] + return left_indices, right_indices + + return None + + # ============================================================ # OCR ENGINES (Apple Native Vision) # ============================================================ @@ -588,23 +707,17 @@ def save_debug_clusters( if img is None: return - # ── FIX 1: white-fill each OCR quad before drawing its outline ── for bbox, txt, conf in ocr: pts = np.array(bbox, dtype=np.int32) - cv2.fillPoly(img, [pts], (255, 255, 255)) # ← white background - cv2.polylines(img, [pts], True, (180, 180, 180), 1) # ← grey outline + cv2.fillPoly(img, [pts], (255, 255, 255)) + cv2.polylines(img, [pts], True, (180, 180, 180), 1) for bid, bb in bubble_boxes.items(): x1, y1, x2, y2 = bb - - # Draw green bubble bounding box + ID label cv2.rectangle(img, (x1, y1), (x2, y2), (0, 220, 0), 2) cv2.putText(img, f"BOX#{bid}", (x1 + 2, max(15, y1 + 16)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2) - # ── FIX 2: yellow line-box drawing loop removed entirely ──── - - # Draw translated text overlay below each bubble box if clean_lines and bid in clean_lines: text = clean_lines[bid] words = text.split() @@ -757,6 +870,86 @@ def translate_manga_text( filtered, image.shape, gap_px=resolved_gap, bbox_padding=3 ) + # ── NEW: SPLIT MULTI-PANEL & MULTI-COLUMN BUBBLES ── + new_bubbles, new_bubble_boxes, new_bubble_quads, new_bubble_indices = {}, {}, {}, {} + next_bid = max(bubbles.keys()) + 1 if bubbles else 1 + splits_performed = [] + + for bid in list(bubbles.keys()): + box = bubble_boxes[bid] + bubble_split = None # Will hold (left_idxs, right_idxs) if a split is detected + + # 1. Try Image-based Panel Border Split + split_result = split_panel_box(image, box, bubble_quads=bubble_quads[bid]) + if split_result: + box_left, box_right, split_x = split_result + left_idxs, right_idxs = [], [] + + for idx in bubble_indices[bid]: + cx, cy = quad_center(filtered[idx][0]) + if cx < split_x: + left_idxs.append(idx) + else: + right_idxs.append(idx) + + if left_idxs and right_idxs: + bubble_split = (left_idxs, right_idxs) + splits_performed.append(f"BOX#{bid} (panel border at x={split_x})") + elif len(bubble_quads[bid]) >= 10: + # Panel border split failed (all quads on one side). Try text column split for large bubbles + col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid, use_aggressive_thresholds=True) + if col_split: + left_idxs, right_idxs = col_split + if left_idxs and right_idxs: + bubble_split = (left_idxs, right_idxs) + splits_performed.append(f"BOX#{bid} ({len(left_idxs)} quads | {len(right_idxs)} quads)") + + # 2. If no panel border split, try text column split + if bubble_split is None: + col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid) + if col_split: + left_idxs, right_idxs = col_split + if left_idxs and right_idxs: + bubble_split = (left_idxs, right_idxs) + splits_performed.append(f"BOX#{bid} ({len(left_idxs)} quads | {len(right_idxs)} quads)") + + # 3. Create split bubbles if detected + if bubble_split: + left_idxs, right_idxs = bubble_split + # Create Left Bubble + new_bubbles[bid] = build_lines_from_indices(left_idxs, filtered) + ub_left = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in left_idxs]) + new_bubble_boxes[bid] = (max(0, ub_left[0]-3), max(0, ub_left[1]-3), min(iw-1, ub_left[2]+3), min(ih-1, ub_left[3]+3)) + new_bubble_quads[bid] = [filtered[i][0] for i in left_idxs] + new_bubble_indices[bid] = left_idxs + + # Create Right Bubble + new_bubbles[next_bid] = build_lines_from_indices(right_idxs, filtered) + ub_right = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in right_idxs]) + new_bubble_boxes[next_bid] = (max(0, ub_right[0]-3), max(0, ub_right[1]-3), min(iw-1, ub_right[2]+3), min(ih-1, ub_right[3]+3)) + new_bubble_quads[next_bid] = [filtered[i][0] for i in right_idxs] + new_bubble_indices[next_bid] = right_idxs + next_bid += 1 + else: + # No split needed, keep original + new_bubbles[bid] = bubbles[bid] + new_bubble_boxes[bid] = bubble_boxes[bid] + new_bubble_quads[bid] = bubble_quads[bid] + new_bubble_indices[bid] = bubble_indices[bid] + + # Print split summary + if splits_performed: + print(f"\n🔀 Multi-column bubble splits detected: {len(splits_performed)}") + for split_info in splits_performed: + print(f" ✓ Split {split_info}") + + # Overwrite old dictionaries with the newly split ones + bubbles = new_bubbles + bubble_boxes = new_bubble_boxes + bubble_quads = new_bubble_quads + bubble_indices = new_bubble_indices + # ─────────────────────────────────────────────────── + translator = GoogleTranslator(source=source_lang, target=target_lang) clean_lines: Dict[int, str] = {} @@ -871,4 +1064,4 @@ if __name__ == "__main__": export_bubbles_to="bubbles.json", reading_mode="ltr", debug=True - ) + ) \ No newline at end of file diff --git a/regenerate_debug.py b/regenerate_debug.py new file mode 100644 index 0000000..fc1a8c9 --- /dev/null +++ b/regenerate_debug.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Regenerate debug_clusters.png with the new split bubbles.json +""" + +import json +import cv2 +import numpy as np + +def quad_bbox(quad): + """Convert quad to bounding box""" + xs = [p[0] for p in quad] + ys = [p[1] for p in quad] + return (min(xs), min(ys), max(xs), max(ys)) + +def save_debug_clusters_from_json( + image_path="004.png", + bubbles_path="bubbles.json", + out_path="debug_clusters.png" +): + img = cv2.imread(image_path) + if img is None: + print(f"❌ Cannot load image: {image_path}") + return + + # Load bubbles.json + with open(bubbles_path, "r", encoding="utf-8") as f: + bubbles_data = json.load(f) + + # Draw all quad polygons in white (erasing original text) + for bid_str, bubble_info in bubbles_data.items(): + for quad in bubble_info.get("quads", []): + pts = np.array(quad, dtype=np.int32) + cv2.fillPoly(img, [pts], (255, 255, 255)) + cv2.polylines(img, [pts], True, (180, 180, 180), 1) + + # Draw bounding boxes with labels + for bid_str, bubble_info in bubbles_data.items(): + bid = int(bid_str) + x = bubble_info["x"] + y = bubble_info["y"] + w = bubble_info["w"] + h = bubble_info["h"] + x2 = x + w + y2 = y + h + + cv2.rectangle(img, (x, y), (x2, y2), (0, 220, 0), 2) + cv2.putText(img, f"BOX#{bid}", (x + 2, max(15, y + 16)), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2) + + cv2.imwrite(out_path, img) + print(f"✅ Saved: {out_path}") + +if __name__ == "__main__": + save_debug_clusters_from_json() diff --git a/split_bubbles.py b/split_bubbles.py new file mode 100644 index 0000000..06cb1ae --- /dev/null +++ b/split_bubbles.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Helper script to split bubbles with multiple separate text regions. +Run this to manually split Box 2 and Box 7 from debug_clusters.png +""" + +import json +import numpy as np +from typing import List, Tuple, Dict + +def quad_bbox(quad): + """Convert quad to bounding box""" + xs = [p[0] for p in quad] + ys = [p[1] for p in quad] + return (min(xs), min(ys), max(xs), max(ys)) + +def boxes_union_xyxy(boxes): + """Union of multiple boxes""" + boxes = [b for b in boxes if b is not None] + if not boxes: + return None + return ( + int(min(b[0] for b in boxes)), + int(min(b[1] for b in boxes)), + int(max(b[2] for b in boxes)), + int(max(b[3] for b in boxes)), + ) + +def xyxy_to_xywh(bbox): + """Convert xyxy format to xywh""" + if bbox is None: + return None + x1, y1, x2, y2 = bbox + return {"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1)} + +def bbox_area_xyxy(b): + """Calculate area of a bounding box in xyxy format""" + if b is None: + return 0 + x1, y1, x2, y2 = b + return (x2 - x1) * (y2 - y1) + +def split_bubble_by_vertical_gap(bubble_id: int, bubble_data: Dict, filtered_indices_map: Dict): + """ + Attempt to split a bubble by detecting a significant vertical gap between columns of text. + Returns: (left_indices, right_indices, gap_size) or None if no split + """ + quad_bboxes = bubble_data['quad_bboxes'] + quads = bubble_data['quads'] + + if len(quads) < 2: + return None + + # Get x-coordinates with original indices + x_coords = [] + for i, quad in enumerate(quads): + bbox = quad_bbox(quad) + x_center = (bbox[0] + bbox[2]) / 2.0 + x_coords.append((i, x_center, bbox)) + + # Sort by x-coordinate + x_coords_sorted = sorted(x_coords, key=lambda t: t[1]) + + # Find the largest gap between consecutive x positions + max_gap = 0 + split_pos = -1 + + for i in range(len(x_coords_sorted) - 1): + gap = x_coords_sorted[i + 1][1] - x_coords_sorted[i][1] + if gap > max_gap: + max_gap = gap + split_pos = i + + # If gap is large enough, split + min_gap_threshold = 80 # pixels + if split_pos != -1 and max_gap > min_gap_threshold: + # Get ORIGINAL indices for left and right + left_indices = [x_coords_sorted[j][0] for j in range(split_pos + 1)] + right_indices = [x_coords_sorted[j][0] for j in range(split_pos + 1, len(x_coords_sorted))] + + return (left_indices, right_indices, max_gap) + + return None + +def split_bubbles_in_json(input_file="bubbles.json", output_file="bubbles_split.json", bubble_ids_to_split=[2, 7]): + """Split specified bubbles in the JSON file""" + + with open(input_file, "r", encoding="utf-8") as f: + data = json.load(f) + + new_data = {} + next_bid = max(int(k) for k in data.keys()) + 1 + + for bid_str, bubble_data in data.items(): + bid = int(bid_str) + + if bid not in bubble_ids_to_split: + # Keep original + new_data[bid_str] = bubble_data + continue + + # Try to split + split_result = split_bubble_by_vertical_gap(bid, bubble_data, {}) + + if split_result: + left_indices, right_indices, gap_size = split_result + + print(f"\n🔀 Splitting BOX#{bid} (gap={gap_size:.1f}px)") + print(f" Left indices: {left_indices}") + print(f" Right indices: {right_indices}") + + # Create left bubble - keep the original bubble ID + left_quads = [bubble_data['quads'][i] for i in left_indices] + left_quad_bboxes = [bubble_data['quad_bboxes'][i] for i in left_indices] + left_bbox = boxes_union_xyxy([quad_bbox(q) for q in left_quads]) + left_bbox_padded = ( + max(0, left_bbox[0] - 3), + max(0, left_bbox[1] - 3), + left_bbox[2] + 3, + left_bbox[3] + 3 + ) + + print(f" Left bbox: {left_bbox} -> padded: {left_bbox_padded}") + + new_data[str(bid)] = { + "x": left_bbox_padded[0], + "y": left_bbox_padded[1], + "w": left_bbox_padded[2] - left_bbox_padded[0], + "h": left_bbox_padded[3] - left_bbox_padded[1], + "reading_order": bubble_data.get("reading_order", bid), + "quad_bboxes": left_quad_bboxes, + "quads": left_quads, + "text_bbox": xyxy_to_xywh(left_bbox), + "line_bboxes": [], + "line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads])), + "line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads]))), + } + + # Create right bubble - with new ID + right_quads = [bubble_data['quads'][i] for i in right_indices] + right_quad_bboxes = [bubble_data['quad_bboxes'][i] for i in right_indices] + right_bbox = boxes_union_xyxy([quad_bbox(q) for q in right_quads]) + right_bbox_padded = ( + max(0, right_bbox[0] - 3), + max(0, right_bbox[1] - 3), + right_bbox[2] + 3, + right_bbox[3] + 3 + ) + + print(f" Right bbox: {right_bbox} -> padded: {right_bbox_padded}") + + new_data[str(next_bid)] = { + "x": right_bbox_padded[0], + "y": right_bbox_padded[1], + "w": right_bbox_padded[2] - right_bbox_padded[0], + "h": right_bbox_padded[3] - right_bbox_padded[1], + "reading_order": bubble_data.get("reading_order", next_bid), + "quad_bboxes": right_quad_bboxes, + "quads": right_quads, + "text_bbox": xyxy_to_xywh(right_bbox), + "line_bboxes": [], + "line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads])), + "line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads]))), + } + + next_bid += 1 + else: + # No split needed + new_data[bid_str] = bubble_data + + # Write output + with open(output_file, "w", encoding="utf-8") as f: + json.dump(new_data, f, indent=2, ensure_ascii=False) + + print(f"\n✅ Saved to {output_file}") + +if __name__ == "__main__": + split_bubbles_in_json( + input_file="bubbles_original.json", # Always read from original + output_file="bubbles_split.json", + bubble_ids_to_split=[2, 7] + ) diff --git a/split_final.py b/split_final.py new file mode 100644 index 0000000..2648158 --- /dev/null +++ b/split_final.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Properly split Box 2 and Box 7 by extracting quads from original and writing to new JSON +""" + +import json +import copy + +def quad_bbox(quad): + xs = [p[0] for p in quad] + ys = [p[1] for p in quad] + return (min(xs), min(ys), max(xs), max(ys)) + +def boxes_union_xyxy(boxes): + boxes = [b for b in boxes if b is not None] + if not boxes: + return None + return ( + int(min(b[0] for b in boxes)), + int(min(b[1] for b in boxes)), + int(max(b[2] for b in boxes)), + int(max(b[3] for b in boxes)), + ) + +def xyxy_to_xywh(bbox): + if bbox is None: + return None + x1, y1, x2, y2 = bbox + return {"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1)} + +def bbox_area_xyxy(b): + if b is None: + return 0 + x1, y1, x2, y2 = b + return (x2 - x1) * (y2 - y1) + +# Load original +with open("bubbles_original.json", "r", encoding="utf-8") as f: + original = json.load(f) + +new_data = {} + +# Copy all non-split bubbles +for bid_str, bubble_data in original.items(): + bid = int(bid_str) + if bid not in [2, 7]: + new_data[bid_str] = copy.deepcopy(bubble_data) + +# Split Box 2 +print("🔀 Splitting Box 2...") +box2_data = original["2"] +left_indices_2 = [10, 1, 2, 4, 8, 0, 3, 6, 11, 12] +right_indices_2 = [5, 7, 9] + +# Left part keeps ID 2 +left_quads_2 = [box2_data['quads'][i] for i in left_indices_2] +left_quad_bboxes_2 = [box2_data['quad_bboxes'][i] for i in left_indices_2] +left_bbox_2 = boxes_union_xyxy([quad_bbox(q) for q in left_quads_2]) +left_bbox_2_padded = (max(0, left_bbox_2[0]-3), max(0, left_bbox_2[1]-3), left_bbox_2[2]+3, left_bbox_2[3]+3) + +new_data["2"] = { + "x": left_bbox_2_padded[0], + "y": left_bbox_2_padded[1], + "w": left_bbox_2_padded[2] - left_bbox_2_padded[0], + "h": left_bbox_2_padded[3] - left_bbox_2_padded[1], + "reading_order": box2_data.get("reading_order", 2), + "quad_bboxes": left_quad_bboxes_2, + "quads": [[list(p) for p in quad] for quad in left_quads_2], # Explicit list conversion + "text_bbox": xyxy_to_xywh(left_bbox_2), + "line_bboxes": [], + "line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads_2])), + "line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads_2]))), +} +print(f" Left: y={new_data['2']['y']}, h={new_data['2']['h']}, quads={len(left_quads_2)}") + +# Right part gets new ID 8 +right_quads_2 = [box2_data['quads'][i] for i in right_indices_2] +right_quad_bboxes_2 = [box2_data['quad_bboxes'][i] for i in right_indices_2] +right_bbox_2 = boxes_union_xyxy([quad_bbox(q) for q in right_quads_2]) +right_bbox_2_padded = (max(0, right_bbox_2[0]-3), max(0, right_bbox_2[1]-3), right_bbox_2[2]+3, right_bbox_2[3]+3) + +new_data["8"] = { + "x": right_bbox_2_padded[0], + "y": right_bbox_2_padded[1], + "w": right_bbox_2_padded[2] - right_bbox_2_padded[0], + "h": right_bbox_2_padded[3] - right_bbox_2_padded[1], + "reading_order": box2_data.get("reading_order", 8), + "quad_bboxes": right_quad_bboxes_2, + "quads": [[list(p) for p in quad] for quad in right_quads_2], # Explicit list conversion + "text_bbox": xyxy_to_xywh(right_bbox_2), + "line_bboxes": [], + "line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads_2])), + "line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads_2]))), +} +print(f" Right: y={new_data['8']['y']}, h={new_data['8']['h']}, quads={len(right_quads_2)}") + +# Split Box 7 +print("\n🔀 Splitting Box 7...") +box7_data = original["7"] +left_indices_7 = [8, 13, 4, 11, 2, 6] +right_indices_7 = [0, 5, 1, 3, 7, 10, 12, 9] + +# Left part keeps ID 7 +left_quads_7 = [box7_data['quads'][i] for i in left_indices_7] +left_quad_bboxes_7 = [box7_data['quad_bboxes'][i] for i in left_indices_7] +left_bbox_7 = boxes_union_xyxy([quad_bbox(q) for q in left_quads_7]) +left_bbox_7_padded = (max(0, left_bbox_7[0]-3), max(0, left_bbox_7[1]-3), left_bbox_7[2]+3, left_bbox_7[3]+3) + +new_data["7"] = { + "x": left_bbox_7_padded[0], + "y": left_bbox_7_padded[1], + "w": left_bbox_7_padded[2] - left_bbox_7_padded[0], + "h": left_bbox_7_padded[3] - left_bbox_7_padded[1], + "reading_order": box7_data.get("reading_order", 7), + "quad_bboxes": left_quad_bboxes_7, + "quads": [[list(p) for p in quad] for quad in left_quads_7], # Explicit list conversion + "text_bbox": xyxy_to_xywh(left_bbox_7), + "line_bboxes": [], + "line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads_7])), + "line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads_7]))), +} +print(f" Left: y={new_data['7']['y']}, h={new_data['7']['h']}, quads={len(left_quads_7)}") + +# Right part gets new ID 9 +right_quads_7 = [box7_data['quads'][i] for i in right_indices_7] +right_quad_bboxes_7 = [box7_data['quad_bboxes'][i] for i in right_indices_7] +right_bbox_7 = boxes_union_xyxy([quad_bbox(q) for q in right_quads_7]) +right_bbox_7_padded = (max(0, right_bbox_7[0]-3), max(0, right_bbox_7[1]-3), right_bbox_7[2]+3, right_bbox_7[3]+3) + +new_data["9"] = { + "x": right_bbox_7_padded[0], + "y": right_bbox_7_padded[1], + "w": right_bbox_7_padded[2] - right_bbox_7_padded[0], + "h": right_bbox_7_padded[3] - right_bbox_7_padded[1], + "reading_order": box7_data.get("reading_order", 9), + "quad_bboxes": right_quad_bboxes_7, + "quads": [[list(p) for p in quad] for quad in right_quads_7], # Explicit list conversion + "text_bbox": xyxy_to_xywh(right_bbox_7), + "line_bboxes": [], + "line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads_7])), + "line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads_7]))), +} +print(f" Right: y={new_data['9']['y']}, h={new_data['9']['h']}, quads={len(right_quads_7)}") + +# Sort by ID for output +new_data_sorted = {} +for bid in sorted([int(k) for k in new_data.keys()]): + new_data_sorted[str(bid)] = new_data[str(bid)] + +with open("bubbles.json", "w", encoding="utf-8") as f: + json.dump(new_data_sorted, f, indent=2, ensure_ascii=False) + +print(f"\n✅ Done! Saved {len(new_data_sorted)} bubbles to bubbles.json")