manga-translator/split_bubbles.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Helper script to split bubbles with multiple separate text regions.
Run this to manually split Box 2 and Box 7 from debug_clusters.png
"""

import json
import numpy as np
from typing import List, Tuple, Dict

def quad_bbox(quad):
    """Convert quad to bounding box"""
    xs = [p[0] for p in quad]
    ys = [p[1] for p in quad]
    return (min(xs), min(ys), max(xs), max(ys))

def boxes_union_xyxy(boxes):
    """Union of multiple boxes"""
    boxes = [b for b in boxes if b is not None]
    if not boxes:
        return None
    return (
        int(min(b[0] for b in boxes)),
        int(min(b[1] for b in boxes)),
        int(max(b[2] for b in boxes)),
        int(max(b[3] for b in boxes)),
    )

def xyxy_to_xywh(bbox):
    """Convert xyxy format to xywh"""
    if bbox is None:
        return None
    x1, y1, x2, y2 = bbox
    return {"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1)}

def bbox_area_xyxy(b):
    """Calculate area of a bounding box in xyxy format"""
    if b is None:
        return 0
    x1, y1, x2, y2 = b
    return (x2 - x1) * (y2 - y1)

def split_bubble_by_vertical_gap(bubble_id: int, bubble_data: Dict, filtered_indices_map: Dict):
    """
    Attempt to split a bubble by detecting a significant vertical gap between columns of text.
    Returns: (left_indices, right_indices, gap_size) or None if no split
    """
    quad_bboxes = bubble_data['quad_bboxes']
    quads = bubble_data['quads']

    if len(quads) < 2:
        return None

    # Get x-coordinates with original indices
    x_coords = []
    for i, quad in enumerate(quads):
        bbox = quad_bbox(quad)
        x_center = (bbox[0] + bbox[2]) / 2.0
        x_coords.append((i, x_center, bbox))

    # Sort by x-coordinate
    x_coords_sorted = sorted(x_coords, key=lambda t: t[1])

    # Find the largest gap between consecutive x positions
    max_gap = 0
    split_pos = -1

    for i in range(len(x_coords_sorted) - 1):
        gap = x_coords_sorted[i + 1][1] - x_coords_sorted[i][1]
        if gap > max_gap:
            max_gap = gap
            split_pos = i

    # If gap is large enough, split
    min_gap_threshold = 80  # pixels
    if split_pos != -1 and max_gap > min_gap_threshold:
        # Get ORIGINAL indices for left and right
        left_indices = [x_coords_sorted[j][0] for j in range(split_pos + 1)]
        right_indices = [x_coords_sorted[j][0] for j in range(split_pos + 1, len(x_coords_sorted))]

        return (left_indices, right_indices, max_gap)

    return None

def split_bubbles_in_json(input_file="bubbles.json", output_file="bubbles_split.json", bubble_ids_to_split=[2, 7]):
    """Split specified bubbles in the JSON file"""

    with open(input_file, "r", encoding="utf-8") as f:
        data = json.load(f)

    new_data = {}
    next_bid = max(int(k) for k in data.keys()) + 1

    for bid_str, bubble_data in data.items():
        bid = int(bid_str)

        if bid not in bubble_ids_to_split:
            # Keep original
            new_data[bid_str] = bubble_data
            continue

        # Try to split
        split_result = split_bubble_by_vertical_gap(bid, bubble_data, {})

        if split_result:
            left_indices, right_indices, gap_size = split_result

            print(f"\n🔀 Splitting BOX#{bid} (gap={gap_size:.1f}px)")
            print(f"   Left indices: {left_indices}")
            print(f"   Right indices: {right_indices}")

            # Create left bubble - keep the original bubble ID
            left_quads = [bubble_data['quads'][i] for i in left_indices]
            left_quad_bboxes = [bubble_data['quad_bboxes'][i] for i in left_indices]
            left_bbox = boxes_union_xyxy([quad_bbox(q) for q in left_quads])
            left_bbox_padded = (
                max(0, left_bbox[0] - 3),
                max(0, left_bbox[1] - 3),
                left_bbox[2] + 3,
                left_bbox[3] + 3
            )

            print(f"   Left bbox: {left_bbox} -> padded: {left_bbox_padded}")

            new_data[str(bid)] = {
                "x": left_bbox_padded[0],
                "y": left_bbox_padded[1],
                "w": left_bbox_padded[2] - left_bbox_padded[0],
                "h": left_bbox_padded[3] - left_bbox_padded[1],
                "reading_order": bubble_data.get("reading_order", bid),
                "quad_bboxes": left_quad_bboxes,
                "quads": left_quads,
                "text_bbox": xyxy_to_xywh(left_bbox),
                "line_bboxes": [],
                "line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads])),
                "line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads]))),
            }

            # Create right bubble - with new ID
            right_quads = [bubble_data['quads'][i] for i in right_indices]
            right_quad_bboxes = [bubble_data['quad_bboxes'][i] for i in right_indices]
            right_bbox = boxes_union_xyxy([quad_bbox(q) for q in right_quads])
            right_bbox_padded = (
                max(0, right_bbox[0] - 3),
                max(0, right_bbox[1] - 3),
                right_bbox[2] + 3,
                right_bbox[3] + 3
            )

            print(f"   Right bbox: {right_bbox} -> padded: {right_bbox_padded}")

            new_data[str(next_bid)] = {
                "x": right_bbox_padded[0],
                "y": right_bbox_padded[1],
                "w": right_bbox_padded[2] - right_bbox_padded[0],
                "h": right_bbox_padded[3] - right_bbox_padded[1],
                "reading_order": bubble_data.get("reading_order", next_bid),
                "quad_bboxes": right_quad_bboxes,
                "quads": right_quads,
                "text_bbox": xyxy_to_xywh(right_bbox),
                "line_bboxes": [],
                "line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads])),
                "line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads]))),
            }

            next_bid += 1
        else:
            # No split needed
            new_data[bid_str] = bubble_data

    # Write output
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(new_data, f, indent=2, ensure_ascii=False)

    print(f"\n✅ Saved to {output_file}")

if __name__ == "__main__":
    split_bubbles_in_json(
        input_file="bubbles_original.json",  # Always read from original
        output_file="bubbles_split.json",
        bubble_ids_to_split=[2, 7]
    )