Added bubble split
This commit is contained in:
@@ -104,7 +104,6 @@ def looks_like_box_tag(t: str) -> bool:
|
|||||||
def is_noise_text(text: str) -> bool:
|
def is_noise_text(text: str) -> bool:
|
||||||
t = (text or "").strip()
|
t = (text or "").strip()
|
||||||
|
|
||||||
# Explicitly allow standalone punctuation like ? or !
|
|
||||||
if re.fullmatch(r"[\?\!]+", t):
|
if re.fullmatch(r"[\?\!]+", t):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -190,6 +189,126 @@ def ocr_candidate_score(text: str) -> float:
|
|||||||
return max(0.0, min(1.0, score))
|
return max(0.0, min(1.0, score))
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# SPLITTERS
|
||||||
|
# ============================================================
|
||||||
|
def split_panel_box(image_bgr, bbox_xyxy, bubble_quads=None):
|
||||||
|
"""
|
||||||
|
Checks if a bounding box crosses a solid vertical panel border.
|
||||||
|
|
||||||
|
This detects vertical lines/borders within the bubble that might indicate
|
||||||
|
the bubble should be split into left/right columns.
|
||||||
|
|
||||||
|
Only splits if:
|
||||||
|
- Bubble has 10+ quads (real multi-column layouts have many quads, while nested bubbles have fewer)
|
||||||
|
- Height is sufficient (50+ pixels)
|
||||||
|
|
||||||
|
Nested bubbles typically have 1-6 quads, while true multi-column bubbles have 8+.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple (box_left, box_right, split_x_absolute) if split found, else None
|
||||||
|
"""
|
||||||
|
x1, y1, x2, y2 = bbox_xyxy
|
||||||
|
w = x2 - x1
|
||||||
|
h = y2 - y1
|
||||||
|
|
||||||
|
# Prevent false splits: require substantial number of quads
|
||||||
|
# Nested bubbles have 1-6 quads, true multi-column layouts have 8+
|
||||||
|
if bubble_quads is not None and len(bubble_quads) < 10:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if w < 50 or h < 50:
|
||||||
|
return None
|
||||||
|
|
||||||
|
roi = image_bgr[y1:y2, x1:x2]
|
||||||
|
if roi.size == 0:
|
||||||
|
return None
|
||||||
|
|
||||||
|
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
|
||||||
|
_, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV)
|
||||||
|
|
||||||
|
vertical_projection = np.sum(thresh, axis=0)
|
||||||
|
|
||||||
|
search_start = int(w * 0.25)
|
||||||
|
search_end = int(w * 0.75)
|
||||||
|
|
||||||
|
if search_start >= search_end:
|
||||||
|
return None
|
||||||
|
|
||||||
|
peak_x_relative = np.argmax(vertical_projection[search_start:search_end]) + search_start
|
||||||
|
peak_val = vertical_projection[peak_x_relative]
|
||||||
|
|
||||||
|
# Detect panel border with more sensitive threshold (0.40 instead of 0.60)
|
||||||
|
# This catches boxes with vertical lines even if they're not super dark
|
||||||
|
if peak_val > (h * 255 * 0.40):
|
||||||
|
split_x_absolute = x1 + peak_x_relative
|
||||||
|
box_left = (x1, y1, split_x_absolute, y2)
|
||||||
|
box_right = (split_x_absolute, y1, x2, y2)
|
||||||
|
return box_left, box_right, split_x_absolute
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thresholds=False):
|
||||||
|
"""
|
||||||
|
Checks if the text inside a bubble is actually two separate columns of text
|
||||||
|
by looking for a clean vertical gap between the bounding boxes.
|
||||||
|
|
||||||
|
Improved to detect large column gaps (e.g., 100+ pixels between text regions).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
use_aggressive_thresholds: If True, use lower thresholds (25px, 1.5x line height)
|
||||||
|
for fallback splitting after failed panel border detection
|
||||||
|
"""
|
||||||
|
if len(indices) < 2:
|
||||||
|
return None
|
||||||
|
|
||||||
|
boxes = [quad_bbox(ocr[i][0]) for i in indices]
|
||||||
|
sorted_items = sorted(zip(indices, boxes), key=lambda x: x[1][0])
|
||||||
|
|
||||||
|
# Calculate gaps between consecutive elements sorted by x-coordinate
|
||||||
|
gaps = []
|
||||||
|
current_max_x = sorted_items[0][1][2]
|
||||||
|
|
||||||
|
for i in range(1, len(sorted_items)):
|
||||||
|
idx, b = sorted_items[i]
|
||||||
|
x1 = b[0]
|
||||||
|
gap = x1 - current_max_x
|
||||||
|
gaps.append((i, gap, current_max_x, x1))
|
||||||
|
current_max_x = max(current_max_x, b[2])
|
||||||
|
|
||||||
|
if not gaps:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Find the largest gap
|
||||||
|
max_gap_idx, max_gap_size, _, _ = max(gaps, key=lambda x: x[1])
|
||||||
|
|
||||||
|
# Calculate thresholds based on line height
|
||||||
|
hs = [b[3] - b[1] for b in boxes]
|
||||||
|
med_h = float(np.median(hs)) if hs else 15.0
|
||||||
|
|
||||||
|
if use_aggressive_thresholds:
|
||||||
|
# Relaxed thresholds for fallback splitting after failed panel border
|
||||||
|
threshold1 = 10.0 # Very low absolute threshold for fallback
|
||||||
|
threshold2 = med_h * 0.8 # Even lower relative threshold
|
||||||
|
min_gap = 5.0
|
||||||
|
else:
|
||||||
|
# Normal thresholds
|
||||||
|
threshold1 = 50.0 # Absolute threshold: 50 pixels
|
||||||
|
threshold2 = med_h * 2.0 # Relative threshold: 2x line height
|
||||||
|
min_gap = 25.0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if max_gap_size > threshold1 or (max_gap_size > threshold2 and max_gap_size > min_gap):
|
||||||
|
split_idx = max_gap_idx
|
||||||
|
left_indices = [item[0] for item in sorted_items[:split_idx]]
|
||||||
|
right_indices = [item[0] for item in sorted_items[split_idx:]]
|
||||||
|
return left_indices, right_indices
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# OCR ENGINES (Apple Native Vision)
|
# OCR ENGINES (Apple Native Vision)
|
||||||
# ============================================================
|
# ============================================================
|
||||||
@@ -588,23 +707,17 @@ def save_debug_clusters(
|
|||||||
if img is None:
|
if img is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
# ── FIX 1: white-fill each OCR quad before drawing its outline ──
|
|
||||||
for bbox, txt, conf in ocr:
|
for bbox, txt, conf in ocr:
|
||||||
pts = np.array(bbox, dtype=np.int32)
|
pts = np.array(bbox, dtype=np.int32)
|
||||||
cv2.fillPoly(img, [pts], (255, 255, 255)) # ← white background
|
cv2.fillPoly(img, [pts], (255, 255, 255))
|
||||||
cv2.polylines(img, [pts], True, (180, 180, 180), 1) # ← grey outline
|
cv2.polylines(img, [pts], True, (180, 180, 180), 1)
|
||||||
|
|
||||||
for bid, bb in bubble_boxes.items():
|
for bid, bb in bubble_boxes.items():
|
||||||
x1, y1, x2, y2 = bb
|
x1, y1, x2, y2 = bb
|
||||||
|
|
||||||
# Draw green bubble bounding box + ID label
|
|
||||||
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 220, 0), 2)
|
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 220, 0), 2)
|
||||||
cv2.putText(img, f"BOX#{bid}", (x1 + 2, max(15, y1 + 16)),
|
cv2.putText(img, f"BOX#{bid}", (x1 + 2, max(15, y1 + 16)),
|
||||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)
|
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)
|
||||||
|
|
||||||
# ── FIX 2: yellow line-box drawing loop removed entirely ────
|
|
||||||
|
|
||||||
# Draw translated text overlay below each bubble box
|
|
||||||
if clean_lines and bid in clean_lines:
|
if clean_lines and bid in clean_lines:
|
||||||
text = clean_lines[bid]
|
text = clean_lines[bid]
|
||||||
words = text.split()
|
words = text.split()
|
||||||
@@ -757,6 +870,86 @@ def translate_manga_text(
|
|||||||
filtered, image.shape, gap_px=resolved_gap, bbox_padding=3
|
filtered, image.shape, gap_px=resolved_gap, bbox_padding=3
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ── NEW: SPLIT MULTI-PANEL & MULTI-COLUMN BUBBLES ──
|
||||||
|
new_bubbles, new_bubble_boxes, new_bubble_quads, new_bubble_indices = {}, {}, {}, {}
|
||||||
|
next_bid = max(bubbles.keys()) + 1 if bubbles else 1
|
||||||
|
splits_performed = []
|
||||||
|
|
||||||
|
for bid in list(bubbles.keys()):
|
||||||
|
box = bubble_boxes[bid]
|
||||||
|
bubble_split = None # Will hold (left_idxs, right_idxs) if a split is detected
|
||||||
|
|
||||||
|
# 1. Try Image-based Panel Border Split
|
||||||
|
split_result = split_panel_box(image, box, bubble_quads=bubble_quads[bid])
|
||||||
|
if split_result:
|
||||||
|
box_left, box_right, split_x = split_result
|
||||||
|
left_idxs, right_idxs = [], []
|
||||||
|
|
||||||
|
for idx in bubble_indices[bid]:
|
||||||
|
cx, cy = quad_center(filtered[idx][0])
|
||||||
|
if cx < split_x:
|
||||||
|
left_idxs.append(idx)
|
||||||
|
else:
|
||||||
|
right_idxs.append(idx)
|
||||||
|
|
||||||
|
if left_idxs and right_idxs:
|
||||||
|
bubble_split = (left_idxs, right_idxs)
|
||||||
|
splits_performed.append(f"BOX#{bid} (panel border at x={split_x})")
|
||||||
|
elif len(bubble_quads[bid]) >= 10:
|
||||||
|
# Panel border split failed (all quads on one side). Try text column split for large bubbles
|
||||||
|
col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid, use_aggressive_thresholds=True)
|
||||||
|
if col_split:
|
||||||
|
left_idxs, right_idxs = col_split
|
||||||
|
if left_idxs and right_idxs:
|
||||||
|
bubble_split = (left_idxs, right_idxs)
|
||||||
|
splits_performed.append(f"BOX#{bid} ({len(left_idxs)} quads | {len(right_idxs)} quads)")
|
||||||
|
|
||||||
|
# 2. If no panel border split, try text column split
|
||||||
|
if bubble_split is None:
|
||||||
|
col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid)
|
||||||
|
if col_split:
|
||||||
|
left_idxs, right_idxs = col_split
|
||||||
|
if left_idxs and right_idxs:
|
||||||
|
bubble_split = (left_idxs, right_idxs)
|
||||||
|
splits_performed.append(f"BOX#{bid} ({len(left_idxs)} quads | {len(right_idxs)} quads)")
|
||||||
|
|
||||||
|
# 3. Create split bubbles if detected
|
||||||
|
if bubble_split:
|
||||||
|
left_idxs, right_idxs = bubble_split
|
||||||
|
# Create Left Bubble
|
||||||
|
new_bubbles[bid] = build_lines_from_indices(left_idxs, filtered)
|
||||||
|
ub_left = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in left_idxs])
|
||||||
|
new_bubble_boxes[bid] = (max(0, ub_left[0]-3), max(0, ub_left[1]-3), min(iw-1, ub_left[2]+3), min(ih-1, ub_left[3]+3))
|
||||||
|
new_bubble_quads[bid] = [filtered[i][0] for i in left_idxs]
|
||||||
|
new_bubble_indices[bid] = left_idxs
|
||||||
|
|
||||||
|
# Create Right Bubble
|
||||||
|
new_bubbles[next_bid] = build_lines_from_indices(right_idxs, filtered)
|
||||||
|
ub_right = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in right_idxs])
|
||||||
|
new_bubble_boxes[next_bid] = (max(0, ub_right[0]-3), max(0, ub_right[1]-3), min(iw-1, ub_right[2]+3), min(ih-1, ub_right[3]+3))
|
||||||
|
new_bubble_quads[next_bid] = [filtered[i][0] for i in right_idxs]
|
||||||
|
new_bubble_indices[next_bid] = right_idxs
|
||||||
|
next_bid += 1
|
||||||
|
else:
|
||||||
|
# No split needed, keep original
|
||||||
|
new_bubbles[bid] = bubbles[bid]
|
||||||
|
new_bubble_boxes[bid] = bubble_boxes[bid]
|
||||||
|
new_bubble_quads[bid] = bubble_quads[bid]
|
||||||
|
new_bubble_indices[bid] = bubble_indices[bid]
|
||||||
|
|
||||||
|
# Print split summary
|
||||||
|
if splits_performed:
|
||||||
|
print(f"\n🔀 Multi-column bubble splits detected: {len(splits_performed)}")
|
||||||
|
for split_info in splits_performed:
|
||||||
|
print(f" ✓ Split {split_info}")
|
||||||
|
|
||||||
|
# Overwrite old dictionaries with the newly split ones
|
||||||
|
bubbles = new_bubbles
|
||||||
|
bubble_boxes = new_bubble_boxes
|
||||||
|
bubble_quads = new_bubble_quads
|
||||||
|
bubble_indices = new_bubble_indices
|
||||||
|
# ───────────────────────────────────────────────────
|
||||||
|
|
||||||
translator = GoogleTranslator(source=source_lang, target=target_lang)
|
translator = GoogleTranslator(source=source_lang, target=target_lang)
|
||||||
|
|
||||||
clean_lines: Dict[int, str] = {}
|
clean_lines: Dict[int, str] = {}
|
||||||
@@ -871,4 +1064,4 @@ if __name__ == "__main__":
|
|||||||
export_bubbles_to="bubbles.json",
|
export_bubbles_to="bubbles.json",
|
||||||
reading_mode="ltr",
|
reading_mode="ltr",
|
||||||
debug=True
|
debug=True
|
||||||
)
|
)
|
||||||
56
regenerate_debug.py
Normal file
56
regenerate_debug.py
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Regenerate debug_clusters.png with the new split bubbles.json
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def quad_bbox(quad):
|
||||||
|
"""Convert quad to bounding box"""
|
||||||
|
xs = [p[0] for p in quad]
|
||||||
|
ys = [p[1] for p in quad]
|
||||||
|
return (min(xs), min(ys), max(xs), max(ys))
|
||||||
|
|
||||||
|
def save_debug_clusters_from_json(
|
||||||
|
image_path="004.png",
|
||||||
|
bubbles_path="bubbles.json",
|
||||||
|
out_path="debug_clusters.png"
|
||||||
|
):
|
||||||
|
img = cv2.imread(image_path)
|
||||||
|
if img is None:
|
||||||
|
print(f"❌ Cannot load image: {image_path}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Load bubbles.json
|
||||||
|
with open(bubbles_path, "r", encoding="utf-8") as f:
|
||||||
|
bubbles_data = json.load(f)
|
||||||
|
|
||||||
|
# Draw all quad polygons in white (erasing original text)
|
||||||
|
for bid_str, bubble_info in bubbles_data.items():
|
||||||
|
for quad in bubble_info.get("quads", []):
|
||||||
|
pts = np.array(quad, dtype=np.int32)
|
||||||
|
cv2.fillPoly(img, [pts], (255, 255, 255))
|
||||||
|
cv2.polylines(img, [pts], True, (180, 180, 180), 1)
|
||||||
|
|
||||||
|
# Draw bounding boxes with labels
|
||||||
|
for bid_str, bubble_info in bubbles_data.items():
|
||||||
|
bid = int(bid_str)
|
||||||
|
x = bubble_info["x"]
|
||||||
|
y = bubble_info["y"]
|
||||||
|
w = bubble_info["w"]
|
||||||
|
h = bubble_info["h"]
|
||||||
|
x2 = x + w
|
||||||
|
y2 = y + h
|
||||||
|
|
||||||
|
cv2.rectangle(img, (x, y), (x2, y2), (0, 220, 0), 2)
|
||||||
|
cv2.putText(img, f"BOX#{bid}", (x + 2, max(15, y + 16)),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)
|
||||||
|
|
||||||
|
cv2.imwrite(out_path, img)
|
||||||
|
print(f"✅ Saved: {out_path}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
save_debug_clusters_from_json()
|
||||||
183
split_bubbles.py
Normal file
183
split_bubbles.py
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Helper script to split bubbles with multiple separate text regions.
|
||||||
|
Run this to manually split Box 2 and Box 7 from debug_clusters.png
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import numpy as np
|
||||||
|
from typing import List, Tuple, Dict
|
||||||
|
|
||||||
|
def quad_bbox(quad):
|
||||||
|
"""Convert quad to bounding box"""
|
||||||
|
xs = [p[0] for p in quad]
|
||||||
|
ys = [p[1] for p in quad]
|
||||||
|
return (min(xs), min(ys), max(xs), max(ys))
|
||||||
|
|
||||||
|
def boxes_union_xyxy(boxes):
|
||||||
|
"""Union of multiple boxes"""
|
||||||
|
boxes = [b for b in boxes if b is not None]
|
||||||
|
if not boxes:
|
||||||
|
return None
|
||||||
|
return (
|
||||||
|
int(min(b[0] for b in boxes)),
|
||||||
|
int(min(b[1] for b in boxes)),
|
||||||
|
int(max(b[2] for b in boxes)),
|
||||||
|
int(max(b[3] for b in boxes)),
|
||||||
|
)
|
||||||
|
|
||||||
|
def xyxy_to_xywh(bbox):
|
||||||
|
"""Convert xyxy format to xywh"""
|
||||||
|
if bbox is None:
|
||||||
|
return None
|
||||||
|
x1, y1, x2, y2 = bbox
|
||||||
|
return {"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1)}
|
||||||
|
|
||||||
|
def bbox_area_xyxy(b):
|
||||||
|
"""Calculate area of a bounding box in xyxy format"""
|
||||||
|
if b is None:
|
||||||
|
return 0
|
||||||
|
x1, y1, x2, y2 = b
|
||||||
|
return (x2 - x1) * (y2 - y1)
|
||||||
|
|
||||||
|
def split_bubble_by_vertical_gap(bubble_id: int, bubble_data: Dict, filtered_indices_map: Dict):
|
||||||
|
"""
|
||||||
|
Attempt to split a bubble by detecting a significant vertical gap between columns of text.
|
||||||
|
Returns: (left_indices, right_indices, gap_size) or None if no split
|
||||||
|
"""
|
||||||
|
quad_bboxes = bubble_data['quad_bboxes']
|
||||||
|
quads = bubble_data['quads']
|
||||||
|
|
||||||
|
if len(quads) < 2:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Get x-coordinates with original indices
|
||||||
|
x_coords = []
|
||||||
|
for i, quad in enumerate(quads):
|
||||||
|
bbox = quad_bbox(quad)
|
||||||
|
x_center = (bbox[0] + bbox[2]) / 2.0
|
||||||
|
x_coords.append((i, x_center, bbox))
|
||||||
|
|
||||||
|
# Sort by x-coordinate
|
||||||
|
x_coords_sorted = sorted(x_coords, key=lambda t: t[1])
|
||||||
|
|
||||||
|
# Find the largest gap between consecutive x positions
|
||||||
|
max_gap = 0
|
||||||
|
split_pos = -1
|
||||||
|
|
||||||
|
for i in range(len(x_coords_sorted) - 1):
|
||||||
|
gap = x_coords_sorted[i + 1][1] - x_coords_sorted[i][1]
|
||||||
|
if gap > max_gap:
|
||||||
|
max_gap = gap
|
||||||
|
split_pos = i
|
||||||
|
|
||||||
|
# If gap is large enough, split
|
||||||
|
min_gap_threshold = 80 # pixels
|
||||||
|
if split_pos != -1 and max_gap > min_gap_threshold:
|
||||||
|
# Get ORIGINAL indices for left and right
|
||||||
|
left_indices = [x_coords_sorted[j][0] for j in range(split_pos + 1)]
|
||||||
|
right_indices = [x_coords_sorted[j][0] for j in range(split_pos + 1, len(x_coords_sorted))]
|
||||||
|
|
||||||
|
return (left_indices, right_indices, max_gap)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def split_bubbles_in_json(input_file="bubbles.json", output_file="bubbles_split.json", bubble_ids_to_split=[2, 7]):
|
||||||
|
"""Split specified bubbles in the JSON file"""
|
||||||
|
|
||||||
|
with open(input_file, "r", encoding="utf-8") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
|
||||||
|
new_data = {}
|
||||||
|
next_bid = max(int(k) for k in data.keys()) + 1
|
||||||
|
|
||||||
|
for bid_str, bubble_data in data.items():
|
||||||
|
bid = int(bid_str)
|
||||||
|
|
||||||
|
if bid not in bubble_ids_to_split:
|
||||||
|
# Keep original
|
||||||
|
new_data[bid_str] = bubble_data
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Try to split
|
||||||
|
split_result = split_bubble_by_vertical_gap(bid, bubble_data, {})
|
||||||
|
|
||||||
|
if split_result:
|
||||||
|
left_indices, right_indices, gap_size = split_result
|
||||||
|
|
||||||
|
print(f"\n🔀 Splitting BOX#{bid} (gap={gap_size:.1f}px)")
|
||||||
|
print(f" Left indices: {left_indices}")
|
||||||
|
print(f" Right indices: {right_indices}")
|
||||||
|
|
||||||
|
# Create left bubble - keep the original bubble ID
|
||||||
|
left_quads = [bubble_data['quads'][i] for i in left_indices]
|
||||||
|
left_quad_bboxes = [bubble_data['quad_bboxes'][i] for i in left_indices]
|
||||||
|
left_bbox = boxes_union_xyxy([quad_bbox(q) for q in left_quads])
|
||||||
|
left_bbox_padded = (
|
||||||
|
max(0, left_bbox[0] - 3),
|
||||||
|
max(0, left_bbox[1] - 3),
|
||||||
|
left_bbox[2] + 3,
|
||||||
|
left_bbox[3] + 3
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f" Left bbox: {left_bbox} -> padded: {left_bbox_padded}")
|
||||||
|
|
||||||
|
new_data[str(bid)] = {
|
||||||
|
"x": left_bbox_padded[0],
|
||||||
|
"y": left_bbox_padded[1],
|
||||||
|
"w": left_bbox_padded[2] - left_bbox_padded[0],
|
||||||
|
"h": left_bbox_padded[3] - left_bbox_padded[1],
|
||||||
|
"reading_order": bubble_data.get("reading_order", bid),
|
||||||
|
"quad_bboxes": left_quad_bboxes,
|
||||||
|
"quads": left_quads,
|
||||||
|
"text_bbox": xyxy_to_xywh(left_bbox),
|
||||||
|
"line_bboxes": [],
|
||||||
|
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads])),
|
||||||
|
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads]))),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create right bubble - with new ID
|
||||||
|
right_quads = [bubble_data['quads'][i] for i in right_indices]
|
||||||
|
right_quad_bboxes = [bubble_data['quad_bboxes'][i] for i in right_indices]
|
||||||
|
right_bbox = boxes_union_xyxy([quad_bbox(q) for q in right_quads])
|
||||||
|
right_bbox_padded = (
|
||||||
|
max(0, right_bbox[0] - 3),
|
||||||
|
max(0, right_bbox[1] - 3),
|
||||||
|
right_bbox[2] + 3,
|
||||||
|
right_bbox[3] + 3
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f" Right bbox: {right_bbox} -> padded: {right_bbox_padded}")
|
||||||
|
|
||||||
|
new_data[str(next_bid)] = {
|
||||||
|
"x": right_bbox_padded[0],
|
||||||
|
"y": right_bbox_padded[1],
|
||||||
|
"w": right_bbox_padded[2] - right_bbox_padded[0],
|
||||||
|
"h": right_bbox_padded[3] - right_bbox_padded[1],
|
||||||
|
"reading_order": bubble_data.get("reading_order", next_bid),
|
||||||
|
"quad_bboxes": right_quad_bboxes,
|
||||||
|
"quads": right_quads,
|
||||||
|
"text_bbox": xyxy_to_xywh(right_bbox),
|
||||||
|
"line_bboxes": [],
|
||||||
|
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads])),
|
||||||
|
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads]))),
|
||||||
|
}
|
||||||
|
|
||||||
|
next_bid += 1
|
||||||
|
else:
|
||||||
|
# No split needed
|
||||||
|
new_data[bid_str] = bubble_data
|
||||||
|
|
||||||
|
# Write output
|
||||||
|
with open(output_file, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(new_data, f, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
print(f"\n✅ Saved to {output_file}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
split_bubbles_in_json(
|
||||||
|
input_file="bubbles_original.json", # Always read from original
|
||||||
|
output_file="bubbles_split.json",
|
||||||
|
bubble_ids_to_split=[2, 7]
|
||||||
|
)
|
||||||
154
split_final.py
Normal file
154
split_final.py
Normal file
@@ -0,0 +1,154 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Properly split Box 2 and Box 7 by extracting quads from original and writing to new JSON
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import copy
|
||||||
|
|
||||||
|
def quad_bbox(quad):
|
||||||
|
xs = [p[0] for p in quad]
|
||||||
|
ys = [p[1] for p in quad]
|
||||||
|
return (min(xs), min(ys), max(xs), max(ys))
|
||||||
|
|
||||||
|
def boxes_union_xyxy(boxes):
|
||||||
|
boxes = [b for b in boxes if b is not None]
|
||||||
|
if not boxes:
|
||||||
|
return None
|
||||||
|
return (
|
||||||
|
int(min(b[0] for b in boxes)),
|
||||||
|
int(min(b[1] for b in boxes)),
|
||||||
|
int(max(b[2] for b in boxes)),
|
||||||
|
int(max(b[3] for b in boxes)),
|
||||||
|
)
|
||||||
|
|
||||||
|
def xyxy_to_xywh(bbox):
|
||||||
|
if bbox is None:
|
||||||
|
return None
|
||||||
|
x1, y1, x2, y2 = bbox
|
||||||
|
return {"x": int(x1), "y": int(y1), "w": int(x2 - x1), "h": int(y2 - y1)}
|
||||||
|
|
||||||
|
def bbox_area_xyxy(b):
|
||||||
|
if b is None:
|
||||||
|
return 0
|
||||||
|
x1, y1, x2, y2 = b
|
||||||
|
return (x2 - x1) * (y2 - y1)
|
||||||
|
|
||||||
|
# Load original
|
||||||
|
with open("bubbles_original.json", "r", encoding="utf-8") as f:
|
||||||
|
original = json.load(f)
|
||||||
|
|
||||||
|
new_data = {}
|
||||||
|
|
||||||
|
# Copy all non-split bubbles
|
||||||
|
for bid_str, bubble_data in original.items():
|
||||||
|
bid = int(bid_str)
|
||||||
|
if bid not in [2, 7]:
|
||||||
|
new_data[bid_str] = copy.deepcopy(bubble_data)
|
||||||
|
|
||||||
|
# Split Box 2
|
||||||
|
print("🔀 Splitting Box 2...")
|
||||||
|
box2_data = original["2"]
|
||||||
|
left_indices_2 = [10, 1, 2, 4, 8, 0, 3, 6, 11, 12]
|
||||||
|
right_indices_2 = [5, 7, 9]
|
||||||
|
|
||||||
|
# Left part keeps ID 2
|
||||||
|
left_quads_2 = [box2_data['quads'][i] for i in left_indices_2]
|
||||||
|
left_quad_bboxes_2 = [box2_data['quad_bboxes'][i] for i in left_indices_2]
|
||||||
|
left_bbox_2 = boxes_union_xyxy([quad_bbox(q) for q in left_quads_2])
|
||||||
|
left_bbox_2_padded = (max(0, left_bbox_2[0]-3), max(0, left_bbox_2[1]-3), left_bbox_2[2]+3, left_bbox_2[3]+3)
|
||||||
|
|
||||||
|
new_data["2"] = {
|
||||||
|
"x": left_bbox_2_padded[0],
|
||||||
|
"y": left_bbox_2_padded[1],
|
||||||
|
"w": left_bbox_2_padded[2] - left_bbox_2_padded[0],
|
||||||
|
"h": left_bbox_2_padded[3] - left_bbox_2_padded[1],
|
||||||
|
"reading_order": box2_data.get("reading_order", 2),
|
||||||
|
"quad_bboxes": left_quad_bboxes_2,
|
||||||
|
"quads": [[list(p) for p in quad] for quad in left_quads_2], # Explicit list conversion
|
||||||
|
"text_bbox": xyxy_to_xywh(left_bbox_2),
|
||||||
|
"line_bboxes": [],
|
||||||
|
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads_2])),
|
||||||
|
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads_2]))),
|
||||||
|
}
|
||||||
|
print(f" Left: y={new_data['2']['y']}, h={new_data['2']['h']}, quads={len(left_quads_2)}")
|
||||||
|
|
||||||
|
# Right part gets new ID 8
|
||||||
|
right_quads_2 = [box2_data['quads'][i] for i in right_indices_2]
|
||||||
|
right_quad_bboxes_2 = [box2_data['quad_bboxes'][i] for i in right_indices_2]
|
||||||
|
right_bbox_2 = boxes_union_xyxy([quad_bbox(q) for q in right_quads_2])
|
||||||
|
right_bbox_2_padded = (max(0, right_bbox_2[0]-3), max(0, right_bbox_2[1]-3), right_bbox_2[2]+3, right_bbox_2[3]+3)
|
||||||
|
|
||||||
|
new_data["8"] = {
|
||||||
|
"x": right_bbox_2_padded[0],
|
||||||
|
"y": right_bbox_2_padded[1],
|
||||||
|
"w": right_bbox_2_padded[2] - right_bbox_2_padded[0],
|
||||||
|
"h": right_bbox_2_padded[3] - right_bbox_2_padded[1],
|
||||||
|
"reading_order": box2_data.get("reading_order", 8),
|
||||||
|
"quad_bboxes": right_quad_bboxes_2,
|
||||||
|
"quads": [[list(p) for p in quad] for quad in right_quads_2], # Explicit list conversion
|
||||||
|
"text_bbox": xyxy_to_xywh(right_bbox_2),
|
||||||
|
"line_bboxes": [],
|
||||||
|
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads_2])),
|
||||||
|
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads_2]))),
|
||||||
|
}
|
||||||
|
print(f" Right: y={new_data['8']['y']}, h={new_data['8']['h']}, quads={len(right_quads_2)}")
|
||||||
|
|
||||||
|
# Split Box 7
|
||||||
|
print("\n🔀 Splitting Box 7...")
|
||||||
|
box7_data = original["7"]
|
||||||
|
left_indices_7 = [8, 13, 4, 11, 2, 6]
|
||||||
|
right_indices_7 = [0, 5, 1, 3, 7, 10, 12, 9]
|
||||||
|
|
||||||
|
# Left part keeps ID 7
|
||||||
|
left_quads_7 = [box7_data['quads'][i] for i in left_indices_7]
|
||||||
|
left_quad_bboxes_7 = [box7_data['quad_bboxes'][i] for i in left_indices_7]
|
||||||
|
left_bbox_7 = boxes_union_xyxy([quad_bbox(q) for q in left_quads_7])
|
||||||
|
left_bbox_7_padded = (max(0, left_bbox_7[0]-3), max(0, left_bbox_7[1]-3), left_bbox_7[2]+3, left_bbox_7[3]+3)
|
||||||
|
|
||||||
|
new_data["7"] = {
|
||||||
|
"x": left_bbox_7_padded[0],
|
||||||
|
"y": left_bbox_7_padded[1],
|
||||||
|
"w": left_bbox_7_padded[2] - left_bbox_7_padded[0],
|
||||||
|
"h": left_bbox_7_padded[3] - left_bbox_7_padded[1],
|
||||||
|
"reading_order": box7_data.get("reading_order", 7),
|
||||||
|
"quad_bboxes": left_quad_bboxes_7,
|
||||||
|
"quads": [[list(p) for p in quad] for quad in left_quads_7], # Explicit list conversion
|
||||||
|
"text_bbox": xyxy_to_xywh(left_bbox_7),
|
||||||
|
"line_bboxes": [],
|
||||||
|
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in left_quads_7])),
|
||||||
|
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in left_quads_7]))),
|
||||||
|
}
|
||||||
|
print(f" Left: y={new_data['7']['y']}, h={new_data['7']['h']}, quads={len(left_quads_7)}")
|
||||||
|
|
||||||
|
# Right part gets new ID 9
|
||||||
|
right_quads_7 = [box7_data['quads'][i] for i in right_indices_7]
|
||||||
|
right_quad_bboxes_7 = [box7_data['quad_bboxes'][i] for i in right_indices_7]
|
||||||
|
right_bbox_7 = boxes_union_xyxy([quad_bbox(q) for q in right_quads_7])
|
||||||
|
right_bbox_7_padded = (max(0, right_bbox_7[0]-3), max(0, right_bbox_7[1]-3), right_bbox_7[2]+3, right_bbox_7[3]+3)
|
||||||
|
|
||||||
|
new_data["9"] = {
|
||||||
|
"x": right_bbox_7_padded[0],
|
||||||
|
"y": right_bbox_7_padded[1],
|
||||||
|
"w": right_bbox_7_padded[2] - right_bbox_7_padded[0],
|
||||||
|
"h": right_bbox_7_padded[3] - right_bbox_7_padded[1],
|
||||||
|
"reading_order": box7_data.get("reading_order", 9),
|
||||||
|
"quad_bboxes": right_quad_bboxes_7,
|
||||||
|
"quads": [[list(p) for p in quad] for quad in right_quads_7], # Explicit list conversion
|
||||||
|
"text_bbox": xyxy_to_xywh(right_bbox_7),
|
||||||
|
"line_bboxes": [],
|
||||||
|
"line_union_bbox": xyxy_to_xywh(boxes_union_xyxy([quad_bbox(q) for q in right_quads_7])),
|
||||||
|
"line_union_area": int(bbox_area_xyxy(boxes_union_xyxy([quad_bbox(q) for q in right_quads_7]))),
|
||||||
|
}
|
||||||
|
print(f" Right: y={new_data['9']['y']}, h={new_data['9']['h']}, quads={len(right_quads_7)}")
|
||||||
|
|
||||||
|
# Sort by ID for output
|
||||||
|
new_data_sorted = {}
|
||||||
|
for bid in sorted([int(k) for k in new_data.keys()]):
|
||||||
|
new_data_sorted[str(bid)] = new_data[str(bid)]
|
||||||
|
|
||||||
|
with open("bubbles.json", "w", encoding="utf-8") as f:
|
||||||
|
json.dump(new_data_sorted, f, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
print(f"\n✅ Done! Saved {len(new_data_sorted)} bubbles to bubbles.json")
|
||||||
Reference in New Issue
Block a user