Added bubble split
This commit is contained in:
@@ -104,7 +104,6 @@ def looks_like_box_tag(t: str) -> bool:
|
||||
def is_noise_text(text: str) -> bool:
|
||||
t = (text or "").strip()
|
||||
|
||||
# Explicitly allow standalone punctuation like ? or !
|
||||
if re.fullmatch(r"[\?\!]+", t):
|
||||
return False
|
||||
|
||||
@@ -190,6 +189,126 @@ def ocr_candidate_score(text: str) -> float:
|
||||
return max(0.0, min(1.0, score))
|
||||
|
||||
|
||||
# ============================================================
|
||||
# SPLITTERS
|
||||
# ============================================================
|
||||
def split_panel_box(image_bgr, bbox_xyxy, bubble_quads=None):
|
||||
"""
|
||||
Checks if a bounding box crosses a solid vertical panel border.
|
||||
|
||||
This detects vertical lines/borders within the bubble that might indicate
|
||||
the bubble should be split into left/right columns.
|
||||
|
||||
Only splits if:
|
||||
- Bubble has 10+ quads (real multi-column layouts have many quads, while nested bubbles have fewer)
|
||||
- Height is sufficient (50+ pixels)
|
||||
|
||||
Nested bubbles typically have 1-6 quads, while true multi-column bubbles have 8+.
|
||||
|
||||
Returns:
|
||||
Tuple (box_left, box_right, split_x_absolute) if split found, else None
|
||||
"""
|
||||
x1, y1, x2, y2 = bbox_xyxy
|
||||
w = x2 - x1
|
||||
h = y2 - y1
|
||||
|
||||
# Prevent false splits: require substantial number of quads
|
||||
# Nested bubbles have 1-6 quads, true multi-column layouts have 8+
|
||||
if bubble_quads is not None and len(bubble_quads) < 10:
|
||||
return None
|
||||
|
||||
if w < 50 or h < 50:
|
||||
return None
|
||||
|
||||
roi = image_bgr[y1:y2, x1:x2]
|
||||
if roi.size == 0:
|
||||
return None
|
||||
|
||||
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
|
||||
_, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV)
|
||||
|
||||
vertical_projection = np.sum(thresh, axis=0)
|
||||
|
||||
search_start = int(w * 0.25)
|
||||
search_end = int(w * 0.75)
|
||||
|
||||
if search_start >= search_end:
|
||||
return None
|
||||
|
||||
peak_x_relative = np.argmax(vertical_projection[search_start:search_end]) + search_start
|
||||
peak_val = vertical_projection[peak_x_relative]
|
||||
|
||||
# Detect panel border with more sensitive threshold (0.40 instead of 0.60)
|
||||
# This catches boxes with vertical lines even if they're not super dark
|
||||
if peak_val > (h * 255 * 0.40):
|
||||
split_x_absolute = x1 + peak_x_relative
|
||||
box_left = (x1, y1, split_x_absolute, y2)
|
||||
box_right = (split_x_absolute, y1, x2, y2)
|
||||
return box_left, box_right, split_x_absolute
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thresholds=False):
|
||||
"""
|
||||
Checks if the text inside a bubble is actually two separate columns of text
|
||||
by looking for a clean vertical gap between the bounding boxes.
|
||||
|
||||
Improved to detect large column gaps (e.g., 100+ pixels between text regions).
|
||||
|
||||
Args:
|
||||
use_aggressive_thresholds: If True, use lower thresholds (25px, 1.5x line height)
|
||||
for fallback splitting after failed panel border detection
|
||||
"""
|
||||
if len(indices) < 2:
|
||||
return None
|
||||
|
||||
boxes = [quad_bbox(ocr[i][0]) for i in indices]
|
||||
sorted_items = sorted(zip(indices, boxes), key=lambda x: x[1][0])
|
||||
|
||||
# Calculate gaps between consecutive elements sorted by x-coordinate
|
||||
gaps = []
|
||||
current_max_x = sorted_items[0][1][2]
|
||||
|
||||
for i in range(1, len(sorted_items)):
|
||||
idx, b = sorted_items[i]
|
||||
x1 = b[0]
|
||||
gap = x1 - current_max_x
|
||||
gaps.append((i, gap, current_max_x, x1))
|
||||
current_max_x = max(current_max_x, b[2])
|
||||
|
||||
if not gaps:
|
||||
return None
|
||||
|
||||
# Find the largest gap
|
||||
max_gap_idx, max_gap_size, _, _ = max(gaps, key=lambda x: x[1])
|
||||
|
||||
# Calculate thresholds based on line height
|
||||
hs = [b[3] - b[1] for b in boxes]
|
||||
med_h = float(np.median(hs)) if hs else 15.0
|
||||
|
||||
if use_aggressive_thresholds:
|
||||
# Relaxed thresholds for fallback splitting after failed panel border
|
||||
threshold1 = 10.0 # Very low absolute threshold for fallback
|
||||
threshold2 = med_h * 0.8 # Even lower relative threshold
|
||||
min_gap = 5.0
|
||||
else:
|
||||
# Normal thresholds
|
||||
threshold1 = 50.0 # Absolute threshold: 50 pixels
|
||||
threshold2 = med_h * 2.0 # Relative threshold: 2x line height
|
||||
min_gap = 25.0
|
||||
|
||||
|
||||
|
||||
if max_gap_size > threshold1 or (max_gap_size > threshold2 and max_gap_size > min_gap):
|
||||
split_idx = max_gap_idx
|
||||
left_indices = [item[0] for item in sorted_items[:split_idx]]
|
||||
right_indices = [item[0] for item in sorted_items[split_idx:]]
|
||||
return left_indices, right_indices
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ============================================================
|
||||
# OCR ENGINES (Apple Native Vision)
|
||||
# ============================================================
|
||||
@@ -588,23 +707,17 @@ def save_debug_clusters(
|
||||
if img is None:
|
||||
return
|
||||
|
||||
# ── FIX 1: white-fill each OCR quad before drawing its outline ──
|
||||
for bbox, txt, conf in ocr:
|
||||
pts = np.array(bbox, dtype=np.int32)
|
||||
cv2.fillPoly(img, [pts], (255, 255, 255)) # ← white background
|
||||
cv2.polylines(img, [pts], True, (180, 180, 180), 1) # ← grey outline
|
||||
cv2.fillPoly(img, [pts], (255, 255, 255))
|
||||
cv2.polylines(img, [pts], True, (180, 180, 180), 1)
|
||||
|
||||
for bid, bb in bubble_boxes.items():
|
||||
x1, y1, x2, y2 = bb
|
||||
|
||||
# Draw green bubble bounding box + ID label
|
||||
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 220, 0), 2)
|
||||
cv2.putText(img, f"BOX#{bid}", (x1 + 2, max(15, y1 + 16)),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 220, 0), 2)
|
||||
|
||||
# ── FIX 2: yellow line-box drawing loop removed entirely ────
|
||||
|
||||
# Draw translated text overlay below each bubble box
|
||||
if clean_lines and bid in clean_lines:
|
||||
text = clean_lines[bid]
|
||||
words = text.split()
|
||||
@@ -757,6 +870,86 @@ def translate_manga_text(
|
||||
filtered, image.shape, gap_px=resolved_gap, bbox_padding=3
|
||||
)
|
||||
|
||||
# ── NEW: SPLIT MULTI-PANEL & MULTI-COLUMN BUBBLES ──
|
||||
new_bubbles, new_bubble_boxes, new_bubble_quads, new_bubble_indices = {}, {}, {}, {}
|
||||
next_bid = max(bubbles.keys()) + 1 if bubbles else 1
|
||||
splits_performed = []
|
||||
|
||||
for bid in list(bubbles.keys()):
|
||||
box = bubble_boxes[bid]
|
||||
bubble_split = None # Will hold (left_idxs, right_idxs) if a split is detected
|
||||
|
||||
# 1. Try Image-based Panel Border Split
|
||||
split_result = split_panel_box(image, box, bubble_quads=bubble_quads[bid])
|
||||
if split_result:
|
||||
box_left, box_right, split_x = split_result
|
||||
left_idxs, right_idxs = [], []
|
||||
|
||||
for idx in bubble_indices[bid]:
|
||||
cx, cy = quad_center(filtered[idx][0])
|
||||
if cx < split_x:
|
||||
left_idxs.append(idx)
|
||||
else:
|
||||
right_idxs.append(idx)
|
||||
|
||||
if left_idxs and right_idxs:
|
||||
bubble_split = (left_idxs, right_idxs)
|
||||
splits_performed.append(f"BOX#{bid} (panel border at x={split_x})")
|
||||
elif len(bubble_quads[bid]) >= 10:
|
||||
# Panel border split failed (all quads on one side). Try text column split for large bubbles
|
||||
col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid, use_aggressive_thresholds=True)
|
||||
if col_split:
|
||||
left_idxs, right_idxs = col_split
|
||||
if left_idxs and right_idxs:
|
||||
bubble_split = (left_idxs, right_idxs)
|
||||
splits_performed.append(f"BOX#{bid} ({len(left_idxs)} quads | {len(right_idxs)} quads)")
|
||||
|
||||
# 2. If no panel border split, try text column split
|
||||
if bubble_split is None:
|
||||
col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid)
|
||||
if col_split:
|
||||
left_idxs, right_idxs = col_split
|
||||
if left_idxs and right_idxs:
|
||||
bubble_split = (left_idxs, right_idxs)
|
||||
splits_performed.append(f"BOX#{bid} ({len(left_idxs)} quads | {len(right_idxs)} quads)")
|
||||
|
||||
# 3. Create split bubbles if detected
|
||||
if bubble_split:
|
||||
left_idxs, right_idxs = bubble_split
|
||||
# Create Left Bubble
|
||||
new_bubbles[bid] = build_lines_from_indices(left_idxs, filtered)
|
||||
ub_left = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in left_idxs])
|
||||
new_bubble_boxes[bid] = (max(0, ub_left[0]-3), max(0, ub_left[1]-3), min(iw-1, ub_left[2]+3), min(ih-1, ub_left[3]+3))
|
||||
new_bubble_quads[bid] = [filtered[i][0] for i in left_idxs]
|
||||
new_bubble_indices[bid] = left_idxs
|
||||
|
||||
# Create Right Bubble
|
||||
new_bubbles[next_bid] = build_lines_from_indices(right_idxs, filtered)
|
||||
ub_right = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in right_idxs])
|
||||
new_bubble_boxes[next_bid] = (max(0, ub_right[0]-3), max(0, ub_right[1]-3), min(iw-1, ub_right[2]+3), min(ih-1, ub_right[3]+3))
|
||||
new_bubble_quads[next_bid] = [filtered[i][0] for i in right_idxs]
|
||||
new_bubble_indices[next_bid] = right_idxs
|
||||
next_bid += 1
|
||||
else:
|
||||
# No split needed, keep original
|
||||
new_bubbles[bid] = bubbles[bid]
|
||||
new_bubble_boxes[bid] = bubble_boxes[bid]
|
||||
new_bubble_quads[bid] = bubble_quads[bid]
|
||||
new_bubble_indices[bid] = bubble_indices[bid]
|
||||
|
||||
# Print split summary
|
||||
if splits_performed:
|
||||
print(f"\n🔀 Multi-column bubble splits detected: {len(splits_performed)}")
|
||||
for split_info in splits_performed:
|
||||
print(f" ✓ Split {split_info}")
|
||||
|
||||
# Overwrite old dictionaries with the newly split ones
|
||||
bubbles = new_bubbles
|
||||
bubble_boxes = new_bubble_boxes
|
||||
bubble_quads = new_bubble_quads
|
||||
bubble_indices = new_bubble_indices
|
||||
# ───────────────────────────────────────────────────
|
||||
|
||||
translator = GoogleTranslator(source=source_lang, target=target_lang)
|
||||
|
||||
clean_lines: Dict[int, str] = {}
|
||||
@@ -871,4 +1064,4 @@ if __name__ == "__main__":
|
||||
export_bubbles_to="bubbles.json",
|
||||
reading_mode="ltr",
|
||||
debug=True
|
||||
)
|
||||
)
|
||||
Reference in New Issue
Block a user