Split
This commit is contained in:
@@ -238,8 +238,31 @@ def split_panel_box(image_bgr, bbox_xyxy, bubble_quads=None):
|
||||
peak_x_relative = np.argmax(vertical_projection[search_start:search_end]) + search_start
|
||||
peak_val = vertical_projection[peak_x_relative]
|
||||
|
||||
# Detect panel border with more sensitive threshold (0.40 instead of 0.60)
|
||||
# This catches boxes with vertical lines even if they're not super dark
|
||||
# Find ALL significant peaks, not just the first
|
||||
# This helps detect internal gaps between text regions (left vs right bubbles)
|
||||
threshold_val = h * 255 * 0.25 # Very sensitive threshold
|
||||
significant_peaks = []
|
||||
|
||||
for x_rel in range(search_start, search_end):
|
||||
if vertical_projection[x_rel] > threshold_val:
|
||||
significant_peaks.append((x_rel, vertical_projection[x_rel]))
|
||||
|
||||
# If we have multiple peaks, find the largest gap between text regions
|
||||
# by looking for the valley (lowest projection value) between peaks
|
||||
if len(significant_peaks) > 1:
|
||||
# Find the minimum value (gap) between the first and last peak
|
||||
min_proj_val = np.min(vertical_projection[search_start:search_end])
|
||||
min_proj_idx = np.argmin(vertical_projection[search_start:search_end]) + search_start
|
||||
|
||||
# Use the valley point as the split, not the peak
|
||||
# This more accurately separates left-aligned vs right-aligned content
|
||||
if min_proj_val < threshold_val * 0.6: # Valley is clearly a gap
|
||||
split_x_absolute = x1 + min_proj_idx
|
||||
box_left = (x1, y1, split_x_absolute, y2)
|
||||
box_right = (split_x_absolute, y1, x2, y2)
|
||||
return box_left, box_right, split_x_absolute
|
||||
|
||||
# Fallback: if the main peak is significant enough, use it
|
||||
if peak_val > (h * 255 * 0.40):
|
||||
split_x_absolute = x1 + peak_x_relative
|
||||
box_left = (x1, y1, split_x_absolute, y2)
|
||||
@@ -289,14 +312,14 @@ def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thre
|
||||
|
||||
if use_aggressive_thresholds:
|
||||
# Relaxed thresholds for fallback splitting after failed panel border
|
||||
threshold1 = 10.0 # Very low absolute threshold for fallback
|
||||
threshold2 = med_h * 0.8 # Even lower relative threshold
|
||||
min_gap = 5.0
|
||||
threshold1 = 80.0 # Increased from 10 to require significant gaps
|
||||
threshold2 = med_h * 1.2 # Increased from 0.8
|
||||
min_gap = 40.0 # Increased from 5
|
||||
else:
|
||||
# Normal thresholds
|
||||
threshold1 = 50.0 # Absolute threshold: 50 pixels
|
||||
threshold2 = med_h * 2.0 # Relative threshold: 2x line height
|
||||
min_gap = 25.0
|
||||
# Normal thresholds - very conservative to avoid breaking valid bubbles
|
||||
threshold1 = 120.0 # Increased from 50 - require very large gaps
|
||||
threshold2 = med_h * 3.0 # Increased from 2.0 - require 3x line height
|
||||
min_gap = 60.0 # Increased from 25
|
||||
|
||||
|
||||
|
||||
@@ -304,6 +327,12 @@ def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thre
|
||||
split_idx = max_gap_idx
|
||||
left_indices = [item[0] for item in sorted_items[:split_idx]]
|
||||
right_indices = [item[0] for item in sorted_items[split_idx:]]
|
||||
|
||||
# Additional safety: don't split if one side only has 1 detection
|
||||
# This prevents breaking up valid bubbles with just a few words
|
||||
if len(left_indices) < 2 or len(right_indices) < 2:
|
||||
return None
|
||||
|
||||
return left_indices, right_indices
|
||||
|
||||
return None
|
||||
@@ -637,6 +666,10 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
|
||||
hs = [max(1.0, b[3] - b[1]) for b in boxes]
|
||||
med_h = float(np.median(hs)) if hs else 12.0
|
||||
dist_thresh = max(20.0, med_h * 2.2)
|
||||
|
||||
# Adaptive vertical gap: allow up to 1.0x median line height for vertical gaps
|
||||
# This handles cases where lines are spaced further apart (e.g., multi-line bubbles)
|
||||
adaptive_gap_y = max(gap_px, med_h * 1.0)
|
||||
|
||||
p = list(range(n))
|
||||
|
||||
@@ -651,7 +684,13 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
|
||||
|
||||
for i in range(n):
|
||||
for j in range(i + 1, n):
|
||||
if overlap_or_near(boxes[i], boxes[j], gap=gap_px):
|
||||
# Use adaptive gap for vertical spacing, fixed gap for horizontal
|
||||
ax1, ay1, ax2, ay2 = boxes[i]
|
||||
bx1, by1, bx2, by2 = boxes[j]
|
||||
gap_x = max(0, max(ax1, bx1) - min(ax2, bx2))
|
||||
gap_y = max(0, max(ay1, by1) - min(ay2, by2))
|
||||
|
||||
if gap_x <= gap_px and gap_y <= adaptive_gap_y:
|
||||
unite(i, j)
|
||||
continue
|
||||
cx1, cy1 = centers[i]
|
||||
@@ -681,8 +720,12 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
|
||||
continue
|
||||
|
||||
x1, y1, x2, y2 = ub
|
||||
x1 = max(0, x1 - bbox_padding); y1 = max(0, y1 - bbox_padding)
|
||||
x2 = min(iw - 1, x2 + bbox_padding); y2 = min(ih - 1, y2 + bbox_padding)
|
||||
|
||||
# Adaptive padding: scale with median line height to ensure all boundary chars are captured
|
||||
# Use max of fixed padding or line-height-based padding
|
||||
adaptive_pad = max(bbox_padding, int(round(med_h * 0.35)))
|
||||
x1 = max(0, x1 - adaptive_pad); y1 = max(0, y1 - adaptive_pad)
|
||||
x2 = min(iw - 1, x2 + adaptive_pad); y2 = min(ih - 1, y2 + adaptive_pad)
|
||||
|
||||
bubbles[bid] = lines
|
||||
bubble_boxes[bid] = (x1, y1, x2, y2)
|
||||
|
||||
Reference in New Issue
Block a user