Added all
This commit is contained in:
@@ -104,14 +104,21 @@ def looks_like_box_tag(t: str) -> bool:
|
||||
def is_noise_text(text: str) -> bool:
|
||||
t = (text or "").strip()
|
||||
|
||||
if re.fullmatch(r"[\?\!]+", t):
|
||||
# ALLOW pure punctuation clusters like "...", "!!", "?!"
|
||||
if re.fullmatch(r"[\?\!\.]+", t):
|
||||
return False
|
||||
|
||||
# ALLOW single alphabetical characters (crucial for vertical text)
|
||||
if len(t) == 1 and t.isalpha():
|
||||
return False
|
||||
|
||||
if any(re.fullmatch(p, t) for p in NOISE_PATTERNS):
|
||||
return True
|
||||
if looks_like_box_tag(t):
|
||||
return True
|
||||
if len(t) <= 2 and not re.search(r"[A-Z0-9\?\!]", t):
|
||||
|
||||
# Relaxed the length check to allow 1-2 letter words and punctuation
|
||||
if len(t) <= 2 and not re.search(r"[A-Z0-9\?\!\.]", t) and not t.isalpha():
|
||||
return True
|
||||
|
||||
symbol_ratio = sum(1 for c in t if not c.isalnum() and not c.isspace()) / max(1, len(t))
|
||||
@@ -193,27 +200,10 @@ def ocr_candidate_score(text: str) -> float:
|
||||
# SPLITTERS
|
||||
# ============================================================
|
||||
def split_panel_box(image_bgr, bbox_xyxy, bubble_quads=None):
|
||||
"""
|
||||
Checks if a bounding box crosses a solid vertical panel border.
|
||||
|
||||
This detects vertical lines/borders within the bubble that might indicate
|
||||
the bubble should be split into left/right columns.
|
||||
|
||||
Only splits if:
|
||||
- Bubble has 10+ quads (real multi-column layouts have many quads, while nested bubbles have fewer)
|
||||
- Height is sufficient (50+ pixels)
|
||||
|
||||
Nested bubbles typically have 1-6 quads, while true multi-column bubbles have 8+.
|
||||
|
||||
Returns:
|
||||
Tuple (box_left, box_right, split_x_absolute) if split found, else None
|
||||
"""
|
||||
x1, y1, x2, y2 = bbox_xyxy
|
||||
w = x2 - x1
|
||||
h = y2 - y1
|
||||
|
||||
# Prevent false splits: require substantial number of quads
|
||||
# Nested bubbles have 1-6 quads, true multi-column layouts have 8+
|
||||
if bubble_quads is not None and len(bubble_quads) < 10:
|
||||
return None
|
||||
|
||||
@@ -238,31 +228,23 @@ def split_panel_box(image_bgr, bbox_xyxy, bubble_quads=None):
|
||||
peak_x_relative = np.argmax(vertical_projection[search_start:search_end]) + search_start
|
||||
peak_val = vertical_projection[peak_x_relative]
|
||||
|
||||
# Find ALL significant peaks, not just the first
|
||||
# This helps detect internal gaps between text regions (left vs right bubbles)
|
||||
threshold_val = h * 255 * 0.25 # Very sensitive threshold
|
||||
threshold_val = h * 255 * 0.25
|
||||
significant_peaks = []
|
||||
|
||||
for x_rel in range(search_start, search_end):
|
||||
if vertical_projection[x_rel] > threshold_val:
|
||||
significant_peaks.append((x_rel, vertical_projection[x_rel]))
|
||||
|
||||
# If we have multiple peaks, find the largest gap between text regions
|
||||
# by looking for the valley (lowest projection value) between peaks
|
||||
if len(significant_peaks) > 1:
|
||||
# Find the minimum value (gap) between the first and last peak
|
||||
min_proj_val = np.min(vertical_projection[search_start:search_end])
|
||||
min_proj_idx = np.argmin(vertical_projection[search_start:search_end]) + search_start
|
||||
|
||||
# Use the valley point as the split, not the peak
|
||||
# This more accurately separates left-aligned vs right-aligned content
|
||||
if min_proj_val < threshold_val * 0.6: # Valley is clearly a gap
|
||||
if min_proj_val < threshold_val * 0.6:
|
||||
split_x_absolute = x1 + min_proj_idx
|
||||
box_left = (x1, y1, split_x_absolute, y2)
|
||||
box_right = (split_x_absolute, y1, x2, y2)
|
||||
return box_left, box_right, split_x_absolute
|
||||
|
||||
# Fallback: if the main peak is significant enough, use it
|
||||
if peak_val > (h * 255 * 0.40):
|
||||
split_x_absolute = x1 + peak_x_relative
|
||||
box_left = (x1, y1, split_x_absolute, y2)
|
||||
@@ -273,23 +255,12 @@ def split_panel_box(image_bgr, bbox_xyxy, bubble_quads=None):
|
||||
|
||||
|
||||
def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thresholds=False):
|
||||
"""
|
||||
Checks if the text inside a bubble is actually two separate columns of text
|
||||
by looking for a clean vertical gap between the bounding boxes.
|
||||
|
||||
Improved to detect large column gaps (e.g., 100+ pixels between text regions).
|
||||
|
||||
Args:
|
||||
use_aggressive_thresholds: If True, use lower thresholds (25px, 1.5x line height)
|
||||
for fallback splitting after failed panel border detection
|
||||
"""
|
||||
if len(indices) < 2:
|
||||
return None
|
||||
|
||||
boxes = [quad_bbox(ocr[i][0]) for i in indices]
|
||||
sorted_items = sorted(zip(indices, boxes), key=lambda x: x[1][0])
|
||||
|
||||
# Calculate gaps between consecutive elements sorted by x-coordinate
|
||||
gaps = []
|
||||
current_max_x = sorted_items[0][1][2]
|
||||
|
||||
@@ -303,33 +274,25 @@ def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thre
|
||||
if not gaps:
|
||||
return None
|
||||
|
||||
# Find the largest gap
|
||||
max_gap_idx, max_gap_size, _, _ = max(gaps, key=lambda x: x[1])
|
||||
|
||||
# Calculate thresholds based on line height
|
||||
hs = [b[3] - b[1] for b in boxes]
|
||||
med_h = float(np.median(hs)) if hs else 15.0
|
||||
|
||||
if use_aggressive_thresholds:
|
||||
# Relaxed thresholds for fallback splitting after failed panel border
|
||||
threshold1 = 80.0 # Increased from 10 to require significant gaps
|
||||
threshold2 = med_h * 1.2 # Increased from 0.8
|
||||
min_gap = 40.0 # Increased from 5
|
||||
threshold1 = 80.0
|
||||
threshold2 = med_h * 1.2
|
||||
min_gap = 40.0
|
||||
else:
|
||||
# Normal thresholds - very conservative to avoid breaking valid bubbles
|
||||
threshold1 = 120.0 # Increased from 50 - require very large gaps
|
||||
threshold2 = med_h * 3.0 # Increased from 2.0 - require 3x line height
|
||||
min_gap = 60.0 # Increased from 25
|
||||
|
||||
|
||||
threshold1 = 120.0
|
||||
threshold2 = med_h * 3.0
|
||||
min_gap = 60.0
|
||||
|
||||
if max_gap_size > threshold1 or (max_gap_size > threshold2 and max_gap_size > min_gap):
|
||||
split_idx = max_gap_idx
|
||||
left_indices = [item[0] for item in sorted_items[:split_idx]]
|
||||
right_indices = [item[0] for item in sorted_items[split_idx:]]
|
||||
|
||||
# Additional safety: don't split if one side only has 1 detection
|
||||
# This prevents breaking up valid bubbles with just a few words
|
||||
if len(left_indices) < 2 or len(right_indices) < 2:
|
||||
return None
|
||||
|
||||
@@ -667,9 +630,7 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
|
||||
med_h = float(np.median(hs)) if hs else 12.0
|
||||
dist_thresh = max(20.0, med_h * 2.2)
|
||||
|
||||
# Adaptive vertical gap: allow up to 1.0x median line height for vertical gaps
|
||||
# This handles cases where lines are spaced further apart (e.g., multi-line bubbles)
|
||||
adaptive_gap_y = max(gap_px, med_h * 1.0)
|
||||
adaptive_gap_y = max(gap_px, med_h * 2.5)
|
||||
|
||||
p = list(range(n))
|
||||
|
||||
@@ -684,17 +645,23 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
|
||||
|
||||
for i in range(n):
|
||||
for j in range(i + 1, n):
|
||||
# Use adaptive gap for vertical spacing, fixed gap for horizontal
|
||||
ax1, ay1, ax2, ay2 = boxes[i]
|
||||
bx1, by1, bx2, by2 = boxes[j]
|
||||
gap_x = max(0, max(ax1, bx1) - min(ax2, bx2))
|
||||
gap_y = max(0, max(ay1, by1) - min(ay2, by2))
|
||||
|
||||
cx1, cy1 = centers[i]
|
||||
cx2, cy2 = centers[j]
|
||||
is_vertically_aligned = abs(cx1 - cx2) < (med_h * 1.5)
|
||||
|
||||
if is_vertically_aligned and gap_y <= (med_h * 4.0):
|
||||
unite(i, j)
|
||||
continue
|
||||
|
||||
if gap_x <= gap_px and gap_y <= adaptive_gap_y:
|
||||
unite(i, j)
|
||||
continue
|
||||
cx1, cy1 = centers[i]
|
||||
cx2, cy2 = centers[j]
|
||||
|
||||
d = ((cx1 - cx2) ** 2 + (cy1 - cy2) ** 2) ** 0.5
|
||||
if d <= dist_thresh and abs(cy1 - cy2) <= med_h * 3.0:
|
||||
unite(i, j)
|
||||
@@ -721,8 +688,6 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3):
|
||||
|
||||
x1, y1, x2, y2 = ub
|
||||
|
||||
# Adaptive padding: scale with median line height to ensure all boundary chars are captured
|
||||
# Use max of fixed padding or line-height-based padding
|
||||
adaptive_pad = max(bbox_padding, int(round(med_h * 0.35)))
|
||||
x1 = max(0, x1 - adaptive_pad); y1 = max(0, y1 - adaptive_pad)
|
||||
x2 = min(iw - 1, x2 + adaptive_pad); y2 = min(ih - 1, y2 + adaptive_pad)
|
||||
@@ -856,7 +821,7 @@ def translate_manga_text(
|
||||
image_path="001-page.png",
|
||||
source_lang="en",
|
||||
target_lang="ca",
|
||||
confidence_threshold=0.12,
|
||||
confidence_threshold=0.05,
|
||||
min_text_length=1,
|
||||
gap_px="auto",
|
||||
filter_sound_effects=True,
|
||||
@@ -913,16 +878,14 @@ def translate_manga_text(
|
||||
filtered, image.shape, gap_px=resolved_gap, bbox_padding=3
|
||||
)
|
||||
|
||||
# ── NEW: SPLIT MULTI-PANEL & MULTI-COLUMN BUBBLES ──
|
||||
new_bubbles, new_bubble_boxes, new_bubble_quads, new_bubble_indices = {}, {}, {}, {}
|
||||
next_bid = max(bubbles.keys()) + 1 if bubbles else 1
|
||||
splits_performed = []
|
||||
|
||||
for bid in list(bubbles.keys()):
|
||||
box = bubble_boxes[bid]
|
||||
bubble_split = None # Will hold (left_idxs, right_idxs) if a split is detected
|
||||
bubble_split = None
|
||||
|
||||
# 1. Try Image-based Panel Border Split
|
||||
split_result = split_panel_box(image, box, bubble_quads=bubble_quads[bid])
|
||||
if split_result:
|
||||
box_left, box_right, split_x = split_result
|
||||
@@ -939,7 +902,6 @@ def translate_manga_text(
|
||||
bubble_split = (left_idxs, right_idxs)
|
||||
splits_performed.append(f"BOX#{bid} (panel border at x={split_x})")
|
||||
elif len(bubble_quads[bid]) >= 10:
|
||||
# Panel border split failed (all quads on one side). Try text column split for large bubbles
|
||||
col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid, use_aggressive_thresholds=True)
|
||||
if col_split:
|
||||
left_idxs, right_idxs = col_split
|
||||
@@ -947,7 +909,6 @@ def translate_manga_text(
|
||||
bubble_split = (left_idxs, right_idxs)
|
||||
splits_performed.append(f"BOX#{bid} ({len(left_idxs)} quads | {len(right_idxs)} quads)")
|
||||
|
||||
# 2. If no panel border split, try text column split
|
||||
if bubble_split is None:
|
||||
col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid)
|
||||
if col_split:
|
||||
@@ -956,17 +917,14 @@ def translate_manga_text(
|
||||
bubble_split = (left_idxs, right_idxs)
|
||||
splits_performed.append(f"BOX#{bid} ({len(left_idxs)} quads | {len(right_idxs)} quads)")
|
||||
|
||||
# 3. Create split bubbles if detected
|
||||
if bubble_split:
|
||||
left_idxs, right_idxs = bubble_split
|
||||
# Create Left Bubble
|
||||
new_bubbles[bid] = build_lines_from_indices(left_idxs, filtered)
|
||||
ub_left = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in left_idxs])
|
||||
new_bubble_boxes[bid] = (max(0, ub_left[0]-3), max(0, ub_left[1]-3), min(iw-1, ub_left[2]+3), min(ih-1, ub_left[3]+3))
|
||||
new_bubble_quads[bid] = [filtered[i][0] for i in left_idxs]
|
||||
new_bubble_indices[bid] = left_idxs
|
||||
|
||||
# Create Right Bubble
|
||||
new_bubbles[next_bid] = build_lines_from_indices(right_idxs, filtered)
|
||||
ub_right = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in right_idxs])
|
||||
new_bubble_boxes[next_bid] = (max(0, ub_right[0]-3), max(0, ub_right[1]-3), min(iw-1, ub_right[2]+3), min(ih-1, ub_right[3]+3))
|
||||
@@ -974,24 +932,20 @@ def translate_manga_text(
|
||||
new_bubble_indices[next_bid] = right_idxs
|
||||
next_bid += 1
|
||||
else:
|
||||
# No split needed, keep original
|
||||
new_bubbles[bid] = bubbles[bid]
|
||||
new_bubble_boxes[bid] = bubble_boxes[bid]
|
||||
new_bubble_quads[bid] = bubble_quads[bid]
|
||||
new_bubble_indices[bid] = bubble_indices[bid]
|
||||
|
||||
# Print split summary
|
||||
if splits_performed:
|
||||
print(f"\n🔀 Multi-column bubble splits detected: {len(splits_performed)}")
|
||||
for split_info in splits_performed:
|
||||
print(f" ✓ Split {split_info}")
|
||||
|
||||
# Overwrite old dictionaries with the newly split ones
|
||||
bubbles = new_bubbles
|
||||
bubble_boxes = new_bubble_boxes
|
||||
bubble_quads = new_bubble_quads
|
||||
bubble_indices = new_bubble_indices
|
||||
# ───────────────────────────────────────────────────
|
||||
|
||||
translator = GoogleTranslator(source=source_lang, target=target_lang)
|
||||
|
||||
@@ -1098,8 +1052,8 @@ if __name__ == "__main__":
|
||||
image_path="004.png",
|
||||
source_lang="en",
|
||||
target_lang="ca",
|
||||
confidence_threshold=0.12,
|
||||
min_text_length=2,
|
||||
confidence_threshold=0.05,
|
||||
min_text_length=1,
|
||||
gap_px="auto",
|
||||
filter_sound_effects=True,
|
||||
quality_threshold=0.62,
|
||||
|
||||
Reference in New Issue
Block a user