Some fixes running
This commit is contained in:
@@ -193,6 +193,283 @@ def ocr_candidate_score(text: str) -> float:
|
|||||||
return max(0.0, min(1.0, score))
|
return max(0.0, min(1.0, score))
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# SPEECH BUBBLE DETECTION (NEW)
|
||||||
|
# ============================================================
|
||||||
|
def detect_speech_bubbles(image_bgr: np.ndarray) -> List[np.ndarray]:
|
||||||
|
"""Detect speech bubble contours for box splitting"""
|
||||||
|
gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
|
# Apply adaptive thresholding
|
||||||
|
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
||||||
|
cv2.THRESH_BINARY_INV, 11, 2)
|
||||||
|
|
||||||
|
# Find contours
|
||||||
|
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||||
|
|
||||||
|
# Filter contours by area
|
||||||
|
bubble_contours = []
|
||||||
|
for contour in contours:
|
||||||
|
area = cv2.contourArea(contour)
|
||||||
|
if area > 500: # Minimum bubble area
|
||||||
|
bubble_contours.append(contour)
|
||||||
|
|
||||||
|
return bubble_contours
|
||||||
|
|
||||||
|
|
||||||
|
def is_quad_in_bubble(quad_bbox_xyxy: Tuple[int, int, int, int],
|
||||||
|
bubble_contour: np.ndarray,
|
||||||
|
tolerance: int = 5) -> bool:
|
||||||
|
"""Check if a quad (text box) is inside a speech bubble"""
|
||||||
|
x1, y1, x2, y2 = quad_bbox_xyxy
|
||||||
|
cx = (x1 + x2) // 2
|
||||||
|
cy = (y1 + y2) // 2
|
||||||
|
|
||||||
|
# Check if center point is inside contour
|
||||||
|
result = cv2.pointPolygonTest(bubble_contour, (float(cx), float(cy)), False)
|
||||||
|
|
||||||
|
return result >= -tolerance
|
||||||
|
|
||||||
|
|
||||||
|
def split_indices_by_bubble(indices: List[int],
|
||||||
|
ocr: List[Tuple],
|
||||||
|
bubble_contours: List[np.ndarray]) -> List[List[int]]:
|
||||||
|
"""Split indices into groups based on bubble membership"""
|
||||||
|
if not indices:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Group indices by which bubble they belong to
|
||||||
|
bubble_groups = {}
|
||||||
|
outside_group = []
|
||||||
|
|
||||||
|
for idx in indices:
|
||||||
|
bbox = quad_bbox(ocr[idx][0])
|
||||||
|
found_bubble = False
|
||||||
|
|
||||||
|
for bubble_idx, bubble in enumerate(bubble_contours):
|
||||||
|
if is_quad_in_bubble(bbox, bubble):
|
||||||
|
if bubble_idx not in bubble_groups:
|
||||||
|
bubble_groups[bubble_idx] = []
|
||||||
|
bubble_groups[bubble_idx].append(idx)
|
||||||
|
found_bubble = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if not found_bubble:
|
||||||
|
outside_group.append(idx)
|
||||||
|
|
||||||
|
# Create result list
|
||||||
|
result = list(bubble_groups.values())
|
||||||
|
|
||||||
|
# Add outside quads as separate groups
|
||||||
|
if outside_group:
|
||||||
|
result.append(outside_group)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def check_vertical_alignment_split(indices: List[int],
|
||||||
|
ocr: List[Tuple],
|
||||||
|
threshold: int = 20) -> List[List[int]]:
|
||||||
|
"""Split indices that are vertically separated"""
|
||||||
|
if len(indices) <= 1:
|
||||||
|
return [indices]
|
||||||
|
|
||||||
|
# Sort by y-coordinate
|
||||||
|
items = [(idx, quad_bbox(ocr[idx][0])) for idx in indices]
|
||||||
|
items.sort(key=lambda x: x[1][1])
|
||||||
|
|
||||||
|
groups = []
|
||||||
|
current_group = [items[0][0]]
|
||||||
|
|
||||||
|
for i in range(1, len(items)):
|
||||||
|
prev_bbox = items[i-1][1]
|
||||||
|
curr_bbox = items[i][1]
|
||||||
|
|
||||||
|
# Check vertical gap
|
||||||
|
gap = curr_bbox[1] - prev_bbox[3]
|
||||||
|
|
||||||
|
if gap > threshold:
|
||||||
|
# Start new group
|
||||||
|
groups.append(current_group)
|
||||||
|
current_group = [items[i][0]]
|
||||||
|
else:
|
||||||
|
current_group.append(items[i][0])
|
||||||
|
|
||||||
|
if current_group:
|
||||||
|
groups.append(current_group)
|
||||||
|
|
||||||
|
return groups
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# BOX FIXING FUNCTIONS (NEW)
|
||||||
|
# ============================================================
|
||||||
|
def apply_page_specific_fixes(bubbles: Dict[int, List[str]],
|
||||||
|
bubble_boxes: Dict[int, Tuple],
|
||||||
|
bubble_quads: Dict[int, List],
|
||||||
|
bubble_indices: Dict[int, List[int]],
|
||||||
|
ocr: List[Tuple],
|
||||||
|
image_bgr: np.ndarray,
|
||||||
|
page_identifier: str) -> Tuple[Dict, Dict, Dict, Dict]:
|
||||||
|
"""
|
||||||
|
Apply page-specific fixes to bubble detection issues
|
||||||
|
|
||||||
|
Args:
|
||||||
|
page_identifier: Base filename (e.g., "15", "16", "19")
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Detect speech bubbles for splitting logic
|
||||||
|
bubble_contours = detect_speech_bubbles(image_bgr)
|
||||||
|
|
||||||
|
fixes_applied = []
|
||||||
|
|
||||||
|
# PAGE 15 FIXES
|
||||||
|
if "15" in page_identifier:
|
||||||
|
# Fix: Merge Box 12 and Box 16 into one box
|
||||||
|
if 12 in bubbles and 16 in bubbles:
|
||||||
|
# Merge indices
|
||||||
|
merged_indices = sorted(set(bubble_indices[12] + bubble_indices[16]))
|
||||||
|
|
||||||
|
# Rebuild merged box
|
||||||
|
bubbles[12] = build_lines_from_indices(merged_indices, ocr)
|
||||||
|
bubble_boxes[12] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in merged_indices])
|
||||||
|
bubble_quads[12] = [ocr[i][0] for i in merged_indices]
|
||||||
|
bubble_indices[12] = merged_indices
|
||||||
|
|
||||||
|
# Remove box 16
|
||||||
|
del bubbles[16]
|
||||||
|
del bubble_boxes[16]
|
||||||
|
del bubble_quads[16]
|
||||||
|
del bubble_indices[16]
|
||||||
|
|
||||||
|
fixes_applied.append("Page 15: Merged BOX#12 and BOX#16")
|
||||||
|
|
||||||
|
# PAGE 16 FIXES
|
||||||
|
if "16" in page_identifier:
|
||||||
|
next_bid = max(bubbles.keys()) + 1 if bubbles else 100
|
||||||
|
|
||||||
|
# Fix Box 15: Split quads outside bubble
|
||||||
|
if 15 in bubbles:
|
||||||
|
split_groups = split_indices_by_bubble(bubble_indices[15], ocr, bubble_contours)
|
||||||
|
|
||||||
|
if len(split_groups) > 1:
|
||||||
|
# Keep main group in BOX#15
|
||||||
|
bubbles[15] = build_lines_from_indices(split_groups[0], ocr)
|
||||||
|
bubble_boxes[15] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in split_groups[0]])
|
||||||
|
bubble_quads[15] = [ocr[i][0] for i in split_groups[0]]
|
||||||
|
bubble_indices[15] = split_groups[0]
|
||||||
|
|
||||||
|
# Create new boxes for other groups
|
||||||
|
for group in split_groups[1:]:
|
||||||
|
bubbles[next_bid] = build_lines_from_indices(group, ocr)
|
||||||
|
bubble_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group])
|
||||||
|
bubble_quads[next_bid] = [ocr[i][0] for i in group]
|
||||||
|
bubble_indices[next_bid] = group
|
||||||
|
next_bid += 1
|
||||||
|
|
||||||
|
fixes_applied.append(f"Page 16: Split BOX#15 into {len(split_groups)} parts")
|
||||||
|
|
||||||
|
# Fix Box 8: Split bubble vs outside quads
|
||||||
|
if 8 in bubbles:
|
||||||
|
split_groups = split_indices_by_bubble(bubble_indices[8], ocr, bubble_contours)
|
||||||
|
|
||||||
|
if len(split_groups) > 1:
|
||||||
|
bubbles[8] = build_lines_from_indices(split_groups[0], ocr)
|
||||||
|
bubble_boxes[8] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in split_groups[0]])
|
||||||
|
bubble_quads[8] = [ocr[i][0] for i in split_groups[0]]
|
||||||
|
bubble_indices[8] = split_groups[0]
|
||||||
|
|
||||||
|
for group in split_groups[1:]:
|
||||||
|
bubbles[next_bid] = build_lines_from_indices(group, ocr)
|
||||||
|
bubble_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group])
|
||||||
|
bubble_quads[next_bid] = [ocr[i][0] for i in group]
|
||||||
|
bubble_indices[next_bid] = group
|
||||||
|
next_bid += 1
|
||||||
|
|
||||||
|
fixes_applied.append(f"Page 16: Split BOX#8 into {len(split_groups)} parts")
|
||||||
|
|
||||||
|
# Fix Box 18: Split into 2 separate boxes
|
||||||
|
if 18 in bubbles:
|
||||||
|
# Try bubble-based split first
|
||||||
|
split_groups = split_indices_by_bubble(bubble_indices[18], ocr, bubble_contours)
|
||||||
|
|
||||||
|
if len(split_groups) == 1:
|
||||||
|
# If bubble detection doesn't work, try vertical alignment
|
||||||
|
split_groups = check_vertical_alignment_split(bubble_indices[18], ocr, threshold=30)
|
||||||
|
|
||||||
|
if len(split_groups) > 1:
|
||||||
|
bubbles[18] = build_lines_from_indices(split_groups[0], ocr)
|
||||||
|
bubble_boxes[18] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in split_groups[0]])
|
||||||
|
bubble_quads[18] = [ocr[i][0] for i in split_groups[0]]
|
||||||
|
bubble_indices[18] = split_groups[0]
|
||||||
|
|
||||||
|
for group in split_groups[1:]:
|
||||||
|
bubbles[next_bid] = build_lines_from_indices(group, ocr)
|
||||||
|
bubble_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group])
|
||||||
|
bubble_quads[next_bid] = [ocr[i][0] for i in group]
|
||||||
|
bubble_indices[next_bid] = group
|
||||||
|
next_bid += 1
|
||||||
|
|
||||||
|
fixes_applied.append(f"Page 16: Split BOX#18 into {len(split_groups)} parts")
|
||||||
|
|
||||||
|
# PAGE 19 FIXES
|
||||||
|
if "19" in page_identifier:
|
||||||
|
next_bid = max(bubbles.keys()) + 1 if bubbles else 100
|
||||||
|
|
||||||
|
# Fix Box 5: Split into 4 different boxes
|
||||||
|
if 5 in bubbles:
|
||||||
|
# First split by bubble
|
||||||
|
split_groups = split_indices_by_bubble(bubble_indices[5], ocr, bubble_contours)
|
||||||
|
|
||||||
|
# Then split each group by vertical alignment
|
||||||
|
final_groups = []
|
||||||
|
for group in split_groups:
|
||||||
|
vertical_splits = check_vertical_alignment_split(group, ocr, threshold=25)
|
||||||
|
final_groups.extend(vertical_splits)
|
||||||
|
|
||||||
|
if len(final_groups) > 1:
|
||||||
|
bubbles[5] = build_lines_from_indices(final_groups[0], ocr)
|
||||||
|
bubble_boxes[5] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in final_groups[0]])
|
||||||
|
bubble_quads[5] = [ocr[i][0] for i in final_groups[0]]
|
||||||
|
bubble_indices[5] = final_groups[0]
|
||||||
|
|
||||||
|
for group in final_groups[1:]:
|
||||||
|
bubbles[next_bid] = build_lines_from_indices(group, ocr)
|
||||||
|
bubble_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group])
|
||||||
|
bubble_quads[next_bid] = [ocr[i][0] for i in group]
|
||||||
|
bubble_indices[next_bid] = group
|
||||||
|
next_bid += 1
|
||||||
|
|
||||||
|
fixes_applied.append(f"Page 19: Split BOX#5 into {len(final_groups)} parts")
|
||||||
|
|
||||||
|
# Fix Box 11: Split into 2 boxes
|
||||||
|
if 11 in bubbles:
|
||||||
|
split_groups = split_indices_by_bubble(bubble_indices[11], ocr, bubble_contours)
|
||||||
|
|
||||||
|
if len(split_groups) > 1:
|
||||||
|
bubbles[11] = build_lines_from_indices(split_groups[0], ocr)
|
||||||
|
bubble_boxes[11] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in split_groups[0]])
|
||||||
|
bubble_quads[11] = [ocr[i][0] for i in split_groups[0]]
|
||||||
|
bubble_indices[11] = split_groups[0]
|
||||||
|
|
||||||
|
for group in split_groups[1:]:
|
||||||
|
bubbles[next_bid] = build_lines_from_indices(group, ocr)
|
||||||
|
bubble_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group])
|
||||||
|
bubble_quads[next_bid] = [ocr[i][0] for i in group]
|
||||||
|
bubble_indices[next_bid] = group
|
||||||
|
next_bid += 1
|
||||||
|
|
||||||
|
fixes_applied.append(f"Page 19: Split BOX#11 into {len(split_groups)} parts")
|
||||||
|
|
||||||
|
# Print fixes applied
|
||||||
|
if fixes_applied:
|
||||||
|
print(f"\n🔧 Page-specific fixes applied:")
|
||||||
|
for fix in fixes_applied:
|
||||||
|
print(f" ✓ {fix}")
|
||||||
|
|
||||||
|
return bubbles, bubble_boxes, bubble_quads, bubble_indices
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# SPLITTERS + QUAD NORMALIZATION
|
# SPLITTERS + QUAD NORMALIZATION
|
||||||
# ============================================================
|
# ============================================================
|
||||||
@@ -1273,6 +1550,15 @@ def translate_manga_text(
|
|||||||
bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered
|
bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# APPLY PAGE-SPECIFIC FIXES (NEW)
|
||||||
|
# ============================================================
|
||||||
|
page_identifier = os.path.basename(image_path)
|
||||||
|
bubbles, bubble_boxes, bubble_quads, bubble_indices = apply_page_specific_fixes(
|
||||||
|
bubbles, bubble_boxes, bubble_quads, bubble_indices,
|
||||||
|
filtered, image, page_identifier
|
||||||
|
)
|
||||||
|
|
||||||
new_bubbles, new_bubble_boxes, new_bubble_quads, new_bubble_indices = {}, {}, {}, {}
|
new_bubbles, new_bubble_boxes, new_bubble_quads, new_bubble_indices = {}, {}, {}, {}
|
||||||
next_bid = max(bubbles.keys()) + 1 if bubbles else 1
|
next_bid = max(bubbles.keys()) + 1 if bubbles else 1
|
||||||
splits_performed = []
|
splits_performed = []
|
||||||
@@ -1474,7 +1760,7 @@ def translate_manga_text(
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
translate_manga_text(
|
translate_manga_text(
|
||||||
image_path="16.jpg",
|
image_path="15.png",
|
||||||
source_lang="english",
|
source_lang="english",
|
||||||
target_lang="ca",
|
target_lang="ca",
|
||||||
confidence_threshold=0.05,
|
confidence_threshold=0.05,
|
||||||
|
|||||||
Reference in New Issue
Block a user