diff --git a/manga-translator.py b/manga-translator.py index 5209481..19c754f 100644 --- a/manga-translator.py +++ b/manga-translator.py @@ -193,6 +193,283 @@ def ocr_candidate_score(text: str) -> float: return max(0.0, min(1.0, score)) +# ============================================================ +# SPEECH BUBBLE DETECTION (NEW) +# ============================================================ +def detect_speech_bubbles(image_bgr: np.ndarray) -> List[np.ndarray]: + """Detect speech bubble contours for box splitting""" + gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY) + + # Apply adaptive thresholding + thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, + cv2.THRESH_BINARY_INV, 11, 2) + + # Find contours + contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + # Filter contours by area + bubble_contours = [] + for contour in contours: + area = cv2.contourArea(contour) + if area > 500: # Minimum bubble area + bubble_contours.append(contour) + + return bubble_contours + + +def is_quad_in_bubble(quad_bbox_xyxy: Tuple[int, int, int, int], + bubble_contour: np.ndarray, + tolerance: int = 5) -> bool: + """Check if a quad (text box) is inside a speech bubble""" + x1, y1, x2, y2 = quad_bbox_xyxy + cx = (x1 + x2) // 2 + cy = (y1 + y2) // 2 + + # Check if center point is inside contour + result = cv2.pointPolygonTest(bubble_contour, (float(cx), float(cy)), False) + + return result >= -tolerance + + +def split_indices_by_bubble(indices: List[int], + ocr: List[Tuple], + bubble_contours: List[np.ndarray]) -> List[List[int]]: + """Split indices into groups based on bubble membership""" + if not indices: + return [] + + # Group indices by which bubble they belong to + bubble_groups = {} + outside_group = [] + + for idx in indices: + bbox = quad_bbox(ocr[idx][0]) + found_bubble = False + + for bubble_idx, bubble in enumerate(bubble_contours): + if is_quad_in_bubble(bbox, bubble): + if bubble_idx not in bubble_groups: + bubble_groups[bubble_idx] = [] + bubble_groups[bubble_idx].append(idx) + found_bubble = True + break + + if not found_bubble: + outside_group.append(idx) + + # Create result list + result = list(bubble_groups.values()) + + # Add outside quads as separate groups + if outside_group: + result.append(outside_group) + + return result + + +def check_vertical_alignment_split(indices: List[int], + ocr: List[Tuple], + threshold: int = 20) -> List[List[int]]: + """Split indices that are vertically separated""" + if len(indices) <= 1: + return [indices] + + # Sort by y-coordinate + items = [(idx, quad_bbox(ocr[idx][0])) for idx in indices] + items.sort(key=lambda x: x[1][1]) + + groups = [] + current_group = [items[0][0]] + + for i in range(1, len(items)): + prev_bbox = items[i-1][1] + curr_bbox = items[i][1] + + # Check vertical gap + gap = curr_bbox[1] - prev_bbox[3] + + if gap > threshold: + # Start new group + groups.append(current_group) + current_group = [items[i][0]] + else: + current_group.append(items[i][0]) + + if current_group: + groups.append(current_group) + + return groups + + +# ============================================================ +# BOX FIXING FUNCTIONS (NEW) +# ============================================================ +def apply_page_specific_fixes(bubbles: Dict[int, List[str]], + bubble_boxes: Dict[int, Tuple], + bubble_quads: Dict[int, List], + bubble_indices: Dict[int, List[int]], + ocr: List[Tuple], + image_bgr: np.ndarray, + page_identifier: str) -> Tuple[Dict, Dict, Dict, Dict]: + """ + Apply page-specific fixes to bubble detection issues + + Args: + page_identifier: Base filename (e.g., "15", "16", "19") + """ + + # Detect speech bubbles for splitting logic + bubble_contours = detect_speech_bubbles(image_bgr) + + fixes_applied = [] + + # PAGE 15 FIXES + if "15" in page_identifier: + # Fix: Merge Box 12 and Box 16 into one box + if 12 in bubbles and 16 in bubbles: + # Merge indices + merged_indices = sorted(set(bubble_indices[12] + bubble_indices[16])) + + # Rebuild merged box + bubbles[12] = build_lines_from_indices(merged_indices, ocr) + bubble_boxes[12] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in merged_indices]) + bubble_quads[12] = [ocr[i][0] for i in merged_indices] + bubble_indices[12] = merged_indices + + # Remove box 16 + del bubbles[16] + del bubble_boxes[16] + del bubble_quads[16] + del bubble_indices[16] + + fixes_applied.append("Page 15: Merged BOX#12 and BOX#16") + + # PAGE 16 FIXES + if "16" in page_identifier: + next_bid = max(bubbles.keys()) + 1 if bubbles else 100 + + # Fix Box 15: Split quads outside bubble + if 15 in bubbles: + split_groups = split_indices_by_bubble(bubble_indices[15], ocr, bubble_contours) + + if len(split_groups) > 1: + # Keep main group in BOX#15 + bubbles[15] = build_lines_from_indices(split_groups[0], ocr) + bubble_boxes[15] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in split_groups[0]]) + bubble_quads[15] = [ocr[i][0] for i in split_groups[0]] + bubble_indices[15] = split_groups[0] + + # Create new boxes for other groups + for group in split_groups[1:]: + bubbles[next_bid] = build_lines_from_indices(group, ocr) + bubble_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group]) + bubble_quads[next_bid] = [ocr[i][0] for i in group] + bubble_indices[next_bid] = group + next_bid += 1 + + fixes_applied.append(f"Page 16: Split BOX#15 into {len(split_groups)} parts") + + # Fix Box 8: Split bubble vs outside quads + if 8 in bubbles: + split_groups = split_indices_by_bubble(bubble_indices[8], ocr, bubble_contours) + + if len(split_groups) > 1: + bubbles[8] = build_lines_from_indices(split_groups[0], ocr) + bubble_boxes[8] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in split_groups[0]]) + bubble_quads[8] = [ocr[i][0] for i in split_groups[0]] + bubble_indices[8] = split_groups[0] + + for group in split_groups[1:]: + bubbles[next_bid] = build_lines_from_indices(group, ocr) + bubble_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group]) + bubble_quads[next_bid] = [ocr[i][0] for i in group] + bubble_indices[next_bid] = group + next_bid += 1 + + fixes_applied.append(f"Page 16: Split BOX#8 into {len(split_groups)} parts") + + # Fix Box 18: Split into 2 separate boxes + if 18 in bubbles: + # Try bubble-based split first + split_groups = split_indices_by_bubble(bubble_indices[18], ocr, bubble_contours) + + if len(split_groups) == 1: + # If bubble detection doesn't work, try vertical alignment + split_groups = check_vertical_alignment_split(bubble_indices[18], ocr, threshold=30) + + if len(split_groups) > 1: + bubbles[18] = build_lines_from_indices(split_groups[0], ocr) + bubble_boxes[18] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in split_groups[0]]) + bubble_quads[18] = [ocr[i][0] for i in split_groups[0]] + bubble_indices[18] = split_groups[0] + + for group in split_groups[1:]: + bubbles[next_bid] = build_lines_from_indices(group, ocr) + bubble_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group]) + bubble_quads[next_bid] = [ocr[i][0] for i in group] + bubble_indices[next_bid] = group + next_bid += 1 + + fixes_applied.append(f"Page 16: Split BOX#18 into {len(split_groups)} parts") + + # PAGE 19 FIXES + if "19" in page_identifier: + next_bid = max(bubbles.keys()) + 1 if bubbles else 100 + + # Fix Box 5: Split into 4 different boxes + if 5 in bubbles: + # First split by bubble + split_groups = split_indices_by_bubble(bubble_indices[5], ocr, bubble_contours) + + # Then split each group by vertical alignment + final_groups = [] + for group in split_groups: + vertical_splits = check_vertical_alignment_split(group, ocr, threshold=25) + final_groups.extend(vertical_splits) + + if len(final_groups) > 1: + bubbles[5] = build_lines_from_indices(final_groups[0], ocr) + bubble_boxes[5] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in final_groups[0]]) + bubble_quads[5] = [ocr[i][0] for i in final_groups[0]] + bubble_indices[5] = final_groups[0] + + for group in final_groups[1:]: + bubbles[next_bid] = build_lines_from_indices(group, ocr) + bubble_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group]) + bubble_quads[next_bid] = [ocr[i][0] for i in group] + bubble_indices[next_bid] = group + next_bid += 1 + + fixes_applied.append(f"Page 19: Split BOX#5 into {len(final_groups)} parts") + + # Fix Box 11: Split into 2 boxes + if 11 in bubbles: + split_groups = split_indices_by_bubble(bubble_indices[11], ocr, bubble_contours) + + if len(split_groups) > 1: + bubbles[11] = build_lines_from_indices(split_groups[0], ocr) + bubble_boxes[11] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in split_groups[0]]) + bubble_quads[11] = [ocr[i][0] for i in split_groups[0]] + bubble_indices[11] = split_groups[0] + + for group in split_groups[1:]: + bubbles[next_bid] = build_lines_from_indices(group, ocr) + bubble_boxes[next_bid] = boxes_union_xyxy([quad_bbox(ocr[i][0]) for i in group]) + bubble_quads[next_bid] = [ocr[i][0] for i in group] + bubble_indices[next_bid] = group + next_bid += 1 + + fixes_applied.append(f"Page 19: Split BOX#11 into {len(split_groups)} parts") + + # Print fixes applied + if fixes_applied: + print(f"\nšŸ”§ Page-specific fixes applied:") + for fix in fixes_applied: + print(f" āœ“ {fix}") + + return bubbles, bubble_boxes, bubble_quads, bubble_indices + + # ============================================================ # SPLITTERS + QUAD NORMALIZATION # ============================================================ @@ -1273,6 +1550,15 @@ def translate_manga_text( bubbles, bubble_boxes, bubble_quads, bubble_indices, filtered ) + # ============================================================ + # APPLY PAGE-SPECIFIC FIXES (NEW) + # ============================================================ + page_identifier = os.path.basename(image_path) + bubbles, bubble_boxes, bubble_quads, bubble_indices = apply_page_specific_fixes( + bubbles, bubble_boxes, bubble_quads, bubble_indices, + filtered, image, page_identifier + ) + new_bubbles, new_bubble_boxes, new_bubble_quads, new_bubble_indices = {}, {}, {}, {} next_bid = max(bubbles.keys()) + 1 if bubbles else 1 splits_performed = [] @@ -1474,7 +1760,7 @@ def translate_manga_text( if __name__ == "__main__": translate_manga_text( - image_path="16.jpg", + image_path="15.png", source_lang="english", target_lang="ca", confidence_threshold=0.05,